Linux userland API discussions

Linux userland API discussions
 help / color / mirror / Atom feed

* [PATCHv3 5/8] cgroup: introduce cgroup namespaces
From: Aditya Kali @ 2014-12-05  1:55 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, lizefan-hv44wF8Li93QT0dZR+AlfA,
	serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, luto-kltTT9wpgjJwATOyAt5JVQ,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, mingo-H+wXaHxf7aLQT0dZR+AlfA
  Cc: richard.weinberger-Re5JQEeQqe8AvxtiuMwx3w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <1417744550-6461-1-git-send-email-adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Introduce the ability to create new cgroup namespace. The newly created
cgroup namespace remembers the cgroup of the process at the point
of creation of the cgroup namespace (referred as cgroupns-root).
The main purpose of cgroup namespace is to virtualize the contents
of /proc/self/cgroup file. Processes inside a cgroup namespace
are only able to see paths relative to their namespace root
(unless they are moved outside of their cgroupns-root, at which point
 they will see a relative path from their cgroupns-root).
For a correctly setup container this enables container-tools
(like libcontainer, lxc, lmctfy, etc.) to create completely virtualized
containers without leaking system level cgroup hierarchy to the task.
This patch only implements the 'unshare' part of the cgroupns.

Signed-off-by: Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 fs/proc/namespaces.c             |   1 +
 include/linux/cgroup.h           |  29 ++++++++-
 include/linux/cgroup_namespace.h |  36 +++++++++++
 include/linux/nsproxy.h          |   2 +
 include/linux/proc_ns.h          |   4 ++
 kernel/Makefile                  |   2 +-
 kernel/cgroup.c                  |  13 ++++
 kernel/cgroup_namespace.c        | 127 +++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                    |   2 +-
 kernel/nsproxy.c                 |  19 +++++-
 10 files changed, 230 insertions(+), 5 deletions(-)

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 8902609..55bc5da 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -32,6 +32,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 	&userns_operations,
 #endif
 	&mntns_operations,
+	&cgroupns_operations,
 };
 
 static const struct file_operations ns_file_operations = {
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6e7533b..94a5a0c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -22,6 +22,8 @@
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
 #include <linux/wait.h>
+#include <linux/nsproxy.h>
+#include <linux/types.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -460,6 +462,13 @@ struct cftype {
 #endif
 };
 
+struct cgroup_namespace {
+	atomic_t		count;
+	unsigned int		proc_inum;
+	struct user_namespace	*user_ns;
+	struct cgroup		*root_cgrp;
+};
+
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
 
@@ -584,10 +593,28 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
 	return kernfs_name(cgrp->kn, buf, buflen);
 }
 
+static inline char * __must_check cgroup_path_ns(struct cgroup_namespace *ns,
+						 struct cgroup *cgrp, char *buf,
+						 size_t buflen)
+{
+	if (ns) {
+		BUG_ON(!cgroup_on_dfl(cgrp));
+		return kernfs_path_from_node(ns->root_cgrp->kn, cgrp->kn, buf,
+					     buflen);
+	} else {
+		return kernfs_path(cgrp->kn, buf, buflen);
+	}
+}
+
 static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
 					      size_t buflen)
 {
-	return kernfs_path(cgrp->kn, buf, buflen);
+	if (cgroup_on_dfl(cgrp)) {
+		return cgroup_path_ns(current->nsproxy->cgroup_ns, cgrp, buf,
+				      buflen);
+	} else {
+		return cgroup_path_ns(NULL, cgrp, buf, buflen);
+	}
 }
 
 static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
diff --git a/include/linux/cgroup_namespace.h b/include/linux/cgroup_namespace.h
new file mode 100644
index 0000000..0b97b8d
--- /dev/null
+++ b/include/linux/cgroup_namespace.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_CGROUP_NAMESPACE_H
+#define _LINUX_CGROUP_NAMESPACE_H
+
+#include <linux/nsproxy.h>
+#include <linux/cgroup.h>
+#include <linux/types.h>
+#include <linux/user_namespace.h>
+
+extern struct cgroup_namespace init_cgroup_ns;
+
+static inline struct cgroup *current_cgroupns_root(void)
+{
+	return current->nsproxy->cgroup_ns->root_cgrp;
+}
+
+extern void free_cgroup_ns(struct cgroup_namespace *ns);
+
+static inline struct cgroup_namespace *get_cgroup_ns(
+		struct cgroup_namespace *ns)
+{
+	if (ns)
+		atomic_inc(&ns->count);
+	return ns;
+}
+
+static inline void put_cgroup_ns(struct cgroup_namespace *ns)
+{
+	if (ns && atomic_dec_and_test(&ns->count))
+		free_cgroup_ns(ns);
+}
+
+extern struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+					       struct user_namespace *user_ns,
+					       struct cgroup_namespace *old_ns);
+
+#endif  /* _LINUX_CGROUP_NAMESPACE_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 35fa08f..ac0d65b 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
 struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
+struct cgroup_namespace;
 struct fs_struct;
 
 /*
@@ -33,6 +34,7 @@ struct nsproxy {
 	struct mnt_namespace *mnt_ns;
 	struct pid_namespace *pid_ns_for_children;
 	struct net 	     *net_ns;
+	struct cgroup_namespace *cgroup_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 34a1e10..e56dd73 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -6,6 +6,8 @@
 
 struct pid_namespace;
 struct nsproxy;
+struct task_struct;
+struct inode;
 
 struct proc_ns_operations {
 	const char *name;
@@ -27,6 +29,7 @@ extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations cgroupns_operations;
 
 /*
  * We always define these enumerators
@@ -37,6 +40,7 @@ enum {
 	PROC_UTS_INIT_INO	= 0xEFFFFFFEU,
 	PROC_USER_INIT_INO	= 0xEFFFFFFDU,
 	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
+	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/kernel/Makefile b/kernel/Makefile
index dc5c775..d9731e2 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -50,7 +50,7 @@ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
-obj-$(CONFIG_CGROUPS) += cgroup.o
+obj-$(CONFIG_CGROUPS) += cgroup.o cgroup_namespace.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e12d36e..b1ae6d9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,6 +57,8 @@
 #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 #include <linux/kthread.h>
 #include <linux/delay.h>
+#include <linux/proc_ns.h>
+#include <linux/cgroup_namespace.h>
 
 #include <linux/atomic.h>
 
@@ -195,6 +197,15 @@ static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
 
+struct cgroup_namespace init_cgroup_ns = {
+	.count = {
+		.counter = 1,
+	},
+	.proc_inum = PROC_CGROUP_INIT_INO,
+	.user_ns = &init_user_ns,
+	.root_cgrp = &cgrp_dfl_root.cgrp,
+};
+
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
 			    gfp_t gfp_mask)
@@ -4989,6 +5000,8 @@ int __init cgroup_init(void)
 	unsigned long key;
 	int ssid, err;
 
+	get_user_ns(init_cgroup_ns.user_ns);
+
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
diff --git a/kernel/cgroup_namespace.c b/kernel/cgroup_namespace.c
new file mode 100644
index 0000000..0e0ef3a
--- /dev/null
+++ b/kernel/cgroup_namespace.c
@@ -0,0 +1,127 @@
+/*
+ *  Copyright (C) 2014 Google Inc.
+ *
+ *  Author: Aditya Kali (adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org)
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License as published by the Free
+ *  Software Foundation, version 2 of the License.
+ */
+
+#include <linux/cgroup.h>
+#include <linux/cgroup_namespace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/nsproxy.h>
+#include <linux/proc_ns.h>
+
+static struct cgroup_namespace *alloc_cgroup_ns(void)
+{
+	struct cgroup_namespace *new_ns;
+
+	new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
+	if (new_ns)
+		atomic_set(&new_ns->count, 1);
+	return new_ns;
+}
+
+void free_cgroup_ns(struct cgroup_namespace *ns)
+{
+	cgroup_put(ns->root_cgrp);
+	put_user_ns(ns->user_ns);
+	proc_free_inum(ns->proc_inum);
+	kfree(ns);
+}
+EXPORT_SYMBOL(free_cgroup_ns);
+
+struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
+					struct user_namespace *user_ns,
+					struct cgroup_namespace *old_ns)
+{
+	struct cgroup_namespace *new_ns = NULL;
+	struct cgroup *cgrp = NULL;
+	int err;
+
+	BUG_ON(!old_ns);
+
+	if (!(flags & CLONE_NEWCGROUP))
+		return get_cgroup_ns(old_ns);
+
+	/* Allow only sysadmin to create cgroup namespace. */
+	err = -EPERM;
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+		goto err_out;
+
+	/* CGROUPNS only virtualizes the cgroup path on the unified hierarchy.
+	 */
+	cgrp = get_task_cgroup(current);
+
+	err = -ENOMEM;
+	new_ns = alloc_cgroup_ns();
+	if (!new_ns)
+		goto err_out;
+
+	err = proc_alloc_inum(&new_ns->proc_inum);
+	if (err)
+		goto err_out;
+
+	new_ns->user_ns = get_user_ns(user_ns);
+	new_ns->root_cgrp = cgrp;
+
+	return new_ns;
+
+err_out:
+	if (cgrp)
+		cgroup_put(cgrp);
+	kfree(new_ns);
+	return ERR_PTR(err);
+}
+
+static int cgroupns_install(struct nsproxy *nsproxy, void *ns)
+{
+	pr_info("setns not supported for cgroup namespace");
+	return -EINVAL;
+}
+
+static void *cgroupns_get(struct task_struct *task)
+{
+	struct cgroup_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->cgroup_ns;
+		get_cgroup_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns;
+}
+
+static void cgroupns_put(void *ns)
+{
+	put_cgroup_ns(ns);
+}
+
+static unsigned int cgroupns_inum(void *ns)
+{
+	struct cgroup_namespace *cgroup_ns = ns;
+
+	return cgroup_ns->proc_inum;
+}
+
+const struct proc_ns_operations cgroupns_operations = {
+	.name		= "cgroup",
+	.type		= CLONE_NEWCGROUP,
+	.get		= cgroupns_get,
+	.put		= cgroupns_put,
+	.install	= cgroupns_install,
+	.inum		= cgroupns_inum,
+};
+
+static __init int cgroup_namespaces_init(void)
+{
+	return 0;
+}
+subsys_initcall(cgroup_namespaces_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index 9b7d746..d22d793 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1797,7 +1797,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
-				CLONE_NEWUSER|CLONE_NEWPID))
+				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing to
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index ef42d0a..a8b1970 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -25,6 +25,7 @@
 #include <linux/proc_ns.h>
 #include <linux/file.h>
 #include <linux/syscalls.h>
+#include <linux/cgroup_namespace.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
@@ -39,6 +40,7 @@ struct nsproxy init_nsproxy = {
 #ifdef CONFIG_NET
 	.net_ns			= &init_net,
 #endif
+	.cgroup_ns		= &init_cgroup_ns,
 };
 
 static inline struct nsproxy *create_nsproxy(void)
@@ -92,6 +94,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_pid;
 	}
 
+	new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
+					    tsk->nsproxy->cgroup_ns);
+	if (IS_ERR(new_nsp->cgroup_ns)) {
+		err = PTR_ERR(new_nsp->cgroup_ns);
+		goto out_cgroup;
+	}
+
 	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
 	if (IS_ERR(new_nsp->net_ns)) {
 		err = PTR_ERR(new_nsp->net_ns);
@@ -101,6 +110,9 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	return new_nsp;
 
 out_net:
+	if (new_nsp->cgroup_ns)
+		put_cgroup_ns(new_nsp->cgroup_ns);
+out_cgroup:
 	if (new_nsp->pid_ns_for_children)
 		put_pid_ns(new_nsp->pid_ns_for_children);
 out_pid:
@@ -128,7 +140,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 	struct nsproxy *new_ns;
 
 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			      CLONE_NEWPID | CLONE_NEWNET)))) {
+			      CLONE_NEWPID | CLONE_NEWNET |
+			      CLONE_NEWCGROUP)))) {
 		get_nsproxy(old_ns);
 		return 0;
 	}
@@ -165,6 +178,8 @@ void free_nsproxy(struct nsproxy *ns)
 		put_ipc_ns(ns->ipc_ns);
 	if (ns->pid_ns_for_children)
 		put_pid_ns(ns->pid_ns_for_children);
+	if (ns->cgroup_ns)
+		put_cgroup_ns(ns->cgroup_ns);
 	put_net(ns->net_ns);
 	kmem_cache_free(nsproxy_cachep, ns);
 }
@@ -180,7 +195,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 	int err = 0;
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
-			       CLONE_NEWNET | CLONE_NEWPID)))
+			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related

* [PATCHv3 6/8] cgroup: cgroup namespace setns support
From: Aditya Kali @ 2014-12-05  1:55 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, lizefan-hv44wF8Li93QT0dZR+AlfA,
	serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, luto-kltTT9wpgjJwATOyAt5JVQ,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, mingo-H+wXaHxf7aLQT0dZR+AlfA
  Cc: richard.weinberger-Re5JQEeQqe8AvxtiuMwx3w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <1417744550-6461-1-git-send-email-adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

setns on a cgroup namespace is allowed only if
task has CAP_SYS_ADMIN in its current user-namespace and
over the user-namespace associated with target cgroupns.
No implicit cgroup changes happen with attaching to another
cgroupns. It is expected that the somone moves the attaching
process under the target cgroupns-root.

Signed-off-by: Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 kernel/cgroup_namespace.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/cgroup_namespace.c b/kernel/cgroup_namespace.c
index 0e0ef3a..ee0cc51 100644
--- a/kernel/cgroup_namespace.c
+++ b/kernel/cgroup_namespace.c
@@ -79,8 +79,21 @@ err_out:
 
 static int cgroupns_install(struct nsproxy *nsproxy, void *ns)
 {
-	pr_info("setns not supported for cgroup namespace");
-	return -EINVAL;
+	struct cgroup_namespace *cgroup_ns = ns;
+
+	if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
+	    !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* Don't need to do anything if we are attaching to our own cgroupns. */
+	if (cgroup_ns == nsproxy->cgroup_ns)
+		return 0;
+
+	get_cgroup_ns(cgroup_ns);
+	put_cgroup_ns(nsproxy->cgroup_ns);
+	nsproxy->cgroup_ns = cgroup_ns;
+
+	return 0;
 }
 
 static void *cgroupns_get(struct task_struct *task)
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related

* [PATCHv3 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns
From: Aditya Kali @ 2014-12-05  1:55 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, lizefan-hv44wF8Li93QT0dZR+AlfA,
	serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, luto-kltTT9wpgjJwATOyAt5JVQ,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, mingo-H+wXaHxf7aLQT0dZR+AlfA
  Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	jnagal-hpIqsD4AKlfQT0dZR+AlfA, vgoyal-H+wXaHxf7aLQT0dZR+AlfA,
	richard.weinberger-Re5JQEeQqe8AvxtiuMwx3w, Aditya Kali
In-Reply-To: <1417744550-6461-1-git-send-email-adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.
In order to support this, a new kernfs api is added to lookup the
dentry for the cgroupns-root.

Signed-off-by: Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 fs/kernfs/mount.c      | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/kernfs.h |  2 ++
 kernel/cgroup.c        | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f973ae9..efe5e15 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,6 +62,54 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
 	return NULL;
 }
 
+/**
+ * kernfs_obtain_root - get a dentry for the given kernfs_node
+ * @sb: the kernfs super_block
+ * @kn: kernfs_node for which a dentry is needed
+ *
+ * This can used used by callers which want to mount only a part of the kernfs
+ * as root of the filesystem.
+ */
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+				  struct kernfs_node *kn)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	BUG_ON(sb->s_op != &kernfs_sops);
+
+	/* inode for the given kernfs_node should already exist. */
+	inode = ilookup(sb, kn->ino);
+	if (!inode) {
+		pr_debug("kernfs: could not get inode for '");
+		pr_cont_kernfs_path(kn);
+		pr_cont("'.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* instantiate and link root dentry */
+	dentry = d_obtain_root(inode);
+	if (!dentry) {
+		pr_debug("kernfs: could not get dentry for '");
+		pr_cont_kernfs_path(kn);
+		pr_cont("'.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* If this is a new dentry, set it up. We need kernfs_mutex because this
+	 * may be called by callers other than kernfs_fill_super. */
+	mutex_lock(&kernfs_mutex);
+	if (!dentry->d_fsdata) {
+		kernfs_get(kn);
+		dentry->d_fsdata = kn;
+	} else {
+		WARN_ON(dentry->d_fsdata != kn);
+	}
+	mutex_unlock(&kernfs_mutex);
+
+	return dentry;
+}
+
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 3c2be75..b9538e0 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -274,6 +274,8 @@ void kernfs_put(struct kernfs_node *kn);
 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry);
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
 
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+				  struct kernfs_node *kn);
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 				       unsigned int flags, void *priv);
 void kernfs_destroy_root(struct kernfs_root *root);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b1ae6d9..e779890 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1438,6 +1438,14 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			return -ENOENT;
 	}
 
+	/* If inside a non-init cgroup namespace, only allow default hierarchy
+	 * to be mounted.
+	 */
+	if ((current->nsproxy->cgroup_ns != &init_cgroup_ns) &&
+	    !(opts->flags & CGRP_ROOT_SANE_BEHAVIOR)) {
+		return -EINVAL;
+	}
+
 	if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) {
 		pr_warn("sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n");
 		if (nr_opts != 1) {
@@ -1630,6 +1638,15 @@ static void init_cgroup_root(struct cgroup_root *root,
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
+struct dentry *cgroupns_get_root(struct super_block *sb,
+				 struct cgroup_namespace *ns)
+{
+	struct dentry *nsdentry;
+
+	nsdentry = kernfs_obtain_root(sb, ns->root_cgrp->kn);
+	return nsdentry;
+}
+
 static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 {
 	LIST_HEAD(tmp_links);
@@ -1734,6 +1751,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 	int ret;
 	int i;
 	bool new_sb;
+	struct cgroup_namespace *ns =
+		get_cgroup_ns(current->nsproxy->cgroup_ns);
+
+	/* Check if the caller has permission to mount. */
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
+		put_cgroup_ns(ns);
+		return ERR_PTR(-EPERM);
+	}
 
 	/*
 	 * The first time anyone tries to mount a cgroup, enable the list
@@ -1866,11 +1891,28 @@ out_free:
 	kfree(opts.release_agent);
 	kfree(opts.name);
 
-	if (ret)
+	if (ret) {
+		put_cgroup_ns(ns);
 		return ERR_PTR(ret);
+	}
 
 	dentry = kernfs_mount(fs_type, flags, root->kf_root,
 				CGROUP_SUPER_MAGIC, &new_sb);
+
+	if (!IS_ERR(dentry) && (root == &cgrp_dfl_root)) {
+		/* If this mount is for the default hierarchy in non-init cgroup
+		 * namespace, then instead of root cgroup's dentry, we return
+		 * the dentry corresponding to the cgroupns->root_cgrp.
+		 */
+		if (ns != &init_cgroup_ns) {
+			struct dentry *nsdentry;
+
+			nsdentry = cgroupns_get_root(dentry->d_sb, ns);
+			dput(dentry);
+			dentry = nsdentry;
+		}
+	}
+
 	if (IS_ERR(dentry) || !new_sb)
 		cgroup_put(&root->cgrp);
 
@@ -1883,6 +1925,7 @@ out_free:
 		deactivate_super(pinned_sb);
 	}
 
+	put_cgroup_ns(ns);
 	return dentry;
 }
 
@@ -1911,6 +1954,7 @@ static struct file_system_type cgroup_fs_type = {
 	.name = "cgroup",
 	.mount = cgroup_mount,
 	.kill_sb = cgroup_kill_sb,
+	.fs_flags = FS_USERNS_MOUNT,
 };
 
 static struct kobject *cgroup_kobj;
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related

* [PATCHv3 8/8] cgroup: Add documentation for cgroup namespaces
From: Aditya Kali @ 2014-12-05  1:55 UTC (permalink / raw)
  To: tj-DgEjT+Ai2ygdnm+yROfE0A, lizefan-hv44wF8Li93QT0dZR+AlfA,
	serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, luto-kltTT9wpgjJwATOyAt5JVQ,
	ebiederm-aS9lmoZGLiVWk0Htik3J/w, cgroups-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA, mingo-H+wXaHxf7aLQT0dZR+AlfA
  Cc: richard.weinberger-Re5JQEeQqe8AvxtiuMwx3w,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <1417744550-6461-1-git-send-email-adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

Signed-off-by: Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 Documentation/cgroups/namespace.txt | 147 ++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 Documentation/cgroups/namespace.txt

diff --git a/Documentation/cgroups/namespace.txt b/Documentation/cgroups/namespace.txt
new file mode 100644
index 0000000..6480379
--- /dev/null
+++ b/Documentation/cgroups/namespace.txt
@@ -0,0 +1,147 @@
+			CGroup Namespaces
+
+CGroup Namespace provides a mechanism to virtualize the view of the
+/proc/<pid>/cgroup file. The CLONE_NEWCGROUP clone-flag can be used with
+clone() and unshare() syscalls to create a new cgroup namespace.
+The process running inside the cgroup namespace will have its /proc/<pid>/cgroup
+output restricted to cgroupns-root. cgroupns-root is the cgroup of the process
+at the time of creation of the cgroup namespace.
+
+Prior to CGroup Namespace, the /proc/<pid>/cgroup file used to show complete
+path of the cgroup of a process. In a container setup (where a set of cgroups
+and namespaces are intended to isolate processes), the /proc/<pid>/cgroup file
+may leak potential system level information to the isolated processes.
+
+For Example:
+  $ cat /proc/self/cgroup
+  0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1
+
+The path '/batchjobs/container_id1' can generally be considered as system-data
+and its desirable to not expose it to the isolated process.
+
+CGroup Namespaces can be used to restrict visibility of this path.
+For Example:
+  # Before creating cgroup namespace
+  $ ls -l /proc/self/ns/cgroup
+  lrwxrwxrwx 1 root root 0 2014-07-15 10:37 /proc/self/ns/cgroup -> cgroup:[4026531835]
+  $ cat /proc/self/cgroup
+  0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1
+
+  # unshare(CLONE_NEWCGROUP) and exec /bin/bash
+  $ ~/unshare -c
+  [ns]$ ls -l /proc/self/ns/cgroup
+  lrwxrwxrwx 1 root root 0 2014-07-15 10:35 /proc/self/ns/cgroup -> cgroup:[4026532183]
+  # From within new cgroupns, process sees that its in the root cgroup
+  [ns]$ cat /proc/self/cgroup
+  0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/
+
+  # From global cgroupns:
+  $ cat /proc/<pid>/cgroup
+  0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1
+
+  # Unshare cgroupns along with userns and mountns
+  # Following calls unshare(CLONE_NEWCGROUP|CLONE_NEWUSER|CLONE_NEWNS), then
+  # sets up uid/gid map and execs /bin/bash
+  $ ~/unshare -c -u -m
+  # Originally, we were in /batchjobs/container_id1 cgroup. Mount our own cgroup
+  # hierarchy.
+  [ns]$ mount -t cgroup cgroup /tmp/cgroup
+  [ns]$ ls -l /tmp/cgroup
+  total 0
+  -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.controllers
+  -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.populated
+  -rw-r--r-- 1 root root 0 2014-10-13 09:25 cgroup.procs
+  -rw-r--r-- 1 root root 0 2014-10-13 09:32 cgroup.subtree_control
+
+The cgroupns-root (/batchjobs/container_id1 in above example) becomes the
+filesystem root for the namespace specific cgroupfs mount.
+
+The virtualization of /proc/self/cgroup file combined with restricting
+the view of cgroup hierarchy by namespace-private cgroupfs mount
+should provide a completely isolated cgroup view inside the container.
+
+Note that CGroup Namespaces virtualizes the path on unified hierarchy only. If
+other hierarchies are mounted, /proc/<pid>/cgroup will continue to show the full
+cgroup path for those.
+
+In its current form, the cgroup namespaces patcheset provides following
+behavior:
+
+(1) The 'cgroupns-root' for a cgroup namespace is the cgroup in which
+    the process calling unshare is running.
+    For ex. if a process in /batchjobs/container_id1 cgroup calls unshare,
+    cgroup /batchjobs/container_id1 becomes the cgroupns-root.
+    For the init_cgroup_ns, this is the real root ('/') cgroup
+    (identified in code as cgrp_dfl_root.cgrp).
+
+(2) The cgroupns-root cgroup does not change even if the namespace
+    creator process later moves to a different cgroup.
+    $ ~/unshare -c # unshare cgroupns in some cgroup
+    [ns]$ cat /proc/self/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/
+    [ns]$ mkdir sub_cgrp_1
+    [ns]$ echo 0 > sub_cgrp_1/cgroup.procs
+    [ns]$ cat /proc/self/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1
+
+(3) Each process gets its CGROUPNS specific view of /proc/<pid>/cgroup
+(a) Processes running inside the cgroup namespace will be able to see
+    cgroup paths (in /proc/self/cgroup) only inside their root cgroup
+    [ns]$ sleep 100000 &  # From within unshared cgroupns
+    [1] 7353
+    [ns]$ echo 7353 > sub_cgrp_1/cgroup.procs
+    [ns]$ cat /proc/7353/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1
+
+(b) From global cgroupns, the real cgroup path will be visible:
+    $ cat /proc/7353/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1/sub_cgrp_1
+
+(c) From a sibling cgroupns (cgroupns root-ed at a different cgroup), cgroup
+    path relative to its own cgroupns-root will be shown:
+    # ns2's cgroupns-root is at '/batchjobs/container_id2'
+    [ns2]$ cat /proc/7353/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/../container_id2/sub_cgrp_1
+
+    Note that the relative path always starts with '/' to indicate that its
+    relative to the cgroupns-root of the caller.
+
+(4) Processes inside a cgroupns can move in-and-out of the cgroupns-root
+    (if they have proper access to external cgroups).
+    # From inside cgroupns (with cgroupns-root at /batchjobs/container_id1), and
+    # assuming that the global hierarchy is still accessible inside cgroupns:
+    $ cat /proc/7353/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1
+    $ echo 7353 > batchjobs/container_id2/cgroup.procs
+    $ cat /proc/7353/cgroup
+    0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/../container_id2
+
+    Note that this kind of setup is not encouraged. A task inside cgroupns
+    should only be exposed to its own cgroupns hierarchy. Otherwise it makes
+    the virtualization of /proc/<pid>/cgroup less useful.
+
+(5) Setns to another cgroup namespace is allowed when:
+    (a) the process has CAP_SYS_ADMIN in its current userns
+    (b) the process has CAP_SYS_ADMIN in the target cgroupns' userns
+    No implicit cgroup changes happen with attaching to another cgroupns. It
+    is expected that the somone moves the attaching process under the target
+    cgroupns-root.
+
+(6) When some thread from a multi-threaded process unshares its
+    cgroup-namespace, the new cgroupns gets applied to the entire
+    process (all the threads). This should be OK since
+    unified-hierarchy only allows process-level containerization. So
+    all the threads in the process will have the same cgroup.
+
+(7) The cgroup namespace is alive as long as there is atleast 1
+    process inside it. When the last process exits, the cgroup
+    namespace is destroyed. The cgroupns-root and the actual cgroups
+    remain though.
+
+(8) Namespace specific cgroup hierarchy can be mounted by a process running
+    inside cgroupns:
+    $ mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT
+
+    This will mount the unified cgroup hierarchy with cgroupns-root as the
+    filesystem root. The process needs CAP_SYS_ADMIN in its userns and mntns.
+
-- 
2.2.0.rc0.207.ga3a616c

^ permalink raw reply related

* Re: [PATCHv3 0/8] CGroup Namespaces
From: Aditya Kali @ 2014-12-05  3:20 UTC (permalink / raw)
  To: Tejun Heo, Li Zefan, Serge Hallyn, Andy Lutomirski,
	Eric W. Biederman, cgroups mailinglist,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Linux API,
	Ingo Molnar
  Cc: Linux Containers, Rohit Jnagal, Vivek Goyal, Richard Weinberger
In-Reply-To: <1417744550-6461-1-git-send-email-adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

These patches are now also hosted on github at
https://github.com/adityakali/linux/tree/cgroupns_v3.

Thanks,

On Thu, Dec 4, 2014 at 5:55 PM, Aditya Kali <adityakali-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
> Another spin for CGroup Namespaces feature.
>
> Changes from V2:
> 1. Added documentation in Documentation/cgroups/namespace.txt
> 2. Fixed a bug that caused crash
> 3. Incorporated some other suggestions from last patchset:
>    - removed use of threadgroup_lock() while creating new cgroupns
>    - use task_lock() instead of rcu_read_lock() while accessing
>      task->nsproxy
>    - optimized setns() to own cgroupns
>    - simplified code around sane-behavior mount option parsing
> 4. Restored ACKs from Serge Hallyn from v1 on few patches that have
>    not changed since then.
>
> Changes from V1:
> 1. No pinning of processes within cgroupns. Tasks can be freely moved
>    across cgroups even outside of their cgroupns-root. Usual DAC/MAC policies
>    apply as before.
> 2. Path in /proc/<pid>/cgroup is now always shown and is relative to
>    cgroupns-root. So path can contain '/..' strings depending on cgroupns-root
>    of the reader and cgroup of <pid>.
> 3. setns() does not require the process to first move under target
>    cgroupns-root.
>
> Changes form RFC (V0):
> 1. setns support for cgroupns
> 2. 'mount -t cgroup cgroup <mntpt>' from inside a cgroupns now
>    mounts the cgroup hierarcy with cgroupns-root as the filesystem root.
> 3. writes to cgroup files outside of cgroupns-root are not allowed
> 4. visibility of /proc/<pid>/cgroup is further restricted by not showing
>    anything if the <pid> is in a sibling cgroupns and its cgroup falls outside
>    your cgroupns-root.
>
> ---
>  Documentation/cgroups/namespace.txt | 147 +++++++++++++++++++++++++++
>  fs/kernfs/dir.c                     | 195 ++++++++++++++++++++++++++++++++----
>  fs/kernfs/mount.c                   |  48 +++++++++
>  fs/proc/namespaces.c                |   1 +
>  include/linux/cgroup.h              |  52 +++++++++-
>  include/linux/cgroup_namespace.h    |  36 +++++++
>  include/linux/kernfs.h              |   5 +
>  include/linux/nsproxy.h             |   2 +
>  include/linux/proc_ns.h             |   4 +
>  include/uapi/linux/sched.h          |   3 +-
>  kernel/Makefile                     |   2 +-
>  kernel/cgroup.c                     | 106 +++++++++++++++-----
>  kernel/cgroup_namespace.c           | 140 ++++++++++++++++++++++++++
>  kernel/fork.c                       |   2 +-
>  kernel/nsproxy.c                    |  19 +++-
>  15 files changed, 711 insertions(+), 51 deletions(-)
>  create mode 100644 Documentation/cgroups/namespace.txt
>  create mode 100644 include/linux/cgroup_namespace.h
>  create mode 100644 kernel/cgroup_namespace.c
>
> [PATCHv3 1/8] kernfs: Add API to generate relative kernfs path
> [PATCHv3 2/8] sched: new clone flag CLONE_NEWCGROUP for cgroup
> [PATCHv3 3/8] cgroup: add function to get task's cgroup on default
> [PATCHv3 4/8] cgroup: export cgroup_get() and cgroup_put()
> [PATCHv3 5/8] cgroup: introduce cgroup namespaces
> [PATCHv3 6/8] cgroup: cgroup namespace setns support
> [PATCHv3 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns
> [PATCHv3 8/8] cgroup: Add documentation for cgroup namespaces



-- 
Aditya

^ permalink raw reply

* Re: [PATCH] arch: uapi: asm: mman.h: Let MADV_FREE have same value for all architectures
From: Minchan Kim @ 2014-12-05  6:54 UTC (permalink / raw)
  To: Chen Gang
  Cc: akpm@linux-foundation.org, rth@twiddle.net,
	ink@jurassic.park.msu.ru, mattst88@gmail.com, Ralf Baechle,
	jejb@parisc-linux.org, deller@gmx.de, chris@zankel.net,
	jcmvbkbc@gmail.com, Arnd Bergmann, linux-kernel@vger.kernel.org,
	linux-arch, linux-api
In-Reply-To: <5480E715.3020900@gmail.com>

On Fri, Dec 05, 2014 at 06:58:29AM +0800, Chen Gang wrote:
> For uapi, need try to let all macros have same value, and MADV_FREE is
> added into main branch recently, so need redefine MADV_FREE for it.
> 
> At present, '8' can be shared with all architectures, so redefine it to
> '8'.
> 
> Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>


Hello Chen,

Thanks for looking at this.
Feel free to add my sign.

Acked-by: Minchan Kim <minchan@kernel.org>

-- 
Kind regards,
Minchan Kim

^ permalink raw reply

* Re: [PATCH v17 1/7] mm: support madvise(MADV_FREE)
From: Minchan Kim @ 2014-12-05  7:08 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Andrew Morton, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, Michael Kerrisk,
	linux-api-u79uwXL29TY76Z2rM5mHXA, Hugh Dickins, Johannes Weiner,
	Rik van Riel, KOSAKI Motohiro, Mel Gorman, Jason Evans,
	zhangyanfei-BthXqXjhjHXQFUHtdCDX3A, Kirill A. Shutemov,
	Kirill A. Shutemov
In-Reply-To: <20141203101329.GB23236-2MMpYkNvuYDjFM9bn6wA6Q@public.gmane.org>

On Wed, Dec 03, 2014 at 11:13:29AM +0100, Michal Hocko wrote:
> On Wed 03-12-14 09:00:26, Minchan Kim wrote:
> > On Tue, Dec 02, 2014 at 11:01:25AM +0100, Michal Hocko wrote:
> > > On Mon 01-12-14 08:56:52, Minchan Kim wrote:
> > > [...]
> > > > From 2edd6890f92fa4943ce3c452194479458582d88c Mon Sep 17 00:00:00 2001
> > > > From: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > > > Date: Mon, 1 Dec 2014 08:53:55 +0900
> > > > Subject: [PATCH] madvise.2: Document MADV_FREE
> > > > 
> > > > Signed-off-by: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > > > ---
> > > >  man2/madvise.2 | 13 +++++++++++++
> > > >  1 file changed, 13 insertions(+)
> > > > 
> > > > diff --git a/man2/madvise.2 b/man2/madvise.2
> > > > index 032ead7..33aa936 100644
> > > > --- a/man2/madvise.2
> > > > +++ b/man2/madvise.2
> > > > @@ -265,6 +265,19 @@ file (see
> > > >  .BR MADV_DODUMP " (since Linux 3.4)"
> > > >  Undo the effect of an earlier
> > > >  .BR MADV_DONTDUMP .
> > > > +.TP
> > > > +.BR MADV_FREE " (since Linux 3.19)"
> > > > +Gives the VM system the freedom to free pages, and tells the system that
> > > > +information in the specified page range is no longer important.
> > > > +This is an efficient way of allowing
> > > > +.BR malloc (3)
> > > 
> > > This might be rather misleading. Only some malloc implementations are
> > > using this feature (jemalloc, right?). So either be specific about which
> > > implementation or do not add it at all.
> > 
> > Make sense. I don't think it's a good idea to say specific example
> > in man page, which is rather arguable and limit the idea.
> > 
> > > 
> > > > +to free pages anywhere in the address space, while keeping the address space
> > > > +valid. The next time that the page is referenced, the page might be demand
> > > > +zeroed, or might contain the data that was there before the MADV_FREE call.
> > > > +References made to that address space range will not make the VM system page the
> > > > +information back in from backing store until the page is modified again.
> > > 
> > > I am not sure I understand the last sentence. So say I did MADV_FREE and
> > > the reclaim has dropped that page. I know that the file backed mappings
> > > are not supported yet but assume they were for a second... Now, I do
> > > read from that location again what is the result?
> > 
> > Zero page.
> 
> OK, it felt strange at first but now that I am thinking about it some
> more it starts making sense. So the semantic is: Either zero page
> (disconnected from the backing store) or the original content after
> madvise(MADV_FREE). The page gets connected to the backing store after
> it gets modified again. If this is the case then the sentence in the man
> page makes perfect sense.
> 
> What made me confused was that I expected file backed pages would get a
> fresh page from the origin but this would be awkward I guess. 
> 
> > > If we consider anon mappings then the backing store is misleading as
> > > well because memory was dropped and so always newly allocated.
> > 
> > When I read the sentence at first, I thought backing store means swap
> > so I don't have any trouble to understand it. But I agree your opinion.
> > Target for man page is not a kernel developer but application developer.
> > 
> > > I would rather drop the whole sentence and rather see an explanation
> > > what is the difference between to MADV_DONT_NEED.
> > > "
> > > Unlike MADV_DONT_NEED the memory is freed lazily e.g. when the VM system
> > > is under memory pressure.
> > > "
> > 
> > It's a good idea but I don't think it's enough. At least we should explan
> > cancel of delay free logic(ie, write). So, How about this?
> > 
> > MADV_FREE " (since Linux 3.19)"
> > 
> > Gives the VM system the freedom to free pages, and tells the system that
> > it's okay to free pages if the VM system has reasons(e.g., memory pressure).
> > So, it looks like delayed MADV_DONTNEED.
> > The next time that the page is referenced, the page might be demand
> > zeroed if the VM system freed the page. Otherwise, it might contain the data
> > that was there before the MADV_FREE call if the VM system didn't free the page.
> > New write in the page after the MADV_FREE call makes the VM system not free
> > the page any more.
> 
> Dunno, I guess the original content was slightly better. Or the
> following wording from UNIX man pages is even more descriptive
> (http://www.lehman.cuny.edu/cgi-bin/man-cgi?madvise+3)
> "
> Tell the kernel that contents in the specified address range are no
> longer important and the range will be overwritten. When there is
> demand for memory, the system will free pages associated with the
> specified address range. In this instance, the next time a page in the
> address range is referenced, it will contain all zeroes.  Otherwise,
> it will con- tain the data that was there prior to the MADV_FREE
> call. References made to the address range will not make the system read
> from backing store (swap space) until the page is modified again.
> 
> This value cannot be used on mappings that have underlying file objects.
> "

For me, it would be better.
Thanks for the heads up.

> 
> I would just clarify the last sentence with addition
> (MAP_PRIVATE|MAP_ANONYMOUS mappings in this implementation). The

I want to be consistent with KSM/THP which used "private anonymous pages".
So, I guess man page maintainer already acked the term so I want to use it,
too.


> difference to MADV_DONTNEED is more complicated now so I wouldn't make
> the text even more confusing.
> 
> Anyway the confusion started on my end so feel free to stick with the
> BSD wording (modulo malloc note which is really confusing as the default
> glibc allocator doesn't do that AFAIK).



>From cfa212d4fb307ae772b08cf564cab7e6adb8f4fc Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date: Mon, 1 Dec 2014 08:53:55 +0900
Subject: [PATCH] madvise.2: Document MADV_FREE

Signed-off-by: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 man2/madvise.2 | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/man2/madvise.2 b/man2/madvise.2
index 032ead7..fc1aaca 100644
--- a/man2/madvise.2
+++ b/man2/madvise.2
@@ -265,6 +265,18 @@ file (see
 .BR MADV_DODUMP " (since Linux 3.4)"
 Undo the effect of an earlier
 .BR MADV_DONTDUMP .
+.TP
+.BR MADV_FREE " (since Linux 3.19)"
+Tell the kernel that contents in the specified address range are no
+longer important and the range will be overwritten. When there is
+demand for memory, the system will free pages associated with the
+specified address range. In this instance, the next time a page in the
+address range is referenced, it will contain all zeroes.  Otherwise,
+it will contain the data that was there prior to the MADV_FREE call.
+References made to the address range will not make the system read
+from backing store (swap space) until the page is modified again.
+It works only with private anonymous pages (see
+.BR mmap (2)).
 .SH RETURN VALUE
 On success
 .BR madvise ()
-- 
2.0.0

-- 
Kind regards,
Minchan Kim

^ permalink raw reply related

* Re: [PATCH v4 0/5] Add Spreadtrum Sharkl64 Platform support
From: Olof Johansson @ 2014-12-05  7:24 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: gregkh, mark.rutland, arnd, gnomes, broonie, robh+dt, pawel.moll,
	ijc+devicetree, galak, will.deacon, catalin.marinas, jslaby,
	jason, heiko, florian.vaussard, andrew, rrichter, hytszk,
	grant.likely, orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu,
	zhang.lyra, wei.qiao, devicetree, linux-doc, linux-api,
	linux-kernel, arm, linux-serial, sprdlinux, linux-arm-kernel
In-Reply-To: <1417692860-18841-1-git-send-email-chunyan.zhang@spreadtrum.com>

On Thu, Dec 04, 2014 at 07:34:15PM +0800, Chunyan Zhang wrote:
> Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
> 
> Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
> standards based on ARMv8 multiple core architecture.Now we have only one
> SoC(SC9836) based on this Platform in developing.
> 
> This patchset adds Sharkl64 support in arm64 device tree and the serial driver
> of SC9836-UART.
> 
> This patchset also has patches which address "sprd" prefix and DT compatible
> strings for nodes which appear un-documented.
> 
> This version code was tesed both on Fast Mode and sc9836-fpga board.
> We use the latest boot-wrapper-aarch64 as the bootloader.

Hi,

We only got 3 of the 5 patches of this version (1, 2 and 4). Can you
resend the dts patch to us (I'm guessing the serial one will go through Greg).


Thanks,

-Olof

^ permalink raw reply

* Re: [PATCH v15 0/3] Add drm driver for Rockchip Socs
From: Dave Airlie @ 2014-12-05  7:26 UTC (permalink / raw)
  To: Daniel Kurtz
  Cc: Heiko Stübner, Mark Yao, Mark Rutland, linux-doc, Kever Yang,
	dri-devel, linux-kernel@vger.kernel.org, linux-api,
	Boris BREZILLON, simon xue, open list:ARM/Rockchip SoC...,
	Grant Likely, Tao Huang, open list:OPEN FIRMWARE AND...,
	Pawel Moll, Ian Campbell, Dominik Behr, 闫孝军,
	Eddie Cai, Rob Herring, John Stultz
In-Reply-To: <CAGS+omAqkQaK99OX_uAf2Z6-fep2Euwn112Oj5gFpRca8ZAgew@mail.gmail.com>

On 5 December 2014 at 05:29, Daniel Kurtz <djkurtz@chromium.org> wrote:
> On Tue, Dec 2, 2014 at 1:31 AM, Heiko Stübner <heiko@sntech.de> wrote:
>> Hi Mark,
>>
>> Am Dienstag, 2. Dezember 2014, 17:13:20 schrieb Mark Yao:
>>> This a series of patches is a DRM Driver for Rockchip Socs, add support
>>> for vop devices. Future patches will add additional encoders/connectors,
>>> such as eDP, HDMI.
>>>
>>> The basic "crtc" for rockchip is a "VOP" - Video Output Processor.
>>> the vop devices found on Rockchip rk3288 Soc, rk3288 soc have two similar
>>> Vop devices. Vop devices support iommu mapping, we use dma-mapping API with
>>> ARM_DMA_USE_IOMMU.
>>
>> [...]
>>
>>> Changes in v15:
>>> - remove depends on ARM_DMA_USE_IOMMU & IOMMU_API which cause
>>>   recursive dependency problem
>
> I thought the recommended solution for this was to fix OMAP3 to not
> select  ARM_DMA_USE_IOMMU and OMAP_IOMMU, not to drop the 'depends on'
> in drm/rockchip?

Lets fix that up once we get this merged, if I had a tree based on the
iommu I'd have
it -next now.

can I get a pull for that?
Dave.

^ permalink raw reply

* Re: [PATCH v6 0/7] vfs: Non-blockling buffered fs read (page cache only)
From: Volker Lendecke @ 2014-12-05  8:17 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Milosz Tanski, LKML, Christoph Hellwig,
	linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, Mel Gorman,
	Tejun Heo, Jeff Moyer, Theodore Ts'o, Al Viro, Linux API,
	Michael Kerrisk, linux-arch
In-Reply-To: <20141204151102.2d7e11dca39f130c2dff2294@linux-foundation.org>

On Thu, Dec 04, 2014 at 03:11:02PM -0800, Andrew Morton wrote:
> I can see all that, but it's handwaving.  Yes, preadv2() will perform
> better in some circumstances than fincore+pread.  But how much better? 
> Enough to justify this approach, or not?
> 
> Alas, the only way to really settle that is to implement fincore() and
> to subject it to a decent amount of realistic quantitative testing.
> 
> Ho hum.
> 
> Could you please hunt down some libuv developers, see if we can solicit
> some quality input from them?  As I said, we really don't want to merge
> this then find that people don't use it for some reason, or that it
> needs changes.

All I can say from a Samba perspective is that none of the ARM based
Storage boxes I have seen so far do AIO because of the base footprint
for every read. For sequential reads kernel-level readahead could kick
in properly and we should be able to give them the best of both worlds:
No context switches in the default case but also good parallel behaviour
for other workloads.  The most important benchmark for those guys is to
read a DVD image, whether it makes sense or not.

Volker

-- 
SerNet GmbH, Bahnhofsallee 1b, 37081 Göttingen
phone: +49-551-370000-0, fax: +49-551-370000-9
AG Göttingen, HRB 2816, GF: Dr. Johannes Loxen
http://www.sernet.de, mailto:kontakt@sernet.de

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply

* Re: [PATCH v17 1/7] mm: support madvise(MADV_FREE)
From: Michal Hocko @ 2014-12-05  8:32 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-mm, Michael Kerrisk, linux-api,
	Hugh Dickins, Johannes Weiner, Rik van Riel, KOSAKI Motohiro,
	Mel Gorman, Jason Evans, zhangyanfei, Kirill A. Shutemov,
	Kirill A. Shutemov
In-Reply-To: <20141205070816.GB3358@bbox>

On Fri 05-12-14 16:08:16, Minchan Kim wrote:
[...]
> From cfa212d4fb307ae772b08cf564cab7e6adb8f4fc Mon Sep 17 00:00:00 2001
> From: Minchan Kim <minchan@kernel.org>
> Date: Mon, 1 Dec 2014 08:53:55 +0900
> Subject: [PATCH] madvise.2: Document MADV_FREE
> 
> Signed-off-by: Minchan Kim <minchan@kernel.org>

Reviewed-by: Michal Hocko <mhocko@suse.cz>

Thanks!

> ---
>  man2/madvise.2 | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/man2/madvise.2 b/man2/madvise.2
> index 032ead7..fc1aaca 100644
> --- a/man2/madvise.2
> +++ b/man2/madvise.2
> @@ -265,6 +265,18 @@ file (see
>  .BR MADV_DODUMP " (since Linux 3.4)"
>  Undo the effect of an earlier
>  .BR MADV_DONTDUMP .
> +.TP
> +.BR MADV_FREE " (since Linux 3.19)"
> +Tell the kernel that contents in the specified address range are no
> +longer important and the range will be overwritten. When there is
> +demand for memory, the system will free pages associated with the
> +specified address range. In this instance, the next time a page in the
> +address range is referenced, it will contain all zeroes.  Otherwise,
> +it will contain the data that was there prior to the MADV_FREE call.
> +References made to the address range will not make the system read
> +from backing store (swap space) until the page is modified again.
> +It works only with private anonymous pages (see
> +.BR mmap (2)).
>  .SH RETURN VALUE
>  On success
>  .BR madvise ()
> -- 
> 2.0.0
> 
> -- 
> Kind regards,
> Minchan Kim
> --
> To unsubscribe from this list: send the line "unsubscribe linux-api" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v15 0/3] Add drm driver for Rockchip Socs
From: Heiko Stübner @ 2014-12-05  8:33 UTC (permalink / raw)
  To: Dave Airlie
  Cc: Daniel Kurtz, Mark Yao, Mark Rutland,
	linux-doc-u79uwXL29TY76Z2rM5mHXA, Kever Yang, dri-devel,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-api-u79uwXL29TY76Z2rM5mHXA, Boris BREZILLON, simon xue,
	open list:ARM/Rockchip SoC..., Grant Likely, Tao Huang,
	open list:OPEN FIRMWARE AND..., Pawel Moll, Ian Campbell,
	Dominik Behr, 闫孝军, Eddie Cai, Rob Herring,
	John Stultz
In-Reply-To: <CAPM=9tzcB0XaenK1RjJoOBpJjrq84atJP59=kh6WMA2-OcFpEA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Hi Dave,

Am Freitag, 5. Dezember 2014, 17:26:04 schrieb Dave Airlie:
> On 5 December 2014 at 05:29, Daniel Kurtz <djkurtz-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
> > I thought the recommended solution for this was to fix OMAP3 to not
> > select  ARM_DMA_USE_IOMMU and OMAP_IOMMU, not to drop the 'depends on'
> > in drm/rockchip?
> 
> Lets fix that up once we get this merged, if I had a tree based on the
> iommu I'd have
> it -next now.
> 
> can I get a pull for that?

Mark sent a pull on tuesday as reply to the cover-letter.
Message-ID: <547D88D7.5080005-TNX95d0MmH7DzftRWevZcw@public.gmane.org>

Replicated here, if you don't want to search.


Heiko

---------------- 8< -------------------------------------

Hi Dave

The following changes since commit 656d7077d8ffd1c2492d4a0a354367ab2e545059:

   dt-bindings: iommu: Add documentation for rockchip iommu (2014-11-03 
17:29:09 +0100)

are available in the git repository at:

   https://github.com/markyzq/kernel-drm-rockchip.git drm_iommu_v15

for you to fetch changes up to 5ac4837b12f533de5d9f8f66b45494c58e805536:

   dt-bindings: video: Add documentation for rockchip vop (2014-12-02 
17:29:33 +0800)

----------------------------------------------------------------
Mark Yao (3):
       drm: rockchip: Add basic drm driver
       dt-bindings: video: Add for rockchip display subsytem
       dt-bindings: video: Add documentation for rockchip vop

  .../devicetree/bindings/video/rockchip-drm.txt     |   19 +
  .../devicetree/bindings/video/rockchip-vop.txt     |   58 +
  drivers/gpu/drm/Kconfig                            |    2 +
  drivers/gpu/drm/Makefile                           |    1 +
  drivers/gpu/drm/rockchip/Kconfig                   |   17 +
  drivers/gpu/drm/rockchip/Makefile                  |    8 +
  drivers/gpu/drm/rockchip/rockchip_drm_drv.c        |  551 ++++++++
  drivers/gpu/drm/rockchip/rockchip_drm_drv.h        |   68 +
  drivers/gpu/drm/rockchip/rockchip_drm_fb.c         |  201 +++
  drivers/gpu/drm/rockchip/rockchip_drm_fb.h         |   28 +
  drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c      |  210 +++
  drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h      |   21 +
  drivers/gpu/drm/rockchip/rockchip_drm_gem.c        |  294 ++++
  drivers/gpu/drm/rockchip/rockchip_drm_gem.h        |   54 +
  drivers/gpu/drm/rockchip/rockchip_drm_vop.c        | 1455 ++++++++++++++++++++
  drivers/gpu/drm/rockchip/rockchip_drm_vop.h        |  201 +++
  16 files changed, 3188 insertions(+)
  create mode 100644 Documentation/devicetree/bindings/video/rockchip-drm.txt
  create mode 100644 Documentation/devicetree/bindings/video/rockchip-vop.txt
  create mode 100644 drivers/gpu/drm/rockchip/Kconfig
  create mode 100644 drivers/gpu/drm/rockchip/Makefile
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_drv.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_drv.h
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_fb.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_fb.h
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_gem.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_gem.h
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_vop.c
  create mode 100644 drivers/gpu/drm/rockchip/rockchip_drm_vop.h

^ permalink raw reply

* Re: [PATCH v2 00/13] Add kdbus implementation
From: Hillf Danton @ 2014-12-05  8:49 UTC (permalink / raw)
  To: 'Greg Kroah-Hartman'
  Cc: arnd-r2nGTMty4D4, ebiederm-aS9lmoZGLiVWk0Htik3J/w,
	gnomes-qBU/x9rampVanCEyBjwyrvXRex20P6io, teg-B22kvLQNl6c,
	jkosina-AlSwsSmVLrQ, luto-kltTT9wpgjJwATOyAt5JVQ,
	linux-api-u79uwXL29TY76Z2rM5mHXA, linux-kernel,
	daniel-cYrQPVfZoowdnm+yROfE0A, dh.herrmann-Re5JQEeQqe8AvxtiuMwx3w,
	tixxdz-Umm1ozX2/EEdnm+yROfE0A, Hillf Danton,
	赵东(辅周)

Hey all
> 
> kdbus is a kernel-level IPC implementation that aims for resemblance to
> the the protocol layer with the existing userspace D-Bus daemon while
> enabling some features that couldn't be implemented before in userspace.
> 
[...]
> 
> This can also be found in a git tree, the kdbus branch of char-misc.git at:
>         https://git.kernel.org/cgit/linux/kernel/git/gregkh/char-misc.git/
> 
In the environment:
	Android Kitkat
	Linux-3.4.67
	CPU MTK MT6582 ARMv7 Processor rev 3 (v7l)

root@cwet_td_a800:/ # kdbus-test 
Testing bus make functions (bus-make) .................................. OK
Testing the HELLO command (hello) ...................................... OK
Testing the BYEBYE command (byebye) .................................... OK
Testing a chat pattern (chat) .......................................... OK
Testing a simple dameon (daemon) ....................................... OK
Testing file descriptor passing (fd-passing) ........................... OK
Testing custom endpoint (endpoint) ..................................... OK
Testing monitor functionality (monitor) ................................ OK
Testing basic name registry functions (name-basics) .................... OK
Testing name registry conflict details (name-conflict) ................. OK
Testing queuing of names (name-queue) .................................. OK
Testing basic message handling (message-basic) ......................... OK
Testing handling of messages with priority (message-prio) .............. OK
Testing message quotas are enforced (message-quota) .................... OK
Testing timeout (timeout) .............................................. OK
Testing synchronous replies vs. BYEBYE (sync-byebye) ................... OK
Testing synchronous replies (sync-reply) ............................... OK
Testing freeing of memory (message-free) ............................... OK
Testing retrieving connection information (connection-info) ............ OK
Testing updating connection information (connection-update) ............ OK
Testing verifying pools are never writable (writable-pool) ............. OK
Testing policy (policy) ................................................ OK
Testing unprivileged bus access (policy-priv) .......................... OK
Testing policy in user namespaces (policy-ns) .......................... OK
Testing metadata in user namespaces (metadata-ns) ...................... OK
Testing adding of matches by id (match-id-add) ......................... OK
Testing removing of matches by id (match-id-remove) .................... OK
Testing replace of matches with the same cookie (match-replace) ........ OK
Testing adding of matches by name (match-name-add) ..................... OK
Testing removing of matches by name (match-name-remove) ................ OK
Testing matching for name changes (match-name-change) .................. OK
Testing matching with bloom filters (match-bloom) ...................... OK
Testing activator connections (activator) .............................. OK
Testing benchmark (benchmark) .......................................... OK
Testing race multiple byebyes (race-byebye) ............................ OK
Testing race byebye vs match removal (race-byebye-match) ............... OK

SUMMARY: 36 tests passed, 0 skipped, 0 failed

And we like to test newer versions, if any, of kdbus with our phone.

Thanks
Hillf

^ permalink raw reply

* Re: [PATCH v4 0/5] Add Spreadtrum Sharkl64 Platform support
From: Mark Rutland @ 2014-12-05 10:40 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org,
	arnd-r2nGTMty4D4@public.gmane.org,
	gnomes-qBU/x9rampVanCEyBjwyrvXRex20P6io@public.gmane.org,
	broonie-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	robh+dt-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org, Pawel Moll,
	ijc+devicetree-KcIKpvwj1kUDXYZnReoRVg@public.gmane.org,
	galak-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org, Will Deacon,
	Catalin Marinas, jslaby-AlSwsSmVLrQ@public.gmane.org,
	jason-NLaQJdtUoK4Be96aLqz0jA@public.gmane.org,
	heiko-4mtYJXux2i+zQB+pC5nmwQ@public.gmane.org,
	florian.vaussard-p8DiymsW2f8@public.gmane.org,
	andrew-g2DYL2Zd6BY@public.gmane.org,
	rrichter-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org,
	hytszk-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	grant.likely-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org,
	orsonzhai-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	geng.ren-lxIno14LUO0EEoCn2XhGlw
In-Reply-To: <1417692860-18841-1-git-send-email-chunyan.zhang-lxIno14LUO0EEoCn2XhGlw@public.gmane.org>

Hi,

On Thu, Dec 04, 2014 at 11:34:15AM +0000, Chunyan Zhang wrote:
> Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
> 
> Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
> standards based on ARMv8 multiple core architecture.Now we have only one
> SoC(SC9836) based on this Platform in developing.
> 
> This patchset adds Sharkl64 support in arm64 device tree and the serial driver
> of SC9836-UART.
> 
> This patchset also has patches which address "sprd" prefix and DT compatible
> strings for nodes which appear un-documented.
> 
> This version code was tesed both on Fast Mode and sc9836-fpga board.
> We use the latest boot-wrapper-aarch64 as the bootloader.
> 
> Changes from v3:
> * Addressed review comments:
> 	- Added the description of clock property for sc9836-uart
> 	- Revised the size of GICC to be 8KiB
> 	- Added another compatible string for psci-0.1

I had open questions on v3 regarding your PSCI imlpementation. You
mentioned that you are using the aarch64 bootwrapper, but your DT
describes PSCI 0.2, and the (upstream) bootwrapper does not implement
PSCI 0.2. Adding the old PSCI compatible string is _not_ sufficient if
you do not have a full PSCI 0.2 implementation.

Given that PSCI 0.2 requires more functionality to be implemented, I'd
like to know that your implementation is spec-compliant (implementing
the mandatory functions, nters the kernel in the correct state, etc),
and that it has been tested.

Would you be able to look at my comments from the last posting please?

Thanks,
Mark.

^ permalink raw reply

* Re: [PATCH v6 4/7] fs/fuse: support compiling out splice
From: Miklos Szeredi @ 2014-12-05 10:44 UTC (permalink / raw)
  To: Pieter Smith
  Cc: Kernel Mailing List, open list:FUSE: FILESYSTEM...,
	open list:ABI/API, Linux-Fsdevel
In-Reply-To: <1417715473-24110-5-git-send-email-pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>

On Thu, Dec 4, 2014 at 6:50 PM, Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org> wrote:
> To implement splice support, fs/fuse makes use of nosteal_pipe_buf_ops. This
> struct is exported by fs/splice. The goal of the larger patch set is to
> completely compile out fs/splice, so uses of the exported struct need to be
> compiled out along with fs/splice.
>
> This patch therefore compiles out splice support in fs/fuse when
> CONFIG_SYSCALL_SPLICE is undefined.
>
> Signed-off-by: Pieter Smith <pieter-qeJ+1H9vRZbz+pZb47iToQ@public.gmane.org>


In the future could you PLEASE PLEASE cut the fuse-devel Cc from the
non-fuse specific patches (and I guess that goes for any other
subsystem specific lists and persons as well)?

Otherwise:

Acked-by: Miklos Szeredi <mszeredi-AlSwsSmVLrQ@public.gmane.org>

Thanks,
Miklos

^ permalink raw reply

* Re: [PATCH v3] media: platform: add VPFE capture driver support for AM437X
From: Hans Verkuil @ 2014-12-05 12:24 UTC (permalink / raw)
  To: Lad, Prabhakar, LMML, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-api
  Cc: LKML, Hans Verkuil
In-Reply-To: <1417648378-18271-1-git-send-email-prabhakar.csengg-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Hi Prabhakar,

Sorry, there are still a few items that need to be fixed.
If you can make a v4 with these issues addressed, then I can still make a
pull request, although it depends on Mauro whether it is still accepted for
3.19.

On 12/04/2014 12:12 AM, Lad, Prabhakar wrote:
> From: Benoit Parrot <bparrot-l0cyMroinI0@public.gmane.org>
> 
> This patch adds Video Processing Front End (VPFE) driver for
> AM437X family of devices
> Driver supports the following:
> - V4L2 API using MMAP buffer access based on videobuf2 api
> - Asynchronous sensor/decoder sub device registration
> - DT support

Just to confirm: this driver only supports SDTV formats? No HDTV?
I didn't see any VIDIOC_*_DV_TIMINGS support, so I assume it really
isn't supported.

> 
> Signed-off-by: Benoit Parrot <bparrot-l0cyMroinI0@public.gmane.org>
> Signed-off-by: Darren Etheridge <detheridge-l0cyMroinI0@public.gmane.org>
> Signed-off-by: Lad, Prabhakar <prabhakar.csengg-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---

<snip>

> diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
> new file mode 100644
> index 0000000..25863e8
> --- /dev/null
> +++ b/drivers/media/platform/am437x/am437x-vpfe.c

<snip>

> +
> +static int
> +cmp_v4l2_format(const struct v4l2_format *lhs, const struct v4l2_format *rhs)
> +{
> +	return lhs->type == rhs->type &&
> +		lhs->fmt.pix.width == rhs->fmt.pix.width &&
> +		lhs->fmt.pix.height == rhs->fmt.pix.height &&
> +		lhs->fmt.pix.pixelformat == rhs->fmt.pix.pixelformat &&
> +		lhs->fmt.pix.field == rhs->fmt.pix.field &&
> +		lhs->fmt.pix.colorspace == rhs->fmt.pix.colorspace;

Add a check for pix.ycbcr_enc and pix.quantization.

<snip>

> +/*
> + * vpfe_release : This function is based on the vb2_fop_release
> + * helper function.
> + * It has been augmented to handle module power management,
> + * by disabling/enabling h/w module fcntl clock when necessary.
> + */
> +static int vpfe_release(struct file *file)
> +{
> +	struct vpfe_device *vpfe = video_drvdata(file);
> +	int ret;
> +
> +	vpfe_dbg(2, vpfe, "vpfe_release\n");
> +
> +	ret = _vb2_fop_release(file, NULL);

This isn't going to work. _vb2_fop_release calls v4l2_fh_release(), so
the v4l2_fh_is_singular_file(file) will be wrong and you release the fh
once too many.

I would do this:

	if (!v4l2_fh_is_singular_file(file))
		return vb2_fop_release(file);
	mutex_lock(&vpfe->lock);
	ret = _vb2_fop_release(file, NULL);
	vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev);
	mutex_unlock(&vpfe->lock);
	return ret;

> +
> +	if (v4l2_fh_is_singular_file(file)) {
> +		mutex_lock(&vpfe->lock);
> +		vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev);
> +		v4l2_fh_release(file);
> +		mutex_unlock(&vpfe->lock);
> +	}
> +
> +	return ret;
> +}

<snip>

> +static int vpfe_enum_size(struct file *file, void  *priv,
> +			  struct v4l2_frmsizeenum *fsize)
> +{
> +	struct vpfe_device *vpfe = video_drvdata(file);
> +	struct v4l2_subdev_frame_size_enum fse;
> +	struct vpfe_subdev_info *sdinfo;
> +	struct v4l2_mbus_framefmt mbus;
> +	struct v4l2_pix_format pix;
> +	struct vpfe_fmt *fmt;
> +	int ret;
> +
> +	vpfe_dbg(2, vpfe, "vpfe_enum_size\n");
> +
> +	/* check for valid format */
> +	fmt = find_format_by_pix(fsize->pixel_format);
> +	if (!fmt) {
> +		vpfe_dbg(3, vpfe, "Invalid pixel code: %x, default used instead\n",
> +			fsize->pixel_format);
> +		return -EINVAL;
> +	}
> +
> +	memset(fsize->reserved, 0x0, sizeof(fsize->reserved));
> +
> +	sdinfo = vpfe->current_subdev;
> +	if (!sdinfo->sd)
> +		return -EINVAL;
> +
> +	memset(&pix, 0x0, sizeof(pix));
> +	/* Construct pix from parameter and use default for the rest */
> +	pix.pixelformat = fsize->pixel_format;
> +	pix.width = 640;
> +	pix.height = 480;
> +	pix.colorspace = V4L2_COLORSPACE_SRGB;
> +	pix.field = V4L2_FIELD_NONE;
> +	pix_to_mbus(vpfe, &pix, &mbus);
> +
> +	memset(&fse, 0x0, sizeof(fse));
> +	fse.index = fsize->index;
> +	fse.pad = 0;
> +	fse.code = mbus.code;
> +	ret = v4l2_subdev_call(sdinfo->sd, pad, enum_frame_size, NULL, &fse);

FYI: strictly speaking this is wrong since this op theoretically expects a
v4l2_subdev_fh pointer instead of a NULL argument. However, you do not have
an alternative right now. As you know, I've been working on fixing this, so
if that gets accepted, then you need to update this code as well in a later
patch.

> +	if (ret)
> +		return -EINVAL;
> +
> +	vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d code: %x W:[%d,%d] H:[%d,%d]\n",
> +		fse.index, fse.code, fse.min_width, fse.max_width,
> +		fse.min_height, fse.max_height);
> +
> +	fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
> +	fsize->discrete.width = fse.max_width;
> +	fsize->discrete.height = fse.max_height;
> +
> +	vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d pixformat: %s size: %dx%d\n",
> +		fsize->index, print_fourcc(fsize->pixel_format),
> +		fsize->discrete.width, fsize->discrete.height);
> +
> +	return 0;
> +}
> +

<snip>

> +static int
> +vpfe_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
> +{
> +	struct vpfe_device *vpfe = video_drvdata(file);
> +
> +	switch (s->target) {
> +	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
> +	case V4L2_SEL_TGT_COMPOSE_BOUNDS:

These two COMPOSE cases should be dropped, since there is no compose support!

> +	case V4L2_SEL_TGT_CROP_BOUNDS:
> +	case V4L2_SEL_TGT_CROP_DEFAULT:
> +		s->r.left = s->r.top = 0;
> +		s->r.width = vpfe->crop.width;
> +		s->r.height = vpfe->crop.height;
> +		break;
> +
> +	case V4L2_SEL_TGT_CROP:
> +		s->r = vpfe->crop;
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}

<snip>

Regards,

	Hans

^ permalink raw reply

* Re: [PATCH v4 0/5] Add Spreadtrum Sharkl64 Platform support
From: Lyra Zhang @ 2014-12-05 12:27 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Chunyan Zhang,
	gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org,
	arnd-r2nGTMty4D4@public.gmane.org,
	gnomes-qBU/x9rampVanCEyBjwyrvXRex20P6io@public.gmane.org,
	broonie-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	robh+dt-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org, Pawel Moll,
	ijc+devicetree-KcIKpvwj1kUDXYZnReoRVg@public.gmane.org,
	galak-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org, Will Deacon,
	Catalin Marinas, jslaby-AlSwsSmVLrQ@public.gmane.org,
	jason-NLaQJdtUoK4Be96aLqz0jA@public.gmane.org,
	heiko-4mtYJXux2i+zQB+pC5nmwQ@public.gmane.org,
	florian.vaussard-p8DiymsW2f8@public.gmane.org,
	andrew-g2DYL2Zd6BY@public.gmane.org,
	rrichter-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org,
	hytszk-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	grant.likely-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org
In-Reply-To: <20141205104009.GE11889@leverpostej>

On Fri, Dec 5, 2014 at 6:40 PM, Mark Rutland <mark.rutland-5wv7dgnIgG8@public.gmane.org> wrote:
> Hi,
>
> On Thu, Dec 04, 2014 at 11:34:15AM +0000, Chunyan Zhang wrote:
>> Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
>>
>> Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
>> standards based on ARMv8 multiple core architecture.Now we have only one
>> SoC(SC9836) based on this Platform in developing.
>>
>> This patchset adds Sharkl64 support in arm64 device tree and the serial driver
>> of SC9836-UART.
>>
>> This patchset also has patches which address "sprd" prefix and DT compatible
>> strings for nodes which appear un-documented.
>>
>> This version code was tesed both on Fast Mode and sc9836-fpga board.
>> We use the latest boot-wrapper-aarch64 as the bootloader.
>>
>> Changes from v3:
>> * Addressed review comments:
>>       - Added the description of clock property for sc9836-uart
>>       - Revised the size of GICC to be 8KiB
>>       - Added another compatible string for psci-0.1
>
> I had open questions on v3 regarding your PSCI imlpementation. You
> mentioned that you are using the aarch64 bootwrapper, but your DT
> describes PSCI 0.2, and the (upstream) bootwrapper does not implement
> PSCI 0.2. Adding the old PSCI compatible string is _not_ sufficient if
> you do not have a full PSCI 0.2 implementation.
>
> Given that PSCI 0.2 requires more functionality to be implemented, I'd
> like to know that your implementation is spec-compliant (implementing
> the mandatory functions, nters the kernel in the correct state, etc),
> and that it has been tested.
>
> Would you be able to look at my comments from the last posting please?
>
> Thanks,
> Mark.
> --

Hi, Mark

Ok, I'll check it again with our related engineers.

Actually, I had read all of your comments carefully before sending
each version of patches, and I replied you a few days early, I guess
you may miss it :)

If we just implemented psci-0.1 until now, can we submit this path
without "compatible = "arm,psci-0.2"", but only with " compatible =
"arm,psci" ".



Thanks,
Chunyan

^ permalink raw reply

* Re: [PATCH v3] media: platform: add VPFE capture driver support for AM437X
From: Prabhakar Lad @ 2014-12-05 12:54 UTC (permalink / raw)
  To: Hans Verkuil
  Cc: LMML, devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-api, LKML, Hans Verkuil
In-Reply-To: <5481A3EC.8020803-qWit8jRvyhVmR6Xm/wNWPw@public.gmane.org>

Hi Hans,

On Fri, Dec 5, 2014 at 12:24 PM, Hans Verkuil <hverkuil-qWit8jRvyhVmR6Xm/wNWPw@public.gmane.org> wrote:
> Hi Prabhakar,
>
> Sorry, there are still a few items that need to be fixed.
> If you can make a v4 with these issues addressed, then I can still make a
> pull request, although it depends on Mauro whether it is still accepted for
> 3.19.
>
OK will post a v4 tonight fixing all the below issues.

FYI: Looking at the response of Mauro on 'soc-camera: 1st set for 3.19'
he wont accept it!

Thanks,
--Prabhakar Lad

> On 12/04/2014 12:12 AM, Lad, Prabhakar wrote:
>> From: Benoit Parrot <bparrot-l0cyMroinI0@public.gmane.org>
>>
>> This patch adds Video Processing Front End (VPFE) driver for
>> AM437X family of devices
>> Driver supports the following:
>> - V4L2 API using MMAP buffer access based on videobuf2 api
>> - Asynchronous sensor/decoder sub device registration
>> - DT support
>
> Just to confirm: this driver only supports SDTV formats? No HDTV?
> I didn't see any VIDIOC_*_DV_TIMINGS support, so I assume it really
> isn't supported.
>
>>
>> Signed-off-by: Benoit Parrot <bparrot-l0cyMroinI0@public.gmane.org>
>> Signed-off-by: Darren Etheridge <detheridge-l0cyMroinI0@public.gmane.org>
>> Signed-off-by: Lad, Prabhakar <prabhakar.csengg-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>
> <snip>
>
>> diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
>> new file mode 100644
>> index 0000000..25863e8
>> --- /dev/null
>> +++ b/drivers/media/platform/am437x/am437x-vpfe.c
>
> <snip>
>
>> +
>> +static int
>> +cmp_v4l2_format(const struct v4l2_format *lhs, const struct v4l2_format *rhs)
>> +{
>> +     return lhs->type == rhs->type &&
>> +             lhs->fmt.pix.width == rhs->fmt.pix.width &&
>> +             lhs->fmt.pix.height == rhs->fmt.pix.height &&
>> +             lhs->fmt.pix.pixelformat == rhs->fmt.pix.pixelformat &&
>> +             lhs->fmt.pix.field == rhs->fmt.pix.field &&
>> +             lhs->fmt.pix.colorspace == rhs->fmt.pix.colorspace;
>
> Add a check for pix.ycbcr_enc and pix.quantization.
>
OK

> <snip>
>
>> +/*
>> + * vpfe_release : This function is based on the vb2_fop_release
>> + * helper function.
>> + * It has been augmented to handle module power management,
>> + * by disabling/enabling h/w module fcntl clock when necessary.
>> + */
>> +static int vpfe_release(struct file *file)
>> +{
>> +     struct vpfe_device *vpfe = video_drvdata(file);
>> +     int ret;
>> +
>> +     vpfe_dbg(2, vpfe, "vpfe_release\n");
>> +
>> +     ret = _vb2_fop_release(file, NULL);
>
> This isn't going to work. _vb2_fop_release calls v4l2_fh_release(), so
> the v4l2_fh_is_singular_file(file) will be wrong and you release the fh
> once too many.
>
> I would do this:
>
>         if (!v4l2_fh_is_singular_file(file))
>                 return vb2_fop_release(file);
>         mutex_lock(&vpfe->lock);
>         ret = _vb2_fop_release(file, NULL);
>         vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev);
>         mutex_unlock(&vpfe->lock);
>         return ret;
>
>> +
>> +     if (v4l2_fh_is_singular_file(file)) {
>> +             mutex_lock(&vpfe->lock);
>> +             vpfe_ccdc_close(&vpfe->ccdc, vpfe->pdev);
>> +             v4l2_fh_release(file);
>> +             mutex_unlock(&vpfe->lock);
>> +     }
>> +
>> +     return ret;
>> +}
>
> <snip>
>
>> +static int vpfe_enum_size(struct file *file, void  *priv,
>> +                       struct v4l2_frmsizeenum *fsize)
>> +{
>> +     struct vpfe_device *vpfe = video_drvdata(file);
>> +     struct v4l2_subdev_frame_size_enum fse;
>> +     struct vpfe_subdev_info *sdinfo;
>> +     struct v4l2_mbus_framefmt mbus;
>> +     struct v4l2_pix_format pix;
>> +     struct vpfe_fmt *fmt;
>> +     int ret;
>> +
>> +     vpfe_dbg(2, vpfe, "vpfe_enum_size\n");
>> +
>> +     /* check for valid format */
>> +     fmt = find_format_by_pix(fsize->pixel_format);
>> +     if (!fmt) {
>> +             vpfe_dbg(3, vpfe, "Invalid pixel code: %x, default used instead\n",
>> +                     fsize->pixel_format);
>> +             return -EINVAL;
>> +     }
>> +
>> +     memset(fsize->reserved, 0x0, sizeof(fsize->reserved));
>> +
>> +     sdinfo = vpfe->current_subdev;
>> +     if (!sdinfo->sd)
>> +             return -EINVAL;
>> +
>> +     memset(&pix, 0x0, sizeof(pix));
>> +     /* Construct pix from parameter and use default for the rest */
>> +     pix.pixelformat = fsize->pixel_format;
>> +     pix.width = 640;
>> +     pix.height = 480;
>> +     pix.colorspace = V4L2_COLORSPACE_SRGB;
>> +     pix.field = V4L2_FIELD_NONE;
>> +     pix_to_mbus(vpfe, &pix, &mbus);
>> +
>> +     memset(&fse, 0x0, sizeof(fse));
>> +     fse.index = fsize->index;
>> +     fse.pad = 0;
>> +     fse.code = mbus.code;
>> +     ret = v4l2_subdev_call(sdinfo->sd, pad, enum_frame_size, NULL, &fse);
>
> FYI: strictly speaking this is wrong since this op theoretically expects a
> v4l2_subdev_fh pointer instead of a NULL argument. However, you do not have
> an alternative right now. As you know, I've been working on fixing this, so
> if that gets accepted, then you need to update this code as well in a later
> patch.
>
>> +     if (ret)
>> +             return -EINVAL;
>> +
>> +     vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d code: %x W:[%d,%d] H:[%d,%d]\n",
>> +             fse.index, fse.code, fse.min_width, fse.max_width,
>> +             fse.min_height, fse.max_height);
>> +
>> +     fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE;
>> +     fsize->discrete.width = fse.max_width;
>> +     fsize->discrete.height = fse.max_height;
>> +
>> +     vpfe_dbg(1, vpfe, "vpfe_enum_size: index: %d pixformat: %s size: %dx%d\n",
>> +             fsize->index, print_fourcc(fsize->pixel_format),
>> +             fsize->discrete.width, fsize->discrete.height);
>> +
>> +     return 0;
>> +}
>> +
>
> <snip>
>
>> +static int
>> +vpfe_g_selection(struct file *file, void *fh, struct v4l2_selection *s)
>> +{
>> +     struct vpfe_device *vpfe = video_drvdata(file);
>> +
>> +     switch (s->target) {
>> +     case V4L2_SEL_TGT_COMPOSE_DEFAULT:
>> +     case V4L2_SEL_TGT_COMPOSE_BOUNDS:
>
> These two COMPOSE cases should be dropped, since there is no compose support!
>
>> +     case V4L2_SEL_TGT_CROP_BOUNDS:
>> +     case V4L2_SEL_TGT_CROP_DEFAULT:
>> +             s->r.left = s->r.top = 0;
>> +             s->r.width = vpfe->crop.width;
>> +             s->r.height = vpfe->crop.height;
>> +             break;
>> +
>> +     case V4L2_SEL_TGT_CROP:
>> +             s->r = vpfe->crop;
>> +             break;
>> +
>> +     default:
>> +             return -EINVAL;
>> +     }
>> +
>> +     return 0;
>> +}
>
> <snip>
>
> Regards,
>
>         Hans

^ permalink raw reply

* Re: [tpmdd-devel] [PATCH v9 8/8] tpm: TPM 2.0 FIFO Interface
From: Jarkko Sakkinen @ 2014-12-05 13:05 UTC (permalink / raw)
  To: Scot Doyle
  Cc: Peter Huewe, Ashley Lai, Marcel Selhorst,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Will Arthur,
	tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <alpine.DEB.2.11.1412041459470.5705-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>

On Thu, Dec 04, 2014 at 03:25:12PM +0000, Scot Doyle wrote:
> Hi Jarkko,
> 
> On Thu, 4 Dec 2014, Jarkko Sakkinen wrote:
> > From: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > 
> > Detect TPM 2.0 by using the extended STS (STS3) register. For TPM 2.0,
> > instead of calling tpm_get_timeouts(), assign duration and timeout
> > values defined in the TPM 2.0 PTP specification.
> > 
> > Signed-off-by: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> > ---
> >  drivers/char/tpm/tpm_tis.c | 80 ++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 67 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
> > index 89e1abb..71cbb2d 100644
> > --- a/drivers/char/tpm/tpm_tis.c
> > +++ b/drivers/char/tpm/tpm_tis.c
> > @@ -1,5 +1,6 @@
> >  /*
> >   * Copyright (C) 2005, 2006 IBM Corporation
> > + * Copyright (C) 2014 Intel Corporation
> >   *
> >   * Authors:
> >   * Leendert van Doorn <leendert-aZOuKsOsJu3MbYB6QlFGEg@public.gmane.org>
> > @@ -44,6 +45,10 @@ enum tis_status {
> >  	TPM_STS_DATA_EXPECT = 0x08,
> >  };
> >  
> > +enum tis_status3 {
> > +	TPM_STS3_TPM2_FAM = 0x04,
> > +};
> > +
> >  enum tis_int_flags {
> >  	TPM_GLOBAL_INT_ENABLE = 0x80000000,
> >  	TPM_INTF_BURST_COUNT_STATIC = 0x100,
> > @@ -70,6 +75,7 @@ enum tis_defaults {
> >  #define	TPM_INT_STATUS(l)		(0x0010 | ((l) << 12))
> >  #define	TPM_INTF_CAPS(l)		(0x0014 | ((l) << 12))
> >  #define	TPM_STS(l)			(0x0018 | ((l) << 12))
> > +#define	TPM_STS3(l)			(0x001b | ((l) << 12))
> >  #define	TPM_DATA_FIFO(l)		(0x0024 | ((l) << 12))
> >  
> >  #define	TPM_DID_VID(l)			(0x0F00 | ((l) << 12))
> > @@ -363,6 +369,7 @@ static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len)
> >  {
> >  	int rc;
> >  	u32 ordinal;
> > +	unsigned long dur;
> >  
> >  	rc = tpm_tis_send_data(chip, buf, len);
> >  	if (rc < 0)
> > @@ -374,9 +381,14 @@ static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len)
> >  
> >  	if (chip->vendor.irq) {
> >  		ordinal = be32_to_cpu(*((__be32 *) (buf + 6)));
> > +
> > +		if (chip->flags & TPM_CHIP_FLAG_TPM2)
> > +			dur = tpm2_calc_ordinal_duration(chip, ordinal);
> > +		else
> > +			dur = tpm_calc_ordinal_duration(chip, ordinal);
> > +
> >  		if (wait_for_tpm_stat
> > -		    (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID,
> > -		     tpm_calc_ordinal_duration(chip, ordinal),
> > +		    (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, dur,
> >  		     &chip->vendor.read_queue, false) < 0) {
> >  			rc = -ETIME;
> >  			goto out_err;
> > @@ -588,6 +600,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
> >  	int rc, i, irq_s, irq_e, probe;
> >  	struct tpm_chip *chip;
> >  	struct priv_data *priv;
> > +	u8 sts3;
> >  
> >  	priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL);
> >  	if (priv == NULL)
> > @@ -604,11 +617,28 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
> >  	if (!chip->vendor.iobase)
> >  		return -EIO;
> >  
> > +	sts3 = ioread8(chip->vendor.iobase + TPM_STS3(1));
> > +	if ((sts3 & TPM_STS3_TPM2_FAM) == TPM_STS3_TPM2_FAM)
> > +		chip->flags = TPM_CHIP_FLAG_TPM2;
> > +
> >  	/* Default timeouts */
> > -	chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > -	chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
> > -	chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > -	chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > +	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
> > +		chip->vendor.timeout_a = usecs_to_jiffies(TPM2_TIMEOUT_A);
> > +		chip->vendor.timeout_b = usecs_to_jiffies(TPM2_TIMEOUT_B);
> > +		chip->vendor.timeout_c = usecs_to_jiffies(TPM2_TIMEOUT_C);
> > +		chip->vendor.timeout_d = usecs_to_jiffies(TPM2_TIMEOUT_D);
> > +		chip->vendor.duration[TPM_SHORT] =
> > +			usecs_to_jiffies(TPM2_DURATION_SHORT);
> > +		chip->vendor.duration[TPM_MEDIUM] =
> > +			usecs_to_jiffies(TPM2_DURATION_MEDIUM);
> > +		chip->vendor.duration[TPM_LONG] =
> > +			usecs_to_jiffies(TPM2_DURATION_LONG);
> > +	} else {
> > +		chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > +		chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT);
> > +		chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > +		chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT);
> > +	}
> >  
> >  	if (wait_startup(chip, 0) != 0) {
> >  		rc = -ENODEV;
> > @@ -623,8 +653,8 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
> >  	vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0));
> >  	chip->vendor.manufacturer_id = vendor;
> >  
> > -	dev_info(dev,
> > -		 "1.2 TPM (device-id 0x%X, rev-id %d)\n",
> > +	dev_info(dev, "%s TPM (device-id 0x%X, rev-id %d)\n",
> > +		 (chip->flags & TPM_CHIP_FLAG_TPM2) ? "2.0" : "1.2",
> >  		 vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0)));
> >  
> >  	if (!itpm) {
> > @@ -665,6 +695,23 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
> >  	if (intfcaps & TPM_INTF_DATA_AVAIL_INT)
> >  		dev_dbg(dev, "\tData Avail Int Support\n");
> >  
> > +	/* get the timeouts before testing for irqs */
> > +	if (!(chip->flags & TPM_CHIP_FLAG_TPM2) && tpm_get_timeouts(chip)) {
> > +		dev_err(dev, "Could not get TPM timeouts and durations\n");
> > +		rc = -ENODEV;
> > +		goto out_err;
> > +	}
> > +
> > +	if (chip->flags & TPM_CHIP_FLAG_TPM2)
> > +		rc = tpm2_do_selftest(chip);
> > +	else
> > +		rc = tpm_do_selftest(chip);
> > +	if (rc) {
> > +		dev_err(dev, "TPM self test failed\n");
> > +		rc = -ENODEV;
> > +		goto out_err;
> > +	}
> > +
> >  	/* INTERRUPT Setup */
> >  	init_waitqueue_head(&chip->vendor.read_queue);
> >  	init_waitqueue_head(&chip->vendor.int_queue);
> 
> In Peter's for-james branch, commit 0e6cb01, the v1.2 calls to
> tpm_get_timeouts and tpm_do_selftest were moved toward the end of 
> tpm_tis_init.

Right. My bad. Could this be reverted with a separate fix or do
I prepare a new patch set? I would propose the former...

> > @@ -720,7 +767,10 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle,
> >  			chip->vendor.probed_irq = 0;
> >  
> >  			/* Generate Interrupts */
> > -			tpm_gen_interrupt(chip);
> > +			if (chip->flags & TPM_CHIP_FLAG_TPM2)
> > +				tpm2_gen_interrupt(chip);
> > +			else
> > +				tpm_gen_interrupt(chip);
> >  
> >  			chip->vendor.irq = chip->vendor.probed_irq;
> >  
> > @@ -808,14 +858,18 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
> >  static int tpm_tis_resume(struct device *dev)
> >  {
> >  	struct tpm_chip *chip = dev_get_drvdata(dev);
> > -	int ret;
> > +	int ret = 0;
> >  
> >  	if (chip->vendor.irq)
> >  		tpm_tis_reenable_interrupts(chip);
> >  
> > -	ret = tpm_pm_resume(dev);
> > -	if (!ret)
> > -		tpm_do_selftest(chip);
> > +	if (chip->flags & TPM_CHIP_FLAG_TPM2)
> > +		tpm2_do_selftest(chip);
> > +	else {
> > +		ret = tpm_pm_resume(dev);
> > +		if (!ret)
> > +			tpm_do_selftest(chip);
> > +	}
> >  
> >  	return ret;
> >  }
> > -- 
> > 2.1.0
> > 

/Jarkko

^ permalink raw reply

* Re: [PATCH v9 8/8] tpm: TPM 2.0 FIFO Interface
From: Jarkko Sakkinen @ 2014-12-05 13:06 UTC (permalink / raw)
  To: Peter Hüwe
  Cc: Ashley Lai, Marcel Selhorst,
	tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Will Arthur
In-Reply-To: <201412042246.25662.PeterHuewe-Mmb7MZpHnFY@public.gmane.org>

On Thu, Dec 04, 2014 at 10:46:25PM +0100, Peter Hüwe wrote:
> Am Donnerstag, 4. Dezember 2014, 06:55:18 schrieb Jarkko Sakkinen:
> > From: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > 
> > Detect TPM 2.0 by using the extended STS (STS3) register. For TPM 2.0,
> > instead of calling tpm_get_timeouts(), assign duration and timeout
> > values defined in the TPM 2.0 PTP specification.
> > 
> > Signed-off-by: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> > ---
> Should we change the KConfig string?
>  TPM Interface Specification 1.2 Interface
> is not quite accurate anymore after applying this patch.

Would make sense. Does this require a new patch set?

> Thanks,
> Peter

/Jarkko

^ permalink raw reply

* Re: [PATCH v9 7/8] tpm: TPM 2.0 CRB Interface
From: Jarkko Sakkinen @ 2014-12-05 13:10 UTC (permalink / raw)
  To: Peter Hüwe
  Cc: Ashley Lai, Marcel Selhorst,
	tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <201412042119.21493.PeterHuewe-Mmb7MZpHnFY@public.gmane.org>

I'll fix these (an run sparse).

/Jarkko

On Thu, Dec 04, 2014 at 09:19:21PM +0100, Peter Hüwe wrote:
> Am Donnerstag, 4. Dezember 2014, 06:55:17 schrieb Jarkko Sakkinen:
> > tpm_crb is a driver for TPM 2.0 Command Response Buffer (CRB) Interface
> > as defined in PC Client Platform TPM Profile (PTP) Specification.
> > 
> > Only polling and single locality is supported as these are the limitations
> > of the available hardware, Platform Trust Techonlogy (PTT) in Haswell
> > CPUs.
> > 
> > The driver always applies CRB with ACPI start because PTT reports using
> > only ACPI start as start method but as a result of my testing it requires
> > also CRB start.
> > 
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> 
> :(
> 
>  make -C /data/data-old/linux-2.6/ M=$(pwd) modules C=1 CHECK=sparse
> make: Entering directory '/data/data-old/linux-2.6'
>   CHECK   /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:99:5: warning: symbol 'crb_suspend' was not declared. Should it be static?
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:121:14: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:121:14:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:121:14:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:137:13: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:137:13:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:137:13:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:173:19: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:173:19:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:173:19:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:177:34: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:177:34:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:177:34:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:187:63: warning: incorrect type in argument 2 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:187:63:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:187:63:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:199:56: warning: incorrect type in argument 2 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:199:56:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:199:56:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:207:27: warning: incorrect type in argument 2 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:207:27:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:207:27:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:277:22: warning: cast removes address space of expression
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:284:33: warning: incorrect type in argument 2 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:284:33:    expected void const volatile [noderef] <asn:2>*src
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:284:33:    got unsigned long long *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:287:56: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:287:56:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:287:56:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:286:19: warning: incorrect type in assignment (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:286:19:    expected unsigned char [usertype] *cmd
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:286:19:    got void [noderef] <asn:2>*
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:293:33: warning: incorrect type in argument 2 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:293:33:    expected void const volatile [noderef] <asn:2>*src
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:293:33:    got unsigned long long *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:296:56: warning: incorrect type in argument 1 (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:296:56:    expected void [noderef] <asn:2>*<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:296:56:    got unsigned int *<noident>
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:295:19: warning: incorrect type in assignment (different address spaces)
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:295:19:    expected unsigned char [usertype] *rsp
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:295:19:    got void [noderef] <asn:2>*
> /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.c:325:5: warning: symbol 'crb_acpi_remove' was not declared. Should it be static?
>   CC [M]  /data/data-old/linux-2.6/drivers/char/tpm/tpm_crb.o
> 
> :(
> Peter

^ permalink raw reply

* Re: [PATCH v9 6/8] tpm: TPM 2.0 baseline support
From: Jarkko Sakkinen @ 2014-12-05 14:13 UTC (permalink / raw)
  To: Peter Hüwe
  Cc: Ashley Lai, Marcel Selhorst,
	tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Will Arthur
In-Reply-To: <201412050017.40668.PeterHuewe-Mmb7MZpHnFY@public.gmane.org>

Hey

is it cool if I prepare a separate set of fixes for all issues
in v9? I do not see any problem that could not be fixed without
major structural changes.

/Jarkko

On Fri, Dec 05, 2014 at 12:17:40AM +0100, Peter Hüwe wrote:
> Am Donnerstag, 4. Dezember 2014, 06:55:16 schrieb Jarkko Sakkinen:
> > TPM 2.0 devices are separated by adding a field 'flags' to struct
> > tpm_chip and defining a flag TPM_CHIP_FLAG_TPM2 for tagging them.
> > 
> > This patch adds the following internal functions:
> > 
> > - tpm2_get_random()
> > - tpm2_get_tpm_pt()
> > - tpm2_pcr_extend()
> > - tpm2_pcr_read()
> > - tpm2_startup()
> > 
> > Additionally, the following exported functions are implemented for
> > implementing TPM 2.0 device drivers:
> > 
> > - tpm2_do_selftest()
> > - tpm2_calc_ordinal_durations()
> > - tpm2_gen_interrupt()
> > 
> > The existing functions that are exported for the use for existing
> > subsystems have been changed to check the flags field in struct
> > tpm_chip and use appropriate TPM 2.0 counterpart if
> > TPM_CHIP_FLAG_TPM2 is est.
> > 
> > The code for tpm2_calc_ordinal_duration() and tpm2_startup() were
> > originally written by Will Arthur.
> > 
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> > Signed-off-by: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > ---
> >  drivers/char/tpm/Makefile        |   2 +-
> >  drivers/char/tpm/tpm-chip.c      |  27 +-
> >  drivers/char/tpm/tpm-interface.c |  24 +-
> >  drivers/char/tpm/tpm.h           |  61 +++++
> >  drivers/char/tpm/tpm2-cmd.c      | 542
> > +++++++++++++++++++++++++++++++++++++++ 5 files changed, 641
> > insertions(+), 15 deletions(-)
> >  create mode 100644 drivers/char/tpm/tpm2-cmd.c
> > 
> > diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
> > index c715596..88848ed 100644
> > --- a/drivers/char/tpm/Makefile
> > +++ b/drivers/char/tpm/Makefile
> > @@ -2,7 +2,7 @@
> >  # Makefile for the kernel tpm device drivers.
> >  #
> >  obj-$(CONFIG_TCG_TPM) += tpm.o
> > -tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o
> > +tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm2-cmd.o
> >  tpm-$(CONFIG_ACPI) += tpm_ppi.o
> > 
> >  ifdef CONFIG_ACPI
> > diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
> > index 7741e28..3f3f2de 100644
> > --- a/drivers/char/tpm/tpm-chip.c
> > +++ b/drivers/char/tpm/tpm-chip.c
> > @@ -195,15 +195,18 @@ int tpm_chip_register(struct tpm_chip *chip)
> >  	if (rc)
> >  		return rc;
> > 
> > -	rc = tpm_sysfs_add_device(chip);
> > -	if (rc)
> > -		goto del_misc;
> > +	/* Populate sysfs for TPM1 devices. */
> > +	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
> > +		rc = tpm_sysfs_add_device(chip);
> > +		if (rc)
> > +			goto del_misc;
> > 
> > -	rc = tpm_add_ppi(chip);
> > -	if (rc)
> > -		goto del_sysfs;
> > +		rc = tpm_add_ppi(chip);
> > +		if (rc)
> > +			goto del_sysfs;
> > 
> > -	chip->bios_dir = tpm_bios_log_setup(chip->devname);
> > +		chip->bios_dir = tpm_bios_log_setup(chip->devname);
> > +	}
> > 
> >  	/* Make the chip available. */
> >  	spin_lock(&driver_lock);
> > @@ -236,10 +239,12 @@ void tpm_chip_unregister(struct tpm_chip *chip)
> >  	spin_unlock(&driver_lock);
> >  	synchronize_rcu();
> > 
> > -	if (chip->bios_dir)
> > -		tpm_bios_log_teardown(chip->bios_dir);
> > -	tpm_remove_ppi(chip);
> > -	tpm_sysfs_del_device(chip);
> > +	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
> > +		if (chip->bios_dir)
> > +			tpm_bios_log_teardown(chip->bios_dir);
> > +		tpm_remove_ppi(chip);
> > +		tpm_sysfs_del_device(chip);
> > +	}
> > 
> >  	tpm_dev_del_device(chip);
> >  }
> > diff --git a/drivers/char/tpm/tpm-interface.c
> > b/drivers/char/tpm/tpm-interface.c index b6f6b17..8a14887 100644
> > --- a/drivers/char/tpm/tpm-interface.c
> > +++ b/drivers/char/tpm/tpm-interface.c
> > @@ -360,7 +360,10 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char
> > *buf, if (chip->vendor.irq)
> >  		goto out_recv;
> > 
> > -	stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
> > +	if (chip->flags & TPM_CHIP_FLAG_TPM2)
> > +		stop = jiffies + tpm2_calc_ordinal_duration(chip, ordinal);
> > +	else
> > +		stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal);
> >  	do {
> >  		u8 status = chip->ops->status(chip);
> >  		if ((status & chip->ops->req_complete_mask) ==
> > @@ -483,7 +486,7 @@ static const struct tpm_input_header tpm_startup_header
> > = { static int tpm_startup(struct tpm_chip *chip, __be16 startup_type) {
> >  	struct tpm_cmd_t start_cmd;
> > -	start_cmd.header.in = tpm_startup_header;
> > +
> WHY?!? This renders tpm_startup useless.
> 
> So NACK for this part.
> >  	start_cmd.params.startup_in.startup_type = startup_type;
> >  	return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
> >  				"attempting to start the TPM");
> 
> I'll get you an TPM1.2 :)
> 
> Thanks
> Peter

^ permalink raw reply

* Aw: Re: [PATCH v9 6/8] tpm: TPM 2.0 baseline support
From: Peter Huewe @ 2014-12-05 14:35 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Ashley Lai, Marcel Selhorst,
	tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	linux-api-u79uwXL29TY76Z2rM5mHXA,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, Will Arthur
In-Reply-To: <20141205141303.GF6993-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

Hi Jarkko,

>is it cool if I prepare a separate set of fixes for all issues
>in v9? I do not see any problem that could not be fixed without
>major structural changes.

to be honest - I would prefer a v10 with the fixes included, especially since there were quite a few issues/remarks.
If you already have fixes post them as patches on top of v9 so we can test, and then squash them in.

I think we are still in schedule if you do fixes over the weekend - so we hopefully have a _great_ v10 on monday/tuesday

I'll send out the first pull request without TPM2.0 support later this evening,
and then send out another one by next week - this should be fine.

The next testing slot is Sunday evening since I'm travelling myself (and don't have TPM hardware with me).

Thanks,
Peter

^ permalink raw reply

* Aw: Re: [tpmdd-devel] [PATCH v9 8/8] tpm: TPM 2.0 FIFO Interface
From: Peter Huewe @ 2014-12-05 15:01 UTC (permalink / raw)
  To: "Peter Hüwe"
  Cc: tpmdd-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	christophe.ricard-Re5JQEeQqe8AvxtiuMwx3w,
	josh.triplett-ral2JQCrhuEAvxtiuMwx3w,
	linux-api-u79uwXL29TY76Z2rM5mHXA, Ashley Lai,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Will Arthur,
	jason.gunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	trousers-tech-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
In-Reply-To: <201412042328.29578.PeterHuewe-Mmb7MZpHnFY@public.gmane.org>

> 
> > Am Donnerstag, 4. Dezember 2014, 06:55:18 schrieb Jarkko Sakkinen:
> > > From: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > >
> > > Detect TPM 2.0 by using the extended STS (STS3) register. For TPM 2.0,
> > > instead of calling tpm_get_timeouts(), assign duration and timeout
> > > values defined in the TPM 2.0 PTP specification.
> > >
> > > Signed-off-by: Will Arthur <will.c.arthur-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> 
> > >
> > > + sts3 = ioread8(chip->vendor.iobase + TPM_STS3(1));
> > > + if ((sts3 & TPM_STS3_TPM2_FAM) == TPM_STS3_TPM2_FAM)
> > > + chip->flags = TPM_CHIP_FLAG_TPM2;
> > > +
> > >
> 
> >
> > When loading tpm_tis force=1 with my tpm1.2 chip on a machine without bios
> > integration, it fets detected as a TPM2.0 chip :/
> >
> > sudo rmmod tpm_tis
> > # modprobe tpm_tis force=1
> > modprobe: ERROR: could not insert 'tpm_tis': No such device
> > # dmesg
> > [ 263.903828] tpm_tis tpm_tis: 2.0 TPM (device-id 0xB, rev-id 16)
> > [ 263.948049] tpm_tis tpm_tis: A TPM error (10) occurred continue selftest
> > [ 263.948120] tpm_tis tpm_tis: TPM self test failed
> >
> >
> > sts3 is reported as 0xff from my TPM1.2
> >
> 
> 
> Hmm,
> my TPM2.0 chip also reports sts3 as 0xff (when loading with force=1 on a
> machine without bios integration)
> 
> [ 307.095344] sts3 ff
> [ 307.095366] tpm_tis tpm_tis: 2.0 TPM (device-id 0x1A, rev-id 16)
> [ 307.140047] tpm_tis tpm_tis: A TPM error (256) occurred continue selftest
> [ 307.140056] tpm_tis tpm_tis: TPM self test failed


You are reading "sts3" - before requesting the locality and thus it returns 0xff for a TPM20 chip as well.
--> You have to have an active locality first.


For a TPM2.0 0xFF is not a valid value (if active locality is set), since reading commandCancel and resetEstablishment bit always return 0 on reads (according to spec).

--> 0xFF should be treated as a TPM1.2 (older tpms with TIS 1.2)
--> 0x04 should be treated as TPM 2.0
--> 0x08 should be treated as TPM1.2 (newer tpms with TIS1.3 enhanced)



Thanks,
Peter

^ permalink raw reply

* Re: [PATCH v4 2/5] crypto: AF_ALG: add AEAD support
From: Herbert Xu @ 2014-12-05 15:46 UTC (permalink / raw)
  To: Stephan Mueller
  Cc: Daniel Borkmann, 'Quentin Gouchet', 'LKML',
	linux-crypto, linux-api
In-Reply-To: <4875720.jRoMDtjHB4@tachyon.chronox.de>

On Wed, Dec 03, 2014 at 08:57:24PM +0100, Stephan Mueller wrote:
>
> +	if (ctx->merge) {
> +		sg = sgl->sg + sgl->cur - 1;
> +		len = min_t(unsigned long, len,
> +			    PAGE_SIZE - sg->offset - sg->length);
> +
> +		err = memcpy_fromiovec(page_address(sg_page(sg)) +
> +				       sg->offset + sg->length,
> +				       msg->msg_iov, len);
> +		if (err)
> +			goto unlock;
> +
> +		sg->length += len;
> +		ctx->merge = (sg->offset + sg->length) & (PAGE_SIZE - 1);
> +
> +		ctx->used += len;
> +		copied += len;
> +		size -= len;
> +	}

Any reason why you got rid of the outer loop here? This will cause
short writes I think.

> +static struct proto_ops algif_aead_ops = {
> +	.family		=	PF_ALG,
> +
> +	.connect	=	sock_no_connect,
> +	.socketpair	=	sock_no_socketpair,
> +	.getname	=	sock_no_getname,
> +	.ioctl		=	sock_no_ioctl,
> +	.listen		=	sock_no_listen,
> +	.shutdown	=	sock_no_shutdown,
> +	.getsockopt	=	sock_no_getsockopt,
> +	.mmap		=	sock_no_mmap,
> +	.bind		=	sock_no_bind,
> +	.accept		=	sock_no_accept,
> +
> +	.release	=	af_alg_release,
> +	.sendmsg	=	aead_sendmsg,
> +	.sendpage	=	aead_sendpage,
> +	.recvmsg	=	aead_recvmsg,
> +	.poll		=	aead_poll,
> +	.setsockopt	=	aead_setsockopt,

No it should go into the parent setsockopt.  Perhaps add a setsockopt
to af_alg_type in order to keep this out of the generic code.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox