All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v2] cgroup: memcg: net: do not associate sock with unrelated cgroup
From: Roman Gushchin @ 2020-02-14 22:33 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Eric Dumazet, Tejun Heo, Greg Thelen,
	Michal Hocko, Vladimir Davydov, Andrew Morton, cgroups, linux-mm,
	linux-kernel
In-Reply-To: <20200214222415.181467-1-shakeelb@google.com>

On Fri, Feb 14, 2020 at 02:24:15PM -0800, Shakeel Butt wrote:
> We are testing network memory accounting in our setup and noticed
> inconsistent network memory usage and often unrelated cgroups network
> usage correlates with testing workload. On further inspection, it
> seems like mem_cgroup_sk_alloc() and cgroup_sk_alloc() are broken in
> irq context specially for cgroup v1.
> 
> mem_cgroup_sk_alloc() and cgroup_sk_alloc() can be called in irq context
> and kind of assumes that this can only happen from sk_clone_lock()
> and the source sock object has already associated cgroup. However in
> cgroup v1, where network memory accounting is opt-in, the source sock
> can be unassociated with any cgroup and the new cloned sock can get
> associated with unrelated interrupted cgroup.
> 
> Cgroup v2 can also suffer if the source sock object was created by
> process in the root cgroup or if sk_alloc() is called in irq context.
> The fix is to just do nothing in interrupt.
> 
> Fixes: 2d7580738345 ("mm: memcontrol: consolidate cgroup socket tracking")
> Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
> Signed-off-by: Shakeel Butt <shakeelb@google.com>
> ---
> 
> Changes since v1:
> - Fix cgroup_sk_alloc() too.
> 
>  kernel/cgroup/cgroup.c | 4 ++++
>  mm/memcontrol.c        | 4 ++++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 9a8a5ded3c48..46e5f5518fba 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -6449,6 +6449,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
>  		return;
>  	}
>  
> +	/* Do not associate the sock with unrelated interrupted task's memcg. */
                                                                       ^^^^^
								       cgroup?
> +	if (in_interrupt())
> +		return;
> +
>  	rcu_read_lock();
>  
>  	while (true) {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 63bb6a2aab81..f500da82bfe8 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6697,6 +6697,10 @@ void mem_cgroup_sk_alloc(struct sock *sk)
>  		return;
>  	}

Can you, please, include the stacktrace into the commit log?
Except a minor typo (see above),
Reviewed-by: Roman Gushchin <guro@fb.com>

A really good catch.

Thank you!


^ permalink raw reply

* [PATCH] db: Fix timespec length assumptions
From: Khem Raj @ 2020-02-14 22:33 UTC (permalink / raw)
  To: openembedded-core

This should fix the vdso crashes seen with kernel 5.0+

python3[11312] general protection fault ip:b7e966b0 sp:bf8175cc error:0 in libc.so[b7e8b000+6b000]

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Cc: Bruce Ashfield <bruce.ashfield@gmail.com>
---
 ...001-clock-Do-not-define-own-timespec.patch | 45 +++++++++++++++++++
 meta/recipes-support/db/db_5.3.28.bb          |  1 +
 2 files changed, 46 insertions(+)
 create mode 100644 meta/recipes-support/db/db/0001-clock-Do-not-define-own-timespec.patch

diff --git a/meta/recipes-support/db/db/0001-clock-Do-not-define-own-timespec.patch b/meta/recipes-support/db/db/0001-clock-Do-not-define-own-timespec.patch
new file mode 100644
index 0000000000..9d51a44a26
--- /dev/null
+++ b/meta/recipes-support/db/db/0001-clock-Do-not-define-own-timespec.patch
@@ -0,0 +1,45 @@
+From 96b303caf70a7635953c36e5bfb9ad6e75cb7637 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 14 Feb 2020 14:12:59 -0800
+Subject: [PATCH] clock: Do not define own timespec
+
+timespec is provided by libc and its best left to libc
+os_gettime takes a db_timespec and passed its address to clock_gettime
+which assumes that db_timespec and timespec are same but actually
+its 12-bytes here and libc has 16-bytes
+
+This can cause problems especially with 64bit time_t
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/dbinc/clock.h | 17 +----------------
+ 1 file changed, 1 insertion(+), 16 deletions(-)
+
+--- a/src/dbinc/clock.h
++++ b/src/dbinc/clock.h
+@@ -44,22 +44,8 @@
+ extern "C" {
+ #endif
+ 
+-/*
+- * This declaration is POSIX-compatible.  Because there are lots of different
+- * time.h include file patterns out there, it's easier to declare our own name
+- * in all cases than to try and discover if a system has a struct timespec.
+- * For the same reason, and because we'd have to #include <sys/time.h> in db.h,
+- * we don't export any timespec structures in the DB API, even in places where
+- * it would make sense, like the replication statistics information.
+- */
+-typedef struct {
+-	time_t	tv_sec;				/* seconds */
+-#ifdef HAVE_MIXED_SIZE_ADDRESSING
+-	int32_t tv_nsec;
+-#else
+-	long	tv_nsec;			/* nanoseconds */
+-#endif
+-} db_timespec;
++#include <time.h>
++#define db_timespec struct timespec
+ 
+ /* Operations on timespecs */
+ #undef	timespecclear
diff --git a/meta/recipes-support/db/db_5.3.28.bb b/meta/recipes-support/db/db_5.3.28.bb
index a2969055a6..badb79f9ea 100644
--- a/meta/recipes-support/db/db_5.3.28.bb
+++ b/meta/recipes-support/db/db_5.3.28.bb
@@ -26,6 +26,7 @@ SRC_URI += "file://fix-parallel-build.patch \
             file://0001-configure-Add-explicit-tag-options-to-libtool-invoca.patch \
             file://sequence-type.patch \
             file://0001-Fix-libc-compatibility-by-renaming-atomic_init-API.patch \
+            file://0001-clock-Do-not-define-own-timespec.patch \
            "
 # We are not interested in official latest 6.x versions;
 # let's track what debian is using.
-- 
2.25.0



^ permalink raw reply related

* [LSF/MM/BPF] BPF: various topics
From: Daniel Borkmann @ 2020-02-14 22:33 UTC (permalink / raw)
  To: bpf; +Cc: lsf-pc

I'd like to propose various BPF core and networking related topics some of which we
also encountered during Cilium development, for example, during our recent BPF
kube-proxy replacement work:

- Cilium uses BPF cgroups programs for its Kubernetes Service implementation
   in order to select backends and directly connect to them instead of later
   having to perform NAT on the skb itself in lower layers. BPF cgroups hooks
   are not network namespace aware while Kubernetes pods are heavily built
   around network namespaces. In addition to getting BPF cgroups netns aware,
   I'd like to discuss various other needs Cilium has around its BPF cgroups
   usage in order to fix some short-comings we're facing today including
   the addition of new hooks.
- Another issue is the BPF fib lookup helper use in combination with our BPF
   based NodePort implementation, where goal is to discuss design proposals to
   enable the Cilium agent to push L3 addresses into the kernel for its backends
   and have the neighboring subsystem self-manage & maintain their resolution.
- Third topic is to discuss a BPF-based static keys proposal in order to
   dynamically allow to enable/disable functionality at runtime with very low
   overhead and without reloading programs through the verifier. This builds upon
   recent work that has been done around direct jumps for optimizing tail calls.
- Some of the LRU based maps in Cilium have interdependencies; currently, we
   use a band-aid through the means of a garbage collector in order to evict
   data from multiple maps, but what is needed is a LRU eviction callback that
   we can make use of in order to trigger deletion events in dependent maps.
   We'll discuss possible API options on how this could be addressed generically.

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH v2] cgroup: memcg: net: do not associate sock with unrelated cgroup
From: Roman Gushchin @ 2020-02-14 22:33 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Eric Dumazet, Tejun Heo, Greg Thelen,
	Michal Hocko, Vladimir Davydov, Andrew Morton,
	cgroups-u79uwXL29TY76Z2rM5mHXA, linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20200214222415.181467-1-shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

On Fri, Feb 14, 2020 at 02:24:15PM -0800, Shakeel Butt wrote:
> We are testing network memory accounting in our setup and noticed
> inconsistent network memory usage and often unrelated cgroups network
> usage correlates with testing workload. On further inspection, it
> seems like mem_cgroup_sk_alloc() and cgroup_sk_alloc() are broken in
> irq context specially for cgroup v1.
> 
> mem_cgroup_sk_alloc() and cgroup_sk_alloc() can be called in irq context
> and kind of assumes that this can only happen from sk_clone_lock()
> and the source sock object has already associated cgroup. However in
> cgroup v1, where network memory accounting is opt-in, the source sock
> can be unassociated with any cgroup and the new cloned sock can get
> associated with unrelated interrupted cgroup.
> 
> Cgroup v2 can also suffer if the source sock object was created by
> process in the root cgroup or if sk_alloc() is called in irq context.
> The fix is to just do nothing in interrupt.
> 
> Fixes: 2d7580738345 ("mm: memcontrol: consolidate cgroup socket tracking")
> Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
> Signed-off-by: Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> ---
> 
> Changes since v1:
> - Fix cgroup_sk_alloc() too.
> 
>  kernel/cgroup/cgroup.c | 4 ++++
>  mm/memcontrol.c        | 4 ++++
>  2 files changed, 8 insertions(+)
> 
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 9a8a5ded3c48..46e5f5518fba 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -6449,6 +6449,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
>  		return;
>  	}
>  
> +	/* Do not associate the sock with unrelated interrupted task's memcg. */
                                                                       ^^^^^
								       cgroup?
> +	if (in_interrupt())
> +		return;
> +
>  	rcu_read_lock();
>  
>  	while (true) {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 63bb6a2aab81..f500da82bfe8 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -6697,6 +6697,10 @@ void mem_cgroup_sk_alloc(struct sock *sk)
>  		return;
>  	}

Can you, please, include the stacktrace into the commit log?
Except a minor typo (see above),
Reviewed-by: Roman Gushchin <guro-b10kYP2dOMg@public.gmane.org>

A really good catch.

Thank you!

^ permalink raw reply

* Re: [PATCH v4 4/6] coresight: Expose device connections via sysfs
From: Mathieu Poirier @ 2020-02-14 22:31 UTC (permalink / raw)
  To: Mike Leach; +Cc: coresight, suzuki.poulose, linux-arm-kernel, linux-doc
In-Reply-To: <20200211105808.27884-5-mike.leach@linaro.org>

Hi Mike,

On Tue, Feb 11, 2020 at 10:58:06AM +0000, Mike Leach wrote:
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Coresight device connections are a bit complicated and is not
> exposed currently to the user. One has to look at the platform
> descriptions (DT bindings or ACPI bindings) to make an understanding.
> Given the new naming scheme, it will be helpful to have this information
> to choose the appropriate devices for tracing. This patch exposes
> the device connections via links in the sysfs directories.
> 
> e.g, for a connection devA[OutputPort_X] -> devB[InputPort_Y]
> is represented as two symlinks:
> 
>   /sys/bus/coresight/.../devA/out:X -> /sys/bus/coresight/.../devB
>   /sys/bus/coresight/.../devB/in:Y  -> /sys/bus/coresight/.../devA
> 
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Revised to use the generic sysfs links functions & link structures.
> Provides a connections sysfs group to hold the links.
> 
> Co-developed-by: Mike Leach <mike.leach@linaro.org>
> Signed-off-by: Mike Leach <mike.leach@linaro.org>

Because this patch is "From:" Suzuki, this should be:

Co-developed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mike Leach <mike.leach@linaro.org>

You can expand on Suzuki's contribution or the modifications you've done to it
in the changelog.

With the above:

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> ---
>  drivers/hwtracing/coresight/coresight-priv.h  |  5 ++
>  drivers/hwtra cing/coresight/coresight-sysfs.c | 80 +++++++++++++++++++
>  drivers/hwtracing/coresight/coresight.c       | 46 ++++++++---
>  include/linux/coresight.h                     |  2 +
>  4 files changed, 121 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
> index a4a658d46045..5a36f0f50899 100644
> --- a/drivers/hwtracing/coresight/coresight-priv.h
> +++ b/drivers/hwtracing/coresight/coresight-priv.h
> @@ -157,6 +157,11 @@ int coresight_add_sysfs_link(struct coresight_sysfs_link *info);
>  void coresight_remove_sysfs_link(struct coresight_sysfs_link *info);
>  int coresight_create_conns_sysfs_group(struct coresight_device *csdev);
>  void coresight_remove_conns_sysfs_group(struct coresight_device *csdev);
> +int coresight_make_links(struct coresight_device *orig,
> +			 struct coresight_connection *conn,
> +			 struct coresight_device *target);
> +void coresight_remove_links(struct coresight_device *orig,
> +			    struct coresight_connection *conn);
>  
>  #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
>  extern int etm_readl_cp14(u32 off, unsigned int *val);
> diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
> index 17d565941e5e..0f18332b9f19 100644
> --- a/drivers/hwtracing/coresight/coresight-sysfs.c
> +++ b/drivers/hwtracing/coresight/coresight-sysfs.c
> @@ -122,3 +122,83 @@ void coresight_remove_sysfs_link(struct coresight_sysfs_link *info)
>  	info->orig->nr_links--;
>  	info->target->nr_links--;
>  }
> +
> +/*
> + * coresight_make_links: Make a link for a connection from a @orig
> + * device to @target, represented by @conn.
> + *
> + *   e.g, for devOrig[output_X] -> devTarget[input_Y] is represented
> + *   as two symbolic links :
> + *
> + *	/sys/.../devOrig/out:X	-> /sys/.../devTarget/
> + *	/sys/.../devTarget/in:Y	-> /sys/.../devOrig/
> + *
> + * The link names are allocated for a device where it appears. i.e, the
> + * "out" link on the master and "in" link on the slave device.
> + * The link info is stored in the connection record for avoiding
> + * the reconstruction of names for removal.
> + */
> +int coresight_make_links(struct coresight_device *orig,
> +			 struct coresight_connection *conn,
> +			 struct coresight_device *target)
> +{
> +	int ret = -ENOMEM;
> +	char *outs = NULL, *ins = NULL;
> +	struct coresight_sysfs_link *link = NULL;
> +
> +	do {
> +		outs = devm_kasprintf(&orig->dev, GFP_KERNEL,
> +				      "out:%d", conn->outport);
> +		if (!outs)
> +			break;
> +		ins = devm_kasprintf(&target->dev, GFP_KERNEL,
> +				     "in:%d", conn->child_port);
> +		if (!ins)
> +			break;
> +		link = devm_kzalloc(&orig->dev,
> +				    sizeof(struct coresight_sysfs_link),
> +				    GFP_KERNEL);
> +		if (!link)
> +			break;
> +
> +		link->orig = orig;
> +		link->target = target;
> +		link->orig_name = outs;
> +		link->target_name = ins;
> +
> +		ret = coresight_add_sysfs_link(link);
> +		if (ret)
> +			break;
> +
> +		conn->link = link;
> +
> +		/*
> +		 * Install the device connection. This also indicates that
> +		 * the links are operational on both ends.
> +		 */
> +		conn->child_dev = target;
> +		return 0;
> +	} while (0);
> +
> +	return ret;
> +}
> +
> +/*
> + * coresight_remove_links: Remove the sysfs links for a given connection @conn,
> + * from @orig device to @target device. See coresight_make_links() for more
> + * details.
> + */
> +void coresight_remove_links(struct coresight_device *orig,
> +			    struct coresight_connection *conn)
> +{
> +	if (!orig || !conn->link)
> +		return;
> +
> +	coresight_remove_sysfs_link(conn->link);
> +
> +	devm_kfree(&conn->child_dev->dev, conn->link->target_name);
> +	devm_kfree(&orig->dev, conn->link->orig_name);
> +	devm_kfree(&orig->dev, conn->link);
> +	conn->link = NULL;
> +	conn->child_dev = NULL;
> +}
> diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> index 07f66a3968f1..4f10cfa9dc18 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -1031,7 +1031,7 @@ static void coresight_device_release(struct device *dev)
>  
>  static int coresight_orphan_match(struct device *dev, void *data)
>  {
> -	int i;
> +	int i, ret = 0;
>  	bool still_orphan = false;
>  	struct coresight_device *csdev, *i_csdev;
>  	struct coresight_connection *conn;
> @@ -1056,19 +1056,23 @@ static int coresight_orphan_match(struct device *dev, void *data)
>  		/* We have found at least one orphan connection */
>  		if (conn->child_dev == NULL) {
>  			/* Does it match this newly added device? */
> -			if (conn->child_fwnode == csdev->dev.fwnode)
> -				conn->child_dev = csdev;
> -			else
> +			if (conn->child_fwnode == csdev->dev.fwnode) {
> +				ret = coresight_make_links(i_csdev,
> +							   conn, csdev);
> +				if (ret)
> +					return ret;
> +			} else {
>  				/* This component still has an orphan */
>  				still_orphan = true;
> +			}
>  		}
>  	}
>  
>  	i_csdev->orphan = still_orphan;
>  
>  	/*
> -	 * Returning '0' ensures that all known component on the
> -	 * bus will be checked.
> +	 * Returning '0' in case we didn't encounter any error,
> +	 * ensures that all known component on the bus will be checked.
>  	 */
>  	return 0;
>  }
> @@ -1082,15 +1086,21 @@ static int coresight_fixup_orphan_conns(struct coresight_device *csdev)
>  
>  static int coresight_fixup_device_conns(struct coresight_device *csdev)
>  {
> -	int i;
> +	int i, ret = 0;
>  
>  	for (i = 0; i < csdev->pdata->nr_outport; i++) {
>  		struct coresight_connection *conn = &csdev->pdata->conns[i];
>  
>  		conn->child_dev =
>  			coresight_find_csdev_by_fwnode(conn->child_fwnode);
> -		if (!conn->child_dev)
> +		if (conn->child_dev) {
> +			ret = coresight_make_links(csdev, conn,
> +						   conn->child_dev);
> +			if (ret)
> +				break;
> +		} else {
>  			csdev->orphan = true;
> +		}
>  	}
>  
>  	return 0;
> @@ -1121,7 +1131,7 @@ static int coresight_remove_match(struct device *dev, void *data)
>  
>  		if (csdev->dev.fwnode == conn->child_fwnode) {
>  			iterator->orphan = true;
> -			conn->child_dev = NULL;
> +			coresight_remove_links(iterator, conn);
>  			/*
>  			 * Drop the reference to the handle for the remote
>  			 * device acquired in parsing the connections from
> @@ -1215,13 +1225,23 @@ void coresight_release_platform_data(struct coresight_device *csdev,
>  				     struct coresight_platform_data *pdata)
>  {
>  	int i;
> +	struct coresight_connection *conns = pdata->conns;
>  
>  	for (i = 0; i < pdata->nr_outport; i++) {
> -		if (pdata->conns[i].child_fwnode) {
> -			fwnode_handle_put(pdata->conns[i].child_fwnode);
> +		/* If we have made the links, remove them now */
> +		if (csdev && conns[i].child_dev)
> +			coresight_remove_links(csdev, &conns[i]);
> +		/*
> +		 * Drop the refcount and clear the handle as this device
> +		 * is going away
> +		 */
> +		if (conns[i].child_fwnode) {
> +			fwnode_handle_put(conns[i].child_fwnode);
>  			pdata->conns[i].child_fwnode = NULL;
>  		}
>  	}
> +	if (csdev)
> +		coresight_remove_conns_sysfs_group(csdev);
>  }
>  
>  struct coresight_device *coresight_register(struct coresight_desc *desc)
> @@ -1303,7 +1323,9 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>  
>  	mutex_lock(&coresight_mutex);
>  
> -	ret = coresight_fixup_device_conns(csdev);
> +	ret = coresight_create_conns_sysfs_group(csdev);
> +	if (!ret)
> +		ret = coresight_fixup_device_conns(csdev);
>  	if (!ret)
>  		ret = coresight_fixup_orphan_conns(csdev);
>  	if (!ret)
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index a2ec25e02ca9..ccd17304d7bd 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -140,12 +140,14 @@ struct coresight_desc {
>   * @chid_fwnode: remote component's fwnode handle.
>   * @child_dev:	a @coresight_device representation of the component
>  		connected to @outport.
> + * @link: Representation of the connection as a sysfs link.
>   */
>  struct coresight_connection {
>  	int outport;
>  	int child_port;
>  	struct fwnode_handle *child_fwnode;
>  	struct coresight_device *child_dev;
> +	struct coresight_sysfs_link *link;
>  };
>  
>  /**
> -- 
> 2.17.1
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH v4 4/6] coresight: Expose device connections via sysfs
From: Mathieu Poirier @ 2020-02-14 22:31 UTC (permalink / raw)
  To: Mike Leach; +Cc: linux-arm-kernel, coresight, linux-doc, suzuki.poulose
In-Reply-To: <20200211105808.27884-5-mike.leach@linaro.org>

Hi Mike,

On Tue, Feb 11, 2020 at 10:58:06AM +0000, Mike Leach wrote:
> From: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Coresight device connections are a bit complicated and is not
> exposed currently to the user. One has to look at the platform
> descriptions (DT bindings or ACPI bindings) to make an understanding.
> Given the new naming scheme, it will be helpful to have this information
> to choose the appropriate devices for tracing. This patch exposes
> the device connections via links in the sysfs directories.
> 
> e.g, for a connection devA[OutputPort_X] -> devB[InputPort_Y]
> is represented as two symlinks:
> 
>   /sys/bus/coresight/.../devA/out:X -> /sys/bus/coresight/.../devB
>   /sys/bus/coresight/.../devB/in:Y  -> /sys/bus/coresight/.../devA
> 
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
> 
> Revised to use the generic sysfs links functions & link structures.
> Provides a connections sysfs group to hold the links.
> 
> Co-developed-by: Mike Leach <mike.leach@linaro.org>
> Signed-off-by: Mike Leach <mike.leach@linaro.org>

Because this patch is "From:" Suzuki, this should be:

Co-developed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mike Leach <mike.leach@linaro.org>

You can expand on Suzuki's contribution or the modifications you've done to it
in the changelog.

With the above:

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>

> ---
>  drivers/hwtracing/coresight/coresight-priv.h  |  5 ++
>  drivers/hwtra cing/coresight/coresight-sysfs.c | 80 +++++++++++++++++++
>  drivers/hwtracing/coresight/coresight.c       | 46 ++++++++---
>  include/linux/coresight.h                     |  2 +
>  4 files changed, 121 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
> index a4a658d46045..5a36f0f50899 100644
> --- a/drivers/hwtracing/coresight/coresight-priv.h
> +++ b/drivers/hwtracing/coresight/coresight-priv.h
> @@ -157,6 +157,11 @@ int coresight_add_sysfs_link(struct coresight_sysfs_link *info);
>  void coresight_remove_sysfs_link(struct coresight_sysfs_link *info);
>  int coresight_create_conns_sysfs_group(struct coresight_device *csdev);
>  void coresight_remove_conns_sysfs_group(struct coresight_device *csdev);
> +int coresight_make_links(struct coresight_device *orig,
> +			 struct coresight_connection *conn,
> +			 struct coresight_device *target);
> +void coresight_remove_links(struct coresight_device *orig,
> +			    struct coresight_connection *conn);
>  
>  #ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
>  extern int etm_readl_cp14(u32 off, unsigned int *val);
> diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
> index 17d565941e5e..0f18332b9f19 100644
> --- a/drivers/hwtracing/coresight/coresight-sysfs.c
> +++ b/drivers/hwtracing/coresight/coresight-sysfs.c
> @@ -122,3 +122,83 @@ void coresight_remove_sysfs_link(struct coresight_sysfs_link *info)
>  	info->orig->nr_links--;
>  	info->target->nr_links--;
>  }
> +
> +/*
> + * coresight_make_links: Make a link for a connection from a @orig
> + * device to @target, represented by @conn.
> + *
> + *   e.g, for devOrig[output_X] -> devTarget[input_Y] is represented
> + *   as two symbolic links :
> + *
> + *	/sys/.../devOrig/out:X	-> /sys/.../devTarget/
> + *	/sys/.../devTarget/in:Y	-> /sys/.../devOrig/
> + *
> + * The link names are allocated for a device where it appears. i.e, the
> + * "out" link on the master and "in" link on the slave device.
> + * The link info is stored in the connection record for avoiding
> + * the reconstruction of names for removal.
> + */
> +int coresight_make_links(struct coresight_device *orig,
> +			 struct coresight_connection *conn,
> +			 struct coresight_device *target)
> +{
> +	int ret = -ENOMEM;
> +	char *outs = NULL, *ins = NULL;
> +	struct coresight_sysfs_link *link = NULL;
> +
> +	do {
> +		outs = devm_kasprintf(&orig->dev, GFP_KERNEL,
> +				      "out:%d", conn->outport);
> +		if (!outs)
> +			break;
> +		ins = devm_kasprintf(&target->dev, GFP_KERNEL,
> +				     "in:%d", conn->child_port);
> +		if (!ins)
> +			break;
> +		link = devm_kzalloc(&orig->dev,
> +				    sizeof(struct coresight_sysfs_link),
> +				    GFP_KERNEL);
> +		if (!link)
> +			break;
> +
> +		link->orig = orig;
> +		link->target = target;
> +		link->orig_name = outs;
> +		link->target_name = ins;
> +
> +		ret = coresight_add_sysfs_link(link);
> +		if (ret)
> +			break;
> +
> +		conn->link = link;
> +
> +		/*
> +		 * Install the device connection. This also indicates that
> +		 * the links are operational on both ends.
> +		 */
> +		conn->child_dev = target;
> +		return 0;
> +	} while (0);
> +
> +	return ret;
> +}
> +
> +/*
> + * coresight_remove_links: Remove the sysfs links for a given connection @conn,
> + * from @orig device to @target device. See coresight_make_links() for more
> + * details.
> + */
> +void coresight_remove_links(struct coresight_device *orig,
> +			    struct coresight_connection *conn)
> +{
> +	if (!orig || !conn->link)
> +		return;
> +
> +	coresight_remove_sysfs_link(conn->link);
> +
> +	devm_kfree(&conn->child_dev->dev, conn->link->target_name);
> +	devm_kfree(&orig->dev, conn->link->orig_name);
> +	devm_kfree(&orig->dev, conn->link);
> +	conn->link = NULL;
> +	conn->child_dev = NULL;
> +}
> diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
> index 07f66a3968f1..4f10cfa9dc18 100644
> --- a/drivers/hwtracing/coresight/coresight.c
> +++ b/drivers/hwtracing/coresight/coresight.c
> @@ -1031,7 +1031,7 @@ static void coresight_device_release(struct device *dev)
>  
>  static int coresight_orphan_match(struct device *dev, void *data)
>  {
> -	int i;
> +	int i, ret = 0;
>  	bool still_orphan = false;
>  	struct coresight_device *csdev, *i_csdev;
>  	struct coresight_connection *conn;
> @@ -1056,19 +1056,23 @@ static int coresight_orphan_match(struct device *dev, void *data)
>  		/* We have found at least one orphan connection */
>  		if (conn->child_dev == NULL) {
>  			/* Does it match this newly added device? */
> -			if (conn->child_fwnode == csdev->dev.fwnode)
> -				conn->child_dev = csdev;
> -			else
> +			if (conn->child_fwnode == csdev->dev.fwnode) {
> +				ret = coresight_make_links(i_csdev,
> +							   conn, csdev);
> +				if (ret)
> +					return ret;
> +			} else {
>  				/* This component still has an orphan */
>  				still_orphan = true;
> +			}
>  		}
>  	}
>  
>  	i_csdev->orphan = still_orphan;
>  
>  	/*
> -	 * Returning '0' ensures that all known component on the
> -	 * bus will be checked.
> +	 * Returning '0' in case we didn't encounter any error,
> +	 * ensures that all known component on the bus will be checked.
>  	 */
>  	return 0;
>  }
> @@ -1082,15 +1086,21 @@ static int coresight_fixup_orphan_conns(struct coresight_device *csdev)
>  
>  static int coresight_fixup_device_conns(struct coresight_device *csdev)
>  {
> -	int i;
> +	int i, ret = 0;
>  
>  	for (i = 0; i < csdev->pdata->nr_outport; i++) {
>  		struct coresight_connection *conn = &csdev->pdata->conns[i];
>  
>  		conn->child_dev =
>  			coresight_find_csdev_by_fwnode(conn->child_fwnode);
> -		if (!conn->child_dev)
> +		if (conn->child_dev) {
> +			ret = coresight_make_links(csdev, conn,
> +						   conn->child_dev);
> +			if (ret)
> +				break;
> +		} else {
>  			csdev->orphan = true;
> +		}
>  	}
>  
>  	return 0;
> @@ -1121,7 +1131,7 @@ static int coresight_remove_match(struct device *dev, void *data)
>  
>  		if (csdev->dev.fwnode == conn->child_fwnode) {
>  			iterator->orphan = true;
> -			conn->child_dev = NULL;
> +			coresight_remove_links(iterator, conn);
>  			/*
>  			 * Drop the reference to the handle for the remote
>  			 * device acquired in parsing the connections from
> @@ -1215,13 +1225,23 @@ void coresight_release_platform_data(struct coresight_device *csdev,
>  				     struct coresight_platform_data *pdata)
>  {
>  	int i;
> +	struct coresight_connection *conns = pdata->conns;
>  
>  	for (i = 0; i < pdata->nr_outport; i++) {
> -		if (pdata->conns[i].child_fwnode) {
> -			fwnode_handle_put(pdata->conns[i].child_fwnode);
> +		/* If we have made the links, remove them now */
> +		if (csdev && conns[i].child_dev)
> +			coresight_remove_links(csdev, &conns[i]);
> +		/*
> +		 * Drop the refcount and clear the handle as this device
> +		 * is going away
> +		 */
> +		if (conns[i].child_fwnode) {
> +			fwnode_handle_put(conns[i].child_fwnode);
>  			pdata->conns[i].child_fwnode = NULL;
>  		}
>  	}
> +	if (csdev)
> +		coresight_remove_conns_sysfs_group(csdev);
>  }
>  
>  struct coresight_device *coresight_register(struct coresight_desc *desc)
> @@ -1303,7 +1323,9 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>  
>  	mutex_lock(&coresight_mutex);
>  
> -	ret = coresight_fixup_device_conns(csdev);
> +	ret = coresight_create_conns_sysfs_group(csdev);
> +	if (!ret)
> +		ret = coresight_fixup_device_conns(csdev);
>  	if (!ret)
>  		ret = coresight_fixup_orphan_conns(csdev);
>  	if (!ret)
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index a2ec25e02ca9..ccd17304d7bd 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -140,12 +140,14 @@ struct coresight_desc {
>   * @chid_fwnode: remote component's fwnode handle.
>   * @child_dev:	a @coresight_device representation of the component
>  		connected to @outport.
> + * @link: Representation of the connection as a sysfs link.
>   */
>  struct coresight_connection {
>  	int outport;
>  	int child_port;
>  	struct fwnode_handle *child_fwnode;
>  	struct coresight_device *child_dev;
> +	struct coresight_sysfs_link *link;
>  };
>  
>  /**
> -- 
> 2.17.1
> 

^ permalink raw reply

* Re: [PATCH v7 00/15] add git-bugreport tool
From: Junio C Hamano @ 2020-02-14 22:30 UTC (permalink / raw)
  To: Emily Shaffer
  Cc: git, Derrick Stolee, Johannes Schindelin, Martin Ågren,
	Aaron Schrab, Danh Doan, Eric Sunshine, SZEDER Gábor,
	Andreas Schwab
In-Reply-To: <20200214220003.GL190927@google.com>

Emily Shaffer <emilyshaffer@google.com> writes:

> That's fine by me. I find the [[:syntax:]] extremely ugly,...

FWIW, I find it so too.

^ permalink raw reply

* Re: [PATCH v2 net 3/3] wireguard: send: account for mtu=0 devices
From: Eric Dumazet @ 2020-02-14 22:30 UTC (permalink / raw)
  To: Jason A. Donenfeld, Eric Dumazet; +Cc: Netdev
In-Reply-To: <CAHmME9r6gTCV8cpPgyjOVMWCbRJtswzqXMYBqTQmo001AZz05Q@mail.gmail.com>



On 2/14/20 1:57 PM, Jason A. Donenfeld wrote:

> 
> Thanks, I appreciate your scrutiny here. Right again, you are. It
> looks like that was added in 2017 after observing the pattern in other
> drivers and seeing the documentation comment, "Wait for packets
> currently being received to be done." That sounds like an important
> thing to do before tearing down a socket. But here it makes no sense
> at all, since synchronize_net() is just a wrapper around
> synchronize_rcu() (and sometimes _expedited). And here, the
> synchronize_rcu() usage makes sense to have, since this is as boring
> of an rcu pattern as can be:
> 
> mutex_lock()
> old = rcu_dereference_protected(x->y)
> rcu_assign(x->y, new)
> mutex_unlock()
> synchronize_rcu()
> free_it(old)
> 
> Straight out of the documentation. Having the extra synchronize_net()
> in there adds nothing at all. I'll send a v3 of this 5.6-rc2 cleanup
> series containing that removal.
> 

Also note that UDP sockets have SOCK_RCU_FREE flag set, so core
networking also respect one RCU grace period before freeing them.

It is possible that no extra synchronize_{net|rcu}() call is needed,
but this is left as an exercise for future kernels :)


^ permalink raw reply

* Re: Performance issue with psusensor / dbus-sensors
From: James Feist @ 2020-02-14 22:29 UTC (permalink / raw)
  To: Peter Lundgren, openbmc, vernon.mauery, jae.hyun.yoo, Josh Lehan,
	Alex Qiu, Xiang Liu, Sui Chen
In-Reply-To: <CAK13xKNkmy5fTgi0xLK=F+fBJbA7EN3bRsinLbN2sTknfiCGwA@mail.gmail.com>

On 2/14/20 2:14 PM, Peter Lundgren wrote:
> We're running into some occasional and hard to reproduce performance 
> issues with sensors on the entity-manager/dbus-sensors/intel-ipmi-oem 
> software stack. I don't have much concrete to say on the subject, but I 
> want to put a feeler out to see if anyone else has seen similar issues. 
> Here's what we think so far:
> 
> Complaints range from IPMI sensor reads being slower than normal to 
> sensors "never" updating.
> 
> Josh got access to one machine in a bad state and observed this:
> 
>  1. All the I2C buses were working normally. i2cdetect ran successfully
>     on each bus.
>  2. hwmon was working fine. He wrote a shell script to read all of the
>     *_input sysfs files and could read every sensor in the system in 3
>     seconds.
>  3. psusensor was running.
>  4. busctl --no-pager monitor | grep -i PropertiesChanged shows no
>     traffic. On a healthy system, it shows many updates per second. No
>     obvious error messages in journalctl --no-pager -f.
>  5. Restarting psusensor alleviates the problem.

Not sure if it's 100% related but Jae is looking into an issue with the 
CPU sensor not reporting data after DC cycles. We haven't root caused it 
completely yet but we'll let you know what we find.

-James

> 

^ permalink raw reply

* Re: [PATCH v2 4/6] NFS: Add READ_PLUS data segment support
From: Chuck Lever @ 2020-02-14 22:28 UTC (permalink / raw)
  To: Anna Schumaker; +Cc: Trond.Myklebust, Linux NFS Mailing List, Anna Schumaker
In-Reply-To: <20200214211227.407836-5-Anna.Schumaker@Netapp.com>



> On Feb 14, 2020, at 4:12 PM, schumaker.anna@gmail.com wrote:
> 
> From: Anna Schumaker <Anna.Schumaker@Netapp.com>
> 
> This patch adds client support for decoding a single NFS4_CONTENT_DATA
> segment returned by the server. This is the simplest implementation
> possible, since it does not account for any hole segments in the reply.
> 
> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
> ---
> fs/nfs/nfs42xdr.c         | 138 ++++++++++++++++++++++++++++++++++++++
> fs/nfs/nfs4proc.c         |  43 +++++++++++-
> fs/nfs/nfs4xdr.c          |   1 +
> include/linux/nfs4.h      |   2 +-
> include/linux/nfs_fs_sb.h |   1 +
> include/linux/nfs_xdr.h   |   2 +-
> 6 files changed, 182 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
> index c03f3246d6c5..bf118ecabe2c 100644
> --- a/fs/nfs/nfs42xdr.c
> +++ b/fs/nfs/nfs42xdr.c
> @@ -45,6 +45,15 @@
> #define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
> 					 encode_fallocate_maxsz)
> #define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
> +#define encode_read_plus_maxsz		(op_encode_hdr_maxsz + \
> +					 encode_stateid_maxsz + 3)
> +#define NFS42_READ_PLUS_SEGMENT_SIZE	(1 /* data_content4 */ + \
> +					 2 /* data_info4.di_offset */ + \
> +					 2 /* data_info4.di_length */)
> +#define decode_read_plus_maxsz		(op_decode_hdr_maxsz + \
> +					 1 /* rpr_eof */ + \
> +					 1 /* rpr_contents count */ + \
> +					 NFS42_READ_PLUS_SEGMENT_SIZE)
> #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
> 					 encode_stateid_maxsz + \
> 					 2 /* offset */ + \
> @@ -128,6 +137,14 @@
> 					 decode_putfh_maxsz + \
> 					 decode_deallocate_maxsz + \
> 					 decode_getattr_maxsz)
> +#define NFS4_enc_read_plus_sz		(compound_encode_hdr_maxsz + \
> +					 encode_sequence_maxsz + \
> +					 encode_putfh_maxsz + \
> +					 encode_read_plus_maxsz)
> +#define NFS4_dec_read_plus_sz		(compound_decode_hdr_maxsz + \
> +					 decode_sequence_maxsz + \
> +					 decode_putfh_maxsz + \
> +					 decode_read_plus_maxsz)
> #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
> 					 encode_sequence_maxsz + \
> 					 encode_putfh_maxsz + \
> @@ -252,6 +269,16 @@ static void encode_deallocate(struct xdr_stream *xdr,
> 	encode_fallocate(xdr, args);
> }
> 
> +static void encode_read_plus(struct xdr_stream *xdr,
> +			     const struct nfs_pgio_args *args,
> +			     struct compound_hdr *hdr)
> +{
> +	encode_op_hdr(xdr, OP_READ_PLUS, decode_read_plus_maxsz, hdr);
> +	encode_nfs4_stateid(xdr, &args->stateid);
> +	encode_uint64(xdr, args->offset);
> +	encode_uint32(xdr, args->count);
> +}
> +
> static void encode_seek(struct xdr_stream *xdr,
> 			const struct nfs42_seek_args *args,
> 			struct compound_hdr *hdr)
> @@ -446,6 +473,29 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
> 	encode_nops(&hdr);
> }
> 
> +/*
> + * Encode READ_PLUS request
> + */
> +static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
> +				   struct xdr_stream *xdr,
> +				   const void *data)
> +{
> +	const struct nfs_pgio_args *args = data;
> +	struct compound_hdr hdr = {
> +		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
> +	};
> +
> +	encode_compound_hdr(xdr, req, &hdr);
> +	encode_sequence(xdr, &args->seq_args, &hdr);
> +	encode_putfh(xdr, args->fh, &hdr);
> +	encode_read_plus(xdr, args, &hdr);
> +
> +	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
> +				args->count, hdr.replen);
> +	req->rq_rcv_buf.flags |= XDRBUF_READ;

IMO this line is incorrect.

RFC 8267 Section 6.1 does not list any part of the result of READ_PLUS
as DDP-eligible. There's no way for a client to know how to set up
Write chunks, unless it knows exactly where the file's holes are in
advance. Even then... racy.

Just curious, have you tried READ_PLUS with proto=rdma ?


> +	encode_nops(&hdr);
> +}
> +
> /*
>  * Encode SEEK request
>  */
> @@ -694,6 +744,67 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
> 	return decode_op_hdr(xdr, OP_DEALLOCATE);
> }
> 
> +static uint32_t decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
> +				      uint32_t *eof)
> +{
> +	__be32 *p;
> +	uint32_t count, recvd;
> +	uint64_t offset;
> +
> +	p = xdr_inline_decode(xdr, 8 + 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	p = xdr_decode_hyper(p, &offset);
> +	count = be32_to_cpup(p);
> +	if (count == 0)
> +		return 0;
> +
> +	recvd = xdr_read_pages(xdr, count);
> +	if (count > recvd) {
> +		dprintk("NFS: server cheating in read reply: "
> +				"count %u > recvd %u\n", count, recvd);
> +		count = recvd;
> +		*eof = 0;
> +	}
> +
> +	return count;
> +}
> +
> +static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
> +{
> +	__be32 *p;
> +	uint32_t count, eof, segments, type;
> +	int status;
> +
> +	status = decode_op_hdr(xdr, OP_READ_PLUS);
> +	if (status)
> +		return status;
> +
> +	p = xdr_inline_decode(xdr, 4 + 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	eof = be32_to_cpup(p++);
> +	segments = be32_to_cpup(p++);
> +	if (segments == 0)
> +		return 0;
> +
> +	p = xdr_inline_decode(xdr, 4);
> +	if (unlikely(!p))
> +		return -EIO;
> +
> +	type = be32_to_cpup(p++);
> +	if (type == NFS4_CONTENT_DATA)
> +		count = decode_read_plus_data(xdr, res, &eof);
> +	else
> +		return -EINVAL;
> +
> +	res->eof = eof;
> +	res->count = count;
> +	return 0;
> +}
> +
> static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
> {
> 	int status;
> @@ -870,6 +981,33 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
> 	return status;
> }
> 
> +/*
> + * Decode READ_PLUS request
> + */
> +static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
> +				  struct xdr_stream *xdr,
> +				  void *data)
> +{
> +	struct nfs_pgio_res *res = data;
> +	struct compound_hdr hdr;
> +	int status;
> +
> +	status = decode_compound_hdr(xdr, &hdr);
> +	if (status)
> +		goto out;
> +	status = decode_sequence(xdr, &res->seq_res, rqstp);
> +	if (status)
> +		goto out;
> +	status = decode_putfh(xdr);
> +	if (status)
> +		goto out;
> +	status = decode_read_plus(xdr, res);
> +	if (!status)
> +		status = res->count;
> +out:
> +	return status;
> +}
> +
> /*
>  * Decode SEEK request
>  */
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 95d07a3dc5d1..ed3ec8c36273 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -69,6 +69,10 @@
> 
> #include "nfs4trace.h"
> 
> +#ifdef CONFIG_NFS_V4_2
> +#include "nfs42.h"
> +#endif /* CONFIG_NFS_V4_2 */
> +
> #define NFSDBG_FACILITY		NFSDBG_PROC
> 
> #define NFS4_BITMASK_SZ		3
> @@ -5199,28 +5203,60 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
> 	return true;
> }
> 
> +static bool nfs4_read_plus_not_supported(struct rpc_task *task,
> +					 struct nfs_pgio_header *hdr)
> +{
> +	struct nfs_server *server = NFS_SERVER(hdr->inode);
> +	struct rpc_message *msg = &task->tk_msg;
> +
> +	if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] &&
> +	    server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) {
> +		server->caps &= ~NFS_CAP_READ_PLUS;
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +		rpc_restart_call_prepare(task);
> +		return true;
> +	}
> +	return false;
> +}
> +
> static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
> {
> -
> 	dprintk("--> %s\n", __func__);
> 
> 	if (!nfs4_sequence_done(task, &hdr->res.seq_res))
> 		return -EAGAIN;
> 	if (nfs4_read_stateid_changed(task, &hdr->args))
> 		return -EAGAIN;
> +	if (nfs4_read_plus_not_supported(task, hdr))
> +		return -EAGAIN;
> 	if (task->tk_status > 0)
> 		nfs_invalidate_atime(hdr->inode);
> 	return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
> 				    nfs4_read_done_cb(task, hdr);
> }
> 
> +#ifdef CONFIG_NFS_V4_2
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> +	if (server->caps & NFS_CAP_READ_PLUS)
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
> +	else
> +		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#else
> +static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
> +{
> +	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +}
> +#endif /* CONFIG_NFS_V4_2 */
> +
> static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
> 				 struct rpc_message *msg)
> {
> 	hdr->timestamp   = jiffies;
> 	if (!hdr->pgio_done_cb)
> 		hdr->pgio_done_cb = nfs4_read_done_cb;
> -	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
> +	nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
> 	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
> }
> 
> @@ -9970,7 +10006,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
> 		| NFS_CAP_SEEK
> 		| NFS_CAP_LAYOUTSTATS
> 		| NFS_CAP_CLONE
> -		| NFS_CAP_LAYOUTERROR,
> +		| NFS_CAP_LAYOUTERROR
> +		| NFS_CAP_READ_PLUS,
> 	.init_client = nfs41_init_client,
> 	.shutdown_client = nfs41_shutdown_client,
> 	.match_stateid = nfs41_match_stateid,
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 47817ef0aadb..68b2917d0537 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -7584,6 +7584,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
> 	PROC42(COPY_NOTIFY,	enc_copy_notify,	dec_copy_notify),
> 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
> 	PROC42(LAYOUTERROR,	enc_layouterror,	dec_layouterror),
> +	PROC42(READ_PLUS,	enc_read_plus,		dec_read_plus),
> };
> 
> static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 82d8fb422092..c1eeef52545c 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -540,8 +540,8 @@ enum {
> 
> 	NFSPROC4_CLNT_LOOKUPP,
> 	NFSPROC4_CLNT_LAYOUTERROR,
> -
> 	NFSPROC4_CLNT_COPY_NOTIFY,
> +	NFSPROC4_CLNT_READ_PLUS,
> };
> 
> /* nfs41 types */
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 465fa98258a3..11248c5a7b24 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -281,5 +281,6 @@ struct nfs_server {
> #define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
> #define NFS_CAP_LAYOUTERROR	(1U << 26)
> #define NFS_CAP_COPY_NOTIFY	(1U << 27)
> +#define NFS_CAP_READ_PLUS	(1U << 28)
> 
> #endif
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 94c77ed55ce1..8efbf3d8b263 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -655,7 +655,7 @@ struct nfs_pgio_args {
> struct nfs_pgio_res {
> 	struct nfs4_sequence_res	seq_res;
> 	struct nfs_fattr *	fattr;
> -	__u32			count;
> +	__u64			count;
> 	__u32			op_status;
> 	union {
> 		struct {
> -- 
> 2.25.0
> 

--
Chuck Lever




^ permalink raw reply

* [PATCH v2 0/7] New way to track mce notifier chain actions
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200212204652.1489-1-tony.luck@intel.com>

Parts 1 & 2 are just cleanup.  CEC should follow the same rules
as everyone else who wants to be on the mce notifier chain. No
real reason for it to have direct hooks into mce/core.c
	[No substantive change since RFC version 1, but note that
	 I have kept the change to make CEC a "normal" user of
	 the mce notifier chain. Result is a few checks for
	 if (mce->kflags & MCE_HANDLED_CEC) in EDAC etc. drivers.]

Part 3 adds a field to struct mce, and defines the BIT fields
for each class of notifier. All EDAC drivers share the same BIT
since only one of them should be active.
	[Boris: Changed name of new field to "kflags" and made
	        it __u64, so plenty of space for possible future
		other uses]

Part 4 Re-done since draft based on Luto and Tglx comments that
	we should kill of all usage of NOTIFY_STOP. This patch
	now gets rid of all but one.  That's an AMD case where
	it looks like they don't want to decode some particular
	errors on a specific platform.  The right fix for that
	is to take Luto's advice and filter out before that item
	gets to the notifier chain. We even already have a filter
	function (filter_mce) to do that! But that change needs
	to be handled by someone with the appropriate h/w.

Part 5	Now just checks for mce->kflags in the default handler at
	the end of the chain to decide whether to print.

Part 6	NEW - add mce=print_all option to override default and
	print everything to the console. Intended for debug, or
	desperation scenarios where other logs are lost.

Part 7	NEW - Delete the code that tries to make sure only one
	out of acpi_extlog and the current loaded EDAC driver
	deals with an error.


Tony Luck (7):
  x86/mce: Rename "first" function as "early"
  x86/mce: Convert corrected error collector to use mce notifier
  x86/mce: Add new "kflags" field to "struct mce"
  x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
  x86/mce: Change default mce logger to check mce->kflags
  x86/mce: Add mce=print_all option
  x86/mce: Drop the EDAC report status checks

 arch/x86/include/asm/mce.h           | 15 +++----
 arch/x86/include/uapi/asm/mce.h      |  9 ++++
 arch/x86/kernel/cpu/mce/core.c       | 58 ++++++++------------------
 arch/x86/kernel/cpu/mce/dev-mcelog.c |  5 +++
 arch/x86/kernel/cpu/mce/internal.h   |  1 +
 drivers/acpi/acpi_extlog.c           | 19 ++-------
 drivers/acpi/nfit/mce.c              |  1 +
 drivers/edac/edac_mc.c               | 61 ----------------------------
 drivers/edac/i7core_edac.c           |  5 ++-
 drivers/edac/mce_amd.c               |  9 +++-
 drivers/edac/pnd2_edac.c             |  8 ++--
 drivers/edac/sb_edac.c               |  7 ++--
 drivers/edac/skx_common.c            |  3 +-
 drivers/ras/cec.c                    | 29 +++++++++++++
 include/linux/edac.h                 |  8 ----
 15 files changed, 91 insertions(+), 147 deletions(-)


base-commit: b19e8c68470385dd2c5440876591fddb02c8c402
-- 
2.21.1


^ permalink raw reply

* [PATCH v2 2/7] x86/mce: Convert corrected error collector to use mce notifier
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

The CEC code has its claws in a couple of routines in mce/core.c

Convert it to just register itself on the normal mce notifier
chain.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mce/core.c | 19 -------------------
 drivers/ras/cec.c              | 26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 3366807d8e58..06240cbe6f3e 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -542,21 +542,6 @@ bool mce_is_correctable(struct mce *m)
 }
 EXPORT_SYMBOL_GPL(mce_is_correctable);
 
-static bool cec_add_mce(struct mce *m)
-{
-	if (!m)
-		return false;
-
-	/* We eat only correctable DRAM errors with usable addresses. */
-	if (mce_is_memory_error(m) &&
-	    mce_is_correctable(m)  &&
-	    mce_usable_address(m))
-		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
-			return true;
-
-	return false;
-}
-
 static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 			      void *data)
 {
@@ -565,9 +550,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 	if (!m)
 		return NOTIFY_DONE;
 
-	if (cec_add_mce(m))
-		return NOTIFY_STOP;
-
 	/* Emit the trace record: */
 	trace_mce_record(m);
 
@@ -2588,7 +2570,6 @@ static int __init mcheck_late_init(void)
 		static_branch_inc(&mcsafe_key);
 
 	mcheck_debugfs_init();
-	cec_init();
 
 	/*
 	 * Flush out everything that has been logged during early boot, now that
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index c09cf55e2d20..d7f6718cbf8d 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -527,6 +527,29 @@ static int __init create_debugfs_nodes(void)
 	return 1;
 }
 
+static int cec_notifier(struct notifier_block *nb, unsigned long val,
+			void *data)
+{
+	struct mce *m = (struct mce *)data;
+
+	if (!m)
+		return NOTIFY_DONE;
+
+	/* We eat only correctable DRAM errors with usable addresses. */
+	if (mce_is_memory_error(m) &&
+	    mce_is_correctable(m)  &&
+	    mce_usable_address(m))
+		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
+			return NOTIFY_STOP;
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block cec_nb = {
+	.notifier_call	= cec_notifier,
+	.priority	= MCE_PRIO_CEC,
+};
+
 void __init cec_init(void)
 {
 	if (ce_arr.disabled)
@@ -546,8 +569,11 @@ void __init cec_init(void)
 	INIT_DELAYED_WORK(&cec_work, cec_work_fn);
 	schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
 
+	mce_register_decode_chain(&cec_nb);
+
 	pr_info("Correctable Errors collector initialized.\n");
 }
+late_initcall(cec_init);
 
 int __init parse_cec_param(char *str)
 {
-- 
2.21.1


^ permalink raw reply related

* bpf-next is OPEN too. Re: net-next is OPEN
From: Alexei Starovoitov @ 2020-02-14 22:28 UTC (permalink / raw)
  To: David Miller; +Cc: Network Development, bpf
In-Reply-To: <20200214.141328.1414498612682173242.davem@davemloft.net>

Sorry everyone for delay in opening.

^ permalink raw reply

* [PATCH v2 3/7] x86/mce: Add new "kflags" field to "struct mce"
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

There can be many different subsystems register on the mce handler
chain. Add a new bitmask field and define values so that handlers
can indicate whether they took any action to log or otherwise
handle an error.

The default handler at the end of the chain can use this information
to decide whether to print to the console log.

Boris suggested a generic name and leaving plenty of spare bits
for possible future use.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/include/uapi/asm/mce.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 955c2a2e1cf9..f8395812520b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -35,8 +35,17 @@ struct mce {
 	__u64 ipid;		/* MCA_IPID MSR: only valid on SMCA systems */
 	__u64 ppin;		/* Protected Processor Inventory Number */
 	__u32 microcode;	/* Microcode revision */
+	__u64 kflags;		/* Internal kernel use. See below */
 };
 
+/* kflags flag bits for logging etc. */
+#define	MCE_HANDLED_CEC		BIT(0)
+#define	MCE_HANDLED_UC		BIT(1)
+#define	MCE_HANDLED_EXTLOG	BIT(2)
+#define	MCE_HANDLED_NFIT	BIT(3)
+#define	MCE_HANDLED_EDAC	BIT(4)
+#define	MCE_HANDLED_MCELOG	BIT(5)
+
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
 #define MCE_GET_LOG_LEN      _IOR('M', 2, int)
 #define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int)
-- 
2.21.1


^ permalink raw reply related

* [PATCH v2 6/7] x86/mce: Add mce=print_all option
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

Sometimes, when logs are getting lost, it's nice to just
have everything dumped to the serial console.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mce/core.c     | 7 ++++++-
 arch/x86/kernel/cpu/mce/internal.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 066d3903ef97..22925fd5e189 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -590,7 +590,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
 	if (!m)
 		return NOTIFY_DONE;
 
-	if (!m->kflags)
+	if (mca_cfg.print_all || !m->kflags)
 		__print_mce(m);
 
 	return NOTIFY_DONE;
@@ -1950,6 +1950,7 @@ void mce_disable_bank(int bank)
  * mce=no_cmci Disables CMCI
  * mce=no_lmce Disables LMCE
  * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
+ * mce=print_all Print all machine check logs to console
  * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
  * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
  *	monarchtimeout is how long to wait for other CPUs on machine
@@ -1978,6 +1979,8 @@ static int __init mcheck_enable(char *str)
 		cfg->lmce_disabled = 1;
 	else if (!strcmp(str, "dont_log_ce"))
 		cfg->dont_log_ce = true;
+	else if (!strcmp(str, "print_all"))
+		cfg->print_all = true;
 	else if (!strcmp(str, "ignore_ce"))
 		cfg->ignore_ce = true;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
@@ -2244,6 +2247,7 @@ static ssize_t store_int_with_restart(struct device *s,
 static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
 static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
 static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
+static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
 
 static struct dev_ext_attribute dev_attr_check_interval = {
 	__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2268,6 +2272,7 @@ static struct device_attribute *mce_device_attrs[] = {
 #endif
 	&dev_attr_monarch_timeout.attr,
 	&dev_attr_dont_log_ce.attr,
+	&dev_attr_print_all.attr,
 	&dev_attr_ignore_ce.attr,
 	&dev_attr_cmci_disabled.attr,
 	NULL
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b785c0d0b590..bf09b8aa8184 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -114,6 +114,7 @@ struct mca_config {
 	bool dont_log_ce;
 	bool cmci_disabled;
 	bool ignore_ce;
+	bool print_all;
 
 	__u64 lmce_disabled		: 1,
 	      disabled			: 1,
-- 
2.21.1


^ permalink raw reply related

* [PATCH v2 7/7] x86/mce: Drop the EDAC report status checks
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

When we added acpi_extlog we were worried that the same error
would be reported more than once by different subsystems. But
in the ensuing years I've seen complaints that people could not
find an error log (because this mechanism suppressed the log
they were looking for).

Rip it all out.  People are smart enough to notice the same
address from different reporting mechanisms.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/acpi/acpi_extlog.c | 14 ---------
 drivers/edac/edac_mc.c     | 61 --------------------------------------
 drivers/edac/pnd2_edac.c   |  3 --
 drivers/edac/sb_edac.c     |  4 ---
 drivers/edac/skx_common.c  |  3 --
 include/linux/edac.h       |  8 -----
 6 files changed, 93 deletions(-)

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 9cc3c1f92db5..f138e12b7b82 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -42,8 +42,6 @@ struct extlog_l1_head {
 	u8  rev1[12];
 };
 
-static int old_edac_report_status;
-
 static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
 
 /* L1 table related physical address */
@@ -229,11 +227,6 @@ static int __init extlog_init(void)
 	if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
 		return -ENODEV;
 
-	if (edac_get_report_status() == EDAC_REPORTING_FORCE) {
-		pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
-		return -EPERM;
-	}
-
 	rc = -EINVAL;
 	/* get L1 header to fetch necessary information */
 	l1_hdr_size = sizeof(struct extlog_l1_head);
@@ -281,12 +274,6 @@ static int __init extlog_init(void)
 	if (elog_buf == NULL)
 		goto err_release_elog;
 
-	/*
-	 * eMCA event report method has higher priority than EDAC method,
-	 * unless EDAC event report method is mandatory.
-	 */
-	old_edac_report_status = edac_get_report_status();
-	edac_set_report_status(EDAC_REPORTING_DISABLED);
 	mce_register_decode_chain(&extlog_mce_dec);
 	/* enable OS to be involved to take over management from BIOS */
 	((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,7 +295,6 @@ static int __init extlog_init(void)
 
 static void __exit extlog_exit(void)
 {
-	edac_set_report_status(old_edac_report_status);
 	mce_unregister_decode_chain(&extlog_mce_dec);
 	((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
 	if (extlog_l1_addr)
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 7243b88f81d8..288ba9e0c26d 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -43,8 +43,6 @@
 int edac_op_state = EDAC_OPSTATE_INVAL;
 EXPORT_SYMBOL_GPL(edac_op_state);
 
-static int edac_report = EDAC_REPORTING_ENABLED;
-
 /* lock to memory controller's control array */
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
@@ -55,65 +53,6 @@ static LIST_HEAD(mc_devices);
  */
 static const char *edac_mc_owner;
 
-int edac_get_report_status(void)
-{
-	return edac_report;
-}
-EXPORT_SYMBOL_GPL(edac_get_report_status);
-
-void edac_set_report_status(int new)
-{
-	if (new == EDAC_REPORTING_ENABLED ||
-	    new == EDAC_REPORTING_DISABLED ||
-	    new == EDAC_REPORTING_FORCE)
-		edac_report = new;
-}
-EXPORT_SYMBOL_GPL(edac_set_report_status);
-
-static int edac_report_set(const char *str, const struct kernel_param *kp)
-{
-	if (!str)
-		return -EINVAL;
-
-	if (!strncmp(str, "on", 2))
-		edac_report = EDAC_REPORTING_ENABLED;
-	else if (!strncmp(str, "off", 3))
-		edac_report = EDAC_REPORTING_DISABLED;
-	else if (!strncmp(str, "force", 5))
-		edac_report = EDAC_REPORTING_FORCE;
-
-	return 0;
-}
-
-static int edac_report_get(char *buffer, const struct kernel_param *kp)
-{
-	int ret = 0;
-
-	switch (edac_report) {
-	case EDAC_REPORTING_ENABLED:
-		ret = sprintf(buffer, "on");
-		break;
-	case EDAC_REPORTING_DISABLED:
-		ret = sprintf(buffer, "off");
-		break;
-	case EDAC_REPORTING_FORCE:
-		ret = sprintf(buffer, "force");
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-static const struct kernel_param_ops edac_report_ops = {
-	.set = edac_report_set,
-	.get = edac_report_get,
-};
-
-module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
-
 unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 				     unsigned int len)
 {
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 77ad315c7e8d..bfb6c88ebb28 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1396,9 +1396,6 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 	struct dram_addr daddr;
 	char *type;
 
-	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
-		return NOTIFY_DONE;
-
 	mci = pnd2_mci;
 	if (!mci || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 6e17f601ea63..898f567d5d89 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3134,8 +3134,6 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 	char *type;
 
-	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
-		return NOTIFY_DONE;
 	if (mce->kflags & MCE_HANDLED_CEC)
 		return NOTIFY_DONE;
 
@@ -3526,8 +3524,6 @@ static int __init sbridge_init(void)
 
 	if (rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
-		if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
-			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
 		return 0;
 	}
 
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 6f08a12f6b11..423d33aef54f 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -574,9 +574,6 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 	char *type;
 
-	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
-		return NOTIFY_DONE;
-
 	if (mce->kflags & MCE_HANDLED_CEC)
 		return NOTIFY_DONE;
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index cc31b9742684..bd770e31ced6 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -31,14 +31,6 @@ struct device;
 extern int edac_op_state;
 
 struct bus_type *edac_get_sysfs_subsys(void);
-int edac_get_report_status(void);
-void edac_set_report_status(int new);
-
-enum {
-	EDAC_REPORTING_ENABLED,
-	EDAC_REPORTING_DISABLED,
-	EDAC_REPORTING_FORCE
-};
 
 static inline void opstate_init(void)
 {
-- 
2.21.1


^ permalink raw reply related

* [PATCH v2 1/7] x86/mce: Rename "first" function as "early"
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

It isn't going to be first on the notifier chain when we move
the CEC code to be a normal user of the notifier chain.

Fix the enum for the MCE_PRIO symbols to list them in reverse
order so that the compiler can give them numbers from low to
high priority. Add an entry for MCE_PRIO_CEC as the highest
priority.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/include/asm/mce.h     | 15 ++++++++-------
 arch/x86/kernel/cpu/mce/core.c | 10 +++++-----
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4359b955e0b7..6f17cc618d5e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -143,13 +143,14 @@ struct mce_log_buffer {
 };
 
 enum mce_notifier_prios {
-	MCE_PRIO_FIRST		= INT_MAX,
-	MCE_PRIO_UC		= INT_MAX - 1,
-	MCE_PRIO_EXTLOG		= INT_MAX - 2,
-	MCE_PRIO_NFIT		= INT_MAX - 3,
-	MCE_PRIO_EDAC		= INT_MAX - 4,
-	MCE_PRIO_MCELOG		= 1,
-	MCE_PRIO_LOWEST		= 0,
+	MCE_PRIO_LOWEST,
+	MCE_PRIO_MCELOG,
+	MCE_PRIO_EDAC,
+	MCE_PRIO_NFIT,
+	MCE_PRIO_EXTLOG,
+	MCE_PRIO_UC,
+	MCE_PRIO_EARLY,
+	MCE_PRIO_CEC
 };
 
 struct notifier_block;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c4f949611e4..3366807d8e58 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -557,7 +557,7 @@ static bool cec_add_mce(struct mce *m)
 	return false;
 }
 
-static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
+static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 			      void *data)
 {
 	struct mce *m = (struct mce *)data;
@@ -578,9 +578,9 @@ static int mce_first_notifier(struct notifier_block *nb, unsigned long val,
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block first_nb = {
-	.notifier_call	= mce_first_notifier,
-	.priority	= MCE_PRIO_FIRST,
+static struct notifier_block early_nb = {
+	.notifier_call	= mce_early_notifier,
+	.priority	= MCE_PRIO_EARLY,
 };
 
 static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
@@ -2028,7 +2028,7 @@ __setup("mce", mcheck_enable);
 int __init mcheck_init(void)
 {
 	mcheck_intel_therm_init();
-	mce_register_decode_chain(&first_nb);
+	mce_register_decode_chain(&early_nb);
 	mce_register_decode_chain(&mce_uc_nb);
 	mce_register_decode_chain(&mce_default_nb);
 	mcheck_vendor_init_severity();
-- 
2.21.1


^ permalink raw reply related

* [PATCH v2 4/7] x86/mce: Fix all mce notifiers to update the mce->kflags bitmask
From: Tony Luck @ 2020-02-14 22:27 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, x86, Andy Lutomirski, linux-kernel
In-Reply-To: <20200214222720.13168-1-tony.luck@intel.com>

If the handler took any action to log or deal with the error, set
a bit int mce->kflags so that the default handler on the end of
the machine check chain can see what has been done.

Get rid of NOTIFY_STOP (well almost ... mce_amd.c is currently using
it to filter out some GART TLB errors ... need to deal with that
later).

Make the EDAC and dev-mcelog handlers skip over errors already
processed by CEC.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mce/core.c       | 4 +++-
 arch/x86/kernel/cpu/mce/dev-mcelog.c | 5 +++++
 drivers/acpi/acpi_extlog.c           | 5 +++--
 drivers/acpi/nfit/mce.c              | 1 +
 drivers/edac/i7core_edac.c           | 5 +++--
 drivers/edac/mce_amd.c               | 9 +++++++--
 drivers/edac/pnd2_edac.c             | 5 +++--
 drivers/edac/sb_edac.c               | 5 ++++-
 drivers/edac/skx_common.c            | 4 ++++
 drivers/ras/cec.c                    | 9 ++++++---
 10 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 06240cbe6f3e..d3d11d1e52b3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -579,8 +579,10 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 		return NOTIFY_DONE;
 
 	pfn = mce->addr >> PAGE_SHIFT;
-	if (!memory_failure(pfn, 0))
+	if (!memory_failure(pfn, 0)) {
 		set_mce_nospec(pfn);
+		mce->kflags |= MCE_HANDLED_UC;
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 7c8958dee103..f1bf7535ead7 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -43,6 +43,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	unsigned int entry;
 
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
+
 	mutex_lock(&mce_chrdev_read_mutex);
 
 	entry = mcelog.next;
@@ -60,6 +63,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
 	mcelog.entry[entry].finished = 1;
+	mcelog.entry[entry].kflags = 0;
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);
@@ -67,6 +71,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 unlock:
 	mutex_unlock(&mce_chrdev_read_mutex);
 
+	mce->kflags |= MCE_HANDLED_MCELOG;
 	return NOTIFY_OK;
 }
 
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 8596a106a933..9cc3c1f92db5 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -146,7 +146,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	static u32 err_seq;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
-	if (estatus == NULL)
+	if (estatus == NULL || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	memcpy(elog_buf, (void *)estatus, ELOG_ENTRY_LEN);
@@ -176,7 +176,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	}
 
 out:
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EXTLOG;
+	return NOTIFY_OK;
 }
 
 static bool __init extlog_get_l1addr(void)
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index f0ae48515b48..ee8d9973f60b 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -76,6 +76,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 			 */
 			acpi_nfit_ars_rescan(acpi_desc, 0);
 		}
+		mce->kflags |= MCE_HANDLED_NFIT;
 		break;
 	}
 
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index b3135b208f9a..5860ca41185c 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1815,7 +1815,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 	struct mem_ctl_info *mci;
 
 	i7_dev = get_i7core_dev(mce->socketid);
-	if (!i7_dev)
+	if (!i7_dev || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	mci = i7_dev->mci;
@@ -1834,7 +1834,8 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 	i7core_check_error(mci, mce);
 
 	/* Advise mcelog that the errors were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block i7_mce_dec = {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea980c556f2e..e31e4db64e1b 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1067,8 +1067,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 	unsigned int fam = x86_family(m->cpuid);
 	int ecc;
 
-	if (ignore_mce(m))
+	if (ignore_mce(m)) {
+		m->kflags |= MCE_HANDLED_EDAC;
 		return NOTIFY_STOP;
+	}
+	if (m->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
 
 	pr_emerg(HW_ERR "%s\n", decode_error_status(m));
 
@@ -1170,7 +1174,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
  err_code:
 	amd_decode_err_code(m->status & 0xffff);
 
-	return NOTIFY_STOP;
+	m->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block amd_mce_dec_nb = {
diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c
index 933f7722b893..77ad315c7e8d 100644
--- a/drivers/edac/pnd2_edac.c
+++ b/drivers/edac/pnd2_edac.c
@@ -1400,7 +1400,7 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 		return NOTIFY_DONE;
 
 	mci = pnd2_mci;
-	if (!mci)
+	if (!mci || (mce->kflags & MCE_HANDLED_CEC))
 		return NOTIFY_DONE;
 
 	/*
@@ -1429,7 +1429,8 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo
 	pnd2_mce_output_error(mci, mce, &daddr);
 
 	/* Advice mcelog that the error were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block pnd2_mce_dec = {
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 4957e8ee1879..6e17f601ea63 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3136,6 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
 
 	/*
 	 * Just let mcelog handle it if the error is
@@ -3183,7 +3185,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 	sbridge_mce_output_error(mci, mce);
 
 	/* Advice mcelog that the error were handled */
-	return NOTIFY_STOP;
+	mce->kflags |= MCE_HANDLED_EDAC;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block sbridge_mce_dec = {
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 99bbaf629b8d..6f08a12f6b11 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -577,6 +577,9 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
 		return NOTIFY_DONE;
 
+	if (mce->kflags & MCE_HANDLED_CEC)
+		return NOTIFY_DONE;
+
 	/* ignore unless this is memory related with an address */
 	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
 		return NOTIFY_DONE;
@@ -616,6 +619,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	skx_mce_output_error(mci, mce, &res);
 
+	mce->kflags |= MCE_HANDLED_EDAC;
 	return NOTIFY_DONE;
 }
 
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index d7f6718cbf8d..e061962d3c58 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -538,9 +538,12 @@ static int cec_notifier(struct notifier_block *nb, unsigned long val,
 	/* We eat only correctable DRAM errors with usable addresses. */
 	if (mce_is_memory_error(m) &&
 	    mce_is_correctable(m)  &&
-	    mce_usable_address(m))
-		if (!cec_add_elem(m->addr >> PAGE_SHIFT))
-			return NOTIFY_STOP;
+	    mce_usable_address(m)) {
+		if (!cec_add_elem(m->addr >> PAGE_SHIFT)) {
+			m->kflags |= MCE_HANDLED_CEC;
+			return NOTIFY_OK;
+		}
+	}
 
 	return NOTIFY_DONE;
 }
-- 
2.21.1


^ permalink raw reply related

* [PATCH v2 39/42] example for future extension: mm:gup/writeback: add callbacks for inaccessible pages: error cases
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Andrew Morton
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik, Andrea Arcangeli, linux-mm, Will Deacon,
	Sean Christopherson
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

From: Claudio Imbrenda <imbrenda@linux.ibm.com>

This is a potential extension to do error handling if we fail to
make the page accessible if we know what others need.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 mm/gup.c            | 17 ++++++++++++-----
 mm/page-writeback.c |  6 +++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index a1c15d029f7c..354bcfbd844b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -193,6 +193,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t *ptep, pte;
+	int ret;
 
 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
@@ -250,8 +251,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 		if (is_zero_pfn(pte_pfn(pte))) {
 			page = pte_page(pte);
 		} else {
-			int ret;
-
 			ret = follow_pfn_pte(vma, address, ptep, flags);
 			page = ERR_PTR(ret);
 			goto out;
@@ -259,7 +258,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	}
 
 	if (flags & FOLL_SPLIT && PageTransCompound(page)) {
-		int ret;
 		get_page(page);
 		pte_unmap_unlock(ptep, ptl);
 		lock_page(page);
@@ -276,7 +274,12 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = ERR_PTR(-ENOMEM);
 			goto out;
 		}
-		arch_make_page_accessible(page);
+		ret = arch_make_page_accessible(page);
+		if (ret) {
+			put_page(page);
+			page = ERR_PTR(ret);
+			goto out;
+		}
 	}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
@@ -1920,7 +1923,11 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
 
-		arch_make_page_accessible(page);
+		ret = arch_make_page_accessible(page);
+		if (ret) {
+			put_page(head);
+			goto pte_unmap;
+		}
 		SetPageReferenced(page);
 		pages[*nr] = page;
 		(*nr)++;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4c020e4ae71c..558d7063c117 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2807,7 +2807,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
 	}
 	unlock_page_memcg(page);
-	arch_make_page_accessible(page);
+	/*
+	 * If writeback has been triggered on a page that cannot be made
+	 * accessible, it is too late.
+	 */
+	WARN_ON(arch_make_page_accessible(page));
 	return ret;
 
 }
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 40/42] example for future extension: mm:gup/writeback: add callbacks for inaccessible pages: source indication
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Andrew Morton
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik, Andrea Arcangeli, linux-mm, Will Deacon,
	Sean Christopherson
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

From: Claudio Imbrenda <imbrenda@linux.ibm.com>

We might want to do different things depending on where we are coming
from.

Cc: Will Deacon <will@kernel.org>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
[borntraeger@de.ibm.com: patch description]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/gfp.h | 8 +++++++-
 mm/gup.c            | 4 ++--
 mm/page-writeback.c | 2 +-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index be2754841369..a15fcb361e7c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -485,8 +485,14 @@ static inline void arch_free_page(struct page *page, int order) { }
 #ifndef HAVE_ARCH_ALLOC_PAGE
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
+enum access_type {
+	MAKE_ACCESSIBLE_GENERIC,
+	MAKE_ACCESSIBLE_GET,
+	MAKE_ACCESSIBLE_GET_FAST,
+	MAKE_ACCESSIBLE_WRITEBACK
+};
 #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
+static inline int arch_make_page_accessible(struct page *page, int where)
 {
 	return 0;
 }
diff --git a/mm/gup.c b/mm/gup.c
index 354bcfbd844b..ce962c155724 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -274,7 +274,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 			page = ERR_PTR(-ENOMEM);
 			goto out;
 		}
-		ret = arch_make_page_accessible(page);
+		ret = arch_make_page_accessible(page, MAKE_ACCESSIBLE_GET);
 		if (ret) {
 			put_page(page);
 			page = ERR_PTR(ret);
@@ -1923,7 +1923,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
 
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
 
-		ret = arch_make_page_accessible(page);
+		ret = arch_make_page_accessible(page, MAKE_ACCESSIBLE_GET_FAST);
 		if (ret) {
 			put_page(head);
 			goto pte_unmap;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 558d7063c117..f85148e59800 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2811,7 +2811,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 	 * If writeback has been triggered on a page that cannot be made
 	 * accessible, it is too late.
 	 */
-	WARN_ON(arch_make_page_accessible(page));
+	WARN_ON(arch_make_page_accessible(page, MAKE_ACCESSIBLE_WRITEBACK));
 	return ret;
 
 }
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 42/42] KVM: s390: rstify new ioctls in api.rst
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

We also need to rstify the new ioctls that we added in parallel to the
rstification of the kvm docs.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virt/kvm/api.rst | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index a82166e5f7d9..89e9c34bc5d3 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4613,35 +4613,38 @@ unpins the VPA pages and releases all the device pages that are used to
 track the secure pages by hypervisor.
 
 4.122 KVM_S390_NORMAL_RESET
+---------------------------
 
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
 
 This ioctl resets VCPU registers and control structures according to
 the cpu reset definition in the POP (Principles Of Operation).
 
 4.123 KVM_S390_INITIAL_RESET
+----------------------------
 
-Capability: none
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: none
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
 
 This ioctl resets VCPU registers and control structures according to
 the initial cpu reset definition in the POP. However, the cpu is not
 put into ESA mode. This reset is a superset of the normal reset.
 
 4.124 KVM_S390_CLEAR_RESET
+--------------------------
 
-Capability: KVM_CAP_S390_VCPU_RESETS
-Architectures: s390
-Type: vcpu ioctl
-Parameters: none
-Returns: 0
+:Capability: KVM_CAP_S390_VCPU_RESETS
+:Architectures: s390
+:Type: vcpu ioctl
+:Parameters: none
+:Returns: 0
 
 This ioctl resets VCPU registers and control structures according to
 the clear cpu reset definition in the POP. However, the cpu is not put
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 41/42] potential fixup for "s390/mm: provide memory management functions for protected KVM guests"
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

From: Claudio Imbrenda <imbrenda@linux.ibm.com>

Needed when "example for future extension: mm:gup/writeback: add
callbacks for inaccessible pages: source indication" is applied.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/page.h | 2 +-
 arch/s390/kernel/uv.c        | 2 +-
 arch/s390/mm/fault.c         | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 4ebcf891ff3c..a658487fe8e7 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -154,7 +154,7 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define HAVE_ARCH_ALLOC_PAGE
 
 #if IS_ENABLED(CONFIG_PGSTE)
-int arch_make_page_accessible(struct page *page);
+int arch_make_page_accessible(struct page *page, int where);
 #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
 #endif
 
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index fb606b171f42..5ace77694ed3 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
 /**
  * To be called with the page locked or with an extra reference!
  */
-int arch_make_page_accessible(struct page *page)
+int arch_make_page_accessible(struct page *page, int where)
 {
 	int rc = 0;
 
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7bd86ebc882f..1f31025bc2cf 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -842,7 +842,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 			up_read(&mm->mmap_sem);
 			break;
 		}
-		if (arch_make_page_accessible(page))
+		if (arch_make_page_accessible(page, MAKE_ACCESSIBLE_GENERIC))
 			send_sig(SIGSEGV, current, 0);
 		put_page(page);
 		up_read(&mm->mmap_sem);
@@ -851,7 +851,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 		page = phys_to_page(addr);
 		if (unlikely(!try_get_page(page)))
 			break;
-		rc = arch_make_page_accessible(page);
+		rc = arch_make_page_accessible(page, MAKE_ACCESSIBLE_GENERIC);
 		put_page(page);
 		if (rc)
 			BUG();
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 37/42] s390/uv: Fix handling of length extensions (already in s390 tree)
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik, stable
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

The query parameter block might contain additional information and can
be extended in the future. If the size of the block does not suffice we
get an error code of rc=0x100.  The buffer will contain all information
up to the specified size and the hypervisor/guest simply do not need the
additional information as they do not know about the new data.  That
means that we can (and must) accept rc=0x100 as success.

Cc: stable@vger.kernel.org
Fixes: 5abb9351dfd9 ("s390/uv: introduce guest side ultravisor code")
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/boot/uv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index af9e1cc93c68..c003593664cd 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -21,7 +21,7 @@ void uv_query_info(void)
 	if (!test_facility(158))
 		return;
 
-	if (uv_call(0, (uint64_t)&uvcb))
+	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
 		return;
 
 	if (IS_ENABLED(CONFIG_KVM)) {
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 36/42] DOCUMENTATION: Protected virtual machine introduction and IPL
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik, Janosch Frank
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

From: Janosch Frank <frankja@linux.ibm.com>

Add documentation about protected KVM guests and description of changes
that are necessary to move a KVM VM into Protected Virtualization mode.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
[borntraeger@de.ibm.com: fixing and conversion to rst]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virt/kvm/index.rst        |   2 +
 Documentation/virt/kvm/s390-pv-boot.rst |  83 +++++++++++++++++
 Documentation/virt/kvm/s390-pv.rst      | 116 ++++++++++++++++++++++++
 MAINTAINERS                             |   1 +
 4 files changed, 202 insertions(+)
 create mode 100644 Documentation/virt/kvm/s390-pv-boot.rst
 create mode 100644 Documentation/virt/kvm/s390-pv.rst

diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
index 774deaebf7fa..dcc252634cf9 100644
--- a/Documentation/virt/kvm/index.rst
+++ b/Documentation/virt/kvm/index.rst
@@ -18,6 +18,8 @@ KVM
    nested-vmx
    ppc-pv
    s390-diag
+   s390-pv
+   s390-pv-boot
    timekeeping
    vcpu-requests
 
diff --git a/Documentation/virt/kvm/s390-pv-boot.rst b/Documentation/virt/kvm/s390-pv-boot.rst
new file mode 100644
index 000000000000..b762df206ab7
--- /dev/null
+++ b/Documentation/virt/kvm/s390-pv-boot.rst
@@ -0,0 +1,83 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+s390 (IBM Z) Boot/IPL of Protected VMs
+======================================
+
+Summary
+-------
+The memory of Protected Virtual Machines (PVMs) is not accessible to
+I/O or the hypervisor. In those cases where the hypervisor needs to
+access the memory of a PVM, that memory must be made accessible.
+Memory made accessible to the hypervisor will be encrypted. See
+:doc:`s390-pv` for details."
+
+On IPL (boot) a small plaintext bootloader is started, which provides
+information about the encrypted components and necessary metadata to
+KVM to decrypt the protected virtual machine.
+
+Based on this data, KVM will make the protected virtual machine known
+to the Ultravisor(UV) and instruct it to secure the memory of the PVM,
+decrypt the components and verify the data and address list hashes, to
+ensure integrity. Afterwards KVM can run the PVM via the SIE
+instruction which the UV will intercept and execute on KVM's behalf.
+
+As the guest image is just like an opaque kernel image that does the
+switch into PV mode itself, the user can load encrypted guest
+executables and data via every available method (network, dasd, scsi,
+direct kernel, ...) without the need to change the boot process.
+
+
+Diag308
+-------
+This diagnose instruction is the basic mechanism to handle IPL and
+related operations for virtual machines. The VM can set and retrieve
+IPL information blocks, that specify the IPL method/devices and
+request VM memory and subsystem resets, as well as IPLs.
+
+For PVMs this concept has been extended with new subcodes:
+
+Subcode 8: Set an IPL Information Block of type 5 (information block
+for PVMs)
+Subcode 9: Store the saved block in guest memory
+Subcode 10: Move into Protected Virtualization mode
+
+The new PV load-device-specific-parameters field specifies all data
+that is necessary to move into PV mode.
+
+* PV Header origin
+* PV Header length
+* List of Components composed of
+   * AES-XTS Tweak prefix
+   * Origin
+   * Size
+
+The PV header contains the keys and hashes, which the UV will use to
+decrypt and verify the PV, as well as control flags and a start PSW.
+
+The components are for instance an encrypted kernel, kernel parameters
+and initrd. The components are decrypted by the UV.
+
+After the initial import of the encrypted data, all defined pages will
+contain the guest content. All non-specified pages will start out as
+zero pages on first access.
+
+
+When running in protected virtualization mode, some subcodes will result in
+exceptions or return error codes.
+
+Subcodes 4 and 7, which specify operations that do not clear the guest
+memory, will result in specification exceptions. This is because the
+UV will clear all memory when a secure VM is removed, and therefore
+non-clearing IPL subcodes are not allowed."
+
+Subcodes 8, 9, 10 will result in specification exceptions.
+Re-IPL into a protected mode is only possible via a detour into non
+protected mode.
+
+Keys
+----
+Every CEC will have a unique public key to enable tooling to build
+encrypted images.
+See  `s390-tools <https://github.com/ibm-s390-tools/s390-tools/>`_
+for the tooling.
diff --git a/Documentation/virt/kvm/s390-pv.rst b/Documentation/virt/kvm/s390-pv.rst
new file mode 100644
index 000000000000..27fe03eaeaad
--- /dev/null
+++ b/Documentation/virt/kvm/s390-pv.rst
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+s390 (IBM Z) Ultravisor and Protected VMs
+=========================================
+
+Summary
+-------
+Protected virtual machines (PVM) are KVM VMs that do not allow KVM to
+access VM state like guest memory or guest registers. Instead, the
+PVMs are mostly managed by a new entity called Ultravisor (UV). The UV
+provides an API that can be used by PVMs and KVM to request management
+actions.
+
+Each guest starts in the non-protected mode and then may make a
+request to transition into protected mode. On transition, KVM
+registers the guest and its VCPUs with the Ultravisor and prepares
+everything for running it.
+
+The Ultravisor will secure and decrypt the guest's boot memory
+(i.e. kernel/initrd). It will safeguard state changes like VCPU
+starts/stops and injected interrupts while the guest is running.
+
+As access to the guest's state, such as the SIE state description, is
+normally needed to be able to run a VM, some changes have been made in
+the behavior of the SIE instruction. A new format 4 state description
+has been introduced, where some fields have different meanings for a
+PVM. SIE exits are minimized as much as possible to improve speed and
+reduce exposed guest state.
+
+
+Interrupt injection
+-------------------
+Interrupt injection is safeguarded by the Ultravisor. As KVM doesn't
+have access to the VCPUs' lowcores, injection is handled via the
+format 4 state description.
+
+Machine check, external, IO and restart interruptions each can be
+injected on SIE entry via a bit in the interrupt injection control
+field (offset 0x54). If the guest cpu is not enabled for the interrupt
+at the time of injection, a validity interception is recognized. The
+format 4 state description contains fields in the interception data
+block where data associated with the interrupt can be transported.
+
+Program and Service Call exceptions have another layer of
+safeguarding; they can only be injected for instructions that have
+been intercepted into KVM. The exceptions need to be a valid outcome
+of an instruction emulation by KVM, e.g. we can never inject a
+addressing exception as they are reported by SIE since KVM has no
+access to the guest memory.
+
+
+Mask notification interceptions
+-------------------------------
+In order to be notified when a PVM enables a certain class of
+interrupt, KVM cannot intercept lctl(g) and lpsw(e) anymore. As a
+replacement, two new interception codes have been introduced: One
+indicating that the contents of CRs 0, 6, or 14 have been changed,
+indicating different interruption subclasses; and one indicating that
+PSW bit 13 has been changed, indicating that a machine check
+intervention was requested and those are now enabled.
+
+Instruction emulation
+---------------------
+With the format 4 state description for PVMs, the SIE instruction already
+interprets more instructions than it does with format 2. It is not able
+to interpret every instruction, but needs to hand some tasks to KVM;
+therefore, the SIE and the ultravisor safeguard emulation inputs and outputs.
+
+The control structures associated with SIE provide the Secure
+Instruction Data Area (SIDA), the Interception Parameters (IP) and the
+Secure Interception General Register Save Area.  Guest GRs and most of
+the instruction data, such as I/O data structures, are filtered.
+Instruction data is copied to and from the Secure Instruction Data
+Area (SIDA) when needed.  Guest GRs are put into / retrieved from the
+Secure Interception General Register Save Area.
+
+Only GR values needed to emulate an instruction will be copied into this
+save area and the real register numbers will be hidden.
+
+The Interception Parameters state description field still contains the
+the bytes of the instruction text, but with pre-set register values
+instead of the actual ones. I.e. each instruction always uses the same
+instruction text, in order not to leak guest instruction text.
+This also implies that the register content that a guest had in r<n>
+may be in r<m> from the hypervisor's point of view.
+
+The Secure Instruction Data Area contains instruction storage
+data. Instruction data, i.e. data being referenced by an instruction
+like the SCCB for sclp, is moved via the SIDA. When an instruction is
+intercepted, the SIE will only allow data and program interrupts for
+this instruction to be moved to the guest via the two data areas
+discussed before. Other data is either ignored or results in validity
+interceptions.
+
+
+Instruction emulation interceptions
+-----------------------------------
+There are two types of SIE secure instruction intercepts: the normal
+and the notification type. Normal secure instruction intercepts will
+make the guest pending for instruction completion of the intercepted
+instruction type, i.e. on SIE entry it is attempted to complete
+emulation of the instruction with the data provided by KVM. That might
+be a program exception or instruction completion.
+
+The notification type intercepts inform KVM about guest environment
+changes due to guest instruction interpretation. Such an interception
+is recognized, for example, for the store prefix instruction to provide
+the new lowcore location. On SIE reentry, any KVM data in the data areas
+is ignored and execution continues as if the guest instruction had
+completed. For that reason KVM is not allowed to inject a program
+interrupt.
+
+Links
+-----
+`KVM Forum 2019 presentation <https://static.sched.com/hosted_files/kvmforum2019/3b/ibm_protected_vms_s390x.pdf>`_
diff --git a/MAINTAINERS b/MAINTAINERS
index 38fe2f3f7b6f..115956e9ac8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9209,6 +9209,7 @@ L:	kvm@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
 S:	Supported
+F:	Documentation/virt/kvm/s390*
 F:	arch/s390/include/uapi/asm/kvm*
 F:	arch/s390/include/asm/gmap.h
 F:	arch/s390/include/asm/kvm*
-- 
2.25.0

^ permalink raw reply related

* [PATCH v2 38/42] s390: protvirt: Add sysfs firmware interface for Ultravisor information
From: Christian Borntraeger @ 2020-02-14 22:26 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, David Hildenbrand, Thomas Huth,
	Ulrich Weigand, Claudio Imbrenda, linux-s390, Michael Mueller,
	Vasily Gorbik, Janosch Frank
In-Reply-To: <20200214222658.12946-1-borntraeger@de.ibm.com>

From: Janosch Frank <frankja@linux.ibm.com>

That information, e.g. the maximum number of guests or installed
Ultravisor facilities, is interesting for QEMU, Libvirt and
administrators.

Let's provide an easily parsable API to get that information.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kernel/uv.c | 86 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 9a6c309864a0..fb606b171f42 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -322,5 +322,91 @@ int arch_make_page_accessible(struct page *page)
 	return rc;
 }
 EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+			uv_info.inst_calls_list[0],
+			uv_info.inst_calls_list[1],
+			uv_info.inst_calls_list[2],
+			uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+	__ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			uv_info.max_guest_cpus);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%d\n",
+			uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return snprintf(page, PAGE_SIZE, "%lx\n",
+			uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+	&uv_query_facilities_attr.attr,
+	&uv_query_max_guest_cpus_attr.attr,
+	&uv_query_max_guest_vms_attr.attr,
+	&uv_query_max_guest_addr_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+	.attrs = uv_query_attrs,
+};
 
+static struct kset *uv_query_kset;
+struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!test_facility(158))
+		return 0;
+
+	uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+	if (!uv_kobj)
+		return -ENOMEM;
+
+	uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+	if (!uv_query_kset)
+		goto out_kobj;
+
+	rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+	if (!rc)
+		return 0;
+
+	kset_unregister(uv_query_kset);
+out_kobj:
+	kobject_del(uv_kobj);
+	kobject_put(uv_kobj);
+	return rc;
+}
+device_initcall(uv_info_init);
 #endif
-- 
2.25.0

^ permalink raw reply related


This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.