* [PATCH 11/13] random: simplify sysctl declaration with register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
From: Xiaoming Ni <nixiaoming@huawei.com>
Move random_table sysctl from kernel/sysctl.c to drivers/char/random.c
and use register_sysctl_subdir() to help remove the clutter out of
kernel/sysctl.c.
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
drivers/char/random.c | 14 ++++++++++++--
include/linux/sysctl.h | 1 -
kernel/sysctl.c | 5 -----
3 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index a7cf6aa65908..73fd4b6e9c18 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2101,8 +2101,7 @@ static int proc_do_entropy(struct ctl_table *table, int write,
}
static int sysctl_poolsize = INPUT_POOL_WORDS * 32;
-extern struct ctl_table random_table[];
-struct ctl_table random_table[] = {
+static struct ctl_table random_table[] = {
{
.procname = "poolsize",
.data = &sysctl_poolsize,
@@ -2164,6 +2163,17 @@ struct ctl_table random_table[] = {
#endif
{ }
};
+
+/*
+ * rand_initialize() is called before sysctl_init(),
+ * so we cannot call register_sysctl_init() in rand_initialize()
+ */
+static int __init random_sysctls_init(void)
+{
+ register_sysctl_subdir("kernel", "random", random_table);
+ return 0;
+}
+device_initcall(random_sysctls_init);
#endif /* CONFIG_SYSCTL */
struct batched_entropy {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e5364b69dd95..33a471b56345 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -216,7 +216,6 @@ extern int unaligned_dump_stack;
extern int no_unaligned_warning;
extern struct ctl_table sysctl_mount_point[];
-extern struct ctl_table random_table[];
#else /* CONFIG_SYSCTL */
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5c116904feb7..f9a35325d5d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2078,11 +2078,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sysctl_max_threads,
},
- {
- .procname = "random",
- .mode = 0555,
- .child = random_table,
- },
{
.procname = "usermodehelper",
.mode = 0555,
--
2.26.2
^ permalink raw reply related
* [PATCH 10/13] eventpoll: simplify sysctl declaration with register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
From: Xiaoming Ni <nixiaoming@huawei.com>
Move epoll_table sysctl to fs/eventpoll.c and remove the
clutter out of kernel/sysctl.c by using register_sysctl_subdir()..
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
fs/eventpoll.c | 10 +++++++++-
include/linux/poll.h | 2 --
include/linux/sysctl.h | 1 -
kernel/sysctl.c | 7 -------
4 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 12eebcdea9c8..957ebc9700e3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -299,7 +299,7 @@ static LIST_HEAD(tfile_check_list);
static long long_zero;
static long long_max = LONG_MAX;
-struct ctl_table epoll_table[] = {
+static struct ctl_table epoll_table[] = {
{
.procname = "max_user_watches",
.data = &max_user_watches,
@@ -311,6 +311,13 @@ struct ctl_table epoll_table[] = {
},
{ }
};
+
+static void __init epoll_sysctls_init(void)
+{
+ register_sysctl_subdir("fs", "epoll", epoll_table);
+}
+#else
+#define epoll_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static const struct file_operations eventpoll_fops;
@@ -2422,6 +2429,7 @@ static int __init eventpoll_init(void)
/* Allocates slab cache used to allocate "struct eppoll_entry" */
pwq_cache = kmem_cache_create("eventpoll_pwq",
sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL);
+ epoll_sysctls_init();
return 0;
}
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 1cdc32b1f1b0..a9e0e1c2d1f2 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -8,12 +8,10 @@
#include <linux/wait.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/sysctl.h>
#include <linux/uaccess.h>
#include <uapi/linux/poll.h>
#include <uapi/linux/eventpoll.h>
-extern struct ctl_table epoll_table[]; /* for sysctl */
/* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
additional memory. */
#ifdef __clang__
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index aa01f54d0442..e5364b69dd95 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -217,7 +217,6 @@ extern int no_unaligned_warning;
extern struct ctl_table sysctl_mount_point[];
extern struct ctl_table random_table[];
-extern struct ctl_table epoll_table[];
#else /* CONFIG_SYSCTL */
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e007375c8a11..5c116904feb7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3001,13 +3001,6 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_EPOLL
- {
- .procname = "epoll",
- .mode = 0555,
- .child = epoll_table,
- },
-#endif
#endif
{
.procname = "protected_symlinks",
--
2.26.2
^ permalink raw reply related
* [PATCH 07/13] test_sysctl: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci lib/test_sysctl.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
lib/test_sysctl.c | 23 ++---------------------
1 file changed, 2 insertions(+), 21 deletions(-)
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 84eaae22d3a6..b17581307756 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -128,26 +128,6 @@ static struct ctl_table test_table[] = {
{ }
};
-static struct ctl_table test_sysctl_table[] = {
- {
- .procname = "test_sysctl",
- .maxlen = 0,
- .mode = 0555,
- .child = test_table,
- },
- { }
-};
-
-static struct ctl_table test_sysctl_root_table[] = {
- {
- .procname = "debug",
- .maxlen = 0,
- .mode = 0555,
- .child = test_sysctl_table,
- },
- { }
-};
-
static struct ctl_table_header *test_sysctl_header;
static int __init test_sysctl_init(void)
@@ -155,7 +135,8 @@ static int __init test_sysctl_init(void)
test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL);
if (!test_data.bitmap_0001)
return -ENOMEM;
- test_sysctl_header = register_sysctl_table(test_sysctl_root_table);
+ test_sysctl_header = register_sysctl_subdir("debug", "test_sysctl",
+ test_table);
if (!test_sysctl_header) {
kfree(test_data.bitmap_0001);
return -ENOMEM;
--
2.26.2
^ permalink raw reply related
* [PATCH 08/13] inotify: simplify sysctl declaration with register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
From: Xiaoming Ni <nixiaoming@huawei.com>
move inotify_user sysctl to inotify_user.c and use the new
register_sysctl_subdir() helper.
Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
fs/notify/inotify/inotify_user.c | 11 ++++++++++-
include/linux/inotify.h | 3 ---
kernel/sysctl.c | 11 -----------
3 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f88bbcc9efeb..64859fbf8463 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,7 +46,7 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
#include <linux/sysctl.h>
-struct ctl_table inotify_table[] = {
+static struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
.data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
@@ -73,6 +73,14 @@ struct ctl_table inotify_table[] = {
},
{ }
};
+
+static void __init inotify_sysctls_init(void)
+{
+ register_sysctl_subdir("fs", "inotify", inotify_table);
+}
+
+#else
+#define inotify_sysctls_init() do { } while (0)
#endif /* CONFIG_SYSCTL */
static inline __u32 inotify_arg_to_mask(u32 arg)
@@ -826,6 +834,7 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
+ inotify_sysctls_init();
return 0;
}
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 6a24905f6e1e..8d20caa1b268 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -7,11 +7,8 @@
#ifndef _LINUX_INOTIFY_H
#define _LINUX_INOTIFY_H
-#include <linux/sysctl.h>
#include <uapi/linux/inotify.h>
-extern struct ctl_table inotify_table[]; /* for sysctl */
-
#define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
IN_MOVED_TO | IN_CREATE | IN_DELETE | \
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 04ff032f2863..30c2d521502a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -123,10 +123,6 @@ static const int maxolduid = 65535;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-#ifdef CONFIG_INOTIFY_USER
-#include <linux/inotify.h>
-#endif
-
#ifdef CONFIG_PROC_SYSCTL
/**
@@ -3012,13 +3008,6 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_INOTIFY_USER
- {
- .procname = "inotify",
- .mode = 0555,
- .child = inotify_table,
- },
-#endif
#ifdef CONFIG_EPOLL
{
.procname = "epoll",
--
2.26.2
^ permalink raw reply related
* [PATCH 06/13] ocfs2: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci fs/ocfs2/stackglue.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
fs/ocfs2/stackglue.c | 27 ++++-----------------------
1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index a191094694c6..addafced7f59 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -677,28 +677,8 @@ static struct ctl_table ocfs2_mod_table[] = {
},
{ }
};
-
-static struct ctl_table ocfs2_kern_table[] = {
- {
- .procname = "ocfs2",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_mod_table
- },
- { }
-};
-
-static struct ctl_table ocfs2_root_table[] = {
- {
- .procname = "fs",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = ocfs2_kern_table
- },
- { }
-};
+ .data = NULL,
+ .data = NULL,
static struct ctl_table_header *ocfs2_table_header;
@@ -711,7 +691,8 @@ static int __init ocfs2_stack_glue_init(void)
{
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
- ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
+ ocfs2_table_header = register_sysctl_subdir("fs", "ocfs2",
+ ocfs2_mod_table);
if (!ocfs2_table_header) {
printk(KERN_ERR
"ocfs2 stack glue: unable to register sysctl\n");
--
2.26.2
^ permalink raw reply related
* [PATCH 01/13] sysctl: add new register_sysctl_subdir() helper
From: Luis Chamberlain @ 2020-05-29 7:40 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
Often enough all we need to do is create a subdirectory so that
we can stuff sysctls underneath it. However, *if* that directory
was already created early on the boot sequence we really have no
need to use the full boiler plate code for it, we can just use
local variables to help us guide sysctl to place the new leaf files.
So use a helper to do precisely this.
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
include/linux/sysctl.h | 11 +++++++++++
kernel/sysctl.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index ddaa06ddd852..58bc978d4f03 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -205,6 +205,9 @@ void unregister_sysctl_table(struct ctl_table_header * table);
extern int sysctl_init(void);
extern void register_sysctl_init(const char *path, struct ctl_table *table,
const char *table_name);
+extern struct ctl_table_header *register_sysctl_subdir(const char *base,
+ const char *subdir,
+ struct ctl_table *table);
void do_sysctl_args(void);
extern int pwrsw_enabled;
@@ -223,6 +226,14 @@ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table *
return NULL;
}
+static
+inline struct ctl_table_header *register_sysctl_subdir(const char *base,
+ const char *subdir,
+ struct ctl_table *table)
+{
+ return NULL;
+}
+
static inline struct ctl_table_header *register_sysctl_paths(
const struct ctl_path *path, struct ctl_table *table)
{
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 008ac0576ae5..04ff032f2863 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3195,6 +3195,43 @@ void __init register_sysctl_init(const char *path, struct ctl_table *table,
}
kmemleak_not_leak(hdr);
}
+
+struct ctl_table_header *register_sysctl_subdir(const char *base,
+ const char *subdir,
+ struct ctl_table *table)
+{
+ struct ctl_table_header *hdr = NULL;
+ struct ctl_table subdir_table[] = {
+ {
+ .procname = subdir,
+ .mode = 0555,
+ .child = table,
+ },
+ { }
+ };
+ struct ctl_table base_table[] = {
+ {
+ .procname = base,
+ .mode = 0555,
+ .child = subdir_table,
+ },
+ { }
+ };
+
+ if (!table->procname)
+ goto out;
+
+ hdr = register_sysctl_table(base_table);
+ if (unlikely(!hdr)) {
+ pr_err("failed when creating subdirectory sysctl %s/%s/%s\n",
+ base, subdir, table->procname);
+ goto out;
+ }
+ kmemleak_not_leak(hdr);
+out:
+ return hdr;
+}
+EXPORT_SYMBOL_GPL(register_sysctl_subdir);
#endif /* CONFIG_SYSCTL */
/*
* No sense putting this after each symbol definition, twice,
--
2.26.2
^ permalink raw reply related
* [PATCH 05/13] macintosh/mac_hid.c: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:41 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci drivers/macintosh/mac_hid.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
drivers/macintosh/mac_hid.c | 25 ++-----------------------
1 file changed, 2 insertions(+), 23 deletions(-)
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 28b8581b44dd..736d0e151716 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -239,33 +239,12 @@ static struct ctl_table mac_hid_files[] = {
{ }
};
-/* dir in /proc/sys/dev */
-static struct ctl_table mac_hid_dir[] = {
- {
- .procname = "mac_hid",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_files,
- },
- { }
-};
-
-/* /proc/sys/dev itself, in case that is not there yet */
-static struct ctl_table mac_hid_root_dir[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = mac_hid_dir,
- },
- { }
-};
-
static struct ctl_table_header *mac_hid_sysctl_header;
static int __init mac_hid_init(void)
{
- mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir);
+ mac_hid_sysctl_header = register_sysctl_subdir("dev", "mac_hid",
+ mac_hid_files);
if (!mac_hid_sysctl_header)
return -ENOMEM;
--
2.26.2
^ permalink raw reply related
* [PATCH 04/13] i915: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:40 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci drivers/gpu/drm/i915/i915_perf.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
drivers/gpu/drm/i915/i915_perf.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 665bb076e84d..52509b573794 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4203,26 +4203,6 @@ static struct ctl_table oa_table[] = {
{}
};
-static struct ctl_table i915_root[] = {
- {
- .procname = "i915",
- .maxlen = 0,
- .mode = 0555,
- .child = oa_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = i915_root,
- },
- {}
-};
-
/**
* i915_perf_init - initialize i915-perf state on module bind
* @i915: i915 device instance
@@ -4383,7 +4363,7 @@ static int destroy_config(int id, void *p, void *data)
void i915_perf_sysctl_register(void)
{
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl_subdir("dev", "i915", oa_table);
}
void i915_perf_sysctl_unregister(void)
--
2.26.2
^ permalink raw reply related
* [PATCH 02/13] cdrom: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:40 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci drivers/cdrom/cdrom.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
drivers/cdrom/cdrom.c | 23 ++---------------------
1 file changed, 2 insertions(+), 21 deletions(-)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index a0a7ae705de8..3c638f464cef 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3719,26 +3719,6 @@ static struct ctl_table cdrom_table[] = {
{ }
};
-static struct ctl_table cdrom_cdrom_table[] = {
- {
- .procname = "cdrom",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_table,
- },
- { }
-};
-
-/* Make sure that /proc/sys/dev is there */
-static struct ctl_table cdrom_root_table[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = cdrom_cdrom_table,
- },
- { }
-};
static struct ctl_table_header *cdrom_sysctl_header;
static void cdrom_sysctl_register(void)
@@ -3748,7 +3728,8 @@ static void cdrom_sysctl_register(void)
if (!atomic_add_unless(&initialized, 1, 1))
return;
- cdrom_sysctl_header = register_sysctl_table(cdrom_root_table);
+ cdrom_sysctl_header = register_sysctl_subdir("dev", "cdrom",
+ cdrom_table);
/* set the defaults */
cdrom_sysctl_settings.autoclose = autoclose;
--
2.26.2
^ permalink raw reply related
* [PATCH 03/13] hpet: use new sysctl subdir helper register_sysctl_subdir()
From: Luis Chamberlain @ 2020-05-29 7:40 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
In-Reply-To: <20200529074108.16928-1-mcgrof@kernel.org>
This simplifies the code considerably. The following coccinelle
SmPL grammar rule was used to transform this code.
// pycocci sysctl-subdir.cocci drivers/char/hpet.c
@c1@
expression E1;
identifier subdir, sysctls;
@@
static struct ctl_table subdir[] = {
{
.procname = E1,
.maxlen = 0,
.mode = 0555,
.child = sysctls,
},
{ }
};
@c2@
identifier c1.subdir;
expression E2;
identifier base;
@@
static struct ctl_table base[] = {
{
.procname = E2,
.maxlen = 0,
.mode = 0555,
.child = subdir,
},
{ }
};
@c3@
identifier c2.base;
identifier header;
@@
header = register_sysctl_table(base);
@r1 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.subdir, c1.sysctls;
@@
-static struct ctl_table subdir[] = {
- {
- .procname = E1,
- .maxlen = 0,
- .mode = 0555,
- .child = sysctls,
- },
- { }
-};
@r2 depends on c1 && c2 && c3@
identifier c1.subdir;
expression c2.E2;
identifier c2.base;
@@
-static struct ctl_table base[] = {
- {
- .procname = E2,
- .maxlen = 0,
- .mode = 0555,
- .child = subdir,
- },
- { }
-};
@r3 depends on c1 && c2 && c3@
expression c1.E1;
identifier c1.sysctls;
expression c2.E2;
identifier c2.base;
identifier c3.header;
@@
header =
-register_sysctl_table(base);
+register_sysctl_subdir(E2, E1, sysctls);
Generated-by: Coccinelle SmPL
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
drivers/char/hpet.c | 22 +---------------------
1 file changed, 1 insertion(+), 21 deletions(-)
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index ed3b7dab678d..169c970d5ff8 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -746,26 +746,6 @@ static struct ctl_table hpet_table[] = {
{}
};
-static struct ctl_table hpet_root[] = {
- {
- .procname = "hpet",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_table,
- },
- {}
-};
-
-static struct ctl_table dev_root[] = {
- {
- .procname = "dev",
- .maxlen = 0,
- .mode = 0555,
- .child = hpet_root,
- },
- {}
-};
-
static struct ctl_table_header *sysctl_header;
/*
@@ -1059,7 +1039,7 @@ static int __init hpet_init(void)
if (result < 0)
return -ENODEV;
- sysctl_header = register_sysctl_table(dev_root);
+ sysctl_header = register_sysctl_subdir("dev", "hpet", hpet_table);
result = acpi_bus_register_driver(&hpet_acpi_driver);
if (result < 0) {
--
2.26.2
^ permalink raw reply related
* [PATCH 00/13] sysctl: spring cleaning
From: Luis Chamberlain @ 2020-05-29 7:40 UTC (permalink / raw)
To: keescook, yzaikin, nixiaoming, ebiederm, axboe, clemens, arnd,
gregkh, jani.nikula, joonas.lahtinen, rodrigo.vivi, airlied,
daniel, benh, rdna, viro, mark, jlbec, joseph.qi, vbabka, sfr,
jack, amir73il, rafael, tytso
Cc: intel-gfx, linux-kernel, dri-devel, julia.lawall,
Luis Chamberlain, akpm, linuxppc-dev, ocfs2-devel
Me and Xiaoming are working on some kernel/sysctl.c spring cleaning.
During a recent linux-next merge conflict it became clear that
the kitchen sink on kernel/sysctl.c creates too many conflicts,
and so we need to do away with stuffing everyone's knobs on this
one file.
This is part of that work. This is not expected to get merged yet, but
since our delta is pretty considerable at this point, we need to piece
meal this and collect reviews for what we have so far. This follows up
on some of his recent work.
This series focuses on a new helper to deal with subdirectories and
empty subdirectories. The terminology that we will embrace will be
that things like "fs", "kernel", "debug" are based directories, and
directories underneath this are subdirectories.
In this case, the cleanup ends up also trimming the amount of
code we have for sysctls.
If this seems reasonable we'll kdocify this a bit too.
This code has been boot tested without issues, and I'm letting 0day do
its thing to test against many kconfig builds. If you however spot
any issues please let us know.
Luis Chamberlain (9):
sysctl: add new register_sysctl_subdir() helper
cdrom: use new sysctl subdir helper register_sysctl_subdir()
hpet: use new sysctl subdir helper register_sysctl_subdir()
i915: use new sysctl subdir helper register_sysctl_subdir()
macintosh/mac_hid.c: use new sysctl subdir helper
register_sysctl_subdir()
ocfs2: use new sysctl subdir helper register_sysctl_subdir()
test_sysctl: use new sysctl subdir helper register_sysctl_subdir()
sysctl: add helper to register empty subdir
fs: move binfmt_misc sysctl to its own file
Xiaoming Ni (4):
inotify: simplify sysctl declaration with register_sysctl_subdir()
firmware_loader: simplify sysctl declaration with
register_sysctl_subdir()
eventpoll: simplify sysctl declaration with register_sysctl_subdir()
random: simplify sysctl declaration with register_sysctl_subdir()
drivers/base/firmware_loader/fallback.c | 4 +
drivers/base/firmware_loader/fallback.h | 11 +++
drivers/base/firmware_loader/fallback_table.c | 22 ++++-
drivers/cdrom/cdrom.c | 23 +----
drivers/char/hpet.c | 22 +----
drivers/char/random.c | 14 +++-
drivers/gpu/drm/i915/i915_perf.c | 22 +----
drivers/macintosh/mac_hid.c | 25 +-----
fs/binfmt_misc.c | 1 +
fs/eventpoll.c | 10 ++-
fs/notify/inotify/inotify_user.c | 11 ++-
fs/ocfs2/stackglue.c | 27 +-----
include/linux/inotify.h | 3 -
include/linux/poll.h | 2 -
include/linux/sysctl.h | 21 ++++-
kernel/sysctl.c | 84 +++++++++++--------
lib/test_sysctl.c | 23 +----
17 files changed, 144 insertions(+), 181 deletions(-)
--
2.26.2
^ permalink raw reply
* [PATCH] powerpc/64/syscall: Disable sanitisers for C syscall entry/exit code
From: Daniel Axtens @ 2020-05-29 6:14 UTC (permalink / raw)
To: linuxppc-dev, npiggin; +Cc: ajd, Daniel Axtens
syzkaller is picking up a bunch of crashes that look like this:
Unrecoverable exception 380 at c00000000037ed60 (msr=8000000000001031)
Oops: Unrecoverable exception, sig: 6 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 0 PID: 874 Comm: syz-executor.0 Not tainted 5.7.0-rc7-syzkaller-00016-gb0c3ba31be3e #0
NIP: c00000000037ed60 LR: c00000000004bac8 CTR: c000000000030990
REGS: c0000000555a7230 TRAP: 0380 Not tainted (5.7.0-rc7-syzkaller-00016-gb0c3ba31be3e)
MSR: 8000000000001031 <SF,ME,IR,DR,LE> CR: 48222882 XER: 20000000
CFAR: c00000000004bac4 IRQMASK: 0
GPR00: c00000000004bb68 c0000000555a74c0 c0000000024b3500 0000000000000005
GPR04: 0000000000000000 0000000000000000 c00000000004bb88 c008000000910000
GPR08: 00000000000b0000 c00000000004bac8 0000000000016000 c000000002503500
GPR12: c000000000030990 c000000003190000 00000000106a5898 00000000106a0000
GPR16: 00000000106a5890 c000000007a92000 c000000008180e00 c000000007a8f700
GPR20: c000000007a904b0 0000000010110000 c00000000259d318 5deadbeef0000100
GPR24: 5deadbeef0000122 c000000078422700 c000000009ee88b8 c000000078422778
GPR28: 0000000000000001 800000000280b033 0000000000000000 c0000000555a75a0
NIP [c00000000037ed60] __sanitizer_cov_trace_pc+0x40/0x50
LR [c00000000004bac8] interrupt_exit_kernel_prepare+0x118/0x310
Call Trace:
[c0000000555a74c0] [c00000000004bb68] interrupt_exit_kernel_prepare+0x1b8/0x310 (unreliable)
[c0000000555a7530] [c00000000000f9a8] interrupt_return+0x118/0x1c0
--- interrupt: 900 at __sanitizer_cov_trace_pc+0x0/0x50
...<random previous call chain>...
That looks like the KCOV helper accessing memory that's not safe to
access in the interrupt handling context.
Do not instrument the new syscall entry/exit code with KCOV, GCOV or
UBSAN.
Cc: Nicholas Piggin <npiggin@gmail.com>
Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C")
Signed-off-by: Daniel Axtens <dja@axtens.net>
---
be warned: I haven't attempted to reproduce the crash yet,
nor have I been able to test that this fixes it. I will attempt to do
that soon. Logically though, it does seem like this would be a
good thing to do regardless.
---
arch/powerpc/kernel/Makefile | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1c4385852d3d..1d443a7dc8a7 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -156,12 +156,19 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
GCOV_PROFILE_prom_init.o := n
KCOV_INSTRUMENT_prom_init.o := n
UBSAN_SANITIZE_prom_init.o := n
+
GCOV_PROFILE_kprobes.o := n
KCOV_INSTRUMENT_kprobes.o := n
UBSAN_SANITIZE_kprobes.o := n
+
GCOV_PROFILE_kprobes-ftrace.o := n
KCOV_INSTRUMENT_kprobes-ftrace.o := n
UBSAN_SANITIZE_kprobes-ftrace.o := n
+
+GCOV_PROFILE_syscall_64.o := n
+KCOV_INSTRUMENT_syscall_64.o := n
+UBSAN_SANITIZE_syscall_64.o := n
+
UBSAN_SANITIZE_vdso.o := n
# Necessary for booting with kcov enabled on book3e machines
--
2.20.1
^ permalink raw reply related
* [RFC PATCH 2/2] powerpc/pmem: Disable synchronous fault by default.
From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V, oohall
In-Reply-To: <20200529054141.156384-1-aneesh.kumar@linux.ibm.com>
This adds a kernel config option that controls whether MAP_SYNC is enabled by
default. With POWER10, architecture is adding new pmem flush and sync
instructions. The kernel should prevent the usage of MAP_SYNC if applications
are not using the new instructions on newer hardware.
This config allows user to control whether MAP_SYNC should be enabled by
default or not.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/platforms/Kconfig.cputype | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 27a81c291be8..f8694838ad4e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -383,6 +383,15 @@ config PPC_KUEP
If you're unsure, say Y.
+config ARCH_MAP_SYNC_DISABLE
+ bool "Disable synchronous fault support (MAP_SYNC)"
+ default y
+ help
+ Disable support for synchronous fault with nvdimm namespaces.
+
+ If you're unsure, say Y.
+
+
config PPC_HAVE_KUAP
bool
--
2.26.2
^ permalink raw reply related
* [RFC PATCH 1/2] libnvdimm: Add prctl control for disabling synchronous fault support.
From: Aneesh Kumar K.V @ 2020-05-29 5:41 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams; +Cc: Aneesh Kumar K.V, oohall
With POWER10, architecture is adding new pmem flush and sync instructions.
The kernel should prevent the usage of MAP_SYNC if applications are not using
the new instructions on newer hardware.
This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable
the usage of MAP_SYNC. The kernel config option is added to allow the user
to control whether MAP_SYNC should be enabled by default or not.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
include/linux/sched/coredump.h | 13 ++++++++++---
include/uapi/linux/prctl.h | 3 +++
kernel/fork.c | 8 +++++++-
kernel/sys.c | 18 ++++++++++++++++++
mm/Kconfig | 3 +++
mm/mmap.c | 4 ++++
6 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ecdc6542070f..9ba6b3d5f991 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
-#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
+#define MMF_DISABLE_MAP_SYNC 27 /* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
+#define MMF_DISABLE_MAP_SYNC_MASK (1 << MMF_DISABLE_MAP_SYNC)
-#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
- MMF_DISABLE_THP_MASK)
+#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \
+ MMF_DISABLE_THP_MASK | MMF_DISABLE_MAP_SYNC_MASK)
+
+static inline bool map_sync_enabled(struct mm_struct *mm)
+{
+ return !(mm->flags & MMF_DISABLE_MAP_SYNC_MASK);
+}
#endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..ee4cde32d5cf 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
#define PR_SET_IO_FLUSHER 57
#define PR_GET_IO_FLUSHER 58
+#define PR_SET_MAP_SYNC_ENABLE 59
+#define PR_GET_MAP_SYNC_ENABLE 60
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..d5a9a363e81e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
+#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE
+unsigned long default_map_sync_mask = MMF_DISABLE_MAP_SYNC_MASK;
+#else
+unsigned long default_map_sync_mask = 0;
+#endif
+
static int __init coredump_filter_setup(char *s)
{
default_dump_filter =
@@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
mm->flags = current->mm->flags & MMF_INIT_MASK;
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
} else {
- mm->flags = default_dump_filter;
+ mm->flags = default_dump_filter | default_map_sync_mask;
mm->def_flags = 0;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a..f6127cf4128b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
clear_bit(MMF_DISABLE_THP, &me->mm->flags);
up_write(&me->mm->mmap_sem);
break;
+
+ case PR_GET_MAP_SYNC_ENABLE:
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+ error = !test_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags);
+ break;
+ case PR_SET_MAP_SYNC_ENABLE:
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+ if (down_write_killable(&me->mm->mmap_sem))
+ return -EINTR;
+ if (arg2)
+ clear_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags);
+ else
+ set_bit(MMF_DISABLE_MAP_SYNC, &me->mm->flags);
+ up_write(&me->mm->mmap_sem);
+ break;
+
case PR_MPX_ENABLE_MANAGEMENT:
case PR_MPX_DISABLE_MANAGEMENT:
/* No longer implemented: */
diff --git a/mm/Kconfig b/mm/Kconfig
index c1acc34c1c35..38fd7cfbfca8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD
config MAPPING_DIRTY_HELPERS
bool
+config ARCH_MAP_SYNC_DISABLE
+ bool
+
endmenu
diff --git a/mm/mmap.c b/mm/mmap.c
index f609e9ec4a25..613e5894f178 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1464,6 +1464,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
case MAP_SHARED_VALIDATE:
if (flags & ~flags_mask)
return -EOPNOTSUPP;
+
+ if ((flags & MAP_SYNC) && !map_sync_enabled(mm))
+ return -EOPNOTSUPP;
+
if (prot & PROT_WRITE) {
if (!(file->f_mode & FMODE_WRITE))
return -EACCES;
--
2.26.2
^ permalink raw reply related
* [PATCH v4 8/8] powerpc/pmem: Initialize pmem device on newer hardware
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
With kernel now supporting new pmem flush/sync instructions, we can now
enable the kernel to initialize the device. On P10 these devices would
appear with a new compatible string. For PAPR device we have
compatible "ibm,pmemory-v2"
and for OF pmem device we have
compatible "pmem-region-v2"
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/platforms/pseries/papr_scm.c | 1 +
drivers/nvdimm/of_pmem.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index ad506e7003c9..407e08f08157 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -483,6 +483,7 @@ static int papr_scm_remove(struct platform_device *pdev)
static const struct of_device_id papr_scm_match[] = {
{ .compatible = "ibm,pmemory" },
+ { .compatible = "ibm,pmemory-v2" },
{ },
};
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 6826a274a1f1..10dbdcdfb9ce 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -90,6 +90,7 @@ static int of_pmem_region_remove(struct platform_device *pdev)
static const struct of_device_id of_pmem_region_match[] = {
{ .compatible = "pmem-region" },
+ { .compatible = "pmem-region-v2" },
{ },
};
--
2.26.2
^ permalink raw reply related
* [PATCH v4 7/8] powerpc/book3s/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush functions.
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
We only support persistent memory on P8 and above. This is enforced by the
firmware and further checked on virtualzied platform during platform init.
Add WARN_ONCE in pmem flush routines to catch the wrong usage of these.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/cacheflush.h | 2 ++
arch/powerpc/lib/pmem.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index bc3ea009cf14..865fae8a226e 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -125,6 +125,8 @@ static inline void arch_pmem_flush_barrier(void)
{
if (cpu_has_feature(CPU_FTR_ARCH_207S))
asm volatile(PPC_PHWSYNC ::: "memory");
+ else
+ WARN_ONCE(1, "Using pmem flush on older hardware.");
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 21210fa676e5..f40bd908d28d 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -37,12 +37,14 @@ static inline void clean_pmem_range(unsigned long start, unsigned long stop)
{
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return __clean_pmem_range(start, stop);
+ WARN_ONCE(1, "Using pmem flush on older hardware.");
}
static inline void flush_pmem_range(unsigned long start, unsigned long stop)
{
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return __flush_pmem_range(start, stop);
+ WARN_ONCE(1, "Using pmem flush on older hardware.");
}
/*
--
2.26.2
^ permalink raw reply related
* [PATCH v4 6/8] powerpc/pmem: Avoid the barrier in flush routines
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
nvdimm expect the flush routines to just mark the cache clean. The barrier
that mark the store globally visible is done in nvdimm_flush().
Update the papr_scm driver to a simplified nvdim_flush callback that do
only the required barrier.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/lib/pmem.c | 6 ------
arch/powerpc/platforms/pseries/papr_scm.c | 13 +++++++++++++
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 5a61aaeb6930..21210fa676e5 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
for (i = 0; i < size >> shift; i++, addr += bytes)
asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
-
-
- asm volatile(PPC_PHWSYNC ::: "memory");
}
static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
@@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
for (i = 0; i < size >> shift; i++, addr += bytes)
asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
-
-
- asm volatile(PPC_PHWSYNC ::: "memory");
}
static inline void clean_pmem_range(unsigned long start, unsigned long stop)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380..ad506e7003c9 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -285,6 +285,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
+/*
+ * We have made sure the pmem writes are done such that before calling this
+ * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
+ * we just need to add the necessary barrier to make sure the above flushes
+ * are have updated persistent storage before any data access or data transfer
+ * caused by subsequent instructions is initiated.
+ */
+static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
+{
+ arch_pmem_flush_barrier();
+ return 0;
+}
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
{
@@ -340,6 +352,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
ndr_desc.mapping = &mapping;
ndr_desc.num_mappings = 1;
ndr_desc.nd_set = &p->nd_set;
+ ndr_desc.flush = papr_scm_flush_sync;
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
--
2.26.2
^ permalink raw reply related
* [PATCH v4 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
Architectures like ppc64 provide persistent memory specific barriers
that will ensure that all stores for which the modifications are
written to persistent storage by preceding dcbfps and dcbstps
instructions have updated persistent storage before any data
access or data transfer caused by subsequent instructions is initiated.
This is in addition to the ordering done by wmb()
Update nvdimm core such that architecture can use barriers other than
wmb to ensure all previous writes are architecturally visible for
the platform buffer flush.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
drivers/md/dm-writecache.c | 2 +-
drivers/nvdimm/region_devs.c | 8 ++++----
include/linux/libnvdimm.h | 4 ++++
3 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 613c171b1b6d..904fdbf2b089 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -540,7 +540,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
{
if (WC_MODE_PMEM(wc))
- wmb();
+ arch_pmem_flush_barrier();
else
ssd_commit_flushed(wc, wait_for_ios);
}
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index ccbb5b43b8b2..88ea34a9c7fd 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1216,13 +1216,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
/*
- * The first wmb() is needed to 'sfence' all previous writes
- * such that they are architecturally visible for the platform
- * buffer flush. Note that we've already arranged for pmem
+ * The first arch_pmem_flush_barrier() is needed to 'sfence' all
+ * previous writes such that they are architecturally visible for
+ * the platform buffer flush. Note that we've already arranged for pmem
* writes to avoid the cache via memcpy_flushcache(). The final
* wmb() ensures ordering for the NVDIMM flush write.
*/
- wmb();
+ arch_pmem_flush_barrier();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 18da4059be09..66f6c65bd789 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, size_t size)
}
#endif
+#ifndef arch_pmem_flush_barrier
+#define arch_pmem_flush_barrier() wmb()
+#endif
+
#endif /* __LIBNVDIMM_H__ */
--
2.26.2
^ permalink raw reply related
* [PATCH v4 5/8] powerpc/pmem/of_pmem: Update of_pmem to use the new barrier instruction.
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
of_pmem on POWER10 can now use phwsync instead of hwsync to ensure
all previous writes are architecturally visible for the platform
buffer flush.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/cacheflush.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index e92191b390f3..bc3ea009cf14 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -119,6 +119,13 @@ static inline void invalidate_dcache_range(unsigned long start,
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy(dst, src, len)
+
+#define arch_pmem_flush_barrier arch_pmem_flush_barrier
+static inline void arch_pmem_flush_barrier(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ asm volatile(PPC_PHWSYNC ::: "memory");
+}
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_CACHEFLUSH_H */
--
2.26.2
^ permalink raw reply related
* [PATCH v4 3/8] powerpc/pmem: Add flush routines using new pmem store and sync instruction
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
Start using dcbstps; phwsync; sequence for flushing persistent memory range.
The new instructions are implemented as a variant of dcbf and hwsync and on
P8 and P9 they will be executed as those instructions. We avoid using them on
older hardware. This helps to avoid difficult to debug bugs.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/lib/pmem.c | 50 +++++++++++++++++++++++++++++++++++++----
1 file changed, 46 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..5a61aaeb6930 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -9,20 +9,62 @@
#include <asm/cacheflush.h>
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+ asm volatile(PPC_PHWSYNC ::: "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+
+
+ asm volatile(PPC_PHWSYNC ::: "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __flush_pmem_range(start, stop);
+}
+
/*
* CONFIG_ARCH_HAS_PMEM_API symbols
*/
void arch_wb_cache_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
void arch_invalidate_pmem(void *addr, size_t size)
{
unsigned long start = (unsigned long) addr;
- flush_dcache_range(start, start + size);
+ flush_pmem_range(start, start + size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
@@ -35,7 +77,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src,
unsigned long copied, start = (unsigned long) dest;
copied = __copy_from_user(dest, src, size);
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
return copied;
}
@@ -45,7 +87,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size)
unsigned long start = (unsigned long) dest;
memcpy(dest, src, size);
- flush_dcache_range(start, start + size);
+ clean_pmem_range(start, start + size);
return dest;
}
--
2.26.2
^ permalink raw reply related
* [PATCH v4 2/8] powerpc/pmem: Add new instructions for persistent storage and sync
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
POWER10 introduces two new variants of dcbf instructions (dcbstps and dcbfps)
that can be used to write modified locations back to persistent storage.
Additionally, POWER10 also introduce phwsync and plwsync which can be used
to establish order of these writes to persistent storage.
This patch exposes these instructions to the rest of the kernel. The existing
dcbf and hwsync instructions in P8 and P9 are adequate to enable appropriate
synchronization with OpenCAPI-hosted persistent storage. Hence the new
instructions are added as a variant of the old ones that old hardware
won't differentiate.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/ppc-opcode.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 2a39c716c343..1ad014e4633e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -219,6 +219,8 @@
#define PPC_INST_STWCX 0x7c00012d
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
+#define PPC_INST_PHWSYNC 0x7c8004ac
+#define PPC_INST_PLWSYNC 0x7ca004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
#define PPC_INST_ISYNC 0x4c00012c
#define PPC_INST_LXVD2X 0x7c000698
@@ -284,6 +286,8 @@
#define PPC_INST_TABORT 0x7c00071d
#define PPC_INST_TSR 0x7c0005dd
+#define PPC_INST_DCBF 0x7c0000ac
+
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
#define PPC_INST_WINKLE 0x4c0003e4
@@ -532,6 +536,14 @@
#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \
__PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_INST_DCBF | \
+ ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_INST_DCBF | \
+ ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+
+#define PPC_PHWSYNC stringify_in_c(.long PPC_INST_PHWSYNC)
+#define PPC_PLWSYNC stringify_in_c(.long PPC_INST_PLWSYNC)
+
/*
* Define what the VSX XX1 form instructions will look like, then add
* the 128 bit load store instructions based on that.
--
2.26.2
^ permalink raw reply related
* [PATCH v4 0/8] Support new pmem flush and sync instructions for POWER
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
This patch series enables the usage os new pmem flush and sync instructions on POWER
architecture. POWER10 introduces two new variants of dcbf instructions (dcbstps and dcbfps)
that can be used to write modified locations back to persistent storage. Additionally,
POWER10 also introduce phwsync and plwsync which can be used to establish order of these
writes to persistent storage.
This series exposes these instructions to the rest of the kernel. The existing
dcbf and hwsync instructions in P8 and P9 are adequate to enable appropriate
synchronization with OpenCAPI-hosted persistent storage. Hence the new instructions
are added as a variant of the old ones that old hardware won't differentiate.
On POWER10, pmem devices will be represented by a different device tree compat
strings. This ensures that older kernels won't initialize pmem devices on POWER10.
Changes from V3:
* Add new compat string to be used for the device.
* Use arch_pmem_flush_barrier() in dm-writecache.
Aneesh Kumar K.V (8):
powerpc/pmem: Restrict papr_scm to P8 and above.
powerpc/pmem: Add new instructions for persistent storage and sync
powerpc/pmem: Add flush routines using new pmem store and sync
instruction
libnvdimm/nvdimm/flush: Allow architecture to override the flush
barrier
powerpc/pmem/of_pmem: Update of_pmem to use the new barrier
instruction.
powerpc/pmem: Avoid the barrier in flush routines
powerpc/book3s/pmem: Add WARN_ONCE to catch the wrong usage of pmem
flush functions.
powerpc/pmem: Initialize pmem device on newer hardware
arch/powerpc/include/asm/cacheflush.h | 9 +++++
arch/powerpc/include/asm/ppc-opcode.h | 12 ++++++
arch/powerpc/lib/pmem.c | 46 +++++++++++++++++++++--
arch/powerpc/platforms/pseries/papr_scm.c | 14 +++++++
arch/powerpc/platforms/pseries/pmem.c | 6 +++
drivers/md/dm-writecache.c | 2 +-
drivers/nvdimm/of_pmem.c | 1 +
drivers/nvdimm/region_devs.c | 8 ++--
include/linux/libnvdimm.h | 4 ++
9 files changed, 93 insertions(+), 9 deletions(-)
--
2.26.2
^ permalink raw reply
* [PATCH v4 1/8] powerpc/pmem: Restrict papr_scm to P8 and above.
From: Aneesh Kumar K.V @ 2020-05-29 5:28 UTC (permalink / raw)
To: linuxppc-dev, mpe, linux-nvdimm; +Cc: Aneesh Kumar K.V, dan.j.williams, oohall
In-Reply-To: <20200529052820.151651-1-aneesh.kumar@linux.ibm.com>
The PAPR based virtualized persistent memory devices are only supported on
POWER9 and above. In the followup patch, the kernel will switch the persistent
memory cache flush functions to use a new `dcbf` variant instruction. The new
instructions even though added in ISA 3.1 works even on P8 and P9 because these
are implemented as a variant of existing `dcbf` and `hwsync` and on P8 and
P9 behaves as such.
Considering these devices are only supported on P8 and above, update the driver
to prevent a P7-compat guest from using persistent memory devices.
We don't update of_pmem driver with the same condition, because, on bare-metal,
the firmware enables pmem support only on P9 and above. There the kernel depends
on OPAL firmware to restrict exposing persistent memory related device tree
entries on older hardware. of_pmem.ko is written without any arch dependency and
we don't want to add ppc64 specific cpu feature check in of_pmem driver.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/platforms/pseries/pmem.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index f860a897a9e0..2347e1038f58 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -147,6 +147,12 @@ const struct of_device_id drc_pmem_match[] = {
static int pseries_pmem_init(void)
{
+ /*
+ * Only supported on POWER8 and above.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 0;
+
pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
if (!pmem_node)
return 0;
--
2.26.2
^ permalink raw reply related
* Re: [PATCH] powerpc/xive: Enforce load-after-store ordering when StoreEOI is active
From: Michael Ellerman @ 2020-05-29 4:30 UTC (permalink / raw)
To: Cédric Le Goater
Cc: Alistair Popple, linuxppc-dev, Greg Kurz, Paul Mackerras,
Cédric Le Goater
In-Reply-To: <20200220081506.31209-1-clg@kaod.org>
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2040 bytes --]
On Thu, 2020-02-20 at 08:15:06 UTC, =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= wrote:
> When an interrupt has been handled, the OS notifies the interrupt
> controller with a EOI sequence. On a POWER9 system using the XIVE
> interrupt controller, this can be done with a load or a store
> operation on the ESB interrupt management page of the interrupt. The
> StoreEOI operation has less latency and improves interrupt handling
> performance but it was deactivated during the POWER9 DD2.0 timeframe
> because of ordering issues. We use the LoadEOI today but we plan to
> reactivate StoreEOI in future architectures.
>
> There is usually no need to enforce ordering between ESB load and
> store operations as they should lead to the same result. E.g. a store
> trigger and a load EOI can be executed in any order. Assuming the
> interrupt state is PQ=10, a store trigger followed by a load EOI will
> return a Q bit. In the reverse order, it will create a new interrupt
> trigger from HW. In both cases, the handler processing interrupts is
> notified.
>
> In some cases, the XIVE_ESB_SET_PQ_10 load operation is used to
> disable temporarily the interrupt source (mask/unmask). When the
> source is reenabled, the OS can detect if interrupts were received
> while the source was disabled and reinject them. This process needs
> special care when StoreEOI is activated. The ESB load and store
> operations should be correctly ordered because a XIVE_ESB_STORE_EOI
> operation could leave the source enabled if it has not completed
> before the loads.
>
> For those cases, we enforce Load-after-Store ordering with a special
> load operation offset. To avoid performance impact, this ordering is
> only enforced when really needed, that is when interrupt sources are
> temporarily disabled with the XIVE_ESB_SET_PQ_10 load. It should not
> be needed for other loads.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
Applied to powerpc topic/ppc-kvm, thanks.
https://git.kernel.org/powerpc/c/b1f9be9392f090f08e4ad9e2c68963aeff03bd67
cheers
^ permalink raw reply
* Re: [PATCH] powerpc/uaccess: Don't use "m<>" constraint
From: Michael Ellerman @ 2020-05-29 4:24 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev
In-Reply-To: <20200507123324.2250024-1-mpe@ellerman.id.au>
On Thu, 2020-05-07 at 12:33:24 UTC, Michael Ellerman wrote:
> The "m<>" constraint breaks compilation with GCC 4.6.x era compilers.
>
> The use of the constraint allows the compiler to use update-form
> instructions, however in practice current compilers never generate
> those forms for any of the current uses of __put_user_asm_goto().
>
> We anticipate that GCC 4.6 will be declared unsupported for building
> the kernel in the not too distant future. So for now just switch to
> the "m" constraint.
>
> Fixes: 334710b1496a ("powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'")
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Applied to powerpc topic/uaccess-ppc.
https://git.kernel.org/powerpc/c/e2a8b49e79553bd8ec48f73cead84e6146c09408
cheers
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox