* [PATCH 6/9] virtio_balloon: remove the balloon-kvm file system
From: Christoph Hellwig @ 2021-03-09 15:53 UTC (permalink / raw)
To: Al Viro
Cc: Jason Gunthorpe, David Hildenbrand, VMware, Inc.,
Michael S. Tsirkin, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Daniel Vetter,
linux-fsdevel, Andrew Morton, linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-1-hch@lst.de>
Just use the generic anon_inode file system.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/virtio/virtio_balloon.c | 30 +++---------------------------
1 file changed, 3 insertions(+), 27 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index cae76ee5bdd688..1efb890cd3ff09 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -6,6 +6,7 @@
* Copyright 2008 Rusty Russell IBM Corporation
*/
+#include <linux/anon_inodes.h>
#include <linux/virtio.h>
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
@@ -42,10 +43,6 @@
(1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
-#ifdef CONFIG_BALLOON_COMPACTION
-static struct vfsmount *balloon_mnt;
-#endif
-
enum virtio_balloon_vq {
VIRTIO_BALLOON_VQ_INFLATE,
VIRTIO_BALLOON_VQ_DEFLATE,
@@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
return MIGRATEPAGE_SUCCESS;
}
-
-static int balloon_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type balloon_fs = {
- .name = "balloon-kvm",
- .init_fs_context = balloon_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
#endif /* CONFIG_BALLOON_COMPACTION */
static unsigned long shrink_free_pages(struct virtio_balloon *vb,
@@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_free_vb;
#ifdef CONFIG_BALLOON_COMPACTION
- balloon_mnt = kern_mount(&balloon_fs);
- if (IS_ERR(balloon_mnt)) {
- err = PTR_ERR(balloon_mnt);
- goto out_del_vqs;
- }
-
vb->vb_dev_info.migratepage = virtballoon_migratepage;
- vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
+ vb->vb_dev_info.inode = alloc_anon_inode();
if (IS_ERR(vb->vb_dev_info.inode)) {
err = PTR_ERR(vb->vb_dev_info.inode);
- goto out_kern_unmount;
+ goto out_del_vqs;
}
vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
#endif
@@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
out_iput:
#ifdef CONFIG_BALLOON_COMPACTION
iput(vb->vb_dev_info.inode);
-out_kern_unmount:
- kern_unmount(balloon_mnt);
out_del_vqs:
#endif
vdev->config->del_vqs(vdev);
@@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
if (vb->vb_dev_info.inode)
iput(vb->vb_dev_info.inode);
- kern_unmount(balloon_mnt);
#endif
kfree(vb);
}
--
2.30.1
^ permalink raw reply related
* [PATCH 7/9] iomem: remove the iomem file system
From: Christoph Hellwig @ 2021-03-09 15:53 UTC (permalink / raw)
To: Al Viro
Cc: Jason Gunthorpe, David Hildenbrand, VMware, Inc.,
Michael S. Tsirkin, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Daniel Vetter,
linux-fsdevel, Andrew Morton, linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-1-hch@lst.de>
Just use the generic anon_inode file system.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
kernel/resource.c | 30 ++++--------------------------
1 file changed, 4 insertions(+), 26 deletions(-)
diff --git a/kernel/resource.c b/kernel/resource.c
index 0fd091a3f2fc66..12560553c26796 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/resource_ext.h>
+#include <linux/anon_inodes.h>
#include <uapi/linux/magic.h>
#include <asm/io.h>
@@ -1838,37 +1839,14 @@ static int __init strict_iomem(char *str)
return 1;
}
-static int iomem_fs_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, DEVMEM_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type iomem_fs_type = {
- .name = "iomem",
- .owner = THIS_MODULE,
- .init_fs_context = iomem_fs_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
static int __init iomem_init_inode(void)
{
- static struct vfsmount *iomem_vfs_mount;
- static int iomem_fs_cnt;
struct inode *inode;
- int rc;
-
- rc = simple_pin_fs(&iomem_fs_type, &iomem_vfs_mount, &iomem_fs_cnt);
- if (rc < 0) {
- pr_err("Cannot mount iomem pseudo filesystem: %d\n", rc);
- return rc;
- }
- inode = alloc_anon_inode_sb(iomem_vfs_mount->mnt_sb);
+ inode = alloc_anon_inode();
if (IS_ERR(inode)) {
- rc = PTR_ERR(inode);
- pr_err("Cannot allocate inode for iomem: %d\n", rc);
- simple_release_fs(&iomem_vfs_mount, &iomem_fs_cnt);
- return rc;
+ pr_err("Cannot allocate inode for iomem: %zd\n", PTR_ERR(inode));
+ return PTR_ERR(inode);
}
/*
--
2.30.1
^ permalink raw reply related
* [PATCH 8/9] z3fold: remove the z3fold file system
From: Christoph Hellwig @ 2021-03-09 15:53 UTC (permalink / raw)
To: Al Viro
Cc: Jason Gunthorpe, David Hildenbrand, VMware, Inc.,
Michael S. Tsirkin, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Daniel Vetter,
linux-fsdevel, Andrew Morton, linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-1-hch@lst.de>
Just use the generic anon_inode file system.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
mm/z3fold.c | 38 ++------------------------------------
1 file changed, 2 insertions(+), 36 deletions(-)
diff --git a/mm/z3fold.c b/mm/z3fold.c
index e7cd9298b221f5..e0749a3d8987de 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -23,6 +23,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/anon_inodes.h>
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
@@ -345,38 +346,10 @@ static inline void free_handle(unsigned long handle, struct z3fold_header *zhdr)
}
}
-static int z3fold_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, Z3FOLD_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type z3fold_fs = {
- .name = "z3fold",
- .init_fs_context = z3fold_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static struct vfsmount *z3fold_mnt;
-static int z3fold_mount(void)
-{
- int ret = 0;
-
- z3fold_mnt = kern_mount(&z3fold_fs);
- if (IS_ERR(z3fold_mnt))
- ret = PTR_ERR(z3fold_mnt);
-
- return ret;
-}
-
-static void z3fold_unmount(void)
-{
- kern_unmount(z3fold_mnt);
-}
-
static const struct address_space_operations z3fold_aops;
static int z3fold_register_migration(struct z3fold_pool *pool)
{
- pool->inode = alloc_anon_inode_sb(z3fold_mnt->mnt_sb);
+ pool->inode = alloc_anon_inode();
if (IS_ERR(pool->inode)) {
pool->inode = NULL;
return 1;
@@ -1787,22 +1760,15 @@ MODULE_ALIAS("zpool-z3fold");
static int __init init_z3fold(void)
{
- int ret;
-
/* Make sure the z3fold header is not larger than the page size */
BUILD_BUG_ON(ZHDR_SIZE_ALIGNED > PAGE_SIZE);
- ret = z3fold_mount();
- if (ret)
- return ret;
zpool_register_driver(&z3fold_zpool_driver);
-
return 0;
}
static void __exit exit_z3fold(void)
{
- z3fold_unmount();
zpool_unregister_driver(&z3fold_zpool_driver);
}
--
2.30.1
^ permalink raw reply related
* [PATCH 9/9] zsmalloc: remove the zsmalloc file system
From: Christoph Hellwig @ 2021-03-09 15:53 UTC (permalink / raw)
To: Al Viro
Cc: Jason Gunthorpe, David Hildenbrand, VMware, Inc.,
Michael S. Tsirkin, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Daniel Vetter,
linux-fsdevel, Andrew Morton, linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-1-hch@lst.de>
Just use the generic anon_inode file system.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
mm/zsmalloc.c | 48 +++---------------------------------------------
1 file changed, 3 insertions(+), 45 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a6449a2ad861de..a7d2f471935447 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/pgtable.h>
#include <asm/tlbflush.h>
+#include <linux/anon_inodes.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>
@@ -176,10 +177,6 @@ struct zs_size_stat {
static struct dentry *zs_stat_root;
#endif
-#ifdef CONFIG_COMPACTION
-static struct vfsmount *zsmalloc_mnt;
-#endif
-
/*
* We assign a page to ZS_ALMOST_EMPTY fullness group when:
* n <= N / f, where
@@ -308,8 +305,6 @@ static void kick_deferred_free(struct zs_pool *pool);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
#else
-static int zsmalloc_mount(void) { return 0; }
-static void zsmalloc_unmount(void) {}
static int zs_register_migration(struct zs_pool *pool) { return 0; }
static void zs_unregister_migration(struct zs_pool *pool) {}
static void migrate_lock_init(struct zspage *zspage) {}
@@ -1751,33 +1746,6 @@ static void lock_zspage(struct zspage *zspage)
} while ((page = get_next_page(page)) != NULL);
}
-static int zs_init_fs_context(struct fs_context *fc)
-{
- return init_pseudo(fc, ZSMALLOC_MAGIC) ? 0 : -ENOMEM;
-}
-
-static struct file_system_type zsmalloc_fs = {
- .name = "zsmalloc",
- .init_fs_context = zs_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-static int zsmalloc_mount(void)
-{
- int ret = 0;
-
- zsmalloc_mnt = kern_mount(&zsmalloc_fs);
- if (IS_ERR(zsmalloc_mnt))
- ret = PTR_ERR(zsmalloc_mnt);
-
- return ret;
-}
-
-static void zsmalloc_unmount(void)
-{
- kern_unmount(zsmalloc_mnt);
-}
-
static void migrate_lock_init(struct zspage *zspage)
{
rwlock_init(&zspage->lock);
@@ -2086,7 +2054,7 @@ static const struct address_space_operations zsmalloc_aops = {
static int zs_register_migration(struct zs_pool *pool)
{
- pool->inode = alloc_anon_inode_sb(zsmalloc_mnt->mnt_sb);
+ pool->inode = alloc_anon_inode();
if (IS_ERR(pool->inode)) {
pool->inode = NULL;
return 1;
@@ -2506,14 +2474,10 @@ static int __init zs_init(void)
{
int ret;
- ret = zsmalloc_mount();
- if (ret)
- goto out;
-
ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
zs_cpu_prepare, zs_cpu_dead);
if (ret)
- goto hp_setup_fail;
+ return ret;
#ifdef CONFIG_ZPOOL
zpool_register_driver(&zs_zpool_driver);
@@ -2522,11 +2486,6 @@ static int __init zs_init(void)
zs_stat_init();
return 0;
-
-hp_setup_fail:
- zsmalloc_unmount();
-out:
- return ret;
}
static void __exit zs_exit(void)
@@ -2534,7 +2493,6 @@ static void __exit zs_exit(void)
#ifdef CONFIG_ZPOOL
zpool_unregister_driver(&zs_zpool_driver);
#endif
- zsmalloc_unmount();
cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
zs_stat_exit();
--
2.30.1
^ permalink raw reply related
* Re: [PATCH v3] powerpc/32: remove bogus ppc_select syscall
From: Christophe Leroy @ 2021-03-09 15:59 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: linuxppc-dev, linux-kernel@vger.kernel.org
In-Reply-To: <CAK8P3a34cnCk4=Xyxvib57JLN-ck4T0-FUZRAQT_L6MDKjE+-w@mail.gmail.com>
Le 05/03/2021 à 13:03, Arnd Bergmann a écrit :
> On Fri, Mar 5, 2021 at 11:15 AM Christophe Leroy
> <christophe.leroy@csgroup.eu> wrote:
>> Le 05/03/2021 à 11:06, Arnd Bergmann a écrit :
>>> On Fri, Mar 5, 2021 at 9:40 AM Christophe Leroy <christophe.leroy@csgroup.eu> wrote:
>>> - glibc support for ppc32 gets merged during the linux-2.5 days, supporting
>>> only #142 with the new behavior.
>
> It turns out to be older than I said. This was actually in glibc-1.94
> from 1997, so during
> the linux-2.1 days, not 2.5!
>
>> Whaou, nice archeology, thanks. Do you mind if I copy the history you established ?
>
> That's fine, please copy it.
>
>> In your commit, you said 2.3.48. Here in the history you say 2.1.48. Which one is correct ?
>
> 2.1.48 is correct.
>
>> Regardless of whethere binaries are broken or not for other reason, is that worth expecting an
>> almost 25 yr old binary to run on future kernels ? If one is able to put the necessary effort to
>> port you hardware to the latest kernel, can't he really port the binary as well ?
>
> I think the questions of supporting old hardware with new software and
> supporting old
> binaries on modern kernels are largely orthogonal. The policy we have
> is that we don't
> break existing user setups, and it really seems unlikely that anyone
> still uses pre-1997
> executables for anything that requires a modern kernel!
>
> I now checked the oldest mklinux I could find (DR2.1 from 1997), and
> even has the
> modern glibc and linux-2.0.28 kernel patched to provide the modern semantics at
> syscall #142 for glibc, with the same (already unused) compatibility hack at #82
> that we still have for ppc32 today. This made mklinux DR2.1 binaries
> incompatible
> with mainline linux-2.0 kernels, but they might still work with modern kernels,
> regardless of whether we remove support for binaries that worked with mainline
> linux-2.0.
I had another look. In fact x86, arm and m68k still have the #82 syscall, but they don't have the
hack we have on powerpc to "guess" that something is calling the old select with the arguments of
the new select.
As part of my series of user accesses cleanup, I'll replace the open coded stuff by a call to
sys_old_select(), see below.
Maybe at the end we should keep the #82 syscall, but do we need to keep the powerpc hack really ?
Maybe the best is to drop ppc_select() function but mention sys_old_select() instead of ni_syscall
for entry #82 in the syscall table ?
Christophe
---
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 700fcdac2e3c..b541c690a31c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -40,6 +40,7 @@
#define __ARCH_WANT_SYS_SIGPROCMASK
#ifdef CONFIG_PPC32
#define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
#endif
#ifdef CONFIG_PPC64
#define __ARCH_WANT_SYS_TIME
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 078608ec2e92..a552c9e68d7e 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -82,16 +82,8 @@ int
ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct
__kernel_old_timeval __user *tvp)
{
if ( (unsigned long)n >= 4096 )
- {
- unsigned long __user *buffer = (unsigned long __user *)n;
- if (!access_ok(buffer, 5*sizeof(unsigned long))
- || __get_user(n, buffer)
- || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
- || __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
- || __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
- || __get_user(tvp, ((struct __kernel_old_timeval __user * __user *)(buffer+4))))
- return -EFAULT;
- }
+ return sys_old_select((void __user *)n);
+
return sys_select(n, inp, outp, exp, tvp);
}
#endif
^ permalink raw reply related
* Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends
From: Mark Rutland @ 2021-03-09 16:05 UTC (permalink / raw)
To: Segher Boessenkool
Cc: Marco Elver, Catalin Marinas, linuxppc-dev, LKML, kasan-dev,
broonie, Paul Mackerras, Will Deacon, linux-arm-kernel
In-Reply-To: <20210304215448.GU29191@gate.crashing.org>
On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote:
> Hi!
Hi Segher,
> On Thu, Mar 04, 2021 at 02:57:30PM +0000, Mark Rutland wrote:
> > It looks like GCC is happy to give us the function-entry-time FP if we use
> > __builtin_frame_address(1),
>
> From the GCC manual:
> Calling this function with a nonzero argument can have
> unpredictable effects, including crashing the calling program. As
> a result, calls that are considered unsafe are diagnosed when the
> '-Wframe-address' option is in effect. Such calls should only be
> made in debugging situations.
>
> It *does* warn (the warning is in -Wall btw), on both powerpc and
> aarch64. Furthermore, using this builtin causes lousy code (it forces
> the use of a frame pointer, which we normally try very hard to optimise
> away, for good reason).
>
> And, that warning is not an idle warning. Non-zero arguments to
> __builtin_frame_address can crash the program. It won't on simpler
> functions, but there is no real definition of what a simpler function
> *is*. It is meant for debugging, not for production use (this is also
> why no one has bothered to make it faster).
>
> On Power it should work, but on pretty much any other arch it won't.
I understand this is true generally, and cannot be relied upon in
portable code. However as you hint here for Power, I believe that on
arm64 __builtin_frame_address(1) shouldn't crash the program due to the
way frame records work on arm64, but I'll go check with some local
compiler folk. I agree that __builtin_frame_address(2) and beyond
certainly can, e.g. by NULL dereference and similar.
For context, why do you think this would work on power specifically? I
wonder if our rationale is similar.
Are you aware of anything in particular that breaks using
__builtin_frame_address(1) in non-portable code, or is this just a
general sentiment of this not being a supported use-case?
> > Unless we can get some strong guarantees from compiler folk such that we
> > can guarantee a specific function acts boundary for unwinding (and
> > doesn't itself get split, etc), the only reliable way I can think to
> > solve this requires an assembly trampoline. Whatever we do is liable to
> > need some invasive rework.
>
> You cannot get such a guarantee, other than not letting the compiler
> see into the routine at all, like with assembler code (not inline asm,
> real assembler code).
If we cannot reliably ensure this then I'm happy to go write an assembly
trampoline to snapshot the state at a function call boundary (where our
procedure call standard mandates the state of the LR, FP, and frame
records pointed to by the FP). This'll require reworking a reasonable
amount of code cross-architecture, so I'll need to get some more
concrete justification (e.g. examples of things that can go wrong in
practice).
> The real way forward is to bite the bullet and to no longer pretend you
> can do a full backtrace from just the stack contents. You cannot.
I think what you mean here is that there's no reliable way to handle the
current/leaf function, right? If so I do agree.
Beyond that I believe that arm64's frame records should be sufficient.
Thanks,
Mark.
^ permalink raw reply
* Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system
From: David Hildenbrand @ 2021-03-09 16:26 UTC (permalink / raw)
To: Christoph Hellwig, Al Viro
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc., linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-4-hch@lst.de>
On 09.03.21 16:53, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> arch/powerpc/platforms/pseries/cmm.c | 27 ++-------------------------
> 1 file changed, 2 insertions(+), 25 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
> index 6d36b858b14df1..9d07e6bea7126c 100644
> --- a/arch/powerpc/platforms/pseries/cmm.c
> +++ b/arch/powerpc/platforms/pseries/cmm.c
> @@ -6,6 +6,7 @@
> * Author(s): Brian King (brking@linux.vnet.ibm.com),
> */
>
> +#include <linux/anon_inodes.h>
> #include <linux/ctype.h>
> #include <linux/delay.h>
> #include <linux/errno.h>
> @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = {
> };
>
> #ifdef CONFIG_BALLOON_COMPACTION
> -static struct vfsmount *balloon_mnt;
> -
> -static int cmm_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
> -}
> -
> -static struct file_system_type balloon_fs = {
> - .name = "ppc-cmm",
> - .init_fs_context = cmm_init_fs_context,
> - .kill_sb = kill_anon_super,
> -};
> -
> static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
> struct page *newpage, struct page *page,
> enum migrate_mode mode)
> @@ -573,19 +561,10 @@ static int cmm_balloon_compaction_init(void)
> balloon_devinfo_init(&b_dev_info);
> b_dev_info.migratepage = cmm_migratepage;
>
> - balloon_mnt = kern_mount(&balloon_fs);
> - if (IS_ERR(balloon_mnt)) {
> - rc = PTR_ERR(balloon_mnt);
> - balloon_mnt = NULL;
> - return rc;
> - }
> -
> - b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
> + b_dev_info.inode = alloc_anon_inode();
> if (IS_ERR(b_dev_info.inode)) {
> rc = PTR_ERR(b_dev_info.inode);
> b_dev_info.inode = NULL;
> - kern_unmount(balloon_mnt);
> - balloon_mnt = NULL;
> return rc;
> }
>
> @@ -597,8 +576,6 @@ static void cmm_balloon_compaction_deinit(void)
> if (b_dev_info.inode)
> iput(b_dev_info.inode);
> b_dev_info.inode = NULL;
> - kern_unmount(balloon_mnt);
> - balloon_mnt = NULL;
> }
> #else /* CONFIG_BALLOON_COMPACTION */
> static int cmm_balloon_compaction_init(void)
>
I always wondered why that was necessary after all (with my limited fs
knowledge :) ).
a) I assume you want to remove PPC_CMM_MAGIC from
include/uapi/linux/magic.h as well?
b) Do we still need #include <linux/magic.h>, #include <linux/mount.h>
and #include <linux/pseudo_fs.h>?
Apart from that looks much cleaner.
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
From: David Hildenbrand @ 2021-03-09 16:21 UTC (permalink / raw)
To: Christoph Hellwig, Al Viro
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc., linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-2-hch@lst.de>
On 09.03.21 16:53, Christoph Hellwig wrote:
> Rename alloc_inode to free the name for a new variant that does not
> need boilerplate to create a super_block first.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> arch/powerpc/platforms/pseries/cmm.c | 2 +-
> drivers/dma-buf/dma-buf.c | 2 +-
> drivers/gpu/drm/drm_drv.c | 2 +-
> drivers/misc/cxl/api.c | 2 +-
> drivers/misc/vmw_balloon.c | 2 +-
> drivers/scsi/cxlflash/ocxl_hw.c | 2 +-
> drivers/virtio/virtio_balloon.c | 2 +-
> fs/aio.c | 2 +-
> fs/anon_inodes.c | 4 ++--
> fs/libfs.c | 2 +-
> include/linux/fs.h | 2 +-
> kernel/resource.c | 2 +-
> mm/z3fold.c | 2 +-
> mm/zsmalloc.c | 2 +-
> 14 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
> index 45a3a3022a85c9..6d36b858b14df1 100644
> --- a/arch/powerpc/platforms/pseries/cmm.c
> +++ b/arch/powerpc/platforms/pseries/cmm.c
> @@ -580,7 +580,7 @@ static int cmm_balloon_compaction_init(void)
> return rc;
> }
>
> - b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
> + b_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
> if (IS_ERR(b_dev_info.inode)) {
> rc = PTR_ERR(b_dev_info.inode);
> b_dev_info.inode = NULL;
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index f264b70c383eb4..dedcc9483352dc 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -445,7 +445,7 @@ static inline int is_dma_buf_file(struct file *file)
> static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
> {
> struct file *file;
> - struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
> + struct inode *inode = alloc_anon_inode_sb(dma_buf_mnt->mnt_sb);
>
> if (IS_ERR(inode))
> return ERR_CAST(inode);
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 20d22e41d7ce74..87e7214a8e3565 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -519,7 +519,7 @@ static struct inode *drm_fs_inode_new(void)
> return ERR_PTR(r);
> }
>
> - inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
> + inode = alloc_anon_inode_sb(drm_fs_mnt->mnt_sb);
> if (IS_ERR(inode))
> simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);
>
> diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
> index b493de962153ba..2efbf6c98028ef 100644
> --- a/drivers/misc/cxl/api.c
> +++ b/drivers/misc/cxl/api.c
> @@ -73,7 +73,7 @@ static struct file *cxl_getfile(const char *name,
> goto err_module;
> }
>
> - inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
> + inode = alloc_anon_inode_sb(cxl_vfs_mount->mnt_sb);
> if (IS_ERR(inode)) {
> file = ERR_CAST(inode);
> goto err_fs;
> diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
> index b837e7eba5f7dc..5d057a05ddbee8 100644
> --- a/drivers/misc/vmw_balloon.c
> +++ b/drivers/misc/vmw_balloon.c
> @@ -1900,7 +1900,7 @@ static __init int vmballoon_compaction_init(struct vmballoon *b)
> return PTR_ERR(vmballoon_mnt);
>
> b->b_dev_info.migratepage = vmballoon_migratepage;
> - b->b_dev_info.inode = alloc_anon_inode(vmballoon_mnt->mnt_sb);
> + b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
>
> if (IS_ERR(b->b_dev_info.inode))
> return PTR_ERR(b->b_dev_info.inode);
> diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
> index 244fc27215dc79..40184ed926b557 100644
> --- a/drivers/scsi/cxlflash/ocxl_hw.c
> +++ b/drivers/scsi/cxlflash/ocxl_hw.c
> @@ -88,7 +88,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
> goto err2;
> }
>
> - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb);
> + inode = alloc_anon_inode_sb(ocxlflash_vfs_mount->mnt_sb);
> if (IS_ERR(inode)) {
> rc = PTR_ERR(inode);
> dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n",
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8985fc2cea8615..cae76ee5bdd688 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -916,7 +916,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
> }
>
> vb->vb_dev_info.migratepage = virtballoon_migratepage;
> - vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
> + vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
> if (IS_ERR(vb->vb_dev_info.inode)) {
> err = PTR_ERR(vb->vb_dev_info.inode);
> goto out_kern_unmount;
> diff --git a/fs/aio.c b/fs/aio.c
> index 1f32da13d39ee6..d1c2aa7fd6de7c 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -234,7 +234,7 @@ static const struct address_space_operations aio_ctx_aops;
> static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
> {
> struct file *file;
> - struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
> + struct inode *inode = alloc_anon_inode_sb(aio_mnt->mnt_sb);
> if (IS_ERR(inode))
> return ERR_CAST(inode);
>
> diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
> index a280156138ed89..4745fc37014332 100644
> --- a/fs/anon_inodes.c
> +++ b/fs/anon_inodes.c
> @@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode(
> const struct qstr qname = QSTR_INIT(name, strlen(name));
> int error;
>
> - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
> + inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
> if (IS_ERR(inode))
> return inode;
> inode->i_flags &= ~S_PRIVATE;
> @@ -231,7 +231,7 @@ static int __init anon_inode_init(void)
> if (IS_ERR(anon_inode_mnt))
> panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt));
>
> - anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
> + anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
> if (IS_ERR(anon_inode_inode))
> panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
>
> diff --git a/fs/libfs.c b/fs/libfs.c
> index e2de5401abca5a..600bebc1cd847f 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -1216,7 +1216,7 @@ static int anon_set_page_dirty(struct page *page)
> return 0;
> };
>
> -struct inode *alloc_anon_inode(struct super_block *s)
> +struct inode *alloc_anon_inode_sb(struct super_block *s)
> {
> static const struct address_space_operations anon_aops = {
> .set_page_dirty = anon_set_page_dirty,
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ec8f3ddf4a6aa8..52387368af3c00 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3286,7 +3286,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
> loff_t pos, unsigned len, unsigned copied,
> struct page *page, void *fsdata);
> extern int always_delete_dentry(const struct dentry *);
> -extern struct inode *alloc_anon_inode(struct super_block *);
> +extern struct inode *alloc_anon_inode_sb(struct super_block *);
> extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
> extern const struct dentry_operations simple_dentry_operations;
>
> diff --git a/kernel/resource.c b/kernel/resource.c
> index 627e61b0c12418..0fd091a3f2fc66 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -1863,7 +1863,7 @@ static int __init iomem_init_inode(void)
> return rc;
> }
>
> - inode = alloc_anon_inode(iomem_vfs_mount->mnt_sb);
> + inode = alloc_anon_inode_sb(iomem_vfs_mount->mnt_sb);
> if (IS_ERR(inode)) {
> rc = PTR_ERR(inode);
> pr_err("Cannot allocate inode for iomem: %d\n", rc);
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index b5dafa7e44e429..e7cd9298b221f5 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -376,7 +376,7 @@ static void z3fold_unmount(void)
> static const struct address_space_operations z3fold_aops;
> static int z3fold_register_migration(struct z3fold_pool *pool)
> {
> - pool->inode = alloc_anon_inode(z3fold_mnt->mnt_sb);
> + pool->inode = alloc_anon_inode_sb(z3fold_mnt->mnt_sb);
> if (IS_ERR(pool->inode)) {
> pool->inode = NULL;
> return 1;
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 30c358b7202510..a6449a2ad861de 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -2086,7 +2086,7 @@ static const struct address_space_operations zsmalloc_aops = {
>
> static int zs_register_migration(struct zs_pool *pool)
> {
> - pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
> + pool->inode = alloc_anon_inode_sb(zsmalloc_mnt->mnt_sb);
> if (IS_ERR(pool->inode)) {
> pool->inode = NULL;
> return 1;
>
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode
From: David Hildenbrand @ 2021-03-09 16:22 UTC (permalink / raw)
To: Christoph Hellwig, Al Viro
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc., linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-3-hch@lst.de>
On 09.03.21 16:53, Christoph Hellwig wrote:
> Add a new alloc_anon_inode helper that allocates an inode on
> the anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/anon_inodes.c | 15 +++++++++++++--
> include/linux/anon_inodes.h | 1 +
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
> index 4745fc37014332..b6a8ea71920bc3 100644
> --- a/fs/anon_inodes.c
> +++ b/fs/anon_inodes.c
> @@ -63,7 +63,7 @@ static struct inode *anon_inode_make_secure_inode(
> const struct qstr qname = QSTR_INIT(name, strlen(name));
> int error;
>
> - inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
> + inode = alloc_anon_inode();
> if (IS_ERR(inode))
> return inode;
> inode->i_flags &= ~S_PRIVATE;
> @@ -225,13 +225,24 @@ int anon_inode_getfd_secure(const char *name, const struct file_operations *fops
> }
> EXPORT_SYMBOL_GPL(anon_inode_getfd_secure);
>
> +/**
> + * alloc_anon_inode - create a new anonymous inode
> + *
> + * Create an inode on the anon_inode file system and return it.
> + */
> +struct inode *alloc_anon_inode(void)
> +{
> + return alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
> +}
> +EXPORT_SYMBOL_GPL(alloc_anon_inode);
> +
> static int __init anon_inode_init(void)
> {
> anon_inode_mnt = kern_mount(&anon_inode_fs_type);
> if (IS_ERR(anon_inode_mnt))
> panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt));
>
> - anon_inode_inode = alloc_anon_inode_sb(anon_inode_mnt->mnt_sb);
> + anon_inode_inode = alloc_anon_inode();
> if (IS_ERR(anon_inode_inode))
> panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
>
> diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
> index 71881a2b6f7860..b5ae9a6eda9923 100644
> --- a/include/linux/anon_inodes.h
> +++ b/include/linux/anon_inodes.h
> @@ -21,6 +21,7 @@ int anon_inode_getfd_secure(const char *name,
> const struct file_operations *fops,
> void *priv, int flags,
> const struct inode *context_inode);
> +struct inode *alloc_anon_inode(void);
>
> #endif /* _LINUX_ANON_INODES_H */
>
>
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 6/9] virtio_balloon: remove the balloon-kvm file system
From: David Hildenbrand @ 2021-03-09 16:29 UTC (permalink / raw)
To: Christoph Hellwig, Al Viro
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc., linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-7-hch@lst.de>
On 09.03.21 16:53, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> drivers/virtio/virtio_balloon.c | 30 +++---------------------------
> 1 file changed, 3 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index cae76ee5bdd688..1efb890cd3ff09 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -6,6 +6,7 @@
> * Copyright 2008 Rusty Russell IBM Corporation
> */
>
> +#include <linux/anon_inodes.h>
> #include <linux/virtio.h>
> #include <linux/virtio_balloon.h>
> #include <linux/swap.h>
> @@ -42,10 +43,6 @@
> (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
> #define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
>
> -#ifdef CONFIG_BALLOON_COMPACTION
> -static struct vfsmount *balloon_mnt;
> -#endif
> -
> enum virtio_balloon_vq {
> VIRTIO_BALLOON_VQ_INFLATE,
> VIRTIO_BALLOON_VQ_DEFLATE,
> @@ -805,18 +802,6 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
>
> return MIGRATEPAGE_SUCCESS;
> }
> -
> -static int balloon_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, BALLOON_KVM_MAGIC) ? 0 : -ENOMEM;
> -}
> -
> -static struct file_system_type balloon_fs = {
> - .name = "balloon-kvm",
> - .init_fs_context = balloon_init_fs_context,
> - .kill_sb = kill_anon_super,
> -};
> -
> #endif /* CONFIG_BALLOON_COMPACTION */
>
> static unsigned long shrink_free_pages(struct virtio_balloon *vb,
> @@ -909,17 +894,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
> goto out_free_vb;
>
> #ifdef CONFIG_BALLOON_COMPACTION
> - balloon_mnt = kern_mount(&balloon_fs);
> - if (IS_ERR(balloon_mnt)) {
> - err = PTR_ERR(balloon_mnt);
> - goto out_del_vqs;
> - }
> -
> vb->vb_dev_info.migratepage = virtballoon_migratepage;
> - vb->vb_dev_info.inode = alloc_anon_inode_sb(balloon_mnt->mnt_sb);
> + vb->vb_dev_info.inode = alloc_anon_inode();
> if (IS_ERR(vb->vb_dev_info.inode)) {
> err = PTR_ERR(vb->vb_dev_info.inode);
> - goto out_kern_unmount;
> + goto out_del_vqs;
> }
> vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
> #endif
> @@ -1016,8 +995,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
> out_iput:
> #ifdef CONFIG_BALLOON_COMPACTION
> iput(vb->vb_dev_info.inode);
> -out_kern_unmount:
> - kern_unmount(balloon_mnt);
> out_del_vqs:
> #endif
> vdev->config->del_vqs(vdev);
> @@ -1070,7 +1047,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
> if (vb->vb_dev_info.inode)
> iput(vb->vb_dev_info.inode);
>
> - kern_unmount(balloon_mnt);
> #endif
> kfree(vb);
> }
>
... you might know what I am going to say :)
Apart from that LGTM.
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 5/9] vmw_balloon: remove the balloon-vmware file system
From: David Hildenbrand @ 2021-03-09 16:28 UTC (permalink / raw)
To: Christoph Hellwig, Al Viro
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc., linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-6-hch@lst.de>
On 09.03.21 16:53, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> drivers/misc/vmw_balloon.c | 24 ++----------------------
> 1 file changed, 2 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
> index 5d057a05ddbee8..be4be32f858253 100644
> --- a/drivers/misc/vmw_balloon.c
> +++ b/drivers/misc/vmw_balloon.c
> @@ -16,6 +16,7 @@
> //#define DEBUG
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> +#include <linux/anon_inodes.h>
> #include <linux/types.h>
> #include <linux/io.h>
> #include <linux/kernel.h>
> @@ -1735,20 +1736,6 @@ static inline void vmballoon_debugfs_exit(struct vmballoon *b)
>
>
> #ifdef CONFIG_BALLOON_COMPACTION
> -
> -static int vmballoon_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, BALLOON_VMW_MAGIC) ? 0 : -ENOMEM;
> -}
> -
> -static struct file_system_type vmballoon_fs = {
> - .name = "balloon-vmware",
> - .init_fs_context = vmballoon_init_fs_context,
> - .kill_sb = kill_anon_super,
> -};
> -
> -static struct vfsmount *vmballoon_mnt;
> -
> /**
> * vmballoon_migratepage() - migrates a balloon page.
> * @b_dev_info: balloon device information descriptor.
> @@ -1878,8 +1865,6 @@ static void vmballoon_compaction_deinit(struct vmballoon *b)
> iput(b->b_dev_info.inode);
>
> b->b_dev_info.inode = NULL;
> - kern_unmount(vmballoon_mnt);
> - vmballoon_mnt = NULL;
> }
>
> /**
> @@ -1895,13 +1880,8 @@ static void vmballoon_compaction_deinit(struct vmballoon *b)
> */
> static __init int vmballoon_compaction_init(struct vmballoon *b)
> {
> - vmballoon_mnt = kern_mount(&vmballoon_fs);
> - if (IS_ERR(vmballoon_mnt))
> - return PTR_ERR(vmballoon_mnt);
> -
> b->b_dev_info.migratepage = vmballoon_migratepage;
> - b->b_dev_info.inode = alloc_anon_inode_sb(vmballoon_mnt->mnt_sb);
> -
> + b->b_dev_info.inode = alloc_anon_inode();
> if (IS_ERR(b->b_dev_info.inode))
> return PTR_ERR(b->b_dev_info.inode);
>
>
Same comment regarding BALLOON_VMW_MAGIC and includes (mount.h,
pseudo_fs.h).
Apart from that looks good.
--
Thanks,
David / dhildenb
^ permalink raw reply
* Re: [PATCH 3/9] powerpc/pseries: remove the ppc-cmm file system
From: Jason Gunthorpe @ 2021-03-09 16:30 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Michael S. Tsirkin, VMware, Inc., David Hildenbrand, linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Al Viro, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-4-hch@lst.de>
On Tue, Mar 09, 2021 at 04:53:42PM +0100, Christoph Hellwig wrote:
> Just use the generic anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> arch/powerpc/platforms/pseries/cmm.c | 27 ++-------------------------
> 1 file changed, 2 insertions(+), 25 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
> index 6d36b858b14df1..9d07e6bea7126c 100644
> +++ b/arch/powerpc/platforms/pseries/cmm.c
> @@ -6,6 +6,7 @@
> * Author(s): Brian King (brking@linux.vnet.ibm.com),
> */
>
> +#include <linux/anon_inodes.h>
> #include <linux/ctype.h>
> #include <linux/delay.h>
> #include <linux/errno.h>
> @@ -502,19 +503,6 @@ static struct notifier_block cmm_mem_nb = {
> };
>
> #ifdef CONFIG_BALLOON_COMPACTION
> -static struct vfsmount *balloon_mnt;
> -
> -static int cmm_init_fs_context(struct fs_context *fc)
> -{
> - return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
Should we clean these unusued magic constants too?
include/uapi/linux/magic.h:#define PPC_CMM_MAGIC 0xc7571590
Jason
^ permalink raw reply
* Re: make alloc_anon_inode more useful
From: Jason Gunthorpe @ 2021-03-09 16:54 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Michael S. Tsirkin, VMware, Inc., David Hildenbrand, linux-kernel,
dri-devel, virtualization, linux-mm, Minchan Kim, Alex Williamson,
Nadav Amit, Al Viro, Daniel Vetter, linux-fsdevel, Andrew Morton,
linuxppc-dev, Nitin Gupta
In-Reply-To: <20210309155348.974875-1-hch@lst.de>
On Tue, Mar 09, 2021 at 04:53:39PM +0100, Christoph Hellwig wrote:
> Hi all,
>
> this series first renames the existing alloc_anon_inode to
> alloc_anon_inode_sb to clearly mark it as requiring a superblock.
>
> It then adds a new alloc_anon_inode that works on the anon_inode
> file system super block, thus removing tons of boilerplate code.
>
> The few remainig callers of alloc_anon_inode_sb all use alloc_file_pseudo
> later, but might also be ripe for some cleanup.
I like it
For a submission plan can we have this on a git branch please? I will
need a copy for RDMA and Alex will need one for vfio..
Thanks,
Jason
^ permalink raw reply
* Re: [PATCH v2 8/8] powerpc/xive: Map one IPI interrupt per node
From: Cédric Le Goater @ 2021-03-09 15:52 UTC (permalink / raw)
To: Greg Kurz; +Cc: linuxppc-dev
In-Reply-To: <20210309142331.1b9456c2@bahia.lan>
On 3/9/21 2:23 PM, Greg Kurz wrote:
> On Wed, 3 Mar 2021 18:48:57 +0100
> Cédric Le Goater <clg@kaod.org> wrote:
>
>> ipistorm [*] can be used to benchmark the raw interrupt rate of an
>> interrupt controller by measuring the number of IPIs a system can
>> sustain. When applied to the XIVE interrupt controller of POWER9 and
>> POWER10 systems, a significant drop of the interrupt rate can be
>> observed when crossing the second node boundary.
>>
>> This is due to the fact that a single IPI interrupt is used for all
>> CPUs of the system. The structure is shared and the cache line updates
>> impact greatly the traffic between nodes and the overall IPI
>> performance.
>>
>> As a workaround, the impact can be reduced by deactivating the IRQ
>> lockup detector ("noirqdebug") which does a lot of accounting in the
>> Linux IRQ descriptor structure and is responsible for most of the
>> performance penalty.
>>
>> As a fix, this proposal allocates an IPI interrupt per node, to be
>> shared by all CPUs of that node. It solves the scaling issue, the IRQ
>> lockup detector still has an impact but the XIVE interrupt rate scales
>> linearly. It also improves the "noirqdebug" case as showed in the
>> tables below.
>>
>> * P9 DD2.2 - 2s * 64 threads
>>
>> "noirqdebug"
>> Mint/s Mint/s
>> chips cpus IPI/sys IPI/chip IPI/chip IPI/sys
>> --------------------------------------------------------------
>> 1 0-15 4.984023 4.875405 4.996536 5.048892
>> 0-31 10.879164 10.544040 10.757632 11.037859
>> 0-47 15.345301 14.688764 14.926520 15.310053
>> 0-63 17.064907 17.066812 17.613416 17.874511
>> 2 0-79 11.768764 21.650749 22.689120 22.566508
>> 0-95 10.616812 26.878789 28.434703 28.320324
>> 0-111 10.151693 31.397803 31.771773 32.388122
>> 0-127 9.948502 33.139336 34.875716 35.224548
>>
>> * P10 DD1 - 4s (not homogeneous) 352 threads
>>
>> "noirqdebug"
>> Mint/s Mint/s
>> chips cpus IPI/sys IPI/chip IPI/chip IPI/sys
>> --------------------------------------------------------------
>> 1 0-15 2.409402 2.364108 2.383303 2.395091
>> 0-31 6.028325 6.046075 6.089999 6.073750
>> 0-47 8.655178 8.644531 8.712830 8.724702
>> 0-63 11.629652 11.735953 12.088203 12.055979
>> 0-79 14.392321 14.729959 14.986701 14.973073
>> 0-95 12.604158 13.004034 17.528748 17.568095
>> 2 0-111 9.767753 13.719831 19.968606 20.024218
>> 0-127 6.744566 16.418854 22.898066 22.995110
>> 0-143 6.005699 19.174421 25.425622 25.417541
>> 0-159 5.649719 21.938836 27.952662 28.059603
>> 0-175 5.441410 24.109484 31.133915 31.127996
>> 3 0-191 5.318341 24.405322 33.999221 33.775354
>> 0-207 5.191382 26.449769 36.050161 35.867307
>> 0-223 5.102790 29.356943 39.544135 39.508169
>> 0-239 5.035295 31.933051 42.135075 42.071975
>> 0-255 4.969209 34.477367 44.655395 44.757074
>> 4 0-271 4.907652 35.887016 47.080545 47.318537
>> 0-287 4.839581 38.076137 50.464307 50.636219
>> 0-303 4.786031 40.881319 53.478684 53.310759
>> 0-319 4.743750 43.448424 56.388102 55.973969
>> 0-335 4.709936 45.623532 59.400930 58.926857
>> 0-351 4.681413 45.646151 62.035804 61.830057
>>
>> [*] https://github.com/antonblanchard/ipistorm
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>> arch/powerpc/sysdev/xive/xive-internal.h | 2 --
>> arch/powerpc/sysdev/xive/common.c | 39 ++++++++++++++++++------
>> 2 files changed, 30 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
>> index 9cf57c722faa..b3a456fdd3a5 100644
>> --- a/arch/powerpc/sysdev/xive/xive-internal.h
>> +++ b/arch/powerpc/sysdev/xive/xive-internal.h
>> @@ -5,8 +5,6 @@
>> #ifndef __XIVE_INTERNAL_H
>> #define __XIVE_INTERNAL_H
>>
>> -#define XIVE_IPI_HW_IRQ 0 /* interrupt source # for IPIs */
>> -
>> /*
>> * A "disabled" interrupt should never fire, to catch problems
>> * we set its logical number to this
>> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
>> index 8eefd152b947..c27f7bb0494b 100644
>> --- a/arch/powerpc/sysdev/xive/common.c
>> +++ b/arch/powerpc/sysdev/xive/common.c
>> @@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain;
>> #ifdef CONFIG_SMP
>> static struct irq_domain *xive_ipi_irq_domain;
>>
>> -/* The IPIs all use the same logical irq number */
>> -static u32 xive_ipi_irq;
>> +/* The IPIs use the same logical irq number when on the same chip */
>> +static struct xive_ipi_desc {
>> + unsigned int irq;
>> + char name[8]; /* enough bytes to fit IPI-XXX */
>
> So this assumes that the node number that node is <= 999 ? This
> is certainly the case for now since CONFIG_NODES_SHIFT is 8
> on ppc64 but starting with 10, you'd have truncated names.
It should be harmless though. I agree this is a useless optimization.
> What about deriving the size of name[] from CONFIG_NODES_SHIFT ?
Yes.
> Apart from that, LGTM. Probably not worth to respin just for
> this.
>
> I also could give a try in a KVM guest.
>
> Topology passed to QEMU:
>
> -smp 8,maxcpus=8,cores=2,threads=2,sockets=2 \
> -numa node,nodeid=0,cpus=0-4 \
> -numa node,nodeid=1,cpus=4-7
>
> Topology observed in guest with lstopo :
>
> Package L#0
> NUMANode L#0 (P#0 30GB)
> L1d L#0 (32KB) + L1i L#0 (32KB) + Core L#0
> PU L#0 (P#0)
> PU L#1 (P#1)
> L1d L#1 (32KB) + L1i L#1 (32KB) + Core L#1
> PU L#2 (P#2)
> PU L#3 (P#3)
> Package L#1
> NUMANode L#1 (P#1 32GB)
> L1d L#2 (32KB) + L1i L#2 (32KB) + Core L#2
> PU L#4 (P#4)
> PU L#5 (P#5)
> L1d L#3 (32KB) + L1i L#3 (32KB) + Core L#3
> PU L#6 (P#6)
> PU L#7 (P#7)
>
> Interrupts in guest:
>
> $ cat /proc/interrupts
> CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
> 16: 1023 871 1042 749 0 0 0 0 XIVE-IPI 0 Edge IPI-0
> 17: 0 0 0 0 2123 1019 1263 1288 XIVE-IPI 1 Edge IPI-1
>
> IPIs are mapped to the appropriate nodes, and the numbers indicate
> that everything is working as expected.
You should see the same on 2 socket PowerNV QEMU machine.
> Reviewed-and-tested-by: Greg Kurz <groug@kaod.org>
Thanks,
C.
>
>> +} *xive_ipis;
>> +
>> +static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
>> +{
>> + return xive_ipis[cpu_to_node(cpu)].irq;
>> +}
>> #endif
>>
>> /* Xive state for each CPU */
>> @@ -1106,25 +1114,36 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
>>
>> static void __init xive_request_ipi(void)
>> {
>> - unsigned int virq;
>> + unsigned int node;
>>
>> - xive_ipi_irq_domain = irq_domain_add_linear(NULL, 1,
>> + xive_ipi_irq_domain = irq_domain_add_linear(NULL, nr_node_ids,
>> &xive_ipi_irq_domain_ops, NULL);
>> if (WARN_ON(xive_ipi_irq_domain == NULL))
>> return;
>>
>> - /* Initialize it */
>> - virq = irq_create_mapping(xive_ipi_irq_domain, XIVE_IPI_HW_IRQ);
>> - xive_ipi_irq = virq;
>> + xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
>> + for_each_node(node) {
>> + struct xive_ipi_desc *xid = &xive_ipis[node];
>> + irq_hw_number_t node_ipi_hwirq = node;
>> +
>> + /*
>> + * Map one IPI interrupt per node for all cpus of that node.
>> + * Since the HW interrupt number doesn't have any meaning,
>> + * simply use the node number.
>> + */
>> + xid->irq = irq_create_mapping(xive_ipi_irq_domain, node_ipi_hwirq);
>> + snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
>>
>> - WARN_ON(request_irq(virq, xive_muxed_ipi_action,
>> - IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
>> + WARN_ON(request_irq(xid->irq, xive_muxed_ipi_action,
>> + IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL));
>> + }
>> }
>>
>> static int xive_setup_cpu_ipi(unsigned int cpu)
>> {
>> struct xive_cpu *xc;
>> int rc;
>> + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
>>
>> pr_debug("Setting up IPI for CPU %d\n", cpu);
>>
>> @@ -1165,6 +1184,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
>>
>> static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
>> {
>> + unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
>> +
>> /* Disable the IPI and free the IRQ data */
>>
>> /* Already cleaned up ? */
>
^ permalink raw reply
* Re: [PATCH 3/3] powerpc/qspinlock: Use generic smp_cond_load_relaxed
From: Michal Suchánek @ 2021-03-09 17:30 UTC (permalink / raw)
To: Davidlohr Bueso
Cc: Davidlohr Bueso, peterz, will, linux-kernel, npiggin, mingo,
paulus, longman, linuxppc-dev
In-Reply-To: <20210309154611.kbxzx65auzvmfqnt@offworld>
On Tue, Mar 09, 2021 at 07:46:11AM -0800, Davidlohr Bueso wrote:
> On Tue, 09 Mar 2021, Michal Such�nek wrote:
>
> > On Mon, Mar 08, 2021 at 05:59:50PM -0800, Davidlohr Bueso wrote:
> > > 49a7d46a06c3 (powerpc: Implement smp_cond_load_relaxed()) added
> > > busy-waiting pausing with a preferred SMT priority pattern, lowering
> > > the priority (reducing decode cycles) during the whole loop slowpath.
> > >
> > > However, data shows that while this pattern works well with simple
> > ^^^^^^^^^^^^^^^^^^^^^^
> > > spinlocks, queued spinlocks benefit more being kept in medium priority,
> > > with a cpu_relax() instead, being a low+medium combo on powerpc.
> > ...
> > >
> > > diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
> > > index aecfde829d5d..7ae29cfb06c0 100644
> > > --- a/arch/powerpc/include/asm/barrier.h
> > > +++ b/arch/powerpc/include/asm/barrier.h
> > > @@ -80,22 +80,6 @@ do { \
> > > ___p1; \
> > > })
> > >
> > > -#ifdef CONFIG_PPC64
> > Maybe it should be kept for the simple spinlock case then?
>
> It is kept, note that simple spinlocks don't use smp_cond_load_relaxed,
> but instead deal with the priorities in arch_spin_lock(), so it will
> spin in low priority until it sees a chance to take the lock, where
> it switches back to medium.
Indeed, thanks for the clarification.
Michal
^ permalink raw reply
* [PATCH] powerpc/xmon: Check cpu id in commands "c#", "dp#" and "dx#"
From: Greg Kurz @ 2021-03-09 18:11 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Cédric Le Goater
All these commands end up peeking into the PACA using the user originated
cpu id as an index. Check the cpu id is valid in order to prevent xmon to
crash. Instead of printing an error, this follows the same behavior as the
"lp s #" command : ignore the buggy cpu id parameter and fall back to the
#-less version of the command.
Signed-off-by: Greg Kurz <groug@kaod.org>
---
arch/powerpc/xmon/xmon.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 80fbf8968f77..d3d6e044228e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1248,7 +1248,7 @@ static int cpu_cmd(void)
unsigned long cpu, first_cpu, last_cpu;
int timeout;
- if (!scanhex(&cpu)) {
+ if (!scanhex(&cpu) || cpu >= num_possible_cpus()) {
/* print cpus waiting or in xmon */
printf("cpus stopped:");
last_cpu = first_cpu = NR_CPUS;
@@ -2678,7 +2678,7 @@ static void dump_pacas(void)
termch = c; /* Put c back, it wasn't 'a' */
- if (scanhex(&num))
+ if (scanhex(&num) && num < num_possible_cpus())
dump_one_paca(num);
else
dump_one_paca(xmon_owner);
@@ -2751,7 +2751,7 @@ static void dump_xives(void)
termch = c; /* Put c back, it wasn't 'a' */
- if (scanhex(&num))
+ if (scanhex(&num) && num < num_possible_cpus())
dump_one_xive(num);
else
dump_one_xive(xmon_owner);
^ permalink raw reply related
* Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
From: Cédric Le Goater @ 2021-03-09 17:26 UTC (permalink / raw)
To: Daniel Henrique Barboza, Greg Kurz
Cc: list@suse.de:PowerPC, linuxppc-dev, QEMU Developers, David Gibson
In-Reply-To: <3180b5c6-e61f-9c5f-3c80-f10e69dc5785@linux.ibm.com>
On 3/9/21 6:08 PM, Daniel Henrique Barboza wrote:
>
>
> On 3/9/21 12:33 PM, Cédric Le Goater wrote:
>> On 3/8/21 6:13 PM, Greg Kurz wrote:
>>> On Wed, 3 Mar 2021 18:48:50 +0100
>>> Cédric Le Goater <clg@kaod.org> wrote:
>>>
>>>> The 'chip_id' field of the XIVE CPU structure is used to choose a
>>>> target for a source located on the same chip when possible. This field
>>>> is assigned on the PowerNV platform using the "ibm,chip-id" property
>>>> on pSeries under KVM when NUMA nodes are defined but it is undefined
>>>
>>> This sentence seems to have a syntax problem... like it is missing an
>>> 'and' before 'on pSeries'.
>>
>> ah yes, or simply a comma.
>>
>>>> under PowerVM. The XIVE source structure has a similar field
>>>> 'src_chip' which is only assigned on the PowerNV platform.
>>>>
>>>> cpu_to_node() returns a compatible value on all platforms, 0 being the
>>>> default node. It will also give us the opportunity to set the affinity
>>>> of a source on pSeries when we can localize them.
>>>>
>>>
>>> IIUC this relies on the fact that the NUMA node id is == to chip id
>>> on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable
>>> with this change.
>>
>> Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall
>> H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel
>> in Cc:)
>
> That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but
> a list with the ibm,associativity domains of the CPU that "proc-no" (processor
> identifier) is mapped to inside QEMU.
>
> node_id in this case, considering that we're working with a reference-points
> of size 4, is the 4th element of the returned list. The last element is
> "procno" itself.
>
>
>>
>> On PowerNV, Linux uses "ibm,associativity" property of the CPU to find
>> the node id. This value is built from the chip id in OPAL, so the
>> value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id"
>> property are unlikely to be different.
>>
>> cpu_to_node(cpu) is used in many places to allocate the structures
>> locally to the owning node. XIVE is not an exception (see below in the
>> same patch), it is better to be consistent and get the same information
>> (node id) using the same routine.
>>
>>
>> In Linux, "ibm,chip-id" is only used in low level PowerNV drivers :
>> LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot
>> unifies the controllers of the system to only expose one the OS. This
>> is problematic and should be changed but it's another topic.
>>
>>
>>> On the other hand, you have the pSeries case under PowerVM that
>>> doesn't xc->chip_id, which isn't passed to any hcall AFAICT.
>>
>> yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning
>> under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid
>> chip id.
>>
>> QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not
>> always correct btw)
>
>
> If you have a way to reliably reproduce this, let me know and I'll fix it
> up in QEMU.
with :
-smp 4,cores=1,maxcpus=8 -object memory-backend-ram,id=ram-node0,size=2G -numa node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object memory-backend-ram,id=ram-node1,size=2G -numa node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1
# dmesg | grep numa
[ 0.013106] numa: Node 0 CPUs: 0-1
[ 0.013136] numa: Node 1 CPUs: 2-3
# dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id
ibm,chip-id = <0x01>;
ibm,chip-id = <0x02>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x03>;
with :
-smp 4,cores=4,maxcpus=8,threads=1 -object memory-backend-ram,id=ram-node0,size=2G -numa node,nodeid=0,cpus=0-1,cpus=4-5,memdev=ram-node0 -object memory-backend-ram,id=ram-node1,size=2G -numa node,nodeid=1,cpus=2-3,cpus=6-7,memdev=ram-node1
# dmesg | grep numa
[ 0.013106] numa: Node 0 CPUs: 0-1
[ 0.013136] numa: Node 1 CPUs: 2-3
# dtc -I fs /proc/device-tree/cpus/ -f | grep ibm,chip-id
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;
ibm,chip-id = <0x00>;
I think we should simply remove "ibm,chip-id" since it's not used and
not in the PAPR spec.
Thanks,
C.
>
> Thanks,
>
>
> DHB
>
>
>>
>>> It looks like the chip id is only used for localization purpose in
>>> this case, right ?
>>
>> Yes and PAPR sources are not localized. So it's not used. MSI sources
>> could be if we rewrote the MSI driver.
>>
>>> In this case, what about doing this change for pSeries only,
>>> somewhere in spapr.c ?
>>
>> The IPI code is common to all platforms and all have the same issue.
>> I rather not.
>>
>> Thanks,
>>
>> C.
>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>> arch/powerpc/sysdev/xive/common.c | 7 +------
>>>> 1 file changed, 1 insertion(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
>>>> index 595310e056f4..b8e456da28aa 100644
>>>> --- a/arch/powerpc/sysdev/xive/common.c
>>>> +++ b/arch/powerpc/sysdev/xive/common.c
>>>> @@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu)
>>>> xc = per_cpu(xive_cpu, cpu);
>>>> if (!xc) {
>>>> - struct device_node *np;
>>>> -
>>>> xc = kzalloc_node(sizeof(struct xive_cpu),
>>>> GFP_KERNEL, cpu_to_node(cpu));
>>>> if (!xc)
>>>> return -ENOMEM;
>>>> - np = of_get_cpu_node(cpu, NULL);
>>>> - if (np)
>>>> - xc->chip_id = of_get_ibm_chip_id(np);
>>>> - of_node_put(np);
>>>> + xc->chip_id = cpu_to_node(cpu);
>>>> xc->hw_ipi = XIVE_BAD_IRQ;
>>>> per_cpu(xive_cpu, cpu) = xc;
>>>
>>
^ permalink raw reply
* Re: [PATCH v2 3/7] powerpc: convert config files to generic cmdline
From: Daniel Walker @ 2021-03-09 21:29 UTC (permalink / raw)
To: Christophe Leroy
Cc: Rob Herring, Daniel Gimpelevich, linuxppc-dev, x86, linux-mips,
linux-kernel, Paul Mackerras, xe-linux-external, Andrew Morton,
Will Deacon
In-Reply-To: <5f865584-09c9-d21f-ffb7-23cf07cf058e@csgroup.eu>
On Tue, Mar 09, 2021 at 08:47:09AM +0100, Christophe Leroy wrote:
>
>
> Le 09/03/2021 à 01:02, Daniel Walker a écrit :
> > This is a scripted mass convert of the config files to use
> > the new generic cmdline. There is a bit of a trim effect here.
> > It would seems that some of the config haven't been trimmed in
> > a while.
>
> If you do that in a separate patch, you loose bisectability.
>
> I think it would have been better to do things in a different way, more or less like I did in my series:
> 1/ Provide GENERIC cmdline at the same functionnality level as what is
> spread in the different architectures
> 2/ Convert architectures to the generic with least churn.
> 3/ Add new features to the generic
You have to have the churn eventually, no matter how you do it. The only way you
don't have churn is if you never upgrade the feature set.
> >
> > The bash script used to convert is as follows,
> >
> > if [[ -z "$1" || -z "$2" ]]; then
> > echo "Two arguments are needed."
> > exit 1
> > fi
> > mkdir $1
> > cp $2 $1/.config
> > sed -i 's/CONFIG_CMDLINE=/CONFIG_CMDLINE_BOOL=y\nCONFIG_CMDLINE_PREPEND=/g' $1/.config
>
> This is not correct.
>
> By default, on powerpc the provided command line is used only if the bootloader doesn't provide one.
>
> Otherwise:
> - the builtin command line is appended to the one provided by the bootloader
> if CONFIG_CMDLINE_EXTEND is selected
> - the builtin command line replaces to the one provided by the bootloader if
> CONFIG_CMDLINE_FORCE is selected
I think my changes maintain most of this due to the override of
CONFIG_CMDLINE_PREPEND. This is an upgrade and the inflexibility in powerpc is
an example of why these changes were created in the first place.
For example , say the default command line is "root=/dev/issblk0" from iss476
platform. And the bootloader adds "root=/dev/sda1"
The result is <prepend><bootloader><append>.
Then you have,
root=/dev/issblk0 root=/dev/sda1
and the bootloader has precedent over the default command line. So root= in the
above cases is defined by the bootloader.
The only issue would be if a person wants to override the default command line
with an unrelated bootloader command line. I don't know how many people do this,
but I doubt it's many. Can you think of any use cases like this?
I would imagine there are many more people who have to entirely duplicate the
default command line in the boot loader when they really just want to change a
single part of it like the root= device or console device or speed.
Daniel
^ permalink raw reply
* Re: [PATCH v2 4/7] CMDLINE: powerpc: convert to generic builtin command line
From: Daniel Walker @ 2021-03-09 21:40 UTC (permalink / raw)
To: Christophe Leroy
Cc: Rob Herring, Ruslan Ruslichenko, Ruslan Bilovol,
Daniel Gimpelevich, linuxppc-dev, x86, linux-mips, linux-kernel,
Paul Mackerras, xe-linux-external, Andrew Morton, Will Deacon
In-Reply-To: <c5c8b57e-7954-ec02-188a-7f85cb0af731@csgroup.eu>
On Tue, Mar 09, 2021 at 08:56:47AM +0100, Christophe Leroy wrote:
>
>
> Le 09/03/2021 à 01:02, Daniel Walker a écrit :
> > This updates the powerpc code to use the CONFIG_GENERIC_CMDLINE
> > option.
> >
> > Cc: xe-linux-external@cisco.com
> > Signed-off-by: Ruslan Ruslichenko <rruslich@cisco.com>
> > Signed-off-by: Ruslan Bilovol <rbilovol@cisco.com>
> > Signed-off-by: Daniel Walker <danielwa@cisco.com>
> > ---
> > arch/powerpc/Kconfig | 37 +--------------------------------
> > arch/powerpc/kernel/prom.c | 1 +
> > arch/powerpc/kernel/prom_init.c | 35 ++++++++++++++++++-------------
> > 3 files changed, 23 insertions(+), 50 deletions(-)
> >
> > diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> > index 107bb4319e0e..276b06d5c961 100644
> > --- a/arch/powerpc/Kconfig
> > +++ b/arch/powerpc/Kconfig
> > @@ -167,6 +167,7 @@ config PPC
> > select EDAC_SUPPORT
> > select GENERIC_ATOMIC64 if PPC32
> > select GENERIC_CLOCKEVENTS_BROADCAST if SMP
> > + select GENERIC_CMDLINE
> > select GENERIC_CMOS_UPDATE
> > select GENERIC_CPU_AUTOPROBE
> > select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
> > @@ -906,42 +907,6 @@ config PPC_DENORMALISATION
> > Add support for handling denormalisation of single precision
> > values. Useful for bare metal only. If unsure say Y here.
> > -config CMDLINE
> > - string "Initial kernel command string"
> > - default ""
> > - help
> > - On some platforms, there is currently no way for the boot loader to
> > - pass arguments to the kernel. For these platforms, you can supply
> > - some command-line options at build time by entering them here. In
> > - most cases you will need to specify the root device here.
> > -
> > -choice
> > - prompt "Kernel command line type" if CMDLINE != ""
> > - default CMDLINE_FROM_BOOTLOADER
> > -
> > -config CMDLINE_FROM_BOOTLOADER
> > - bool "Use bootloader kernel arguments if available"
> > - help
> > - Uses the command-line options passed by the boot loader. If
> > - the boot loader doesn't provide any, the default kernel command
> > - string provided in CMDLINE will be used.
> > -
> > -config CMDLINE_EXTEND
> > - bool "Extend bootloader kernel arguments"
> > - help
> > - The command-line arguments provided by the boot loader will be
> > - appended to the default kernel command string.
> > -
> > -config CMDLINE_FORCE
> > - bool "Always use the default kernel command string"
> > - help
> > - Always use the default kernel command string, even if the boot
> > - loader passes other arguments to the kernel.
> > - This is useful if you cannot or don't want to change the
> > - command-line options your boot loader passes to the kernel.
> > -
> > -endchoice
> > -
> > config EXTRA_TARGETS
> > string "Additional default image types"
> > help
> > diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
> > index ae3c41730367..96d0a01be1b4 100644
> > --- a/arch/powerpc/kernel/prom.c
> > +++ b/arch/powerpc/kernel/prom.c
> > @@ -27,6 +27,7 @@
> > #include <linux/irq.h>
> > #include <linux/memblock.h>
> > #include <linux/of.h>
> > +#include <linux/cmdline.h>
>
> Why is this needed in prom.c ?
Must have been a mistake, I don't think it's needed.
> > #include <linux/of_fdt.h>
> > #include <linux/libfdt.h>
> > #include <linux/cpu.h>
> > diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> > index e9d4eb6144e1..657241534d69 100644
> > --- a/arch/powerpc/kernel/prom_init.c
> > +++ b/arch/powerpc/kernel/prom_init.c
> > @@ -27,6 +27,7 @@
> > #include <linux/initrd.h>
> > #include <linux/bitops.h>
> > #include <linux/pgtable.h>
> > +#include <linux/cmdline.h>
> > #include <asm/prom.h>
> > #include <asm/rtas.h>
> > #include <asm/page.h>
> > @@ -242,15 +243,6 @@ static int __init prom_strcmp(const char *cs, const char *ct)
> > return 0;
> > }
> > -static char __init *prom_strcpy(char *dest, const char *src)
> > -{
> > - char *tmp = dest;
> > -
> > - while ((*dest++ = *src++) != '\0')
> > - /* nothing */;
> > - return tmp;
> > -}
> > -
>
> This game with prom_strcpy() should go a separate preceeding patch.
>
> Also, it looks like checkpatch.pl recommends to use strscpy() instead of strlcpy().
strscpy() is very large. I'm not sure it's compatible with this prom_init.c
environment.
> > static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
> > {
> > unsigned char c1, c2;
> > @@ -276,6 +268,20 @@ static size_t __init prom_strlen(const char *s)
> > return sc - s;
> > }
> > +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
> > +{
> > + size_t ret = prom_strlen(src);
> > +
> > + if (size) {
> > + size_t len = (ret >= size) ? size - 1 : ret;
> > +
> > + memcpy(dest, src, len);
> > + dest[len] = '\0';
> > + }
> > + return ret;
> > +}
> > +
> > +
> > static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
> > {
> > const unsigned char *su1, *su2;
> > @@ -304,6 +310,7 @@ static char __init *prom_strstr(const char *s1, const char *s2)
> > return NULL;
> > }
> > +#ifdef GENERIC_CMDLINE_NEED_STRLCAT
> > static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
> > {
> > size_t dsize = prom_strlen(dest);
> > @@ -323,6 +330,7 @@ static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
> > return res;
> > }
> > +#endif
> > #ifdef CONFIG_PPC_PSERIES
> > static int __init prom_strtobool(const char *s, bool *res)
> > @@ -775,12 +783,11 @@ static void __init early_cmdline_parse(void)
> > prom_cmd_line[0] = 0;
> > p = prom_cmd_line;
> > - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
> > + if ((long)prom.chosen > 0)
> > l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
> > - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
> > - prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
> > - sizeof(prom_cmd_line));
> > + cmdline_add_builtin_custom(prom_cmd_line, (l > 0 ? p : NULL), sizeof(prom_cmd_line),
> > + __prombss, prom_strlcpy, prom_strlcat);
>
> So we are referencing a function that doesn't exist (namely prom_strlcat).
> But it works because cmdline_add_builtin_custom() looks like a function but
> is in fact an obscure macro that doesn't use prom_strlcat() unless
> GENERIC_CMDLINE_NEED_STRLCAT is defined.
>
> IMHO that's awful for readability and code maintenance.
powerpc is a special case, there's no other users like this. The reason is
because of all the difficulty in this prom_init.c code. A lot of the generic
code has similar kind of changes to work across architectures.
> > prom_printf("command line: %s\n", prom_cmd_line);
> > @@ -2706,7 +2713,7 @@ static void __init flatten_device_tree(void)
> > /* Add "phandle" in there, we'll need it */
> > namep = make_room(&mem_start, &mem_end, 16, 1);
> > - prom_strcpy(namep, "phandle");
> > + prom_strlcpy(namep, "phandle", 8);
>
> Should be in a separate patch.
I can move it, I missed that from the first round.
Daniel
^ permalink raw reply
* Re: [PATCH] ethernet: ucc_geth: Use kmemdup instead of kmalloc and memcpy
From: Rasmus Villemoes @ 2021-03-09 21:41 UTC (permalink / raw)
To: angkery, leoyang.li, davem, kuba
Cc: netdev, linuxppc-dev, linux-kernel, Junlin Yang
In-Reply-To: <20210305142711.3022-1-angkery@163.com>
On 05/03/2021 15.27, angkery wrote:
> From: Junlin Yang <yangjunlin@yulong.com>
>
> Fixes coccicheck warnings:
> ./drivers/net/ethernet/freescale/ucc_geth.c:3594:11-18:
> WARNING opportunity for kmemdup
>
> Signed-off-by: Junlin Yang <yangjunlin@yulong.com>
> ---
> drivers/net/ethernet/freescale/ucc_geth.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
> index ef4e2fe..2c079ad 100644
> --- a/drivers/net/ethernet/freescale/ucc_geth.c
> +++ b/drivers/net/ethernet/freescale/ucc_geth.c
> @@ -3591,10 +3591,9 @@ static int ucc_geth_probe(struct platform_device* ofdev)
> if ((ucc_num < 0) || (ucc_num > 7))
> return -ENODEV;
>
> - ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL);
> + ug_info = kmemdup(&ugeth_primary_info, sizeof(*ug_info), GFP_KERNEL);
> if (ug_info == NULL)
> return -ENOMEM;
> - memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info));
>
> ug_info->uf_info.ucc_num = ucc_num;
>
>
Ah, yes, of course, I should have used that.
Acked-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
^ permalink raw reply
* Re: [PATCH v2 1/8] powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
From: Daniel Henrique Barboza @ 2021-03-09 17:08 UTC (permalink / raw)
To: Cédric Le Goater, Greg Kurz; +Cc: linuxppc-dev
In-Reply-To: <8dd98e22-1f10-e87b-3fe3-e786bc9a8d71@kaod.org>
On 3/9/21 12:33 PM, Cédric Le Goater wrote:
> On 3/8/21 6:13 PM, Greg Kurz wrote:
>> On Wed, 3 Mar 2021 18:48:50 +0100
>> Cédric Le Goater <clg@kaod.org> wrote:
>>
>>> The 'chip_id' field of the XIVE CPU structure is used to choose a
>>> target for a source located on the same chip when possible. This field
>>> is assigned on the PowerNV platform using the "ibm,chip-id" property
>>> on pSeries under KVM when NUMA nodes are defined but it is undefined
>>
>> This sentence seems to have a syntax problem... like it is missing an
>> 'and' before 'on pSeries'.
>
> ah yes, or simply a comma.
>
>>> under PowerVM. The XIVE source structure has a similar field
>>> 'src_chip' which is only assigned on the PowerNV platform.
>>>
>>> cpu_to_node() returns a compatible value on all platforms, 0 being the
>>> default node. It will also give us the opportunity to set the affinity
>>> of a source on pSeries when we can localize them.
>>>
>>
>> IIUC this relies on the fact that the NUMA node id is == to chip id
>> on PowerNV, i.e. xc->chip_id which is passed to OPAL remain stable
>> with this change.
>
> Linux sets the NUMA node in numa_setup_cpu(). On pseries, the hcall
> H_HOME_NODE_ASSOCIATIVITY returns the node id if I am correct (Daniel
> in Cc:)
That's correct. H_HOME_NODE_ASSOCIATIVITY returns not only the node_id, but
a list with the ibm,associativity domains of the CPU that "proc-no" (processor
identifier) is mapped to inside QEMU.
node_id in this case, considering that we're working with a reference-points
of size 4, is the 4th element of the returned list. The last element is
"procno" itself.
>
> On PowerNV, Linux uses "ibm,associativity" property of the CPU to find
> the node id. This value is built from the chip id in OPAL, so the
> value returned by cpu_to_node(cpu) and the value of the "ibm,chip-id"
> property are unlikely to be different.
>
> cpu_to_node(cpu) is used in many places to allocate the structures
> locally to the owning node. XIVE is not an exception (see below in the
> same patch), it is better to be consistent and get the same information
> (node id) using the same routine.
>
>
> In Linux, "ibm,chip-id" is only used in low level PowerNV drivers :
> LPC, XSCOM, RNG, VAS, NX. XIVE should be in that list also but skiboot
> unifies the controllers of the system to only expose one the OS. This
> is problematic and should be changed but it's another topic.
>
>
>> On the other hand, you have the pSeries case under PowerVM that
>> doesn't xc->chip_id, which isn't passed to any hcall AFAICT.
>
> yes "ibm,chip-id" is an OPAL concept unfortunately and it has no meaning
> under PAPR. xc->chip_id on pseries (PowerVM) will contains an invalid
> chip id.
>
> QEMU/KVM exposes "ibm,chip-id" but it's not used. (its value is not
> always correct btw)
If you have a way to reliably reproduce this, let me know and I'll fix it
up in QEMU.
Thanks,
DHB
>
>> It looks like the chip id is only used for localization purpose in
>> this case, right ?
>
> Yes and PAPR sources are not localized. So it's not used. MSI sources
> could be if we rewrote the MSI driver.
>
>> In this case, what about doing this change for pSeries only,
>> somewhere in spapr.c ?
>
> The IPI code is common to all platforms and all have the same issue.
> I rather not.
>
> Thanks,
>
> C.
>
>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>> ---
>>> arch/powerpc/sysdev/xive/common.c | 7 +------
>>> 1 file changed, 1 insertion(+), 6 deletions(-)
>>>
>>> diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
>>> index 595310e056f4..b8e456da28aa 100644
>>> --- a/arch/powerpc/sysdev/xive/common.c
>>> +++ b/arch/powerpc/sysdev/xive/common.c
>>> @@ -1335,16 +1335,11 @@ static int xive_prepare_cpu(unsigned int cpu)
>>>
>>> xc = per_cpu(xive_cpu, cpu);
>>> if (!xc) {
>>> - struct device_node *np;
>>> -
>>> xc = kzalloc_node(sizeof(struct xive_cpu),
>>> GFP_KERNEL, cpu_to_node(cpu));
>>> if (!xc)
>>> return -ENOMEM;
>>> - np = of_get_cpu_node(cpu, NULL);
>>> - if (np)
>>> - xc->chip_id = of_get_ibm_chip_id(np);
>>> - of_node_put(np);
>>> + xc->chip_id = cpu_to_node(cpu);
>>> xc->hw_ipi = XIVE_BAD_IRQ;
>>>
>>> per_cpu(xive_cpu, cpu) = xc;
>>
>
^ permalink raw reply
* Re: [PATCH 2/9] fs: add an argument-less alloc_anon_inode
From: Gao Xiang @ 2021-03-09 19:34 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc.,
David Hildenbrand, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Al Viro,
Daniel Vetter, linux-fsdevel, Andrew Morton, linuxppc-dev,
Nitin Gupta
In-Reply-To: <20210309155348.974875-3-hch@lst.de>
On Tue, Mar 09, 2021 at 04:53:41PM +0100, Christoph Hellwig wrote:
> Add a new alloc_anon_inode helper that allocates an inode on
> the anon_inode file system.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Thanks,
Gao Xiang
^ permalink raw reply
* Re: [PATCH 1/9] fs: rename alloc_anon_inode to alloc_anon_inode_sb
From: Gao Xiang @ 2021-03-09 19:33 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jason Gunthorpe, Michael S. Tsirkin, VMware, Inc.,
David Hildenbrand, linux-kernel, dri-devel, virtualization,
linux-mm, Minchan Kim, Alex Williamson, Nadav Amit, Al Viro,
Daniel Vetter, linux-fsdevel, Andrew Morton, linuxppc-dev,
Nitin Gupta
In-Reply-To: <20210309155348.974875-2-hch@lst.de>
On Tue, Mar 09, 2021 at 04:53:40PM +0100, Christoph Hellwig wrote:
> Rename alloc_inode to free the name for a new variant that does not
> need boilerplate to create a super_block first.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
That is a nice idea as well to avoid sb by introducing an unique
fs...
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
Thanks,
Gao Xiang
^ permalink raw reply
* Re: [PATCH v1] powerpc: Include running function as first entry in save_stack_trace() and friends
From: Segher Boessenkool @ 2021-03-09 22:05 UTC (permalink / raw)
To: Mark Rutland
Cc: Marco Elver, Catalin Marinas, linuxppc-dev, LKML, kasan-dev,
broonie, Paul Mackerras, Will Deacon, linux-arm-kernel
In-Reply-To: <20210309160505.GA4979@C02TD0UTHF1T.local>
Hi!
On Tue, Mar 09, 2021 at 04:05:23PM +0000, Mark Rutland wrote:
> On Thu, Mar 04, 2021 at 03:54:48PM -0600, Segher Boessenkool wrote:
> > On Thu, Mar 04, 2021 at 02:57:30PM +0000, Mark Rutland wrote:
> > > It looks like GCC is happy to give us the function-entry-time FP if we use
> > > __builtin_frame_address(1),
> >
> > From the GCC manual:
> > Calling this function with a nonzero argument can have
> > unpredictable effects, including crashing the calling program. As
> > a result, calls that are considered unsafe are diagnosed when the
> > '-Wframe-address' option is in effect. Such calls should only be
> > made in debugging situations.
> >
> > It *does* warn (the warning is in -Wall btw), on both powerpc and
> > aarch64. Furthermore, using this builtin causes lousy code (it forces
> > the use of a frame pointer, which we normally try very hard to optimise
> > away, for good reason).
> >
> > And, that warning is not an idle warning. Non-zero arguments to
> > __builtin_frame_address can crash the program. It won't on simpler
> > functions, but there is no real definition of what a simpler function
> > *is*. It is meant for debugging, not for production use (this is also
> > why no one has bothered to make it faster).
> >
> > On Power it should work, but on pretty much any other arch it won't.
>
> I understand this is true generally, and cannot be relied upon in
> portable code. However as you hint here for Power, I believe that on
> arm64 __builtin_frame_address(1) shouldn't crash the program due to the
> way frame records work on arm64, but I'll go check with some local
> compiler folk. I agree that __builtin_frame_address(2) and beyond
> certainly can, e.g. by NULL dereference and similar.
I still do not know the aarch64 ABI well enough. If only I had time!
> For context, why do you think this would work on power specifically? I
> wonder if our rationale is similar.
On most 64-bit Power ABIs all stack frames are connected together as a
linked list (which is updated atomically, importantly). This makes it
possible to always find all previous stack frames.
> Are you aware of anything in particular that breaks using
> __builtin_frame_address(1) in non-portable code, or is this just a
> general sentiment of this not being a supported use-case?
It is not supported, and trying to do it anyway can crash: it can use
random stack contents as pointer! Not really "random" of course, but
where it thinks to find a pointer into the previous frame, which is not
something it can rely on (unless the ABI guarantees it somehow).
See gcc.gnu.org/PR60109 for example.
> > > Unless we can get some strong guarantees from compiler folk such that we
> > > can guarantee a specific function acts boundary for unwinding (and
> > > doesn't itself get split, etc), the only reliable way I can think to
> > > solve this requires an assembly trampoline. Whatever we do is liable to
> > > need some invasive rework.
> >
> > You cannot get such a guarantee, other than not letting the compiler
> > see into the routine at all, like with assembler code (not inline asm,
> > real assembler code).
>
> If we cannot reliably ensure this then I'm happy to go write an assembly
> trampoline to snapshot the state at a function call boundary (where our
> procedure call standard mandates the state of the LR, FP, and frame
> records pointed to by the FP).
Is the frame pointer required?!
> This'll require reworking a reasonable
> amount of code cross-architecture, so I'll need to get some more
> concrete justification (e.g. examples of things that can go wrong in
> practice).
Say you have a function that does dynamic stack allocation, then there
is usually no way to find the previous stack frame (without function-
specific knowledge). So __builtin_frame_address cannot work (it knows
nothing about frames further up).
Dynamic stack allocation (alloca, or variable length automatic arrays)
is just the most common and most convenient example; it is not the only
case you have problems here.
> > The real way forward is to bite the bullet and to no longer pretend you
> > can do a full backtrace from just the stack contents. You cannot.
>
> I think what you mean here is that there's no reliable way to handle the
> current/leaf function, right? If so I do agree.
No, I meant what I said.
There is the separate issue that you do not know where the return
address (etc.) is stored in a function that has not yet done a call
itself, sure. You cannot assume anything the ABI does not tell you you
can depend on.
> Beyond that I believe that arm64's frame records should be sufficient.
Do you have a simple linked list connecting all frames? The aarch64 GCC
port does not define anything special here (DYNAMIC_CHAIN_ADDRESS), so
the default will be used: every frame pointer has to point to the
previous one, no exceptions whatsoever.
Segher
^ permalink raw reply
* Re: Errant readings on LM81 with T2080 SoC
From: Chris Packham @ 2021-03-09 23:35 UTC (permalink / raw)
To: Guenter Roeck, jdelvare@suse.com
Cc: linux-hwmon@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org, linux-i2c@vger.kernel.org
In-Reply-To: <96d660bc-17ab-4e0e-9a94-bce1737a8da1@roeck-us.net>
On 8/03/21 1:31 pm, Guenter Roeck wrote:
> On 3/7/21 2:52 PM, Chris Packham wrote:
>> Fundamentally I think this is a problem with the fact that the LM81 is
>> an SMBus device but the T2080 (and other Freescale SoCs) uses i2c and we
>> emulate SMBus. I suspect the errant readings are when we don't get round
>> to completing the read within the timeout specified by the SMBus
>> specification. Depending on when that happens we either fail the
>> transfer or interpret the result as all-1s.
> That is quite unlikely. Many sensor chips are SMBus chips connected to
> i2c busses. It is much more likely that there is a bug in the T2080 i2c driver,
> that the chip doesn't like the bulk read command issued through regmap, that
> the chip has problems with the i2c bus speed, or that the i2c bus is noisy.
I have noticed that with the switch to regmap we end up using plain i2c
instead of SMBUS. There appears to be no way of saying use SMBUS
semantics if the i2c adapter reports I2C_FUNC_I2C.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox