Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence

Linux toolchain discussions
 help / color / mirror / Atom feed

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
       [not found]         ` <9b6d585a-c17f-4dd1-9518-b28ac2dfd855@paulmck-laptop>
@ 2024-12-18 17:12           ` Peter Zijlstra
  2024-12-18 19:15             ` Paul E. McKenney
                               ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Peter Zijlstra @ 2024-12-18 17:12 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Daniel Xu, mingo, will, longman, boqun.feng, linux-kernel,
	linux-toolchains


+linux-toolchains

On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:

> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > I'd have to check what the compiler makes of that.
> > 
> > /me mucks about with godbolt for a bit...
> > 
> > GCC doesn't optimize that, but Clang does.
> > 
> > I would still very much refrain from making this change until both
> > compilers can generate sane code for it.
> 
> Is GCC on track to do this, or do we need to encourage them?

I have no clue; probably wise to offer encouragement.

> Just to make sure I understand, your proposal is to create an INC_ONCE()
> or similar, and add it once compiler support is there?  Seems reasonable
> to me, just checking.

I suppose we can work in parallel, add INC_ONCE() now, but not have a
proper definition for GCC.

---
 arch/riscv/kvm/vmid.c                     | 2 +-
 arch/s390/kernel/idle.c                   | 2 +-
 drivers/md/dm-vdo/indexer/index-session.c | 2 +-
 fs/xfs/libxfs/xfs_iext_tree.c             | 2 +-
 include/linux/compiler-gcc.h              | 5 +++++
 include/linux/compiler.h                  | 4 ++++
 include/linux/rcupdate_trace.h            | 2 +-
 include/linux/srcutiny.h                  | 2 +-
 io_uring/io_uring.c                       | 2 +-
 kernel/rcu/srcutree.c                     | 4 ++--
 kernel/rcu/tree_plugin.h                  | 2 +-
 kernel/sched/fair.c                       | 2 +-
 mm/kfence/kfence_test.c                   | 2 +-
 security/apparmor/apparmorfs.c            | 2 +-
 14 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index ddc98714ce8e..805a5acf669c 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -90,7 +90,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
 
 	/* First user of a new VMID version? */
 	if (unlikely(vmid_next == 0)) {
-		WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+		INC_ONCE(vmid_version);
 		vmid_next = 1;
 
 		/*
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 39cb8d0ae348..8fb7cd75fe62 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -45,7 +45,7 @@ void account_idle_time_irq(void)
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
-	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+	INC_ONC(idle->idle_count);
 	account_idle_time(cputime_to_nsecs(idle_time));
 }
 
diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
index aee0914d604a..c5a7dee9dc66 100644
--- a/drivers/md/dm-vdo/indexer/index-session.c
+++ b/drivers/md/dm-vdo/indexer/index-session.c
@@ -152,7 +152,7 @@ static void enter_callback_stage(struct uds_request *request)
 
 static inline void count_once(u64 *count_ptr)
 {
-	WRITE_ONCE(*count_ptr, READ_ONCE(*count_ptr) + 1);
+	INC_ONCE(*count_ptr);
 }
 
 static void update_session_stats(struct uds_request *request)
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 8796f2b3e534..a1fcd4cf2424 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -626,7 +626,7 @@ xfs_iext_realloc_root(
  */
 static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
 {
-	WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
+	INC_ONCE(ifp->if_seq);
 }
 
 void
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c9b58188ec61..77c253e29758 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -137,3 +137,8 @@
 #if GCC_VERSION < 90100
 #undef __alloc_size__
 #endif
+
+/*
+ * GCC can't properly optimize the real one with volatile on.
+ */
+#define INC_ONCE(v) (v)++
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index efd43df3a99a..b1b13dac1b9e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -8,6 +8,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef INC_ONCE
+#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
+#endif
+
 /*
  * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
  * to disable branch tracing on a per file basis.
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index e6c44eb428ab..adb12e7304da 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -50,7 +50,7 @@ static inline void rcu_read_lock_trace(void)
 {
 	struct task_struct *t = current;
 
-	WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
+	INC_ONCE(t->trc_reader_nesting);
 	barrier();
 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
 	    t->trc_reader_special.b.need_mb)
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 1321da803274..bd4362323733 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -66,7 +66,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
 
 	preempt_disable();  // Needed for PREEMPT_AUTO
 	idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
-	WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
+	INC_ONCE(ssp->srcu_lock_nesting[idx]);
 	preempt_enable();
 	return idx;
 }
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 06ff41484e29..ef3d4871e775 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -373,7 +373,7 @@ static void io_account_cq_overflow(struct io_ring_ctx *ctx)
 {
 	struct io_rings *r = ctx->rings;
 
-	WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
+	INC_ONCE(r->cq_overflow);
 	ctx->cq_extra--;
 }
 
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 5e2e53464794..a812af81dbff 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -656,7 +656,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
 			jbase += j - gpstart;
 		if (!jbase) {
 			ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
-			WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
+			INC_ONCE(sup->srcu_n_exp_nodelay);
 			if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
 				jbase = 1;
 		}
@@ -1856,7 +1856,7 @@ static void process_srcu(struct work_struct *work)
 		j = jiffies;
 		if (READ_ONCE(sup->reschedule_jiffies) == j) {
 			ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
-			WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
+			INC_ONCE(sup->reschedule_count);
 			if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
 				curdelay = 1;
 		} else {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3927ea5f7955..51f525089c27 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -387,7 +387,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 
 static void rcu_preempt_read_enter(void)
 {
-	WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
+	INC_ONCE(current->rcu_read_lock_nesting);
 }
 
 static int rcu_preempt_read_exit(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f5329672815b..f0927407abbe 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3242,7 +3242,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
 	 * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
 	 * expensive, to avoid any form of compiler optimizations:
 	 */
-	WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
+	INC_ONCE(p->mm->numa_scan_seq);
 	p->mm->numa_scan_offset = 0;
 }
 
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index f65fb182466d..e0bf31b1875e 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -517,7 +517,7 @@ static void test_kmalloc_aligned_oob_write(struct kunit *test)
 	 * fault immediately after it.
 	 */
 	expect.addr = buf + size;
-	WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
+	INC_ONCE(*expect.addr);
 	KUNIT_EXPECT_FALSE(test, report_available());
 	test_free(buf);
 	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2c0185ebc900..63f5c8387764 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -596,7 +596,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
 
 void __aa_bump_ns_revision(struct aa_ns *ns)
 {
-	WRITE_ONCE(ns->revision, READ_ONCE(ns->revision) + 1);
+	INC_ONCE(ns->revision);
 	wake_up_interruptible(&ns->wait);
 }
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-18 17:12           ` [PATCH] seqlock: Use WRITE_ONCE() when updating sequence Peter Zijlstra
@ 2024-12-18 19:15             ` Paul E. McKenney
  2024-12-18 19:56             ` Florian Weimer
  2024-12-19 16:34             ` Will Deacon
  2 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-18 19:15 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Daniel Xu, mingo, will, longman, boqun.feng, linux-kernel,
	linux-toolchains

On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> 
> +linux-toolchains
> 
> On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
> 
> > > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > > I'd have to check what the compiler makes of that.
> > > 
> > > /me mucks about with godbolt for a bit...
> > > 
> > > GCC doesn't optimize that, but Clang does.
> > > 
> > > I would still very much refrain from making this change until both
> > > compilers can generate sane code for it.
> > 
> > Is GCC on track to do this, or do we need to encourage them?
> 
> I have no clue; probably wise to offer encouragement.

Hopefully your +linux-toolchain is a start.

> > Just to make sure I understand, your proposal is to create an INC_ONCE()
> > or similar, and add it once compiler support is there?  Seems reasonable
> > to me, just checking.
> 
> I suppose we can work in parallel, add INC_ONCE() now, but not have a
> proper definition for GCC.

This passes an rcutorture smoke test, so feel free to add:

Reviewed-by: Paul E. McKenney <paulmck@kernel.org>

> ---
>  arch/riscv/kvm/vmid.c                     | 2 +-
>  arch/s390/kernel/idle.c                   | 2 +-
>  drivers/md/dm-vdo/indexer/index-session.c | 2 +-
>  fs/xfs/libxfs/xfs_iext_tree.c             | 2 +-
>  include/linux/compiler-gcc.h              | 5 +++++
>  include/linux/compiler.h                  | 4 ++++
>  include/linux/rcupdate_trace.h            | 2 +-
>  include/linux/srcutiny.h                  | 2 +-
>  io_uring/io_uring.c                       | 2 +-
>  kernel/rcu/srcutree.c                     | 4 ++--
>  kernel/rcu/tree_plugin.h                  | 2 +-
>  kernel/sched/fair.c                       | 2 +-
>  mm/kfence/kfence_test.c                   | 2 +-
>  security/apparmor/apparmorfs.c            | 2 +-
>  14 files changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
> index ddc98714ce8e..805a5acf669c 100644
> --- a/arch/riscv/kvm/vmid.c
> +++ b/arch/riscv/kvm/vmid.c
> @@ -90,7 +90,7 @@ void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
>  
>  	/* First user of a new VMID version? */
>  	if (unlikely(vmid_next == 0)) {
> -		WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
> +		INC_ONCE(vmid_version);
>  		vmid_next = 1;
>  
>  		/*
> diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> index 39cb8d0ae348..8fb7cd75fe62 100644
> --- a/arch/s390/kernel/idle.c
> +++ b/arch/s390/kernel/idle.c
> @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
>  
>  	/* Account time spent with enabled wait psw loaded as idle time. */
>  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> +	INC_ONC(idle->idle_count);
>  	account_idle_time(cputime_to_nsecs(idle_time));
>  }
>  
> diff --git a/drivers/md/dm-vdo/indexer/index-session.c b/drivers/md/dm-vdo/indexer/index-session.c
> index aee0914d604a..c5a7dee9dc66 100644
> --- a/drivers/md/dm-vdo/indexer/index-session.c
> +++ b/drivers/md/dm-vdo/indexer/index-session.c
> @@ -152,7 +152,7 @@ static void enter_callback_stage(struct uds_request *request)
>  
>  static inline void count_once(u64 *count_ptr)
>  {
> -	WRITE_ONCE(*count_ptr, READ_ONCE(*count_ptr) + 1);
> +	INC_ONCE(*count_ptr);
>  }
>  
>  static void update_session_stats(struct uds_request *request)
> diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
> index 8796f2b3e534..a1fcd4cf2424 100644
> --- a/fs/xfs/libxfs/xfs_iext_tree.c
> +++ b/fs/xfs/libxfs/xfs_iext_tree.c
> @@ -626,7 +626,7 @@ xfs_iext_realloc_root(
>   */
>  static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
>  {
> -	WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
> +	INC_ONCE(ifp->if_seq);
>  }
>  
>  void
> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index c9b58188ec61..77c253e29758 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -137,3 +137,8 @@
>  #if GCC_VERSION < 90100
>  #undef __alloc_size__
>  #endif
> +
> +/*
> + * GCC can't properly optimize the real one with volatile on.
> + */
> +#define INC_ONCE(v) (v)++
> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index efd43df3a99a..b1b13dac1b9e 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -8,6 +8,10 @@
>  
>  #ifdef __KERNEL__
>  
> +#ifndef INC_ONCE
> +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> +#endif
> +
>  /*
>   * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code
>   * to disable branch tracing on a per file basis.
> diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
> index e6c44eb428ab..adb12e7304da 100644
> --- a/include/linux/rcupdate_trace.h
> +++ b/include/linux/rcupdate_trace.h
> @@ -50,7 +50,7 @@ static inline void rcu_read_lock_trace(void)
>  {
>  	struct task_struct *t = current;
>  
> -	WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
> +	INC_ONCE(t->trc_reader_nesting);
>  	barrier();
>  	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
>  	    t->trc_reader_special.b.need_mb)
> diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
> index 1321da803274..bd4362323733 100644
> --- a/include/linux/srcutiny.h
> +++ b/include/linux/srcutiny.h
> @@ -66,7 +66,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp)
>  
>  	preempt_disable();  // Needed for PREEMPT_AUTO
>  	idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1;
> -	WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1);
> +	INC_ONCE(ssp->srcu_lock_nesting[idx]);
>  	preempt_enable();
>  	return idx;
>  }
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 06ff41484e29..ef3d4871e775 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -373,7 +373,7 @@ static void io_account_cq_overflow(struct io_ring_ctx *ctx)
>  {
>  	struct io_rings *r = ctx->rings;
>  
> -	WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
> +	INC_ONCE(r->cq_overflow);
>  	ctx->cq_extra--;
>  }
>  
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 5e2e53464794..a812af81dbff 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -656,7 +656,7 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
>  			jbase += j - gpstart;
>  		if (!jbase) {
>  			ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
> -			WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + 1);
> +			INC_ONCE(sup->srcu_n_exp_nodelay);
>  			if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
>  				jbase = 1;
>  		}
> @@ -1856,7 +1856,7 @@ static void process_srcu(struct work_struct *work)
>  		j = jiffies;
>  		if (READ_ONCE(sup->reschedule_jiffies) == j) {
>  			ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
> -			WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + 1);
> +			INC_ONCE(sup->reschedule_count);
>  			if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
>  				curdelay = 1;
>  		} else {
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 3927ea5f7955..51f525089c27 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -387,7 +387,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
>  
>  static void rcu_preempt_read_enter(void)
>  {
> -	WRITE_ONCE(current->rcu_read_lock_nesting, READ_ONCE(current->rcu_read_lock_nesting) + 1);
> +	INC_ONCE(current->rcu_read_lock_nesting);
>  }
>  
>  static int rcu_preempt_read_exit(void)
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index f5329672815b..f0927407abbe 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3242,7 +3242,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
>  	 * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
>  	 * expensive, to avoid any form of compiler optimizations:
>  	 */
> -	WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
> +	INC_ONCE(p->mm->numa_scan_seq);
>  	p->mm->numa_scan_offset = 0;
>  }
>  
> diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
> index f65fb182466d..e0bf31b1875e 100644
> --- a/mm/kfence/kfence_test.c
> +++ b/mm/kfence/kfence_test.c
> @@ -517,7 +517,7 @@ static void test_kmalloc_aligned_oob_write(struct kunit *test)
>  	 * fault immediately after it.
>  	 */
>  	expect.addr = buf + size;
> -	WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
> +	INC_ONCE(*expect.addr);
>  	KUNIT_EXPECT_FALSE(test, report_available());
>  	test_free(buf);
>  	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
> diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
> index 2c0185ebc900..63f5c8387764 100644
> --- a/security/apparmor/apparmorfs.c
> +++ b/security/apparmor/apparmorfs.c
> @@ -596,7 +596,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
>  
>  void __aa_bump_ns_revision(struct aa_ns *ns)
>  {
> -	WRITE_ONCE(ns->revision, READ_ONCE(ns->revision) + 1);
> +	INC_ONCE(ns->revision);
>  	wake_up_interruptible(&ns->wait);
>  }
>  

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-18 17:12           ` [PATCH] seqlock: Use WRITE_ONCE() when updating sequence Peter Zijlstra
  2024-12-18 19:15             ` Paul E. McKenney
@ 2024-12-18 19:56             ` Florian Weimer
  2024-12-19 16:10               ` Paul E. McKenney
  2024-12-19 16:34             ` Will Deacon
  2 siblings, 1 reply; 13+ messages in thread
From: Florian Weimer @ 2024-12-18 19:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul E. McKenney, Daniel Xu, mingo, will, longman, boqun.feng,
	linux-kernel, linux-toolchains

* Peter Zijlstra:

> +linux-toolchains
>
> On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
>
>> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
>> > I'd have to check what the compiler makes of that.
>> > 
>> > /me mucks about with godbolt for a bit...
>> > 
>> > GCC doesn't optimize that, but Clang does.
>> > 
>> > I would still very much refrain from making this change until both
>> > compilers can generate sane code for it.
>> 
>> Is GCC on track to do this, or do we need to encourage them?
>
> I have no clue; probably wise to offer encouragement.

What do you consider sane code?

Clang's choice to generate an incl instruction (on x86-64 at least) is a
bit surprising.  Curiously, the C11 abstract machine has a value-less
increment-in-place operation, so it's probably not in violation of the
volatile rules.  (C doesn't specify x++ in terms of ++x and x += 1.)

Thanks,
Florian


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-18 19:56             ` Florian Weimer
@ 2024-12-19 16:10               ` Paul E. McKenney
  2024-12-19 16:45                 ` Florian Weimer
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-19 16:10 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Peter Zijlstra, Daniel Xu, mingo, will, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Wed, Dec 18, 2024 at 08:56:07PM +0100, Florian Weimer wrote:
> * Peter Zijlstra:
> 
> > +linux-toolchains
> >
> > On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
> >
> >> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> >> > I'd have to check what the compiler makes of that.
> >> > 
> >> > /me mucks about with godbolt for a bit...
> >> > 
> >> > GCC doesn't optimize that, but Clang does.
> >> > 
> >> > I would still very much refrain from making this change until both
> >> > compilers can generate sane code for it.
> >> 
> >> Is GCC on track to do this, or do we need to encourage them?
> >
> > I have no clue; probably wise to offer encouragement.
> 
> What do you consider sane code?

Peter's "(*(volatile unsigned int *)&s->sequence)++;" qualifies as sane.

> Clang's choice to generate an incl instruction (on x86-64 at least) is a
> bit surprising.  Curiously, the C11 abstract machine has a value-less
> increment-in-place operation, so it's probably not in violation of the
> volatile rules.  (C doesn't specify x++ in terms of ++x and x += 1.)

Very good!  Should I do something like file a bug somewhere to help
this along?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 16:10               ` Paul E. McKenney
@ 2024-12-19 16:45                 ` Florian Weimer
  2024-12-19 17:48                   ` Paul E. McKenney
  2024-12-19 17:53                   ` Peter Zijlstra
  0 siblings, 2 replies; 13+ messages in thread
From: Florian Weimer @ 2024-12-19 16:45 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Peter Zijlstra, Daniel Xu, mingo, will, longman, boqun.feng,
	linux-kernel, linux-toolchains

* Paul E. McKenney:

> On Wed, Dec 18, 2024 at 08:56:07PM +0100, Florian Weimer wrote:
>> * Peter Zijlstra:
>> 
>> > +linux-toolchains
>> >
>> > On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
>> >
>> >> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
>> >> > I'd have to check what the compiler makes of that.
>> >> > 
>> >> > /me mucks about with godbolt for a bit...
>> >> > 
>> >> > GCC doesn't optimize that, but Clang does.
>> >> > 
>> >> > I would still very much refrain from making this change until both
>> >> > compilers can generate sane code for it.
>> >> 
>> >> Is GCC on track to do this, or do we need to encourage them?
>> >
>> > I have no clue; probably wise to offer encouragement.
>> 
>> What do you consider sane code?
>
> Peter's "(*(volatile unsigned int *)&s->sequence)++;" qualifies as sane.

I think the reference was originally to machine code.

>> Clang's choice to generate an incl instruction (on x86-64 at least) is a
>> bit surprising.  Curiously, the C11 abstract machine has a value-less
>> increment-in-place operation, so it's probably not in violation of the
>> volatile rules.  (C doesn't specify x++ in terms of ++x and x += 1.)
>
> Very good!  Should I do something like file a bug somewhere to help
> this along?

I don't know.  It seems that Clang/LLVM is cheating.  It's doing this
optimization even for

  i = i + 1;

with a volatile i.  That doesn't look like “strictly according to the
abstract machine” anymore.  A proper implementation would need explicit
representation of volatile increment/decrement in the IR.  Given that
volatile increment/decrement is deprecated, that seems quite a bit of
effort.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 16:45                 ` Florian Weimer
@ 2024-12-19 17:48                   ` Paul E. McKenney
  2024-12-19 17:53                   ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-19 17:48 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Peter Zijlstra, Daniel Xu, mingo, will, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 05:45:15PM +0100, Florian Weimer wrote:
> * Paul E. McKenney:
> 
> > On Wed, Dec 18, 2024 at 08:56:07PM +0100, Florian Weimer wrote:
> >> * Peter Zijlstra:
> >> 
> >> > +linux-toolchains
> >> >
> >> > On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
> >> >
> >> >> > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> >> >> > I'd have to check what the compiler makes of that.
> >> >> > 
> >> >> > /me mucks about with godbolt for a bit...
> >> >> > 
> >> >> > GCC doesn't optimize that, but Clang does.
> >> >> > 
> >> >> > I would still very much refrain from making this change until both
> >> >> > compilers can generate sane code for it.
> >> >> 
> >> >> Is GCC on track to do this, or do we need to encourage them?
> >> >
> >> > I have no clue; probably wise to offer encouragement.
> >> 
> >> What do you consider sane code?
> >
> > Peter's "(*(volatile unsigned int *)&s->sequence)++;" qualifies as sane.
> 
> I think the reference was originally to machine code.

Very well, then compiling this to a to-memory increment instruction
qualifies as sane.

> >> Clang's choice to generate an incl instruction (on x86-64 at least) is a
> >> bit surprising.  Curiously, the C11 abstract machine has a value-less
> >> increment-in-place operation, so it's probably not in violation of the
> >> volatile rules.  (C doesn't specify x++ in terms of ++x and x += 1.)
> >
> > Very good!  Should I do something like file a bug somewhere to help
> > this along?
> 
> I don't know.  It seems that Clang/LLVM is cheating.  It's doing this
> optimization even for
> 
>   i = i + 1;
> 
> with a volatile i.  That doesn't look like “strictly according to the
> abstract machine” anymore.

How so?  It is in fact adding one to that volatile variable, which is
in accord with the abstract machine.

>                             A proper implementation would need explicit
> representation of volatile increment/decrement in the IR.  Given that
> volatile increment/decrement is deprecated, that seems quite a bit of
> effort.

If I remember correctly, there was a discussion in SG21 about
de-deprecating volatile increment/decrement.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 16:45                 ` Florian Weimer
  2024-12-19 17:48                   ` Paul E. McKenney
@ 2024-12-19 17:53                   ` Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Peter Zijlstra @ 2024-12-19 17:53 UTC (permalink / raw)
  To: Florian Weimer
  Cc: Paul E. McKenney, Daniel Xu, mingo, will, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 05:45:15PM +0100, Florian Weimer wrote:

> > Peter's "(*(volatile unsigned int *)&s->sequence)++;" qualifies as sane.
> 
> I think the reference was originally to machine code.

Correct; however more tinkering with godbolt got me:

  https://godbolt.org/z/6xoxjdYPx

Where we can see that clang can even optimize the WRITE_ONCE(foo,
READ_ONCE(foo) + 1) case, which I though it would not be able to do.

So perhaps we just need to get GCC fixed.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-18 17:12           ` [PATCH] seqlock: Use WRITE_ONCE() when updating sequence Peter Zijlstra
  2024-12-18 19:15             ` Paul E. McKenney
  2024-12-18 19:56             ` Florian Weimer
@ 2024-12-19 16:34             ` Will Deacon
  2024-12-19 17:53               ` Paul E. McKenney
  2 siblings, 1 reply; 13+ messages in thread
From: Will Deacon @ 2024-12-19 16:34 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul E. McKenney, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> 
> +linux-toolchains
> 
> On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
> 
> > > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > > I'd have to check what the compiler makes of that.
> > > 
> > > /me mucks about with godbolt for a bit...
> > > 
> > > GCC doesn't optimize that, but Clang does.
> > > 
> > > I would still very much refrain from making this change until both
> > > compilers can generate sane code for it.
> > 
> > Is GCC on track to do this, or do we need to encourage them?
> 
> I have no clue; probably wise to offer encouragement.
> 
> > Just to make sure I understand, your proposal is to create an INC_ONCE()
> > or similar, and add it once compiler support is there?  Seems reasonable
> > to me, just checking.
> 
> I suppose we can work in parallel, add INC_ONCE() now, but not have a
> proper definition for GCC.

[...]

> diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> index 39cb8d0ae348..8fb7cd75fe62 100644
> --- a/arch/s390/kernel/idle.c
> +++ b/arch/s390/kernel/idle.c
> @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
>  
>  	/* Account time spent with enabled wait psw loaded as idle time. */
>  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> +	INC_ONC(idle->idle_count);

(nit: drive-by typo)

[...]

> diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> index c9b58188ec61..77c253e29758 100644
> --- a/include/linux/compiler-gcc.h
> +++ b/include/linux/compiler-gcc.h
> @@ -137,3 +137,8 @@
>  #if GCC_VERSION < 90100
>  #undef __alloc_size__
>  #endif
> +
> +/*
> + * GCC can't properly optimize the real one with volatile on.
> + */
> +#define INC_ONCE(v) (v)++

I think I'm missing what we're trying to do here, but how is this a
safe fallback? I'd have thought we'd need the whole READ_ONCE() +
WRITE_ONCE() sequence if the compiler doesn't give us what we need...

> diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> index efd43df3a99a..b1b13dac1b9e 100644
> --- a/include/linux/compiler.h
> +++ b/include/linux/compiler.h
> @@ -8,6 +8,10 @@
>  
>  #ifdef __KERNEL__
>  
> +#ifndef INC_ONCE
> +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> +#endif

... but I'm honestly not sure what this is aiming to elide. Is this about
having the compiler emit an increment instruction with memory operands
on architectures that support it? If so, what about architectures that
_don't_ support that? We still need to guarantee that the load/store
instructions are single-copy atomic for the type in that case.

Given how hard it can be to get the compiler to break atomicity for
plain accesses, I'm pretty worried about the robustness of this.

Will

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 16:34             ` Will Deacon
@ 2024-12-19 17:53               ` Paul E. McKenney
  2024-12-19 17:58                 ` Will Deacon
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-19 17:53 UTC (permalink / raw)
  To: Will Deacon
  Cc: Peter Zijlstra, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 04:34:55PM +0000, Will Deacon wrote:
> On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> > 
> > +linux-toolchains
> > 
> > On Wed, Dec 18, 2024 at 08:59:47AM -0800, Paul E. McKenney wrote:
> > 
> > > > Perhaps something like: (*(volatile unsigned int *)&s->sequence)++; ?
> > > > I'd have to check what the compiler makes of that.
> > > > 
> > > > /me mucks about with godbolt for a bit...
> > > > 
> > > > GCC doesn't optimize that, but Clang does.
> > > > 
> > > > I would still very much refrain from making this change until both
> > > > compilers can generate sane code for it.
> > > 
> > > Is GCC on track to do this, or do we need to encourage them?
> > 
> > I have no clue; probably wise to offer encouragement.
> > 
> > > Just to make sure I understand, your proposal is to create an INC_ONCE()
> > > or similar, and add it once compiler support is there?  Seems reasonable
> > > to me, just checking.
> > 
> > I suppose we can work in parallel, add INC_ONCE() now, but not have a
> > proper definition for GCC.
> 
> [...]
> 
> > diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> > index 39cb8d0ae348..8fb7cd75fe62 100644
> > --- a/arch/s390/kernel/idle.c
> > +++ b/arch/s390/kernel/idle.c
> > @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
> >  
> >  	/* Account time spent with enabled wait psw loaded as idle time. */
> >  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> > -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> > +	INC_ONC(idle->idle_count);
> 
> (nit: drive-by typo)

Heh!  I was clearly building with GCC.  ;-)

> [...]
> 
> > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> > index c9b58188ec61..77c253e29758 100644
> > --- a/include/linux/compiler-gcc.h
> > +++ b/include/linux/compiler-gcc.h
> > @@ -137,3 +137,8 @@
> >  #if GCC_VERSION < 90100
> >  #undef __alloc_size__
> >  #endif
> > +
> > +/*
> > + * GCC can't properly optimize the real one with volatile on.
> > + */
> > +#define INC_ONCE(v) (v)++
> 
> I think I'm missing what we're trying to do here, but how is this a
> safe fallback? I'd have thought we'd need the whole READ_ONCE() +
> WRITE_ONCE() sequence if the compiler doesn't give us what we need...
> 
> > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > index efd43df3a99a..b1b13dac1b9e 100644
> > --- a/include/linux/compiler.h
> > +++ b/include/linux/compiler.h
> > @@ -8,6 +8,10 @@
> >  
> >  #ifdef __KERNEL__
> >  
> > +#ifndef INC_ONCE
> > +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> > +#endif
> 
> ... but I'm honestly not sure what this is aiming to elide. Is this about
> having the compiler emit an increment instruction with memory operands
> on architectures that support it? If so, what about architectures that
> _don't_ support that? We still need to guarantee that the load/store
> instructions are single-copy atomic for the type in that case.

Architectures whose increment instructions lack memory operands can still
load, increment, then store.  So yes, if two of these run concurrently,
you can lose counts when incrementing normal memory.  (Of course, with
device memory, you get what you get.)

> Given how hard it can be to get the compiler to break atomicity for
> plain accesses, I'm pretty worried about the robustness of this.

Your point being that the current plain C-language postfix ++ is unlikely
to be adversely optimized?  If so, although I agree here in 2024,
the years pass quickly and increasingly clever compiler optimizations
accumulate.

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 17:53               ` Paul E. McKenney
@ 2024-12-19 17:58                 ` Will Deacon
  2024-12-19 18:06                   ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Will Deacon @ 2024-12-19 17:58 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Peter Zijlstra, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 09:53:48AM -0800, Paul E. McKenney wrote:
> On Thu, Dec 19, 2024 at 04:34:55PM +0000, Will Deacon wrote:
> > On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> > > diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> > > index 39cb8d0ae348..8fb7cd75fe62 100644
> > > --- a/arch/s390/kernel/idle.c
> > > +++ b/arch/s390/kernel/idle.c
> > > @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
> > >  
> > >  	/* Account time spent with enabled wait psw loaded as idle time. */
> > >  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> > > -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> > > +	INC_ONC(idle->idle_count);
> > 
> > (nit: drive-by typo)
> 
> Heh!  I was clearly building with GCC.  ;-)

And not for S390 :p

> > > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> > > index c9b58188ec61..77c253e29758 100644
> > > --- a/include/linux/compiler-gcc.h
> > > +++ b/include/linux/compiler-gcc.h
> > > @@ -137,3 +137,8 @@
> > >  #if GCC_VERSION < 90100
> > >  #undef __alloc_size__
> > >  #endif
> > > +
> > > +/*
> > > + * GCC can't properly optimize the real one with volatile on.
> > > + */
> > > +#define INC_ONCE(v) (v)++
> > 
> > I think I'm missing what we're trying to do here, but how is this a
> > safe fallback? I'd have thought we'd need the whole READ_ONCE() +
> > WRITE_ONCE() sequence if the compiler doesn't give us what we need...
> > 
> > > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > > index efd43df3a99a..b1b13dac1b9e 100644
> > > --- a/include/linux/compiler.h
> > > +++ b/include/linux/compiler.h
> > > @@ -8,6 +8,10 @@
> > >  
> > >  #ifdef __KERNEL__
> > >  
> > > +#ifndef INC_ONCE
> > > +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> > > +#endif
> > 
> > ... but I'm honestly not sure what this is aiming to elide. Is this about
> > having the compiler emit an increment instruction with memory operands
> > on architectures that support it? If so, what about architectures that
> > _don't_ support that? We still need to guarantee that the load/store
> > instructions are single-copy atomic for the type in that case.
> 
> Architectures whose increment instructions lack memory operands can still
> load, increment, then store.  So yes, if two of these run concurrently,
> you can lose counts when incrementing normal memory.  (Of course, with
> device memory, you get what you get.)

... but can we guarantee that those load and store instructions won't
now be torn?

> > Given how hard it can be to get the compiler to break atomicity for
> > plain accesses, I'm pretty worried about the robustness of this.
> 
> Your point being that the current plain C-language postfix ++ is unlikely
> to be adversely optimized?  If so, although I agree here in 2024,
> the years pass quickly and increasingly clever compiler optimizations
> accumulate.

Right, I think we're in agreement. I'm saying that just because this
proposed INC_ONCE() macro appears to generate the assembly code we want,
I don't have even an instinctive feeling that it's deliberate or
something that we should/can rely upon.

Will

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 17:58                 ` Will Deacon
@ 2024-12-19 18:06                   ` Paul E. McKenney
  2024-12-19 18:31                     ` Will Deacon
  0 siblings, 1 reply; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-19 18:06 UTC (permalink / raw)
  To: Will Deacon
  Cc: Peter Zijlstra, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 05:58:03PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2024 at 09:53:48AM -0800, Paul E. McKenney wrote:
> > On Thu, Dec 19, 2024 at 04:34:55PM +0000, Will Deacon wrote:
> > > On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> > > > diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> > > > index 39cb8d0ae348..8fb7cd75fe62 100644
> > > > --- a/arch/s390/kernel/idle.c
> > > > +++ b/arch/s390/kernel/idle.c
> > > > @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
> > > >  
> > > >  	/* Account time spent with enabled wait psw loaded as idle time. */
> > > >  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> > > > -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> > > > +	INC_ONC(idle->idle_count);
> > > 
> > > (nit: drive-by typo)
> > 
> > Heh!  I was clearly building with GCC.  ;-)
> 
> And not for S390 :p

True enough!  ;-)

> > > > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> > > > index c9b58188ec61..77c253e29758 100644
> > > > --- a/include/linux/compiler-gcc.h
> > > > +++ b/include/linux/compiler-gcc.h
> > > > @@ -137,3 +137,8 @@
> > > >  #if GCC_VERSION < 90100
> > > >  #undef __alloc_size__
> > > >  #endif
> > > > +
> > > > +/*
> > > > + * GCC can't properly optimize the real one with volatile on.
> > > > + */
> > > > +#define INC_ONCE(v) (v)++
> > > 
> > > I think I'm missing what we're trying to do here, but how is this a
> > > safe fallback? I'd have thought we'd need the whole READ_ONCE() +
> > > WRITE_ONCE() sequence if the compiler doesn't give us what we need...
> > > 
> > > > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > > > index efd43df3a99a..b1b13dac1b9e 100644
> > > > --- a/include/linux/compiler.h
> > > > +++ b/include/linux/compiler.h
> > > > @@ -8,6 +8,10 @@
> > > >  
> > > >  #ifdef __KERNEL__
> > > >  
> > > > +#ifndef INC_ONCE
> > > > +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> > > > +#endif
> > > 
> > > ... but I'm honestly not sure what this is aiming to elide. Is this about
> > > having the compiler emit an increment instruction with memory operands
> > > on architectures that support it? If so, what about architectures that
> > > _don't_ support that? We still need to guarantee that the load/store
> > > instructions are single-copy atomic for the type in that case.
> > 
> > Architectures whose increment instructions lack memory operands can still
> > load, increment, then store.  So yes, if two of these run concurrently,
> > you can lose counts when incrementing normal memory.  (Of course, with
> > device memory, you get what you get.)
> 
> ... but can we guarantee that those load and store instructions won't
> now be torn?

The volatile type qualifier must take care of that, or device drivers
no longer work.

Or are you worried about the C-language ++ fallback?  In that case,
as I understand it, this fallback is only there until GCC learns to
generate a increment-to-memory instruction for x86.

> > > Given how hard it can be to get the compiler to break atomicity for
> > > plain accesses, I'm pretty worried about the robustness of this.
> > 
> > Your point being that the current plain C-language postfix ++ is unlikely
> > to be adversely optimized?  If so, although I agree here in 2024,
> > the years pass quickly and increasingly clever compiler optimizations
> > accumulate.
> 
> Right, I think we're in agreement. I'm saying that just because this
> proposed INC_ONCE() macro appears to generate the assembly code we want,
> I don't have even an instinctive feeling that it's deliberate or
> something that we should/can rely upon.

My perhaps naive hope is that GCC updates permit the plain C-language
postfix ++ fallback goes away sooner rather than later.

Or am I still missing your point?

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 18:06                   ` Paul E. McKenney
@ 2024-12-19 18:31                     ` Will Deacon
  2024-12-20 17:40                       ` Paul E. McKenney
  0 siblings, 1 reply; 13+ messages in thread
From: Will Deacon @ 2024-12-19 18:31 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Peter Zijlstra, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 10:06:23AM -0800, Paul E. McKenney wrote:
> On Thu, Dec 19, 2024 at 05:58:03PM +0000, Will Deacon wrote:
> > On Thu, Dec 19, 2024 at 09:53:48AM -0800, Paul E. McKenney wrote:
> > > On Thu, Dec 19, 2024 at 04:34:55PM +0000, Will Deacon wrote:
> > > > On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> > > > > diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> > > > > index 39cb8d0ae348..8fb7cd75fe62 100644
> > > > > --- a/arch/s390/kernel/idle.c
> > > > > +++ b/arch/s390/kernel/idle.c
> > > > > @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
> > > > >  
> > > > >  	/* Account time spent with enabled wait psw loaded as idle time. */
> > > > >  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> > > > > -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> > > > > +	INC_ONC(idle->idle_count);
> > > > 
> > > > (nit: drive-by typo)
> > > 
> > > Heh!  I was clearly building with GCC.  ;-)
> > 
> > And not for S390 :p
> 
> True enough!  ;-)
> 
> > > > > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> > > > > index c9b58188ec61..77c253e29758 100644
> > > > > --- a/include/linux/compiler-gcc.h
> > > > > +++ b/include/linux/compiler-gcc.h
> > > > > @@ -137,3 +137,8 @@
> > > > >  #if GCC_VERSION < 90100
> > > > >  #undef __alloc_size__
> > > > >  #endif
> > > > > +
> > > > > +/*
> > > > > + * GCC can't properly optimize the real one with volatile on.
> > > > > + */
> > > > > +#define INC_ONCE(v) (v)++
> > > > 
> > > > I think I'm missing what we're trying to do here, but how is this a
> > > > safe fallback? I'd have thought we'd need the whole READ_ONCE() +
> > > > WRITE_ONCE() sequence if the compiler doesn't give us what we need...
> > > > 
> > > > > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > > > > index efd43df3a99a..b1b13dac1b9e 100644
> > > > > --- a/include/linux/compiler.h
> > > > > +++ b/include/linux/compiler.h
> > > > > @@ -8,6 +8,10 @@
> > > > >  
> > > > >  #ifdef __KERNEL__
> > > > >  
> > > > > +#ifndef INC_ONCE
> > > > > +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> > > > > +#endif
> > > > 
> > > > ... but I'm honestly not sure what this is aiming to elide. Is this about
> > > > having the compiler emit an increment instruction with memory operands
> > > > on architectures that support it? If so, what about architectures that
> > > > _don't_ support that? We still need to guarantee that the load/store
> > > > instructions are single-copy atomic for the type in that case.
> > > 
> > > Architectures whose increment instructions lack memory operands can still
> > > load, increment, then store.  So yes, if two of these run concurrently,
> > > you can lose counts when incrementing normal memory.  (Of course, with
> > > device memory, you get what you get.)
> > 
> > ... but can we guarantee that those load and store instructions won't
> > now be torn?
> 
> The volatile type qualifier must take care of that, or device drivers
> no longer work.
> 
> Or are you worried about the C-language ++ fallback?  In that case,
> as I understand it, this fallback is only there until GCC learns to
> generate a increment-to-memory instruction for x86.

As I understand it, both the clang and the GCC implementations in the
proposed diff are written using the C-language ++ operator, no?

The GCC one doesn't have 'volatile' at all:

	#define INC_ONCE(v) (v)++

so I don't understand why it's a correct replacement for e.g.:

	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);

If it was, then we wouldn't need any of these macros in the first place!

The clang version does have 'volatile':

	#define INC_ONCE(v)  (*(volatile typeof(v) *)&(v))++

but then I don't understand why this is enough for the compiler to
generate an 'inc' with memory operands on x86 if it cannot do that for
the READ_ONCE + WRITE_ONCE implementation. If somehow the above is more
"optimisable" then I worry about the atomicity being preserved as well.

So, basically, I don't understand what's going on here :)

I think I was expecting to see arch-specific implementations of
INC_ONCE() written in assembly where they have the relevant instructions
but that doesn't seem to be the case.

Will

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] seqlock: Use WRITE_ONCE() when updating sequence
  2024-12-19 18:31                     ` Will Deacon
@ 2024-12-20 17:40                       ` Paul E. McKenney
  0 siblings, 0 replies; 13+ messages in thread
From: Paul E. McKenney @ 2024-12-20 17:40 UTC (permalink / raw)
  To: Will Deacon
  Cc: Peter Zijlstra, Daniel Xu, mingo, longman, boqun.feng,
	linux-kernel, linux-toolchains

On Thu, Dec 19, 2024 at 06:31:15PM +0000, Will Deacon wrote:
> On Thu, Dec 19, 2024 at 10:06:23AM -0800, Paul E. McKenney wrote:
> > On Thu, Dec 19, 2024 at 05:58:03PM +0000, Will Deacon wrote:
> > > On Thu, Dec 19, 2024 at 09:53:48AM -0800, Paul E. McKenney wrote:
> > > > On Thu, Dec 19, 2024 at 04:34:55PM +0000, Will Deacon wrote:
> > > > > On Wed, Dec 18, 2024 at 06:12:41PM +0100, Peter Zijlstra wrote:
> > > > > > diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
> > > > > > index 39cb8d0ae348..8fb7cd75fe62 100644
> > > > > > --- a/arch/s390/kernel/idle.c
> > > > > > +++ b/arch/s390/kernel/idle.c
> > > > > > @@ -45,7 +45,7 @@ void account_idle_time_irq(void)
> > > > > >  
> > > > > >  	/* Account time spent with enabled wait psw loaded as idle time. */
> > > > > >  	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> > > > > > -	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
> > > > > > +	INC_ONC(idle->idle_count);
> > > > > 
> > > > > (nit: drive-by typo)
> > > > 
> > > > Heh!  I was clearly building with GCC.  ;-)
> > > 
> > > And not for S390 :p
> > 
> > True enough!  ;-)
> > 
> > > > > > diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
> > > > > > index c9b58188ec61..77c253e29758 100644
> > > > > > --- a/include/linux/compiler-gcc.h
> > > > > > +++ b/include/linux/compiler-gcc.h
> > > > > > @@ -137,3 +137,8 @@
> > > > > >  #if GCC_VERSION < 90100
> > > > > >  #undef __alloc_size__
> > > > > >  #endif
> > > > > > +
> > > > > > +/*
> > > > > > + * GCC can't properly optimize the real one with volatile on.
> > > > > > + */
> > > > > > +#define INC_ONCE(v) (v)++
> > > > > 
> > > > > I think I'm missing what we're trying to do here, but how is this a
> > > > > safe fallback? I'd have thought we'd need the whole READ_ONCE() +
> > > > > WRITE_ONCE() sequence if the compiler doesn't give us what we need...
> > > > > 
> > > > > > diff --git a/include/linux/compiler.h b/include/linux/compiler.h
> > > > > > index efd43df3a99a..b1b13dac1b9e 100644
> > > > > > --- a/include/linux/compiler.h
> > > > > > +++ b/include/linux/compiler.h
> > > > > > @@ -8,6 +8,10 @@
> > > > > >  
> > > > > >  #ifdef __KERNEL__
> > > > > >  
> > > > > > +#ifndef INC_ONCE
> > > > > > +#define INC_ONCE(v)	(*(volatile typeof(v) *)&(v))++
> > > > > > +#endif
> > > > > 
> > > > > ... but I'm honestly not sure what this is aiming to elide. Is this about
> > > > > having the compiler emit an increment instruction with memory operands
> > > > > on architectures that support it? If so, what about architectures that
> > > > > _don't_ support that? We still need to guarantee that the load/store
> > > > > instructions are single-copy atomic for the type in that case.
> > > > 
> > > > Architectures whose increment instructions lack memory operands can still
> > > > load, increment, then store.  So yes, if two of these run concurrently,
> > > > you can lose counts when incrementing normal memory.  (Of course, with
> > > > device memory, you get what you get.)
> > > 
> > > ... but can we guarantee that those load and store instructions won't
> > > now be torn?
> > 
> > The volatile type qualifier must take care of that, or device drivers
> > no longer work.
> > 
> > Or are you worried about the C-language ++ fallback?  In that case,
> > as I understand it, this fallback is only there until GCC learns to
> > generate a increment-to-memory instruction for x86.
> 
> As I understand it, both the clang and the GCC implementations in the
> proposed diff are written using the C-language ++ operator, no?
> 
> The GCC one doesn't have 'volatile' at all:
> 
> 	#define INC_ONCE(v) (v)++
> 
> so I don't understand why it's a correct replacement for e.g.:
> 
> 	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
> 
> If it was, then we wouldn't need any of these macros in the first place!

You are right, it is not a correct replacement.  It is instead a
behavior-preserving fallback until GCC does the optimizations.
Here the behavior is the code generation.

> The clang version does have 'volatile':
> 
> 	#define INC_ONCE(v)  (*(volatile typeof(v) *)&(v))++
> 
> but then I don't understand why this is enough for the compiler to
> generate an 'inc' with memory operands on x86 if it cannot do that for
> the READ_ONCE + WRITE_ONCE implementation. If somehow the above is more
> "optimisable" then I worry about the atomicity being preserved as well.
> 
> So, basically, I don't understand what's going on here :)

Peter learned that clang does in fact optimize the original READ_ONCE +
WRITE_ONCE implementation.

> I think I was expecting to see arch-specific implementations of
> INC_ONCE() written in assembly where they have the relevant instructions
> but that doesn't seem to be the case.

That is yet another fallback, though with quite a bit more code.  :-(

							Thanx, Paul

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2024-12-20 17:40 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <0eaea03ecc9df536649763cfecda356fc38b6938.1734477414.git.dxu@dxuuu.xyz>
     [not found] ` <20241218103000.GK11133@noisy.programming.kicks-ass.net>
     [not found]   ` <ff2aecbc-cd5d-4a9c-92b1-792555300301@paulmck-laptop>
     [not found]     ` <20241218162325.GH2354@noisy.programming.kicks-ass.net>
     [not found]       ` <20241218162934.GJ12500@noisy.programming.kicks-ass.net>
     [not found]         ` <9b6d585a-c17f-4dd1-9518-b28ac2dfd855@paulmck-laptop>
2024-12-18 17:12           ` [PATCH] seqlock: Use WRITE_ONCE() when updating sequence Peter Zijlstra
2024-12-18 19:15             ` Paul E. McKenney
2024-12-18 19:56             ` Florian Weimer
2024-12-19 16:10               ` Paul E. McKenney
2024-12-19 16:45                 ` Florian Weimer
2024-12-19 17:48                   ` Paul E. McKenney
2024-12-19 17:53                   ` Peter Zijlstra
2024-12-19 16:34             ` Will Deacon
2024-12-19 17:53               ` Paul E. McKenney
2024-12-19 17:58                 ` Will Deacon
2024-12-19 18:06                   ` Paul E. McKenney
2024-12-19 18:31                     ` Will Deacon
2024-12-20 17:40                       ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox