All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marco Elver <elver@google.com>
To: elver@google.com, Peter Zijlstra <peterz@infradead.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Mark Rutland <mark.rutland@arm.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Dmitry Vyukov <dvyukov@google.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	linuxppc-dev@lists.ozlabs.org, linux-perf-users@vger.kernel.org,
	x86@kernel.org, linux-sh@vger.kernel.org,
	kasan-dev@googlegroups.com, linux-kernel@vger.kernel.org
Subject: [PATCH v3 13/14] perf/hw_breakpoint: Optimize max_bp_pinned_slots() for CPU-independent task targets
Date: Mon,  4 Jul 2022 17:05:13 +0200	[thread overview]
Message-ID: <20220704150514.48816-14-elver@google.com> (raw)
In-Reply-To: <20220704150514.48816-1-elver@google.com>

Running the perf benchmark with (note: more aggressive parameters vs.
preceding changes, but same 256 CPUs host):

 | $> perf bench -r 100 breakpoint thread -b 4 -p 128 -t 512
 | # Running 'breakpoint/thread' benchmark:
 | # Created/joined 100 threads with 4 breakpoints and 128 parallelism
 |      Total time: 1.989 [sec]
 |
 |       38.854160 usecs/op
 |     4973.332500 usecs/op/cpu

    20.43%  [kernel]       [k] queued_spin_lock_slowpath
    18.75%  [kernel]       [k] osq_lock
    16.98%  [kernel]       [k] rhashtable_jhash2
     8.34%  [kernel]       [k] task_bp_pinned
     4.23%  [kernel]       [k] smp_cfm_core_cond
     3.65%  [kernel]       [k] bcmp
     2.83%  [kernel]       [k] toggle_bp_slot
     1.87%  [kernel]       [k] find_next_bit
     1.49%  [kernel]       [k] __reserve_bp_slot

We can see that a majority of the time is now spent hashing task
pointers to index into task_bps_ht in task_bp_pinned().

Obtaining the max_bp_pinned_slots() for CPU-independent task targets
currently is O(#cpus), and calls task_bp_pinned() for each CPU, even if
the result of task_bp_pinned() is CPU-independent.

The loop in max_bp_pinned_slots() wants to compute the maximum slots
across all CPUs. If task_bp_pinned() is CPU-independent, we can do so by
obtaining the max slots across all CPUs and adding task_bp_pinned().

To do so in O(1), use a bp_slots_histogram for CPU-pinned slots.

After this optimization:

 | $> perf bench -r 100 breakpoint thread -b 4 -p 128 -t 512
 | # Running 'breakpoint/thread' benchmark:
 | # Created/joined 100 threads with 4 breakpoints and 128 parallelism
 |      Total time: 1.930 [sec]
 |
 |       37.697832 usecs/op
 |     4825.322500 usecs/op/cpu

    19.13%  [kernel]       [k] queued_spin_lock_slowpath
    18.21%  [kernel]       [k] rhashtable_jhash2
    15.46%  [kernel]       [k] osq_lock
     6.27%  [kernel]       [k] toggle_bp_slot
     5.91%  [kernel]       [k] task_bp_pinned
     5.05%  [kernel]       [k] smp_cfm_core_cond
     1.78%  [kernel]       [k] update_sg_lb_stats
     1.36%  [kernel]       [k] llist_reverse_order
     1.34%  [kernel]       [k] find_next_bit
     1.19%  [kernel]       [k] bcmp

Suggesting that time spent in task_bp_pinned() has been reduced.
However, we're still hashing too much, which will be addressed in the
subsequent change.

Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
---
v3:
* Update hw_breakpoint_is_used() to include global cpu_pinned.

v2:
* New patch.
---
 kernel/events/hw_breakpoint.c | 57 ++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 03ebecf048c0..a489f31fe147 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -64,6 +64,9 @@ static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 	return per_cpu_ptr(bp_cpuinfo + type, cpu);
 }
 
+/* Number of pinned CPU breakpoints globally. */
+static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
+
 /* Keep track of the breakpoints attached to tasks */
 static struct rhltable task_bps_ht;
 static const struct rhashtable_params task_bps_ht_params = {
@@ -194,6 +197,10 @@ static __init int init_breakpoint_slots(void)
 				goto err;
 		}
 	}
+	for (i = 0; i < TYPE_MAX; i++) {
+		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
+			goto err;
+	}
 
 	return 0;
 err:
@@ -203,6 +210,8 @@ static __init int init_breakpoint_slots(void)
 		if (err_cpu == cpu)
 			break;
 	}
+	for (i = 0; i < TYPE_MAX; i++)
+		bp_slots_histogram_free(&cpu_pinned[i]);
 
 	return -ENOMEM;
 }
@@ -270,6 +279,9 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 /*
  * Count the number of breakpoints of the same type and same task.
  * The given event must be not on the list.
+ *
+ * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
+ * returns a negative value.
  */
 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 {
@@ -288,9 +300,18 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 		goto out;
 
 	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
-		if (find_slot_idx(iter->attr.bp_type) == type &&
-		    (iter->cpu < 0 || cpu == iter->cpu))
-			count += hw_breakpoint_weight(iter);
+		if (find_slot_idx(iter->attr.bp_type) != type)
+			continue;
+
+		if (iter->cpu >= 0) {
+			if (cpu == -1) {
+				count = -1;
+				goto out;
+			} else if (cpu != iter->cpu)
+				continue;
+		}
+
+		count += hw_breakpoint_weight(iter);
 	}
 
 out:
@@ -316,6 +337,19 @@ max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 	int pinned_slots = 0;
 	int cpu;
 
+	if (bp->hw.target && bp->cpu < 0) {
+		int max_pinned = task_bp_pinned(-1, bp, type);
+
+		if (max_pinned >= 0) {
+			/*
+			 * Fast path: task_bp_pinned() is CPU-independent and
+			 * returns the same value for any CPU.
+			 */
+			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
+			return max_pinned;
+		}
+	}
+
 	for_each_cpu(cpu, cpumask) {
 		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 		int nr;
@@ -366,8 +400,11 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 
 	/* Pinned counter cpu profiling */
 	if (!bp->hw.target) {
+		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
+
 		lockdep_assert_held_write(&bp_cpuinfo_sem);
-		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
+		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
+		info->cpu_pinned += weight;
 		return 0;
 	}
 
@@ -804,6 +841,18 @@ bool hw_breakpoint_is_used(void)
 		}
 	}
 
+	for (int type = 0; type < TYPE_MAX; ++type) {
+		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
+			/*
+			 * Warn, because if there are CPU pinned counters,
+			 * should never get here; bp_cpuinfo::cpu_pinned should
+			 * be consistent with the global cpu_pinned histogram.
+			 */
+			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
+				return true;
+		}
+	}
+
 	return false;
 }
 
-- 
2.37.0.rc0.161.g10f37bed90-goog


WARNING: multiple messages have this Message-ID (diff)
From: Marco Elver <elver@google.com>
To: elver@google.com, Peter Zijlstra <peterz@infradead.org>,
	 Frederic Weisbecker <frederic@kernel.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>,
	linux-sh@vger.kernel.org,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	x86@kernel.org, linuxppc-dev@lists.ozlabs.org,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	kasan-dev@googlegroups.com, Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Jiri Olsa <jolsa@redhat.com>, Dmitry Vyukov <dvyukov@google.com>
Subject: [PATCH v3 13/14] perf/hw_breakpoint: Optimize max_bp_pinned_slots() for CPU-independent task targets
Date: Mon,  4 Jul 2022 17:05:13 +0200	[thread overview]
Message-ID: <20220704150514.48816-14-elver@google.com> (raw)
In-Reply-To: <20220704150514.48816-1-elver@google.com>

Running the perf benchmark with (note: more aggressive parameters vs.
preceding changes, but same 256 CPUs host):

 | $> perf bench -r 100 breakpoint thread -b 4 -p 128 -t 512
 | # Running 'breakpoint/thread' benchmark:
 | # Created/joined 100 threads with 4 breakpoints and 128 parallelism
 |      Total time: 1.989 [sec]
 |
 |       38.854160 usecs/op
 |     4973.332500 usecs/op/cpu

    20.43%  [kernel]       [k] queued_spin_lock_slowpath
    18.75%  [kernel]       [k] osq_lock
    16.98%  [kernel]       [k] rhashtable_jhash2
     8.34%  [kernel]       [k] task_bp_pinned
     4.23%  [kernel]       [k] smp_cfm_core_cond
     3.65%  [kernel]       [k] bcmp
     2.83%  [kernel]       [k] toggle_bp_slot
     1.87%  [kernel]       [k] find_next_bit
     1.49%  [kernel]       [k] __reserve_bp_slot

We can see that a majority of the time is now spent hashing task
pointers to index into task_bps_ht in task_bp_pinned().

Obtaining the max_bp_pinned_slots() for CPU-independent task targets
currently is O(#cpus), and calls task_bp_pinned() for each CPU, even if
the result of task_bp_pinned() is CPU-independent.

The loop in max_bp_pinned_slots() wants to compute the maximum slots
across all CPUs. If task_bp_pinned() is CPU-independent, we can do so by
obtaining the max slots across all CPUs and adding task_bp_pinned().

To do so in O(1), use a bp_slots_histogram for CPU-pinned slots.

After this optimization:

 | $> perf bench -r 100 breakpoint thread -b 4 -p 128 -t 512
 | # Running 'breakpoint/thread' benchmark:
 | # Created/joined 100 threads with 4 breakpoints and 128 parallelism
 |      Total time: 1.930 [sec]
 |
 |       37.697832 usecs/op
 |     4825.322500 usecs/op/cpu

    19.13%  [kernel]       [k] queued_spin_lock_slowpath
    18.21%  [kernel]       [k] rhashtable_jhash2
    15.46%  [kernel]       [k] osq_lock
     6.27%  [kernel]       [k] toggle_bp_slot
     5.91%  [kernel]       [k] task_bp_pinned
     5.05%  [kernel]       [k] smp_cfm_core_cond
     1.78%  [kernel]       [k] update_sg_lb_stats
     1.36%  [kernel]       [k] llist_reverse_order
     1.34%  [kernel]       [k] find_next_bit
     1.19%  [kernel]       [k] bcmp

Suggesting that time spent in task_bp_pinned() has been reduced.
However, we're still hashing too much, which will be addressed in the
subsequent change.

Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
---
v3:
* Update hw_breakpoint_is_used() to include global cpu_pinned.

v2:
* New patch.
---
 kernel/events/hw_breakpoint.c | 57 ++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 03ebecf048c0..a489f31fe147 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -64,6 +64,9 @@ static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
 	return per_cpu_ptr(bp_cpuinfo + type, cpu);
 }
 
+/* Number of pinned CPU breakpoints globally. */
+static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
+
 /* Keep track of the breakpoints attached to tasks */
 static struct rhltable task_bps_ht;
 static const struct rhashtable_params task_bps_ht_params = {
@@ -194,6 +197,10 @@ static __init int init_breakpoint_slots(void)
 				goto err;
 		}
 	}
+	for (i = 0; i < TYPE_MAX; i++) {
+		if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
+			goto err;
+	}
 
 	return 0;
 err:
@@ -203,6 +210,8 @@ static __init int init_breakpoint_slots(void)
 		if (err_cpu == cpu)
 			break;
 	}
+	for (i = 0; i < TYPE_MAX; i++)
+		bp_slots_histogram_free(&cpu_pinned[i]);
 
 	return -ENOMEM;
 }
@@ -270,6 +279,9 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 /*
  * Count the number of breakpoints of the same type and same task.
  * The given event must be not on the list.
+ *
+ * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
+ * returns a negative value.
  */
 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 {
@@ -288,9 +300,18 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 		goto out;
 
 	rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
-		if (find_slot_idx(iter->attr.bp_type) == type &&
-		    (iter->cpu < 0 || cpu == iter->cpu))
-			count += hw_breakpoint_weight(iter);
+		if (find_slot_idx(iter->attr.bp_type) != type)
+			continue;
+
+		if (iter->cpu >= 0) {
+			if (cpu == -1) {
+				count = -1;
+				goto out;
+			} else if (cpu != iter->cpu)
+				continue;
+		}
+
+		count += hw_breakpoint_weight(iter);
 	}
 
 out:
@@ -316,6 +337,19 @@ max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
 	int pinned_slots = 0;
 	int cpu;
 
+	if (bp->hw.target && bp->cpu < 0) {
+		int max_pinned = task_bp_pinned(-1, bp, type);
+
+		if (max_pinned >= 0) {
+			/*
+			 * Fast path: task_bp_pinned() is CPU-independent and
+			 * returns the same value for any CPU.
+			 */
+			max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
+			return max_pinned;
+		}
+	}
+
 	for_each_cpu(cpu, cpumask) {
 		struct bp_cpuinfo *info = get_bp_info(cpu, type);
 		int nr;
@@ -366,8 +400,11 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 
 	/* Pinned counter cpu profiling */
 	if (!bp->hw.target) {
+		struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
+
 		lockdep_assert_held_write(&bp_cpuinfo_sem);
-		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
+		bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
+		info->cpu_pinned += weight;
 		return 0;
 	}
 
@@ -804,6 +841,18 @@ bool hw_breakpoint_is_used(void)
 		}
 	}
 
+	for (int type = 0; type < TYPE_MAX; ++type) {
+		for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
+			/*
+			 * Warn, because if there are CPU pinned counters,
+			 * should never get here; bp_cpuinfo::cpu_pinned should
+			 * be consistent with the global cpu_pinned histogram.
+			 */
+			if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
+				return true;
+		}
+	}
+
 	return false;
 }
 
-- 
2.37.0.rc0.161.g10f37bed90-goog


  parent reply	other threads:[~2022-07-04 15:07 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 15:05 [PATCH v3 00/14] perf/hw_breakpoint: Optimize for thousands of tasks Marco Elver
2022-07-04 15:05 ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 01/14] perf/hw_breakpoint: Add KUnit test for constraints accounting Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-04 15:10   ` Dmitry Vyukov
2022-07-04 15:10     ` Dmitry Vyukov
2022-07-20 15:22     ` Ian Rogers
2022-07-20 15:22       ` Ian Rogers
2022-07-21 16:22   ` Mark Rutland
2022-07-21 16:22     ` Mark Rutland
2022-07-22  9:10     ` Will Deacon
2022-07-22  9:10       ` Will Deacon
2022-07-22  9:20       ` Dmitry Vyukov
2022-07-22  9:20         ` Dmitry Vyukov
2022-07-22 10:10         ` Will Deacon
2022-07-22 10:10           ` Will Deacon
2022-07-22 10:31           ` Dmitry Vyukov
2022-07-22 10:31             ` Dmitry Vyukov
2022-07-22 11:03             ` Will Deacon
2022-07-22 11:03               ` Will Deacon
2022-07-22 13:41               ` Dmitry Vyukov
2022-07-22 13:41                 ` Dmitry Vyukov
2022-07-25 11:00     ` Marco Elver
2022-07-25 11:00       ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 02/14] perf/hw_breakpoint: Provide hw_breakpoint_is_used() and use in test Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-04 15:09   ` Dmitry Vyukov
2022-07-04 15:09     ` Dmitry Vyukov
2022-07-20 15:22     ` Ian Rogers
2022-07-20 15:22       ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 03/14] perf/hw_breakpoint: Clean up headers Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:23   ` Ian Rogers
2022-07-20 15:23     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 04/14] perf/hw_breakpoint: Optimize list of per-task breakpoints Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:29   ` Ian Rogers
2022-07-20 15:29     ` Ian Rogers
2022-07-20 15:39     ` Marco Elver
2022-07-20 15:39       ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 05/14] perf/hw_breakpoint: Mark data __ro_after_init Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:30   ` Ian Rogers
2022-07-20 15:30     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 06/14] perf/hw_breakpoint: Optimize constant number of breakpoint slots Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:31   ` Ian Rogers
2022-07-20 15:31     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 07/14] perf/hw_breakpoint: Make hw_breakpoint_weight() inlinable Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:32   ` Ian Rogers
2022-07-20 15:32     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 08/14] perf/hw_breakpoint: Remove useless code related to flexible breakpoints Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:34   ` Ian Rogers
2022-07-20 15:34     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 09/14] powerpc/hw_breakpoint: Avoid relying on caller synchronization Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:35   ` Ian Rogers
2022-07-20 15:35     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 10/14] locking/percpu-rwsem: Add percpu_is_write_locked() and percpu_is_read_locked() Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:36   ` Ian Rogers
2022-07-20 15:36     ` Ian Rogers
2022-08-17 12:47   ` Peter Zijlstra
2022-08-17 12:47     ` Peter Zijlstra
2022-08-29  6:00     ` Marco Elver
2022-08-29  6:00       ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 11/14] perf/hw_breakpoint: Reduce contention with large number of tasks Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:38   ` Ian Rogers
2022-07-20 15:38     ` Ian Rogers
2022-08-17 13:03   ` Peter Zijlstra
2022-08-17 13:03     ` Peter Zijlstra
2022-08-17 13:14     ` Marco Elver
2022-08-17 13:14       ` Marco Elver
2022-08-29  8:38       ` Peter Zijlstra
2022-08-29  8:38         ` Peter Zijlstra
2022-08-29  9:38         ` Marco Elver
2022-08-29  9:38           ` Marco Elver
2022-07-04 15:05 ` [PATCH v3 12/14] perf/hw_breakpoint: Introduce bp_slots_histogram Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:40   ` Ian Rogers
2022-07-20 15:40     ` Ian Rogers
2022-07-04 15:05 ` Marco Elver [this message]
2022-07-04 15:05   ` [PATCH v3 13/14] perf/hw_breakpoint: Optimize max_bp_pinned_slots() for CPU-independent task targets Marco Elver
2022-07-20 15:42   ` Ian Rogers
2022-07-20 15:42     ` Ian Rogers
2022-07-04 15:05 ` [PATCH v3 14/14] perf/hw_breakpoint: Optimize toggle_bp_slot() " Marco Elver
2022-07-04 15:05   ` Marco Elver
2022-07-20 15:44   ` Ian Rogers
2022-07-20 15:44     ` Ian Rogers
2022-07-12 13:39 ` [PATCH v3 00/14] perf/hw_breakpoint: Optimize for thousands of tasks Marco Elver
2022-07-12 13:39   ` Marco Elver
2022-07-20 15:47   ` Ian Rogers
2022-07-20 15:47     ` Ian Rogers
2022-08-16 14:12     ` Marco Elver
2022-08-16 14:12       ` Marco Elver

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220704150514.48816-14-elver@google.com \
    --to=elver@google.com \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dvyukov@google.com \
    --cc=frederic@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.