linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 08/40] perf/arm: use atomic find_bit() API
       [not found] <20240620175703.605111-1-yury.norov@gmail.com>
@ 2024-06-20 17:56 ` Yury Norov
  2024-06-20 17:56 ` [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit() Yury Norov
  2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
  2 siblings, 0 replies; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
  To: linux-kernel, Will Deacon, Mark Rutland, linux-arm-kernel
  Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
	Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
	Rasmus Villemoes, Sergey Shtylyov

Simplify subsystem by use atomic find_bit() or atomic API where
applicable.

CC: Will Deacon <will@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 drivers/perf/arm-cci.c        | 25 +++++++------------------
 drivers/perf/arm-ccn.c        | 11 +++--------
 drivers/perf/arm_dmc620_pmu.c | 10 +++-------
 drivers/perf/arm_pmuv3.c      |  9 +++------
 4 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index c76bac668dea..4c5d23942352 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -4,6 +4,7 @@
 // Author: Punit Agrawal <punit.agrawal@arm.com>, Suzuki Poulose <suzuki.poulose@arm.com>
 
 #include <linux/arm-cci.h>
+#include <linux/find_atomic.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
@@ -318,12 +319,9 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
 		return CCI400_PMU_CYCLE_CNTR_IDX;
 	}
 
-	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
-		if (!test_and_set_bit(idx, hw->used_mask))
-			return idx;
-
-	/* No counters available */
-	return -EAGAIN;
+	idx = find_and_set_next_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1,
+							CCI400_PMU_CNTR0_IDX);
+	return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
 }
 
 static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
@@ -792,13 +790,8 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
 	if (cci_pmu->model->get_event_idx)
 		return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
 
-	/* Generic code to find an unused idx from the mask */
-	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
-		if (!test_and_set_bit(idx, hw->used_mask))
-			return idx;
-
-	/* No counters available */
-	return -EAGAIN;
+	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+	return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
 }
 
 static int pmu_map_event(struct perf_event *event)
@@ -851,12 +844,8 @@ static void pmu_free_irq(struct cci_pmu *cci_pmu)
 {
 	int i;
 
-	for (i = 0; i < cci_pmu->nr_irqs; i++) {
-		if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
-			continue;
-
+	for_each_test_and_clear_bit(i, &cci_pmu->active_irqs, cci_pmu->nr_irqs)
 		free_irq(cci_pmu->irqs[i], cci_pmu);
-	}
 }
 
 static u32 pmu_read_counter(struct perf_event *event)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 86ef31ac7503..bd66d90dfda6 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/ctype.h>
+#include <linux/find_atomic.h>
 #include <linux/hrtimer.h>
 #include <linux/idr.h>
 #include <linux/interrupt.h>
@@ -580,15 +581,9 @@ static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
 
 static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
 {
-	int bit;
-
-	do {
-		bit = find_first_zero_bit(bitmap, size);
-		if (bit >= size)
-			return -EAGAIN;
-	} while (test_and_set_bit(bit, bitmap));
+	int bit = find_and_set_bit(bitmap, size);
 
-	return bit;
+	return bit < size ? bit : -EAGAIN;
 }
 
 /* All RN-I and RN-D nodes have identical PMUs */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 7e5f1d4fca0f..f41cc2ee9564 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -16,6 +16,7 @@
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/find_atomic.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -303,13 +304,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
 		end_idx = DMC620_PMU_MAX_COUNTERS;
 	}
 
-	for (idx = start_idx; idx < end_idx; ++idx) {
-		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
-			return idx;
-	}
-
-	/* The counters are all in use. */
-	return -EAGAIN;
+	idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
+	return idx < end_idx ? idx : -EAGAIN;
 }
 
 static inline
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 23fa6c5da82c..f3b20a3b1d9c 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/bitfield.h>
 #include <linux/clocksource.h>
+#include <linux/find_atomic.h>
 #include <linux/of.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/perf/arm_pmuv3.h>
@@ -903,13 +904,9 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
 				    struct arm_pmu *cpu_pmu)
 {
-	int idx;
+	int idx = find_and_set_next_bit(cpuc->used_mask, cpu_pmu->num_events, ARMV8_IDX_COUNTER0);
 
-	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
-		if (!test_and_set_bit(idx, cpuc->used_mask))
-			return idx;
-	}
-	return -EAGAIN;
+	return idx < cpu_pmu->num_events ? idx : -EAGAIN;
 }
 
 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit()
       [not found] <20240620175703.605111-1-yury.norov@gmail.com>
  2024-06-20 17:56 ` [PATCH v4 08/40] perf/arm: use atomic find_bit() API Yury Norov
@ 2024-06-20 17:56 ` Yury Norov
  2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
  2 siblings, 0 replies; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
  To: linux-kernel, Shuai Xue, Will Deacon, Mark Rutland,
	linux-arm-kernel
  Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
	Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
	Rasmus Villemoes, Sergey Shtylyov

The function searches used_mask for a set bit in a for-loop bit by bit.
Simplify it by using atomic find_and_set_bit().

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Will Deacon <will@kernel.org>
---
 drivers/perf/alibaba_uncore_drw_pmu.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 38a2947ae813..1516f2c3d58f 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -17,6 +17,7 @@
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/find_atomic.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
@@ -266,15 +267,9 @@ static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
 static int ali_drw_get_counter_idx(struct perf_event *event)
 {
 	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
-	int idx;
+	int idx = find_and_set_bit(drw_pmu->used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
 
-	for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
-		if (!test_and_set_bit(idx, drw_pmu->used_mask))
-			return idx;
-	}
-
-	/* The counters are all in use. */
-	return -EBUSY;
+	return idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS ? idx : -EBUSY;
 }
 
 static u64 ali_drw_pmu_read_counter(struct perf_event *event)
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API
       [not found] <20240620175703.605111-1-yury.norov@gmail.com>
  2024-06-20 17:56 ` [PATCH v4 08/40] perf/arm: use atomic find_bit() API Yury Norov
  2024-06-20 17:56 ` [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit() Yury Norov
@ 2024-06-20 17:56 ` Yury Norov
  2024-06-25 12:16   ` Joerg Roedel
  2 siblings, 1 reply; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
  To: linux-kernel, Will Deacon, Robin Murphy, Joerg Roedel, Andy Gross,
	Bjorn Andersson, Konrad Dybcio, linux-arm-kernel, iommu,
	linux-arm-msm
  Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
	Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
	Rasmus Villemoes, Sergey Shtylyov

Simplify __arm_smmu_alloc_bitmap() and msm_iommu_alloc_ctx() by using
a dedicated API, and make them nice one-liner wrappers.

While here, refactor msm_iommu_attach_dev() and msm_iommu_alloc_ctx()
so that error codes don't mismatch.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu.h | 11 +++--------
 drivers/iommu/msm_iommu.c             | 19 +++++--------------
 2 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 4765c6945c34..c74d0300b64b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -15,6 +15,7 @@
 #include <linux/bits.h>
 #include <linux/clk.h>
 #include <linux/device.h>
+#include <linux/find_atomic.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/io-pgtable.h>
 #include <linux/iommu.h>
@@ -455,15 +456,9 @@ struct arm_smmu_impl {
 
 static inline int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
 {
-	int idx;
+	int idx = find_and_set_next_bit(map, end, start);
 
-	do {
-		idx = find_next_zero_bit(map, end, start);
-		if (idx == end)
-			return -ENOSPC;
-	} while (test_and_set_bit(idx, map));
-
-	return idx;
+	return idx < end ? idx : -ENOSPC;
 }
 
 static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 989e0869d805..4299e6a5b2ec 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/errno.h>
+#include <linux/find_atomic.h>
 #include <linux/io.h>
 #include <linux/io-pgtable.h>
 #include <linux/interrupt.h>
@@ -185,17 +186,9 @@ static const struct iommu_flush_ops msm_iommu_flush_ops = {
 	.tlb_add_page = __flush_iotlb_page,
 };
 
-static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
+static int msm_iommu_alloc_ctx(struct msm_iommu_dev *iommu)
 {
-	int idx;
-
-	do {
-		idx = find_next_zero_bit(map, end, start);
-		if (idx == end)
-			return -ENOSPC;
-	} while (test_and_set_bit(idx, map));
-
-	return idx;
+	return find_and_set_bit(iommu->context_map, iommu->ncb);
 }
 
 static void msm_iommu_free_ctx(unsigned long *map, int idx)
@@ -418,10 +411,8 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 					ret = -EEXIST;
 					goto fail;
 				}
-				master->num =
-					msm_iommu_alloc_ctx(iommu->context_map,
-							    0, iommu->ncb);
-				if (IS_ERR_VALUE(master->num)) {
+				master->num = msm_iommu_alloc_ctx(iommu);
+				if (master->num >= iommu->ncb) {
 					ret = -ENODEV;
 					goto fail;
 				}
-- 
2.43.0



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API
  2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
@ 2024-06-25 12:16   ` Joerg Roedel
  0 siblings, 0 replies; 4+ messages in thread
From: Joerg Roedel @ 2024-06-25 12:16 UTC (permalink / raw)
  To: Yury Norov
  Cc: linux-kernel, Will Deacon, Robin Murphy, Andy Gross,
	Bjorn Andersson, Konrad Dybcio, linux-arm-kernel, iommu,
	linux-arm-msm, Alexey Klimov, Bart Van Assche, Jan Kara,
	Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
	Rasmus Villemoes, Sergey Shtylyov

On Thu, Jun 20, 2024 at 10:56:42AM -0700, Yury Norov wrote:
>  drivers/iommu/arm/arm-smmu/arm-smmu.h | 11 +++--------
>  drivers/iommu/msm_iommu.c             | 19 +++++--------------

Please split that up into an arm-smmu and msm part, so that these can be
reviewed and merged via separate branches.

Thanks,

	Joerg


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-06-25 12:17 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20240620175703.605111-1-yury.norov@gmail.com>
2024-06-20 17:56 ` [PATCH v4 08/40] perf/arm: use atomic find_bit() API Yury Norov
2024-06-20 17:56 ` [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit() Yury Norov
2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
2024-06-25 12:16   ` Joerg Roedel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).