* [PATCH v4 08/40] perf/arm: use atomic find_bit() API
[not found] <20240620175703.605111-1-yury.norov@gmail.com>
@ 2024-06-20 17:56 ` Yury Norov
2024-06-20 17:56 ` [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit() Yury Norov
2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
2 siblings, 0 replies; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
To: linux-kernel, Will Deacon, Mark Rutland, linux-arm-kernel
Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
Rasmus Villemoes, Sergey Shtylyov
Simplify subsystem by use atomic find_bit() or atomic API where
applicable.
CC: Will Deacon <will@kernel.org>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/perf/arm-cci.c | 25 +++++++------------------
drivers/perf/arm-ccn.c | 11 +++--------
drivers/perf/arm_dmc620_pmu.c | 10 +++-------
drivers/perf/arm_pmuv3.c | 9 +++------
4 files changed, 16 insertions(+), 39 deletions(-)
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index c76bac668dea..4c5d23942352 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -4,6 +4,7 @@
// Author: Punit Agrawal <punit.agrawal@arm.com>, Suzuki Poulose <suzuki.poulose@arm.com>
#include <linux/arm-cci.h>
+#include <linux/find_atomic.h>
#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/module.h>
@@ -318,12 +319,9 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
return CCI400_PMU_CYCLE_CNTR_IDX;
}
- for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_next_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1,
+ CCI400_PMU_CNTR0_IDX);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
@@ -792,13 +790,8 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
if (cci_pmu->model->get_event_idx)
return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
- /* Generic code to find an unused idx from the mask */
- for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int pmu_map_event(struct perf_event *event)
@@ -851,12 +844,8 @@ static void pmu_free_irq(struct cci_pmu *cci_pmu)
{
int i;
- for (i = 0; i < cci_pmu->nr_irqs; i++) {
- if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
- continue;
-
+ for_each_test_and_clear_bit(i, &cci_pmu->active_irqs, cci_pmu->nr_irqs)
free_irq(cci_pmu->irqs[i], cci_pmu);
- }
}
static u32 pmu_read_counter(struct perf_event *event)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 86ef31ac7503..bd66d90dfda6 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -5,6 +5,7 @@
*/
#include <linux/ctype.h>
+#include <linux/find_atomic.h>
#include <linux/hrtimer.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
@@ -580,15 +581,9 @@ static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
{
- int bit;
-
- do {
- bit = find_first_zero_bit(bitmap, size);
- if (bit >= size)
- return -EAGAIN;
- } while (test_and_set_bit(bit, bitmap));
+ int bit = find_and_set_bit(bitmap, size);
- return bit;
+ return bit < size ? bit : -EAGAIN;
}
/* All RN-I and RN-D nodes have identical PMUs */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 7e5f1d4fca0f..f41cc2ee9564 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -16,6 +16,7 @@
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/errno.h>
+#include <linux/find_atomic.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
@@ -303,13 +304,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
end_idx = DMC620_PMU_MAX_COUNTERS;
}
- for (idx = start_idx; idx < end_idx; ++idx) {
- if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EAGAIN;
+ idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
+ return idx < end_idx ? idx : -EAGAIN;
}
static inline
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 23fa6c5da82c..f3b20a3b1d9c 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -17,6 +17,7 @@
#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/clocksource.h>
+#include <linux/find_atomic.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/perf/arm_pmuv3.h>
@@ -903,13 +904,9 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
struct arm_pmu *cpu_pmu)
{
- int idx;
+ int idx = find_and_set_next_bit(cpuc->used_mask, cpu_pmu->num_events, ARMV8_IDX_COUNTER0);
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
- return -EAGAIN;
+ return idx < cpu_pmu->num_events ? idx : -EAGAIN;
}
static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
--
2.43.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit()
[not found] <20240620175703.605111-1-yury.norov@gmail.com>
2024-06-20 17:56 ` [PATCH v4 08/40] perf/arm: use atomic find_bit() API Yury Norov
@ 2024-06-20 17:56 ` Yury Norov
2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
2 siblings, 0 replies; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
To: linux-kernel, Shuai Xue, Will Deacon, Mark Rutland,
linux-arm-kernel
Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
Rasmus Villemoes, Sergey Shtylyov
The function searches used_mask for a set bit in a for-loop bit by bit.
Simplify it by using atomic find_and_set_bit().
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Will Deacon <will@kernel.org>
---
drivers/perf/alibaba_uncore_drw_pmu.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 38a2947ae813..1516f2c3d58f 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -17,6 +17,7 @@
#include <linux/cpumask.h>
#include <linux/device.h>
#include <linux/errno.h>
+#include <linux/find_atomic.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
@@ -266,15 +267,9 @@ static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
static int ali_drw_get_counter_idx(struct perf_event *event)
{
struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
- int idx;
+ int idx = find_and_set_bit(drw_pmu->used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
- for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
- if (!test_and_set_bit(idx, drw_pmu->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EBUSY;
+ return idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS ? idx : -EBUSY;
}
static u64 ali_drw_pmu_read_counter(struct perf_event *event)
--
2.43.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API
[not found] <20240620175703.605111-1-yury.norov@gmail.com>
2024-06-20 17:56 ` [PATCH v4 08/40] perf/arm: use atomic find_bit() API Yury Norov
2024-06-20 17:56 ` [PATCH v4 09/40] drivers/perf: optimize ali_drw_get_counter_idx() by using find_and_set_bit() Yury Norov
@ 2024-06-20 17:56 ` Yury Norov
2024-06-25 12:16 ` Joerg Roedel
2 siblings, 1 reply; 4+ messages in thread
From: Yury Norov @ 2024-06-20 17:56 UTC (permalink / raw)
To: linux-kernel, Will Deacon, Robin Murphy, Joerg Roedel, Andy Gross,
Bjorn Andersson, Konrad Dybcio, linux-arm-kernel, iommu,
linux-arm-msm
Cc: Yury Norov, Alexey Klimov, Bart Van Assche, Jan Kara,
Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
Rasmus Villemoes, Sergey Shtylyov
Simplify __arm_smmu_alloc_bitmap() and msm_iommu_alloc_ctx() by using
a dedicated API, and make them nice one-liner wrappers.
While here, refactor msm_iommu_attach_dev() and msm_iommu_alloc_ctx()
so that error codes don't mismatch.
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/iommu/arm/arm-smmu/arm-smmu.h | 11 +++--------
drivers/iommu/msm_iommu.c | 19 +++++--------------
2 files changed, 8 insertions(+), 22 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 4765c6945c34..c74d0300b64b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -15,6 +15,7 @@
#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/device.h>
+#include <linux/find_atomic.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/io-pgtable.h>
#include <linux/iommu.h>
@@ -455,15 +456,9 @@ struct arm_smmu_impl {
static inline int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
{
- int idx;
+ int idx = find_and_set_next_bit(map, end, start);
- do {
- idx = find_next_zero_bit(map, end, start);
- if (idx == end)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
+ return idx < end ? idx : -ENOSPC;
}
static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 989e0869d805..4299e6a5b2ec 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -9,6 +9,7 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/errno.h>
+#include <linux/find_atomic.h>
#include <linux/io.h>
#include <linux/io-pgtable.h>
#include <linux/interrupt.h>
@@ -185,17 +186,9 @@ static const struct iommu_flush_ops msm_iommu_flush_ops = {
.tlb_add_page = __flush_iotlb_page,
};
-static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
+static int msm_iommu_alloc_ctx(struct msm_iommu_dev *iommu)
{
- int idx;
-
- do {
- idx = find_next_zero_bit(map, end, start);
- if (idx == end)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
+ return find_and_set_bit(iommu->context_map, iommu->ncb);
}
static void msm_iommu_free_ctx(unsigned long *map, int idx)
@@ -418,10 +411,8 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
ret = -EEXIST;
goto fail;
}
- master->num =
- msm_iommu_alloc_ctx(iommu->context_map,
- 0, iommu->ncb);
- if (IS_ERR_VALUE(master->num)) {
+ master->num = msm_iommu_alloc_ctx(iommu);
+ if (master->num >= iommu->ncb) {
ret = -ENODEV;
goto fail;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API
2024-06-20 17:56 ` [PATCH v4 19/40] iommu: optimize subsystem by using atomic find_bit() API Yury Norov
@ 2024-06-25 12:16 ` Joerg Roedel
0 siblings, 0 replies; 4+ messages in thread
From: Joerg Roedel @ 2024-06-25 12:16 UTC (permalink / raw)
To: Yury Norov
Cc: linux-kernel, Will Deacon, Robin Murphy, Andy Gross,
Bjorn Andersson, Konrad Dybcio, linux-arm-kernel, iommu,
linux-arm-msm, Alexey Klimov, Bart Van Assche, Jan Kara,
Linus Torvalds, Matthew Wilcox, Mirsad Todorovac,
Rasmus Villemoes, Sergey Shtylyov
On Thu, Jun 20, 2024 at 10:56:42AM -0700, Yury Norov wrote:
> drivers/iommu/arm/arm-smmu/arm-smmu.h | 11 +++--------
> drivers/iommu/msm_iommu.c | 19 +++++--------------
Please split that up into an arm-smmu and msm part, so that these can be
reviewed and merged via separate branches.
Thanks,
Joerg
^ permalink raw reply [flat|nested] 4+ messages in thread