* [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API
[not found] <20231118155105.25678-1-yury.norov@gmail.com>
@ 2023-11-18 15:50 ` Yury Norov
2023-11-21 15:53 ` Will Deacon
2023-11-18 15:50 ` [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit() Yury Norov
` (2 subsequent siblings)
3 siblings, 1 reply; 10+ messages in thread
From: Yury Norov @ 2023-11-18 15:50 UTC (permalink / raw)
To: linux-kernel, Will Deacon, Mark Rutland, linux-arm-kernel
Cc: Yury Norov, Jan Kara, Mirsad Todorovac, Matthew Wilcox,
Rasmus Villemoes, Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
Switch subsystem to use atomic find_bit() or atomic iterators as
appropriate.
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/perf/arm-cci.c | 23 +++++------------------
drivers/perf/arm-ccn.c | 10 ++--------
drivers/perf/arm_dmc620_pmu.c | 9 ++-------
drivers/perf/arm_pmuv3.c | 8 ++------
4 files changed, 11 insertions(+), 39 deletions(-)
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 61de861eaf91..70fbf9d09d37 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
return CCI400_PMU_CYCLE_CNTR_IDX;
}
- for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
@@ -802,13 +798,8 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
if (cci_pmu->model->get_event_idx)
return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
- /* Generic code to find an unused idx from the mask */
- for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int pmu_map_event(struct perf_event *event)
@@ -861,12 +852,8 @@ static void pmu_free_irq(struct cci_pmu *cci_pmu)
{
int i;
- for (i = 0; i < cci_pmu->nr_irqs; i++) {
- if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
- continue;
-
+ for_each_test_and_clear_bit(i, &cci_pmu->active_irqs, cci_pmu->nr_irqs)
free_irq(cci_pmu->irqs[i], cci_pmu);
- }
}
static u32 pmu_read_counter(struct perf_event *event)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..d657701b1f23 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -589,15 +589,9 @@ static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
{
- int bit;
-
- do {
- bit = find_first_zero_bit(bitmap, size);
- if (bit >= size)
- return -EAGAIN;
- } while (test_and_set_bit(bit, bitmap));
+ int bit = find_and_set_bit(bitmap, size);
- return bit;
+ return bit < size ? bit : -EAGAIN;
}
/* All RN-I and RN-D nodes have identical PMUs */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 30cea6859574..e41c84dabc3e 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
end_idx = DMC620_PMU_MAX_COUNTERS;
}
- for (idx = start_idx; idx < end_idx; ++idx) {
- if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EAGAIN;
+ idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
+ return idx < end_idx ? idx : -EAGAIN;
}
static inline
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 18b91b56af1d..784b0383e9f8 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -825,13 +825,9 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
struct arm_pmu *cpu_pmu)
{
- int idx;
+ int idx = find_and_set_next_bit(cpuc->used_mask, cpu_pmu->num_events, ARMV8_IDX_COUNTER0);
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
- return -EAGAIN;
+ return idx < cpu_pmu->num_events ? idx : -EAGAIN;
}
static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
--
2.39.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit()
[not found] <20231118155105.25678-1-yury.norov@gmail.com>
2023-11-18 15:50 ` [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API Yury Norov
@ 2023-11-18 15:50 ` Yury Norov
2023-11-21 15:54 ` Will Deacon
2023-11-18 15:50 ` [PATCH 17/34] iommu: use atomic find_bit() API where appropriate Yury Norov
2023-11-18 15:51 ` [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API Yury Norov
3 siblings, 1 reply; 10+ messages in thread
From: Yury Norov @ 2023-11-18 15:50 UTC (permalink / raw)
To: linux-kernel, Shuai Xue, Will Deacon, Mark Rutland,
linux-arm-kernel
Cc: Yury Norov, Jan Kara, Mirsad Todorovac, Matthew Wilcox,
Rasmus Villemoes, Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
The function searches used_mask for a set bit in a for-loop bit by bit.
We can do it faster by using atomic find_and_set_bit().
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/perf/alibaba_uncore_drw_pmu.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
index 19d459a36be5..2a3b7701d568 100644
--- a/drivers/perf/alibaba_uncore_drw_pmu.c
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -274,15 +274,9 @@ static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
static int ali_drw_get_counter_idx(struct perf_event *event)
{
struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
- int idx;
+ int idx = find_and_set_bit(drw_pmu->used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
- for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
- if (!test_and_set_bit(idx, drw_pmu->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EBUSY;
+ return idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS ? idx : -EBUSY;
}
static u64 ali_drw_pmu_read_counter(struct perf_event *event)
--
2.39.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 17/34] iommu: use atomic find_bit() API where appropriate
[not found] <20231118155105.25678-1-yury.norov@gmail.com>
2023-11-18 15:50 ` [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API Yury Norov
2023-11-18 15:50 ` [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit() Yury Norov
@ 2023-11-18 15:50 ` Yury Norov
2023-11-18 15:51 ` [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API Yury Norov
3 siblings, 0 replies; 10+ messages in thread
From: Yury Norov @ 2023-11-18 15:50 UTC (permalink / raw)
To: linux-kernel, Will Deacon, Robin Murphy, Joerg Roedel, Andy Gross,
Bjorn Andersson, Konrad Dybcio, Yury Norov, linux-arm-kernel,
iommu, linux-arm-msm
Cc: Jan Kara, Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
Fix opencoded find_and_set_next_bit() in __arm_smmu_alloc_bitmap()
and msm_iommu_alloc_ctx(), and make them nice one-liner wrappers.
While here, refactor msm_iommu_attach_dev() and msm_iommu_alloc_ctx()
so that error codes don't mismatch.
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/iommu/arm/arm-smmu/arm-smmu.h | 10 ++--------
drivers/iommu/msm_iommu.c | 18 ++++--------------
2 files changed, 6 insertions(+), 22 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index 703fd5817ec1..004a4704ebf1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -453,15 +453,9 @@ struct arm_smmu_impl {
static inline int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
{
- int idx;
+ int idx = find_and_set_next_bit(map, end, start);
- do {
- idx = find_next_zero_bit(map, end, start);
- if (idx == end)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
+ return idx < end ? idx : -ENOSPC;
}
static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index f86af9815d6f..67124f4228b1 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -185,17 +185,9 @@ static const struct iommu_flush_ops msm_iommu_flush_ops = {
.tlb_add_page = __flush_iotlb_page,
};
-static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
+static int msm_iommu_alloc_ctx(struct msm_iommu_dev *iommu)
{
- int idx;
-
- do {
- idx = find_next_zero_bit(map, end, start);
- if (idx == end)
- return -ENOSPC;
- } while (test_and_set_bit(idx, map));
-
- return idx;
+ return find_and_set_bit(iommu->context_map, iommu->ncb);
}
static void msm_iommu_free_ctx(unsigned long *map, int idx)
@@ -418,10 +410,8 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
ret = -EEXIST;
goto fail;
}
- master->num =
- msm_iommu_alloc_ctx(iommu->context_map,
- 0, iommu->ncb);
- if (IS_ERR_VALUE(master->num)) {
+ master->num = msm_iommu_alloc_ctx(iommu);
+ if (master->num >= iommu->ncb) {
ret = -ENODEV;
goto fail;
}
--
2.39.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API
[not found] <20231118155105.25678-1-yury.norov@gmail.com>
` (2 preceding siblings ...)
2023-11-18 15:50 ` [PATCH 17/34] iommu: use atomic find_bit() API where appropriate Yury Norov
@ 2023-11-18 15:51 ` Yury Norov
2023-11-18 18:40 ` Marc Zyngier
3 siblings, 1 reply; 10+ messages in thread
From: Yury Norov @ 2023-11-18 15:51 UTC (permalink / raw)
To: linux-kernel, Will Deacon, Mark Rutland, Marc Zyngier,
linux-arm-kernel
Cc: Yury Norov, Jan Kara, Mirsad Todorovac, Matthew Wilcox,
Rasmus Villemoes, Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
The function searches used_mask for a bit in a for-loop bit by bit.
We can do it faster by using atomic find_and_set_bit().
The comment to the function says that it searches for the first free
counter, but obviously for_each_set_bit() searches for the first set
counter. The following test_and_set_bit() tries to enable already set
bit, which is weird.
This patch, by using find_and_set_bit(), fixes this automatically.
Fixes: a639027a1be1 ("drivers/perf: Add Apple icestorm/firestorm CPU PMU driver")
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
drivers/perf/apple_m1_cpu_pmu.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index cd2de44b61b9..2d50670ffb01 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -447,12 +447,8 @@ static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
* counting on the PMU at any given time, and by placing the
* most constraining events first.
*/
- for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
-
- return -EAGAIN;
+ idx = find_and_set_bit(cpuc->used_mask, M1_PMU_NR_COUNTERS);
+ return idx < M1_PMU_NR_COUNTERS ? idx : -EAGAIN;
}
static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
--
2.39.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API
2023-11-18 15:51 ` [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API Yury Norov
@ 2023-11-18 18:40 ` Marc Zyngier
2023-11-18 18:45 ` Yury Norov
0 siblings, 1 reply; 10+ messages in thread
From: Marc Zyngier @ 2023-11-18 18:40 UTC (permalink / raw)
To: Yury Norov
Cc: linux-kernel, Will Deacon, Mark Rutland, linux-arm-kernel,
Jan Kara, Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Sat, 18 Nov 2023 15:51:02 +0000,
Yury Norov <yury.norov@gmail.com> wrote:
>
> The function searches used_mask for a bit in a for-loop bit by bit.
> We can do it faster by using atomic find_and_set_bit().
Sure, let's do things fast. Correctness is overrated anyway.
>
> The comment to the function says that it searches for the first free
> counter, but obviously for_each_set_bit() searches for the first set
> counter.
No it doesn't. It iterates over the counters the event can count on.
> The following test_and_set_bit() tries to enable already set
> bit, which is weird.
Maybe you could try to actually read the code?
>
> This patch, by using find_and_set_bit(), fixes this automatically.
This doesn't fix anything, but instead actively breaks the driver.
>
> Fixes: a639027a1be1 ("drivers/perf: Add Apple icestorm/firestorm CPU PMU driver")
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
> drivers/perf/apple_m1_cpu_pmu.c | 8 ++------
> 1 file changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
> index cd2de44b61b9..2d50670ffb01 100644
> --- a/drivers/perf/apple_m1_cpu_pmu.c
> +++ b/drivers/perf/apple_m1_cpu_pmu.c
> @@ -447,12 +447,8 @@ static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
> * counting on the PMU at any given time, and by placing the
> * most constraining events first.
> */
> - for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
> - if (!test_and_set_bit(idx, cpuc->used_mask))
> - return idx;
> - }
> -
> - return -EAGAIN;
> + idx = find_and_set_bit(cpuc->used_mask, M1_PMU_NR_COUNTERS);
> + return idx < M1_PMU_NR_COUNTERS ? idx : -EAGAIN;
So now you're picking any possible counter, irrespective of the
possible affinity of the event. This is great.
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API
2023-11-18 18:40 ` Marc Zyngier
@ 2023-11-18 18:45 ` Yury Norov
0 siblings, 0 replies; 10+ messages in thread
From: Yury Norov @ 2023-11-18 18:45 UTC (permalink / raw)
To: Marc Zyngier
Cc: linux-kernel, Will Deacon, Mark Rutland, linux-arm-kernel,
Jan Kara, Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Sat, Nov 18, 2023 at 06:40:43PM +0000, Marc Zyngier wrote:
> On Sat, 18 Nov 2023 15:51:02 +0000,
> Yury Norov <yury.norov@gmail.com> wrote:
> >
> > The function searches used_mask for a bit in a for-loop bit by bit.
> > We can do it faster by using atomic find_and_set_bit().
>
> Sure, let's do things fast. Correctness is overrated anyway.
>
> >
> > The comment to the function says that it searches for the first free
> > counter, but obviously for_each_set_bit() searches for the first set
> > counter.
>
> No it doesn't. It iterates over the counters the event can count on.
>
> > The following test_and_set_bit() tries to enable already set
> > bit, which is weird.
>
> Maybe you could try to actually read the code?
>
> >
> > This patch, by using find_and_set_bit(), fixes this automatically.
>
> This doesn't fix anything, but instead actively breaks the driver.
>
> >
> > Fixes: a639027a1be1 ("drivers/perf: Add Apple icestorm/firestorm CPU PMU driver")
> > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > ---
> > drivers/perf/apple_m1_cpu_pmu.c | 8 ++------
> > 1 file changed, 2 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
> > index cd2de44b61b9..2d50670ffb01 100644
> > --- a/drivers/perf/apple_m1_cpu_pmu.c
> > +++ b/drivers/perf/apple_m1_cpu_pmu.c
> > @@ -447,12 +447,8 @@ static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
> > * counting on the PMU at any given time, and by placing the
> > * most constraining events first.
> > */
> > - for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
> > - if (!test_and_set_bit(idx, cpuc->used_mask))
> > - return idx;
> > - }
> > -
> > - return -EAGAIN;
> > + idx = find_and_set_bit(cpuc->used_mask, M1_PMU_NR_COUNTERS);
> > + return idx < M1_PMU_NR_COUNTERS ? idx : -EAGAIN;
>
> So now you're picking any possible counter, irrespective of the
> possible affinity of the event. This is great.
Ok, I'll drop the patch. Sorry.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API
2023-11-18 15:50 ` [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API Yury Norov
@ 2023-11-21 15:53 ` Will Deacon
2023-11-21 16:16 ` Yury Norov
0 siblings, 1 reply; 10+ messages in thread
From: Will Deacon @ 2023-11-21 15:53 UTC (permalink / raw)
To: Yury Norov
Cc: linux-kernel, Mark Rutland, linux-arm-kernel, Jan Kara,
Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> Switch subsystem to use atomic find_bit() or atomic iterators as
> appropriate.
>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
> drivers/perf/arm-cci.c | 23 +++++------------------
> drivers/perf/arm-ccn.c | 10 ++--------
> drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> drivers/perf/arm_pmuv3.c | 8 ++------
> 4 files changed, 11 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> index 61de861eaf91..70fbf9d09d37 100644
> --- a/drivers/perf/arm-cci.c
> +++ b/drivers/perf/arm-cci.c
> @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> return CCI400_PMU_CYCLE_CNTR_IDX;
> }
>
> - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> - if (!test_and_set_bit(idx, hw->used_mask))
> - return idx;
> -
> - /* No counters available */
> - return -EAGAIN;
> + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
[...]
> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> index 30cea6859574..e41c84dabc3e 100644
> --- a/drivers/perf/arm_dmc620_pmu.c
> +++ b/drivers/perf/arm_dmc620_pmu.c
> @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> end_idx = DMC620_PMU_MAX_COUNTERS;
> }
>
> - for (idx = start_idx; idx < end_idx; ++idx) {
> - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> - return idx;
> - }
> -
> - /* The counters are all in use. */
> - return -EAGAIN;
> + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
It might just be me, but I'd find this a tonne easier to read if you swapped
the last two arguments around so that the offset came before the limit in
the new function.
Will
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit()
2023-11-18 15:50 ` [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit() Yury Norov
@ 2023-11-21 15:54 ` Will Deacon
0 siblings, 0 replies; 10+ messages in thread
From: Will Deacon @ 2023-11-21 15:54 UTC (permalink / raw)
To: Yury Norov
Cc: linux-kernel, Shuai Xue, Mark Rutland, linux-arm-kernel, Jan Kara,
Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Sat, Nov 18, 2023 at 07:50:39AM -0800, Yury Norov wrote:
> The function searches used_mask for a set bit in a for-loop bit by bit.
> We can do it faster by using atomic find_and_set_bit().
>
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
> drivers/perf/alibaba_uncore_drw_pmu.c | 10 ++--------
> 1 file changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
> index 19d459a36be5..2a3b7701d568 100644
> --- a/drivers/perf/alibaba_uncore_drw_pmu.c
> +++ b/drivers/perf/alibaba_uncore_drw_pmu.c
> @@ -274,15 +274,9 @@ static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
> static int ali_drw_get_counter_idx(struct perf_event *event)
> {
> struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
> - int idx;
> + int idx = find_and_set_bit(drw_pmu->used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
>
> - for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
> - if (!test_and_set_bit(idx, drw_pmu->used_mask))
> - return idx;
> - }
> -
> - /* The counters are all in use. */
> - return -EBUSY;
> + return idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS ? idx : -EBUSY;
> }
>
> static u64 ali_drw_pmu_read_counter(struct perf_event *event)
> --
> 2.39.2
Acked-by: Will Deacon <will@kernel.org>
Will
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API
2023-11-21 15:53 ` Will Deacon
@ 2023-11-21 16:16 ` Yury Norov
2023-11-21 16:17 ` Will Deacon
0 siblings, 1 reply; 10+ messages in thread
From: Yury Norov @ 2023-11-21 16:16 UTC (permalink / raw)
To: Will Deacon
Cc: linux-kernel, Mark Rutland, linux-arm-kernel, Jan Kara,
Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > Switch subsystem to use atomic find_bit() or atomic iterators as
> > appropriate.
> >
> > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > ---
> > drivers/perf/arm-cci.c | 23 +++++------------------
> > drivers/perf/arm-ccn.c | 10 ++--------
> > drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> > drivers/perf/arm_pmuv3.c | 8 ++------
> > 4 files changed, 11 insertions(+), 39 deletions(-)
> >
> > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > index 61de861eaf91..70fbf9d09d37 100644
> > --- a/drivers/perf/arm-cci.c
> > +++ b/drivers/perf/arm-cci.c
> > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> > return CCI400_PMU_CYCLE_CNTR_IDX;
> > }
> >
> > - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > - if (!test_and_set_bit(idx, hw->used_mask))
> > - return idx;
> > -
> > - /* No counters available */
> > - return -EAGAIN;
> > + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
>
> CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
You're right. Will fix in v2
> [...]
>
> > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > index 30cea6859574..e41c84dabc3e 100644
> > --- a/drivers/perf/arm_dmc620_pmu.c
> > +++ b/drivers/perf/arm_dmc620_pmu.c
> > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> > end_idx = DMC620_PMU_MAX_COUNTERS;
> > }
> >
> > - for (idx = start_idx; idx < end_idx; ++idx) {
> > - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > - return idx;
> > - }
> > -
> > - /* The counters are all in use. */
> > - return -EAGAIN;
> > + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
>
> It might just be me, but I'd find this a tonne easier to read if you swapped
> the last two arguments around so that the offset came before the limit in
> the new function.
I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
functions, and having atomic versions of the same with different order
of arguments will make it even more messy...
Thanks,
Yury
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API
2023-11-21 16:16 ` Yury Norov
@ 2023-11-21 16:17 ` Will Deacon
0 siblings, 0 replies; 10+ messages in thread
From: Will Deacon @ 2023-11-21 16:17 UTC (permalink / raw)
To: Yury Norov
Cc: linux-kernel, Mark Rutland, linux-arm-kernel, Jan Kara,
Mirsad Todorovac, Matthew Wilcox, Rasmus Villemoes,
Andy Shevchenko, Maxim Kuvyrkov, Alexey Klimov
On Tue, Nov 21, 2023 at 08:16:13AM -0800, Yury Norov wrote:
> On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> > On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > > Switch subsystem to use atomic find_bit() or atomic iterators as
> > > appropriate.
> > >
> > > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > > ---
> > > drivers/perf/arm-cci.c | 23 +++++------------------
> > > drivers/perf/arm-ccn.c | 10 ++--------
> > > drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> > > drivers/perf/arm_pmuv3.c | 8 ++------
> > > 4 files changed, 11 insertions(+), 39 deletions(-)
> > >
> > > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > > index 61de861eaf91..70fbf9d09d37 100644
> > > --- a/drivers/perf/arm-cci.c
> > > +++ b/drivers/perf/arm-cci.c
> > > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> > > return CCI400_PMU_CYCLE_CNTR_IDX;
> > > }
> > >
> > > - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > > - if (!test_and_set_bit(idx, hw->used_mask))
> > > - return idx;
> > > -
> > > - /* No counters available */
> > > - return -EAGAIN;
> > > + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
> >
> > CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
>
> You're right. Will fix in v2
>
> > [...]
> >
> > > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > > index 30cea6859574..e41c84dabc3e 100644
> > > --- a/drivers/perf/arm_dmc620_pmu.c
> > > +++ b/drivers/perf/arm_dmc620_pmu.c
> > > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> > > end_idx = DMC620_PMU_MAX_COUNTERS;
> > > }
> > >
> > > - for (idx = start_idx; idx < end_idx; ++idx) {
> > > - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > > - return idx;
> > > - }
> > > -
> > > - /* The counters are all in use. */
> > > - return -EAGAIN;
> > > + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
> >
> > It might just be me, but I'd find this a tonne easier to read if you swapped
> > the last two arguments around so that the offset came before the limit in
> > the new function.
>
> I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
> functions, and having atomic versions of the same with different order
> of arguments will make it even more messy...
Urgh, and there's loads of them too :(
Will
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2023-11-21 16:18 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20231118155105.25678-1-yury.norov@gmail.com>
2023-11-18 15:50 ` [PATCH 07/34] perf/arm: optimize opencoded atomic find_bit() API Yury Norov
2023-11-21 15:53 ` Will Deacon
2023-11-21 16:16 ` Yury Norov
2023-11-21 16:17 ` Will Deacon
2023-11-18 15:50 ` [PATCH 08/34] drivers/perf: optimize ali_drw_get_counter_idx() by using find_bit() Yury Norov
2023-11-21 15:54 ` Will Deacon
2023-11-18 15:50 ` [PATCH 17/34] iommu: use atomic find_bit() API where appropriate Yury Norov
2023-11-18 15:51 ` [PATCH 31/34] drivers/perf: optimize m1_pmu_get_event_idx() by using find_bit() API Yury Norov
2023-11-18 18:40 ` Marc Zyngier
2023-11-18 18:45 ` Yury Norov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).