* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 13:53 ` Marc Gonzalez
0 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 13:53 UTC (permalink / raw)
To: Dmitry Torokhov, Robin Murphy
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette, linux-clk,
LKML, Bjorn Andersson, Russell King, Linux ARM, Sudip Mukherjee,
Guenter Roeck
On 11/12/2019 23:28, Dmitry Torokhov wrote:
> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>
>> What is the rationale for the devm_add_action API?
>
> For one-off and maybe complex unwind actions in drivers that wish to use
> devm API (as mixing devm and manual release is verboten). Also is often
> used when some core subsystem does not provide enough devm APIs.
Thanks for the insight, Dmitry. Thanks to Robin too.
This is what I understand so far:
devm_add_action() is nice because it hides/factorizes the complexity
of the devres API, but it incurs a small storage overhead of one
pointer per call, which makes it unfit for frequently used actions,
such as clk_get.
Is that correct?
My question is: why not design the API without the small overhead?
Proof of concept below:
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 0bbb328bd17f..76392dd6273b 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
}
EXPORT_SYMBOL_GPL(devres_release_group);
+void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
+{
+ void *data = devres_alloc(func, size, GFP_KERNEL);
+
+ if (data) {
+ memcpy(data, arg, size);
+ devres_add(dev, data);
+ } else
+ func(dev, arg);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(devm_add);
+
/*
* Custom devres actions allow inserting a simple function call
* into the teadown sequence.
diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index be160764911b..8db671823126 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -4,6 +4,11 @@
#include <linux/export.h>
#include <linux/gfp.h>
+static void __clk_put(struct device *dev, void *data)
+{
+ clk_put(*(struct clk **)data);
+}
+
static void devm_clk_release(struct device *dev, void *res)
{
clk_put(*(struct clk **)res);
@@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
struct clk *devm_clk_get(struct device *dev, const char *id)
{
- struct clk **ptr, *clk;
-
- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
- return ERR_PTR(-ENOMEM);
+ struct clk *clk = clk_get(dev, id);
- clk = clk_get(dev, id);
- if (!IS_ERR(clk)) {
- *ptr = clk;
- devres_add(dev, ptr);
- } else {
- devres_free(ptr);
- }
+ if (!IS_ERR(clk))
+ if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
+ clk = ERR_PTR(-ENOMEM);
return clk;
}
diff --git a/include/linux/device.h b/include/linux/device.h
index e226030c1df3..5acb61ec39ab 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -970,6 +970,7 @@ void __iomem *devm_of_iomap(struct device *dev,
resource_size_t *size);
/* allows to add/remove a custom action to devres stack */
+void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size);
int devm_add_action(struct device *dev, void (*action)(void *), void *data);
void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
void devm_release_action(struct device *dev, void (*action)(void *), void *data);
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 13:53 ` Marc Gonzalez
@ 2019-12-12 14:17 ` Russell King - ARM Linux admin
-1 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 14:17 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Dmitry Torokhov, Robin Murphy, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM
On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>
> > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >
> >> What is the rationale for the devm_add_action API?
> >
> > For one-off and maybe complex unwind actions in drivers that wish to use
> > devm API (as mixing devm and manual release is verboten). Also is often
> > used when some core subsystem does not provide enough devm APIs.
>
> Thanks for the insight, Dmitry. Thanks to Robin too.
>
> This is what I understand so far:
>
> devm_add_action() is nice because it hides/factorizes the complexity
> of the devres API, but it incurs a small storage overhead of one
> pointer per call, which makes it unfit for frequently used actions,
> such as clk_get.
>
> Is that correct?
>
> My question is: why not design the API without the small overhead?
>
> Proof of concept below:
>
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..76392dd6273b 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> }
> EXPORT_SYMBOL_GPL(devres_release_group);
>
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> +{
> + void *data = devres_alloc(func, size, GFP_KERNEL);
> +
> + if (data) {
> + memcpy(data, arg, size);
> + devres_add(dev, data);
> + } else
> + func(dev, arg);
> +
> + return data;
> +}
> +EXPORT_SYMBOL_GPL(devm_add);
> +
> /*
> * Custom devres actions allow inserting a simple function call
> * into the teadown sequence.
> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> index be160764911b..8db671823126 100644
> --- a/drivers/clk/clk-devres.c
> +++ b/drivers/clk/clk-devres.c
> @@ -4,6 +4,11 @@
> #include <linux/export.h>
> #include <linux/gfp.h>
>
> +static void __clk_put(struct device *dev, void *data)
> +{
> + clk_put(*(struct clk **)data);
> +}
> +
> static void devm_clk_release(struct device *dev, void *res)
> {
> clk_put(*(struct clk **)res);
> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>
> struct clk *devm_clk_get(struct device *dev, const char *id)
> {
> - struct clk **ptr, *clk;
> -
> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> - if (!ptr)
> - return ERR_PTR(-ENOMEM);
> + struct clk *clk = clk_get(dev, id);
>
> - clk = clk_get(dev, id);
> - if (!IS_ERR(clk)) {
> - *ptr = clk;
> - devres_add(dev, ptr);
> - } else {
> - devres_free(ptr);
> - }
> + if (!IS_ERR(clk))
> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> + clk = ERR_PTR(-ENOMEM);
You leak clk here.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 14:17 ` Russell King - ARM Linux admin
0 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 14:17 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>
> > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >
> >> What is the rationale for the devm_add_action API?
> >
> > For one-off and maybe complex unwind actions in drivers that wish to use
> > devm API (as mixing devm and manual release is verboten). Also is often
> > used when some core subsystem does not provide enough devm APIs.
>
> Thanks for the insight, Dmitry. Thanks to Robin too.
>
> This is what I understand so far:
>
> devm_add_action() is nice because it hides/factorizes the complexity
> of the devres API, but it incurs a small storage overhead of one
> pointer per call, which makes it unfit for frequently used actions,
> such as clk_get.
>
> Is that correct?
>
> My question is: why not design the API without the small overhead?
>
> Proof of concept below:
>
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..76392dd6273b 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> }
> EXPORT_SYMBOL_GPL(devres_release_group);
>
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> +{
> + void *data = devres_alloc(func, size, GFP_KERNEL);
> +
> + if (data) {
> + memcpy(data, arg, size);
> + devres_add(dev, data);
> + } else
> + func(dev, arg);
> +
> + return data;
> +}
> +EXPORT_SYMBOL_GPL(devm_add);
> +
> /*
> * Custom devres actions allow inserting a simple function call
> * into the teadown sequence.
> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> index be160764911b..8db671823126 100644
> --- a/drivers/clk/clk-devres.c
> +++ b/drivers/clk/clk-devres.c
> @@ -4,6 +4,11 @@
> #include <linux/export.h>
> #include <linux/gfp.h>
>
> +static void __clk_put(struct device *dev, void *data)
> +{
> + clk_put(*(struct clk **)data);
> +}
> +
> static void devm_clk_release(struct device *dev, void *res)
> {
> clk_put(*(struct clk **)res);
> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>
> struct clk *devm_clk_get(struct device *dev, const char *id)
> {
> - struct clk **ptr, *clk;
> -
> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> - if (!ptr)
> - return ERR_PTR(-ENOMEM);
> + struct clk *clk = clk_get(dev, id);
>
> - clk = clk_get(dev, id);
> - if (!IS_ERR(clk)) {
> - *ptr = clk;
> - devres_add(dev, ptr);
> - } else {
> - devres_free(ptr);
> - }
> + if (!IS_ERR(clk))
> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> + clk = ERR_PTR(-ENOMEM);
You leak clk here.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 14:17 ` Russell King - ARM Linux admin
@ 2019-12-12 14:41 ` Marc Gonzalez
-1 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 14:41 UTC (permalink / raw)
To: Russell King - ARM Linux admin
Cc: Dmitry Torokhov, Robin Murphy, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM
On 12/12/2019 15:17, Russell King - ARM Linux admin wrote:
> On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
>
>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>
>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>
>>>> What is the rationale for the devm_add_action API?
>>>
>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>> devm API (as mixing devm and manual release is verboten). Also is often
>>> used when some core subsystem does not provide enough devm APIs.
>>
>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>
>> This is what I understand so far:
>>
>> devm_add_action() is nice because it hides/factorizes the complexity
>> of the devres API, but it incurs a small storage overhead of one
>> pointer per call, which makes it unfit for frequently used actions,
>> such as clk_get.
>>
>> Is that correct?
>>
>> My question is: why not design the API without the small overhead?
>>
>> Proof of concept below:
>>
>>
>> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
>> index 0bbb328bd17f..76392dd6273b 100644
>> --- a/drivers/base/devres.c
>> +++ b/drivers/base/devres.c
>> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
>> }
>> EXPORT_SYMBOL_GPL(devres_release_group);
>>
>> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
>> +{
>> + void *data = devres_alloc(func, size, GFP_KERNEL);
>> +
>> + if (data) {
>> + memcpy(data, arg, size);
>> + devres_add(dev, data);
>> + } else
>> + func(dev, arg);
>> +
>> + return data;
>> +}
>> +EXPORT_SYMBOL_GPL(devm_add);
>> +
>> /*
>> * Custom devres actions allow inserting a simple function call
>> * into the teadown sequence.
>> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
>> index be160764911b..8db671823126 100644
>> --- a/drivers/clk/clk-devres.c
>> +++ b/drivers/clk/clk-devres.c
>> @@ -4,6 +4,11 @@
>> #include <linux/export.h>
>> #include <linux/gfp.h>
>>
>> +static void __clk_put(struct device *dev, void *data)
>> +{
>> + clk_put(*(struct clk **)data);
>> +}
>> +
>> static void devm_clk_release(struct device *dev, void *res)
>> {
>> clk_put(*(struct clk **)res);
>> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>>
>> struct clk *devm_clk_get(struct device *dev, const char *id)
>> {
>> - struct clk **ptr, *clk;
>> -
>> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
>> - if (!ptr)
>> - return ERR_PTR(-ENOMEM);
>> + struct clk *clk = clk_get(dev, id);
>>
>> - clk = clk_get(dev, id);
>> - if (!IS_ERR(clk)) {
>> - *ptr = clk;
>> - devres_add(dev, ptr);
>> - } else {
>> - devres_free(ptr);
>> - }
>> + if (!IS_ERR(clk))
>> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
>> + clk = ERR_PTR(-ENOMEM);
>
> You leak clk here.
I don't think so ;-)
If devm_add() returns NULL, then we have called __clk_put(dev, &clk);
Regards.
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 14:41 ` Marc Gonzalez
0 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 14:41 UTC (permalink / raw)
To: Russell King - ARM Linux admin
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On 12/12/2019 15:17, Russell King - ARM Linux admin wrote:
> On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
>
>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>
>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>
>>>> What is the rationale for the devm_add_action API?
>>>
>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>> devm API (as mixing devm and manual release is verboten). Also is often
>>> used when some core subsystem does not provide enough devm APIs.
>>
>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>
>> This is what I understand so far:
>>
>> devm_add_action() is nice because it hides/factorizes the complexity
>> of the devres API, but it incurs a small storage overhead of one
>> pointer per call, which makes it unfit for frequently used actions,
>> such as clk_get.
>>
>> Is that correct?
>>
>> My question is: why not design the API without the small overhead?
>>
>> Proof of concept below:
>>
>>
>> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
>> index 0bbb328bd17f..76392dd6273b 100644
>> --- a/drivers/base/devres.c
>> +++ b/drivers/base/devres.c
>> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
>> }
>> EXPORT_SYMBOL_GPL(devres_release_group);
>>
>> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
>> +{
>> + void *data = devres_alloc(func, size, GFP_KERNEL);
>> +
>> + if (data) {
>> + memcpy(data, arg, size);
>> + devres_add(dev, data);
>> + } else
>> + func(dev, arg);
>> +
>> + return data;
>> +}
>> +EXPORT_SYMBOL_GPL(devm_add);
>> +
>> /*
>> * Custom devres actions allow inserting a simple function call
>> * into the teadown sequence.
>> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
>> index be160764911b..8db671823126 100644
>> --- a/drivers/clk/clk-devres.c
>> +++ b/drivers/clk/clk-devres.c
>> @@ -4,6 +4,11 @@
>> #include <linux/export.h>
>> #include <linux/gfp.h>
>>
>> +static void __clk_put(struct device *dev, void *data)
>> +{
>> + clk_put(*(struct clk **)data);
>> +}
>> +
>> static void devm_clk_release(struct device *dev, void *res)
>> {
>> clk_put(*(struct clk **)res);
>> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>>
>> struct clk *devm_clk_get(struct device *dev, const char *id)
>> {
>> - struct clk **ptr, *clk;
>> -
>> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
>> - if (!ptr)
>> - return ERR_PTR(-ENOMEM);
>> + struct clk *clk = clk_get(dev, id);
>>
>> - clk = clk_get(dev, id);
>> - if (!IS_ERR(clk)) {
>> - *ptr = clk;
>> - devres_add(dev, ptr);
>> - } else {
>> - devres_free(ptr);
>> - }
>> + if (!IS_ERR(clk))
>> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
>> + clk = ERR_PTR(-ENOMEM);
>
> You leak clk here.
I don't think so ;-)
If devm_add() returns NULL, then we have called __clk_put(dev, &clk);
Regards.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 14:41 ` Marc Gonzalez
@ 2019-12-12 14:46 ` Russell King - ARM Linux admin
-1 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 14:46 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Dmitry Torokhov, Robin Murphy, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM
On Thu, Dec 12, 2019 at 03:41:20PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:17, Russell King - ARM Linux admin wrote:
>
> > On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
> >
> >> On 11/12/2019 23:28, Dmitry Torokhov wrote:
> >>
> >>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >>>
> >>>> What is the rationale for the devm_add_action API?
> >>>
> >>> For one-off and maybe complex unwind actions in drivers that wish to use
> >>> devm API (as mixing devm and manual release is verboten). Also is often
> >>> used when some core subsystem does not provide enough devm APIs.
> >>
> >> Thanks for the insight, Dmitry. Thanks to Robin too.
> >>
> >> This is what I understand so far:
> >>
> >> devm_add_action() is nice because it hides/factorizes the complexity
> >> of the devres API, but it incurs a small storage overhead of one
> >> pointer per call, which makes it unfit for frequently used actions,
> >> such as clk_get.
> >>
> >> Is that correct?
> >>
> >> My question is: why not design the API without the small overhead?
> >>
> >> Proof of concept below:
> >>
> >>
> >> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> >> index 0bbb328bd17f..76392dd6273b 100644
> >> --- a/drivers/base/devres.c
> >> +++ b/drivers/base/devres.c
> >> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> >> }
> >> EXPORT_SYMBOL_GPL(devres_release_group);
> >>
> >> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> >> +{
> >> + void *data = devres_alloc(func, size, GFP_KERNEL);
> >> +
> >> + if (data) {
> >> + memcpy(data, arg, size);
> >> + devres_add(dev, data);
> >> + } else
> >> + func(dev, arg);
> >> +
> >> + return data;
> >> +}
> >> +EXPORT_SYMBOL_GPL(devm_add);
> >> +
> >> /*
> >> * Custom devres actions allow inserting a simple function call
> >> * into the teadown sequence.
> >> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> >> index be160764911b..8db671823126 100644
> >> --- a/drivers/clk/clk-devres.c
> >> +++ b/drivers/clk/clk-devres.c
> >> @@ -4,6 +4,11 @@
> >> #include <linux/export.h>
> >> #include <linux/gfp.h>
> >>
> >> +static void __clk_put(struct device *dev, void *data)
> >> +{
> >> + clk_put(*(struct clk **)data);
> >> +}
> >> +
> >> static void devm_clk_release(struct device *dev, void *res)
> >> {
> >> clk_put(*(struct clk **)res);
> >> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
> >>
> >> struct clk *devm_clk_get(struct device *dev, const char *id)
> >> {
> >> - struct clk **ptr, *clk;
> >> -
> >> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> >> - if (!ptr)
> >> - return ERR_PTR(-ENOMEM);
> >> + struct clk *clk = clk_get(dev, id);
> >>
> >> - clk = clk_get(dev, id);
> >> - if (!IS_ERR(clk)) {
> >> - *ptr = clk;
> >> - devres_add(dev, ptr);
> >> - } else {
> >> - devres_free(ptr);
> >> - }
> >> + if (!IS_ERR(clk))
> >> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> >> + clk = ERR_PTR(-ENOMEM);
> >
> > You leak clk here.
>
> I don't think so ;-)
>
> If devm_add() returns NULL, then we have called __clk_put(dev, &clk);
Okay.
However, please don't call this __clk_put(). git grep __clk_put will
tell you why. Thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 14:46 ` Russell King - ARM Linux admin
0 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 14:46 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 03:41:20PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:17, Russell King - ARM Linux admin wrote:
>
> > On Thu, Dec 12, 2019 at 02:53:40PM +0100, Marc Gonzalez wrote:
> >
> >> On 11/12/2019 23:28, Dmitry Torokhov wrote:
> >>
> >>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >>>
> >>>> What is the rationale for the devm_add_action API?
> >>>
> >>> For one-off and maybe complex unwind actions in drivers that wish to use
> >>> devm API (as mixing devm and manual release is verboten). Also is often
> >>> used when some core subsystem does not provide enough devm APIs.
> >>
> >> Thanks for the insight, Dmitry. Thanks to Robin too.
> >>
> >> This is what I understand so far:
> >>
> >> devm_add_action() is nice because it hides/factorizes the complexity
> >> of the devres API, but it incurs a small storage overhead of one
> >> pointer per call, which makes it unfit for frequently used actions,
> >> such as clk_get.
> >>
> >> Is that correct?
> >>
> >> My question is: why not design the API without the small overhead?
> >>
> >> Proof of concept below:
> >>
> >>
> >> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> >> index 0bbb328bd17f..76392dd6273b 100644
> >> --- a/drivers/base/devres.c
> >> +++ b/drivers/base/devres.c
> >> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> >> }
> >> EXPORT_SYMBOL_GPL(devres_release_group);
> >>
> >> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> >> +{
> >> + void *data = devres_alloc(func, size, GFP_KERNEL);
> >> +
> >> + if (data) {
> >> + memcpy(data, arg, size);
> >> + devres_add(dev, data);
> >> + } else
> >> + func(dev, arg);
> >> +
> >> + return data;
> >> +}
> >> +EXPORT_SYMBOL_GPL(devm_add);
> >> +
> >> /*
> >> * Custom devres actions allow inserting a simple function call
> >> * into the teadown sequence.
> >> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> >> index be160764911b..8db671823126 100644
> >> --- a/drivers/clk/clk-devres.c
> >> +++ b/drivers/clk/clk-devres.c
> >> @@ -4,6 +4,11 @@
> >> #include <linux/export.h>
> >> #include <linux/gfp.h>
> >>
> >> +static void __clk_put(struct device *dev, void *data)
> >> +{
> >> + clk_put(*(struct clk **)data);
> >> +}
> >> +
> >> static void devm_clk_release(struct device *dev, void *res)
> >> {
> >> clk_put(*(struct clk **)res);
> >> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
> >>
> >> struct clk *devm_clk_get(struct device *dev, const char *id)
> >> {
> >> - struct clk **ptr, *clk;
> >> -
> >> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> >> - if (!ptr)
> >> - return ERR_PTR(-ENOMEM);
> >> + struct clk *clk = clk_get(dev, id);
> >>
> >> - clk = clk_get(dev, id);
> >> - if (!IS_ERR(clk)) {
> >> - *ptr = clk;
> >> - devres_add(dev, ptr);
> >> - } else {
> >> - devres_free(ptr);
> >> - }
> >> + if (!IS_ERR(clk))
> >> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> >> + clk = ERR_PTR(-ENOMEM);
> >
> > You leak clk here.
>
> I don't think so ;-)
>
> If devm_add() returns NULL, then we have called __clk_put(dev, &clk);
Okay.
However, please don't call this __clk_put(). git grep __clk_put will
tell you why. Thanks.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 14:46 ` Russell King - ARM Linux admin
@ 2019-12-12 15:51 ` Marc Gonzalez
-1 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 15:51 UTC (permalink / raw)
To: Russell King - ARM Linux admin
Cc: Dmitry Torokhov, Robin Murphy, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM
On 12/12/2019 15:46, Russell King - ARM Linux admin wrote:
> However, please don't call this __clk_put().
> git grep __clk_put will tell you why. Thanks.
$ git grep __clk_put
drivers/clk/clk-devres.c:static void __clk_put(struct device *dev, void *data)
drivers/clk/clk-devres.c: if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
drivers/clk/clk.c:void __clk_put(struct clk *clk)
drivers/clk/clk.h:void __clk_put(struct clk *clk);
drivers/clk/clk.h:static inline void __clk_put(struct clk *clk) { }
drivers/clk/clkdev.c: __clk_put(clk);
I see. I will s/__clk_put/my_clk_put/ in my proposal.
Out of curiosity...
$ git grep __clk_put v2.6.29-rc1
v2.6.29-rc1:arch/arm/common/clkdev.c: __clk_put(clk);
v2.6.29-rc1:arch/arm/mach-ep93xx/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-integrator/include/mach/clkdev.h:static inline void __clk_put(struct clk *clk)
v2.6.29-rc1:arch/arm/mach-pxa/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-realview/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-versatile/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
Genesis seems to be 0318e693d3a56
The clkdev API expected platforms to export a __clk_put method?
Regards.
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 15:51 ` Marc Gonzalez
0 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 15:51 UTC (permalink / raw)
To: Russell King - ARM Linux admin
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On 12/12/2019 15:46, Russell King - ARM Linux admin wrote:
> However, please don't call this __clk_put().
> git grep __clk_put will tell you why. Thanks.
$ git grep __clk_put
drivers/clk/clk-devres.c:static void __clk_put(struct device *dev, void *data)
drivers/clk/clk-devres.c: if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
drivers/clk/clk.c:void __clk_put(struct clk *clk)
drivers/clk/clk.h:void __clk_put(struct clk *clk);
drivers/clk/clk.h:static inline void __clk_put(struct clk *clk) { }
drivers/clk/clkdev.c: __clk_put(clk);
I see. I will s/__clk_put/my_clk_put/ in my proposal.
Out of curiosity...
$ git grep __clk_put v2.6.29-rc1
v2.6.29-rc1:arch/arm/common/clkdev.c: __clk_put(clk);
v2.6.29-rc1:arch/arm/mach-ep93xx/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-integrator/include/mach/clkdev.h:static inline void __clk_put(struct clk *clk)
v2.6.29-rc1:arch/arm/mach-pxa/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-realview/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
v2.6.29-rc1:arch/arm/mach-versatile/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
Genesis seems to be 0318e693d3a56
The clkdev API expected platforms to export a __clk_put method?
Regards.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 15:51 ` Marc Gonzalez
@ 2019-12-12 16:13 ` Russell King - ARM Linux admin
-1 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 16:13 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Dmitry Torokhov, Robin Murphy, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM
On Thu, Dec 12, 2019 at 04:51:25PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:46, Russell King - ARM Linux admin wrote:
>
> > However, please don't call this __clk_put().
> > git grep __clk_put will tell you why. Thanks.
>
> $ git grep __clk_put
> drivers/clk/clk-devres.c:static void __clk_put(struct device *dev, void *data)
> drivers/clk/clk-devres.c: if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> drivers/clk/clk.c:void __clk_put(struct clk *clk)
> drivers/clk/clk.h:void __clk_put(struct clk *clk);
> drivers/clk/clk.h:static inline void __clk_put(struct clk *clk) { }
> drivers/clk/clkdev.c: __clk_put(clk);
>
> I see. I will s/__clk_put/my_clk_put/ in my proposal.
>
> Out of curiosity...
>
> $ git grep __clk_put v2.6.29-rc1
> v2.6.29-rc1:arch/arm/common/clkdev.c: __clk_put(clk);
> v2.6.29-rc1:arch/arm/mach-ep93xx/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-integrator/include/mach/clkdev.h:static inline void __clk_put(struct clk *clk)
> v2.6.29-rc1:arch/arm/mach-pxa/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-realview/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-versatile/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
>
> Genesis seems to be 0318e693d3a56
>
> The clkdev API expected platforms to export a __clk_put method?
Along with __clk_get(), these were the interfaces from the cross-
platform clkdev code to the clk API implementation specific code.
__clk_get() no longer exists as its uses were eliminated, but
__clk_put() remains.
It's quite logical if you read the patch which your above commit ID
references.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 16:13 ` Russell King - ARM Linux admin
0 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 16:13 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 04:51:25PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:46, Russell King - ARM Linux admin wrote:
>
> > However, please don't call this __clk_put().
> > git grep __clk_put will tell you why. Thanks.
>
> $ git grep __clk_put
> drivers/clk/clk-devres.c:static void __clk_put(struct device *dev, void *data)
> drivers/clk/clk-devres.c: if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> drivers/clk/clk.c:void __clk_put(struct clk *clk)
> drivers/clk/clk.h:void __clk_put(struct clk *clk);
> drivers/clk/clk.h:static inline void __clk_put(struct clk *clk) { }
> drivers/clk/clkdev.c: __clk_put(clk);
>
> I see. I will s/__clk_put/my_clk_put/ in my proposal.
>
> Out of curiosity...
>
> $ git grep __clk_put v2.6.29-rc1
> v2.6.29-rc1:arch/arm/common/clkdev.c: __clk_put(clk);
> v2.6.29-rc1:arch/arm/mach-ep93xx/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-integrator/include/mach/clkdev.h:static inline void __clk_put(struct clk *clk)
> v2.6.29-rc1:arch/arm/mach-pxa/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-realview/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
> v2.6.29-rc1:arch/arm/mach-versatile/include/mach/clkdev.h:#define __clk_put(clk) do { } while (0)
>
> Genesis seems to be 0318e693d3a56
>
> The clkdev API expected platforms to export a __clk_put method?
Along with __clk_get(), these were the interfaces from the cross-
platform clkdev code to the clk API implementation specific code.
__clk_get() no longer exists as its uses were eliminated, but
__clk_put() remains.
It's quite logical if you read the patch which your above commit ID
references.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 13:53 ` Marc Gonzalez
@ 2019-12-12 14:47 ` Robin Murphy
-1 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 14:47 UTC (permalink / raw)
To: Marc Gonzalez, Dmitry Torokhov
Cc: Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM
On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>
>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>
>>> What is the rationale for the devm_add_action API?
>>
>> For one-off and maybe complex unwind actions in drivers that wish to use
>> devm API (as mixing devm and manual release is verboten). Also is often
>> used when some core subsystem does not provide enough devm APIs.
>
> Thanks for the insight, Dmitry. Thanks to Robin too.
>
> This is what I understand so far:
>
> devm_add_action() is nice because it hides/factorizes the complexity
> of the devres API, but it incurs a small storage overhead of one
> pointer per call, which makes it unfit for frequently used actions,
> such as clk_get.
>
> Is that correct?
>
> My question is: why not design the API without the small overhead?
Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
least as big as two pointers anyway, so this "overhead" should mostly be
free in practice. Plus the devres API is almost entirely about being
able to write simple robust code, rather than absolute efficiency - I
mean, struct devres itself is already 5 pointers large at the absolute
minimum ;)
In summary: the email client in which I'm writing this is currently
using 2.3GB of my workstation's 64GB of RAM; welcome to 21st century
software... :P
Robin.
> Proof of concept below:
>
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..76392dd6273b 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> }
> EXPORT_SYMBOL_GPL(devres_release_group);
>
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> +{
> + void *data = devres_alloc(func, size, GFP_KERNEL);
> +
> + if (data) {
> + memcpy(data, arg, size);
> + devres_add(dev, data);
> + } else
> + func(dev, arg);
> +
> + return data;
> +}
> +EXPORT_SYMBOL_GPL(devm_add);
> +
> /*
> * Custom devres actions allow inserting a simple function call
> * into the teadown sequence.
> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> index be160764911b..8db671823126 100644
> --- a/drivers/clk/clk-devres.c
> +++ b/drivers/clk/clk-devres.c
> @@ -4,6 +4,11 @@
> #include <linux/export.h>
> #include <linux/gfp.h>
>
> +static void __clk_put(struct device *dev, void *data)
> +{
> + clk_put(*(struct clk **)data);
> +}
> +
> static void devm_clk_release(struct device *dev, void *res)
> {
> clk_put(*(struct clk **)res);
> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>
> struct clk *devm_clk_get(struct device *dev, const char *id)
> {
> - struct clk **ptr, *clk;
> -
> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> - if (!ptr)
> - return ERR_PTR(-ENOMEM);
> + struct clk *clk = clk_get(dev, id);
>
> - clk = clk_get(dev, id);
> - if (!IS_ERR(clk)) {
> - *ptr = clk;
> - devres_add(dev, ptr);
> - } else {
> - devres_free(ptr);
> - }
> + if (!IS_ERR(clk))
> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> + clk = ERR_PTR(-ENOMEM);
>
> return clk;
> }
> diff --git a/include/linux/device.h b/include/linux/device.h
> index e226030c1df3..5acb61ec39ab 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -970,6 +970,7 @@ void __iomem *devm_of_iomap(struct device *dev,
> resource_size_t *size);
>
> /* allows to add/remove a custom action to devres stack */
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size);
> int devm_add_action(struct device *dev, void (*action)(void *), void *data);
> void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
> void devm_release_action(struct device *dev, void (*action)(void *), void *data);
>
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 14:47 ` Robin Murphy
0 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 14:47 UTC (permalink / raw)
To: Marc Gonzalez, Dmitry Torokhov
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette, linux-clk,
LKML, Bjorn Andersson, Russell King, Linux ARM, Sudip Mukherjee,
Guenter Roeck
On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>
>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>
>>> What is the rationale for the devm_add_action API?
>>
>> For one-off and maybe complex unwind actions in drivers that wish to use
>> devm API (as mixing devm and manual release is verboten). Also is often
>> used when some core subsystem does not provide enough devm APIs.
>
> Thanks for the insight, Dmitry. Thanks to Robin too.
>
> This is what I understand so far:
>
> devm_add_action() is nice because it hides/factorizes the complexity
> of the devres API, but it incurs a small storage overhead of one
> pointer per call, which makes it unfit for frequently used actions,
> such as clk_get.
>
> Is that correct?
>
> My question is: why not design the API without the small overhead?
Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
least as big as two pointers anyway, so this "overhead" should mostly be
free in practice. Plus the devres API is almost entirely about being
able to write simple robust code, rather than absolute efficiency - I
mean, struct devres itself is already 5 pointers large at the absolute
minimum ;)
In summary: the email client in which I'm writing this is currently
using 2.3GB of my workstation's 64GB of RAM; welcome to 21st century
software... :P
Robin.
> Proof of concept below:
>
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..76392dd6273b 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -685,6 +685,20 @@ int devres_release_group(struct device *dev, void *id)
> }
> EXPORT_SYMBOL_GPL(devres_release_group);
>
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size)
> +{
> + void *data = devres_alloc(func, size, GFP_KERNEL);
> +
> + if (data) {
> + memcpy(data, arg, size);
> + devres_add(dev, data);
> + } else
> + func(dev, arg);
> +
> + return data;
> +}
> +EXPORT_SYMBOL_GPL(devm_add);
> +
> /*
> * Custom devres actions allow inserting a simple function call
> * into the teadown sequence.
> diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
> index be160764911b..8db671823126 100644
> --- a/drivers/clk/clk-devres.c
> +++ b/drivers/clk/clk-devres.c
> @@ -4,6 +4,11 @@
> #include <linux/export.h>
> #include <linux/gfp.h>
>
> +static void __clk_put(struct device *dev, void *data)
> +{
> + clk_put(*(struct clk **)data);
> +}
> +
> static void devm_clk_release(struct device *dev, void *res)
> {
> clk_put(*(struct clk **)res);
> @@ -11,19 +16,11 @@ static void devm_clk_release(struct device *dev, void *res)
>
> struct clk *devm_clk_get(struct device *dev, const char *id)
> {
> - struct clk **ptr, *clk;
> -
> - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
> - if (!ptr)
> - return ERR_PTR(-ENOMEM);
> + struct clk *clk = clk_get(dev, id);
>
> - clk = clk_get(dev, id);
> - if (!IS_ERR(clk)) {
> - *ptr = clk;
> - devres_add(dev, ptr);
> - } else {
> - devres_free(ptr);
> - }
> + if (!IS_ERR(clk))
> + if (!devm_add(dev, __clk_put, &clk, sizeof(clk)))
> + clk = ERR_PTR(-ENOMEM);
>
> return clk;
> }
> diff --git a/include/linux/device.h b/include/linux/device.h
> index e226030c1df3..5acb61ec39ab 100644
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -970,6 +970,7 @@ void __iomem *devm_of_iomap(struct device *dev,
> resource_size_t *size);
>
> /* allows to add/remove a custom action to devres stack */
> +void *devm_add(struct device *dev, dr_release_t func, void *arg, size_t size);
> int devm_add_action(struct device *dev, void (*action)(void *), void *data);
> void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
> void devm_release_action(struct device *dev, void (*action)(void *), void *data);
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 14:47 ` Robin Murphy
@ 2019-12-12 16:59 ` Marc Gonzalez
-1 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 16:59 UTC (permalink / raw)
To: Robin Murphy, Dmitry Torokhov
Cc: Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM, x86
On 12/12/2019 15:47, Robin Murphy wrote:
> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>
>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>
>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>
>>>> What is the rationale for the devm_add_action API?
>>>
>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>> devm API (as mixing devm and manual release is verboten). Also is often
>>> used when some core subsystem does not provide enough devm APIs.
>>
>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>
>> This is what I understand so far:
>>
>> devm_add_action() is nice because it hides/factorizes the complexity
>> of the devres API, but it incurs a small storage overhead of one
>> pointer per call, which makes it unfit for frequently used actions,
>> such as clk_get.
>>
>> Is that correct?
>>
>> My question is: why not design the API without the small overhead?
>
> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> least as big as two pointers anyway, so this "overhead" should mostly be
> free in practice. Plus the devres API is almost entirely about being
> able to write simple robust code, rather than absolute efficiency - I
> mean, struct devres itself is already 5 pointers large at the absolute
> minimum ;)
(3 pointers: 1 list_head + 1 function pointer)
I'm confused. The first patch was criticized for potentially adding
an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
platform with 100 clocks).
Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
So basically, a struct devres looks like this on arm64:
list_head.next
list_head.prev
dr_release_t
.
.
.
104 bytes of padding
.
.
.
data (flexible array)
.
.
.
padding up to 256 bytes
Basically, on arm64, every struct devres occupies 256 bytes, most of it
(typically 104 + 112 = 216) wasted as padding.
Hmmm, given how many devm stuff goes on in a modern platform, there
might be large savings to be had...
Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
of RAM. Not sure it's worth trying to save that?
$ git grep '#define ARCH_DMA_MINALIGN'
arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
Hmmm, how does arch/x86 do it?
Regards.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 16:59 ` Marc Gonzalez
0 siblings, 0 replies; 40+ messages in thread
From: Marc Gonzalez @ 2019-12-12 16:59 UTC (permalink / raw)
To: Robin Murphy, Dmitry Torokhov
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette, x86,
linux-clk, LKML, Bjorn Andersson, Russell King, Linux ARM,
Sudip Mukherjee, Guenter Roeck
On 12/12/2019 15:47, Robin Murphy wrote:
> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>
>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>
>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>
>>>> What is the rationale for the devm_add_action API?
>>>
>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>> devm API (as mixing devm and manual release is verboten). Also is often
>>> used when some core subsystem does not provide enough devm APIs.
>>
>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>
>> This is what I understand so far:
>>
>> devm_add_action() is nice because it hides/factorizes the complexity
>> of the devres API, but it incurs a small storage overhead of one
>> pointer per call, which makes it unfit for frequently used actions,
>> such as clk_get.
>>
>> Is that correct?
>>
>> My question is: why not design the API without the small overhead?
>
> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> least as big as two pointers anyway, so this "overhead" should mostly be
> free in practice. Plus the devres API is almost entirely about being
> able to write simple robust code, rather than absolute efficiency - I
> mean, struct devres itself is already 5 pointers large at the absolute
> minimum ;)
(3 pointers: 1 list_head + 1 function pointer)
I'm confused. The first patch was criticized for potentially adding
an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
platform with 100 clocks).
Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
So basically, a struct devres looks like this on arm64:
list_head.next
list_head.prev
dr_release_t
.
.
.
104 bytes of padding
.
.
.
data (flexible array)
.
.
.
padding up to 256 bytes
Basically, on arm64, every struct devres occupies 256 bytes, most of it
(typically 104 + 112 = 216) wasted as padding.
Hmmm, given how many devm stuff goes on in a modern platform, there
might be large savings to be had...
Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
of RAM. Not sure it's worth trying to save that?
$ git grep '#define ARCH_DMA_MINALIGN'
arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
Hmmm, how does arch/x86 do it?
Regards.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 16:59 ` Marc Gonzalez
@ 2019-12-12 17:05 ` Russell King - ARM Linux admin
-1 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 17:05 UTC (permalink / raw)
To: Marc Gonzalez
Cc: Robin Murphy, Dmitry Torokhov, Bjorn Andersson, Kuninori Morimoto,
Stephen Boyd, Michael Turquette, LKML, Sudip Mukherjee,
Guenter Roeck, linux-clk, Linux ARM, x86
On Thu, Dec 12, 2019 at 05:59:04PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:47, Robin Murphy wrote:
>
> > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> >
> >> On 11/12/2019 23:28, Dmitry Torokhov wrote:
> >>
> >>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >>>
> >>>> What is the rationale for the devm_add_action API?
> >>>
> >>> For one-off and maybe complex unwind actions in drivers that wish to use
> >>> devm API (as mixing devm and manual release is verboten). Also is often
> >>> used when some core subsystem does not provide enough devm APIs.
> >>
> >> Thanks for the insight, Dmitry. Thanks to Robin too.
> >>
> >> This is what I understand so far:
> >>
> >> devm_add_action() is nice because it hides/factorizes the complexity
> >> of the devres API, but it incurs a small storage overhead of one
> >> pointer per call, which makes it unfit for frequently used actions,
> >> such as clk_get.
> >>
> >> Is that correct?
> >>
> >> My question is: why not design the API without the small overhead?
> >
> > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > least as big as two pointers anyway, so this "overhead" should mostly be
> > free in practice. Plus the devres API is almost entirely about being
> > able to write simple robust code, rather than absolute efficiency - I
> > mean, struct devres itself is already 5 pointers large at the absolute
> > minimum ;)
>
> (3 pointers: 1 list_head + 1 function pointer)
>
> I'm confused. The first patch was criticized for potentially adding
> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> platform with 100 clocks).
>
> Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
>
> So basically, a struct devres looks like this on arm64:
>
> list_head.next
> list_head.prev
> dr_release_t
> .
> .
> .
> 104 bytes of padding
> .
> .
> .
> data (flexible array)
> .
> .
> .
> padding up to 256 bytes
>
>
> Basically, on arm64, every struct devres occupies 256 bytes, most of it
> (typically 104 + 112 = 216) wasted as padding.
>
> Hmmm, given how many devm stuff goes on in a modern platform, there
> might be large savings to be had...
>
> Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
> of RAM. Not sure it's worth trying to save that?
>
> $ git grep '#define ARCH_DMA_MINALIGN'
> arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
> arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
> arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
>
> Hmmm, how does arch/x86 do it?
As I understand it, x86 tends to be fully coherent, so has no there
is not much requirement for DMA to be aligned to cachelines.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 17:05 ` Russell King - ARM Linux admin
0 siblings, 0 replies; 40+ messages in thread
From: Russell King - ARM Linux admin @ 2019-12-12 17:05 UTC (permalink / raw)
To: Marc Gonzalez
Cc: x86, Kuninori Morimoto, Stephen Boyd, Michael Turquette,
Dmitry Torokhov, linux-clk, LKML, Bjorn Andersson, Linux ARM,
Robin Murphy, Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 05:59:04PM +0100, Marc Gonzalez wrote:
> On 12/12/2019 15:47, Robin Murphy wrote:
>
> > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> >
> >> On 11/12/2019 23:28, Dmitry Torokhov wrote:
> >>
> >>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> >>>
> >>>> What is the rationale for the devm_add_action API?
> >>>
> >>> For one-off and maybe complex unwind actions in drivers that wish to use
> >>> devm API (as mixing devm and manual release is verboten). Also is often
> >>> used when some core subsystem does not provide enough devm APIs.
> >>
> >> Thanks for the insight, Dmitry. Thanks to Robin too.
> >>
> >> This is what I understand so far:
> >>
> >> devm_add_action() is nice because it hides/factorizes the complexity
> >> of the devres API, but it incurs a small storage overhead of one
> >> pointer per call, which makes it unfit for frequently used actions,
> >> such as clk_get.
> >>
> >> Is that correct?
> >>
> >> My question is: why not design the API without the small overhead?
> >
> > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > least as big as two pointers anyway, so this "overhead" should mostly be
> > free in practice. Plus the devres API is almost entirely about being
> > able to write simple robust code, rather than absolute efficiency - I
> > mean, struct devres itself is already 5 pointers large at the absolute
> > minimum ;)
>
> (3 pointers: 1 list_head + 1 function pointer)
>
> I'm confused. The first patch was criticized for potentially adding
> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> platform with 100 clocks).
>
> Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
>
> So basically, a struct devres looks like this on arm64:
>
> list_head.next
> list_head.prev
> dr_release_t
> .
> .
> .
> 104 bytes of padding
> .
> .
> .
> data (flexible array)
> .
> .
> .
> padding up to 256 bytes
>
>
> Basically, on arm64, every struct devres occupies 256 bytes, most of it
> (typically 104 + 112 = 216) wasted as padding.
>
> Hmmm, given how many devm stuff goes on in a modern platform, there
> might be large savings to be had...
>
> Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
> of RAM. Not sure it's worth trying to save that?
>
> $ git grep '#define ARCH_DMA_MINALIGN'
> arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
> arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
> arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
>
> Hmmm, how does arch/x86 do it?
As I understand it, x86 tends to be fully coherent, so has no there
is not much requirement for DMA to be aligned to cachelines.
--
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 16:59 ` Marc Gonzalez
@ 2019-12-12 18:15 ` Robin Murphy
-1 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 18:15 UTC (permalink / raw)
To: Marc Gonzalez, Dmitry Torokhov
Cc: Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM, x86
On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> On 12/12/2019 15:47, Robin Murphy wrote:
>
>> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>>
>>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>>
>>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>>
>>>>> What is the rationale for the devm_add_action API?
>>>>
>>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>>> devm API (as mixing devm and manual release is verboten). Also is often
>>>> used when some core subsystem does not provide enough devm APIs.
>>>
>>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>>
>>> This is what I understand so far:
>>>
>>> devm_add_action() is nice because it hides/factorizes the complexity
>>> of the devres API, but it incurs a small storage overhead of one
>>> pointer per call, which makes it unfit for frequently used actions,
>>> such as clk_get.
>>>
>>> Is that correct?
>>>
>>> My question is: why not design the API without the small overhead?
>>
>> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
>> least as big as two pointers anyway, so this "overhead" should mostly be
>> free in practice. Plus the devres API is almost entirely about being
>> able to write simple robust code, rather than absolute efficiency - I
>> mean, struct devres itself is already 5 pointers large at the absolute
>> minimum ;)
>
> (3 pointers: 1 list_head + 1 function pointer)
Ah yes, I failed to mentally preprocess the debug config :)
> I'm confused. The first patch was criticized for potentially adding
> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> platform with 100 clocks).
I'm not sure it was a criticism so much as an observation of an aspect
that deserved consideration (certainly it was on my part, and I read
Dmitry's "It might still, ..." as implying the same). I'd say by this
point it has been thoroughly considered, and personally I'm now happy
with the conclusion that the kind of embedded platforms that will have
many dozens of clocks are also the kind that will tend to have enough
padding to make it moot, and thus the code simplification probably is
worthwhile overall.
Robin.
> Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
>
> So basically, a struct devres looks like this on arm64:
>
> list_head.next
> list_head.prev
> dr_release_t
> .
> .
> .
> 104 bytes of padding
> .
> .
> .
> data (flexible array)
> .
> .
> .
> padding up to 256 bytes
>
>
> Basically, on arm64, every struct devres occupies 256 bytes, most of it
> (typically 104 + 112 = 216) wasted as padding.
>
> Hmmm, given how many devm stuff goes on in a modern platform, there
> might be large savings to be had...
>
> Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
> of RAM. Not sure it's worth trying to save that?
>
> $ git grep '#define ARCH_DMA_MINALIGN'
> arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
> arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
> arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
>
> Hmmm, how does arch/x86 do it?
>
> Regards.
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 18:15 ` Robin Murphy
0 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 18:15 UTC (permalink / raw)
To: Marc Gonzalez, Dmitry Torokhov
Cc: Kuninori Morimoto, Stephen Boyd, Michael Turquette, x86,
linux-clk, LKML, Bjorn Andersson, Russell King, Linux ARM,
Sudip Mukherjee, Guenter Roeck
On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> On 12/12/2019 15:47, Robin Murphy wrote:
>
>> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>>
>>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>>
>>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>>
>>>>> What is the rationale for the devm_add_action API?
>>>>
>>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>>> devm API (as mixing devm and manual release is verboten). Also is often
>>>> used when some core subsystem does not provide enough devm APIs.
>>>
>>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>>
>>> This is what I understand so far:
>>>
>>> devm_add_action() is nice because it hides/factorizes the complexity
>>> of the devres API, but it incurs a small storage overhead of one
>>> pointer per call, which makes it unfit for frequently used actions,
>>> such as clk_get.
>>>
>>> Is that correct?
>>>
>>> My question is: why not design the API without the small overhead?
>>
>> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
>> least as big as two pointers anyway, so this "overhead" should mostly be
>> free in practice. Plus the devres API is almost entirely about being
>> able to write simple robust code, rather than absolute efficiency - I
>> mean, struct devres itself is already 5 pointers large at the absolute
>> minimum ;)
>
> (3 pointers: 1 list_head + 1 function pointer)
Ah yes, I failed to mentally preprocess the debug config :)
> I'm confused. The first patch was criticized for potentially adding
> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> platform with 100 clocks).
I'm not sure it was a criticism so much as an observation of an aspect
that deserved consideration (certainly it was on my part, and I read
Dmitry's "It might still, ..." as implying the same). I'd say by this
point it has been thoroughly considered, and personally I'm now happy
with the conclusion that the kind of embedded platforms that will have
many dozens of clocks are also the kind that will tend to have enough
padding to make it moot, and thus the code simplification probably is
worthwhile overall.
Robin.
> Let's see. On arm64, ARCH_KMALLOC_MINALIGN is 128.
>
> So basically, a struct devres looks like this on arm64:
>
> list_head.next
> list_head.prev
> dr_release_t
> .
> .
> .
> 104 bytes of padding
> .
> .
> .
> data (flexible array)
> .
> .
> .
> padding up to 256 bytes
>
>
> Basically, on arm64, every struct devres occupies 256 bytes, most of it
> (typically 104 + 112 = 216) wasted as padding.
>
> Hmmm, given how many devm stuff goes on in a modern platform, there
> might be large savings to be had...
>
> Assuming 10,000 calls to devres_alloc_node(), we would be wasting ~2 MB
> of RAM. Not sure it's worth trying to save that?
>
> $ git grep '#define ARCH_DMA_MINALIGN'
> arch/arc/include/asm/cache.h:#define ARCH_DMA_MINALIGN SMP_CACHE_BYTES
> arch/arm/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/arm64/include/asm/cache.h:#define ARCH_DMA_MINALIGN (128)
> arch/c6x/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/csky/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/hexagon/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/m68k/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/microblaze/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/mips/include/asm/mach-generic/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 32
> arch/mips/include/asm/mach-ip32/kmalloc.h:#define ARCH_DMA_MINALIGN 128
> arch/mips/include/asm/mach-tx49xx/kmalloc.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nds32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/nios2/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/parisc/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/powerpc/include/asm/page_32.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/sh/include/asm/page.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/unicore32/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
> arch/xtensa/include/asm/cache.h:#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
>
> Hmmm, how does arch/x86 do it?
>
> Regards.
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 18:15 ` Robin Murphy
@ 2019-12-12 19:10 ` Dmitry Torokhov
-1 siblings, 0 replies; 40+ messages in thread
From: Dmitry Torokhov @ 2019-12-12 19:10 UTC (permalink / raw)
To: Robin Murphy
Cc: Marc Gonzalez, Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM, x86
On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
> On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> > On 12/12/2019 15:47, Robin Murphy wrote:
> >
> > > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> > >
> > > > On 11/12/2019 23:28, Dmitry Torokhov wrote:
> > > >
> > > > > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> > > > >
> > > > > > What is the rationale for the devm_add_action API?
> > > > >
> > > > > For one-off and maybe complex unwind actions in drivers that wish to use
> > > > > devm API (as mixing devm and manual release is verboten). Also is often
> > > > > used when some core subsystem does not provide enough devm APIs.
> > > >
> > > > Thanks for the insight, Dmitry. Thanks to Robin too.
> > > >
> > > > This is what I understand so far:
> > > >
> > > > devm_add_action() is nice because it hides/factorizes the complexity
> > > > of the devres API, but it incurs a small storage overhead of one
> > > > pointer per call, which makes it unfit for frequently used actions,
> > > > such as clk_get.
> > > >
> > > > Is that correct?
> > > >
> > > > My question is: why not design the API without the small overhead?
> > >
> > > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > > least as big as two pointers anyway, so this "overhead" should mostly be
> > > free in practice. Plus the devres API is almost entirely about being
> > > able to write simple robust code, rather than absolute efficiency - I
> > > mean, struct devres itself is already 5 pointers large at the absolute
> > > minimum ;)
> >
> > (3 pointers: 1 list_head + 1 function pointer)
>
> Ah yes, I failed to mentally preprocess the debug config :)
>
> > I'm confused. The first patch was criticized for potentially adding
> > an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> > platform with 100 clocks).
>
> I'm not sure it was a criticism so much as an observation of an aspect that
> deserved consideration (certainly it was on my part, and I read Dmitry's "It
> might still, ..." as implying the same). I'd say by this point it has been
> thoroughly considered, and personally I'm now happy with the conclusion that
> the kind of embedded platforms that will have many dozens of clocks are also
> the kind that will tend to have enough padding to make it moot, and thus the
> code simplification probably is worthwhile overall.
I wonder if we could actually avoid allocating the data with
ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
devm_k*alloc() group of functions as they are direct replacement for
k*alloc() APIs that give users aligned memory, but for other data
structures (clocks, regulators, etc, etc) it is not required.
Thanks.
--
Dmitry
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 19:10 ` Dmitry Torokhov
0 siblings, 0 replies; 40+ messages in thread
From: Dmitry Torokhov @ 2019-12-12 19:10 UTC (permalink / raw)
To: Robin Murphy
Cc: Kuninori Morimoto, Marc Gonzalez, Stephen Boyd, Michael Turquette,
x86, linux-clk, LKML, Bjorn Andersson, Russell King, Linux ARM,
Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
> On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> > On 12/12/2019 15:47, Robin Murphy wrote:
> >
> > > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> > >
> > > > On 11/12/2019 23:28, Dmitry Torokhov wrote:
> > > >
> > > > > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> > > > >
> > > > > > What is the rationale for the devm_add_action API?
> > > > >
> > > > > For one-off and maybe complex unwind actions in drivers that wish to use
> > > > > devm API (as mixing devm and manual release is verboten). Also is often
> > > > > used when some core subsystem does not provide enough devm APIs.
> > > >
> > > > Thanks for the insight, Dmitry. Thanks to Robin too.
> > > >
> > > > This is what I understand so far:
> > > >
> > > > devm_add_action() is nice because it hides/factorizes the complexity
> > > > of the devres API, but it incurs a small storage overhead of one
> > > > pointer per call, which makes it unfit for frequently used actions,
> > > > such as clk_get.
> > > >
> > > > Is that correct?
> > > >
> > > > My question is: why not design the API without the small overhead?
> > >
> > > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > > least as big as two pointers anyway, so this "overhead" should mostly be
> > > free in practice. Plus the devres API is almost entirely about being
> > > able to write simple robust code, rather than absolute efficiency - I
> > > mean, struct devres itself is already 5 pointers large at the absolute
> > > minimum ;)
> >
> > (3 pointers: 1 list_head + 1 function pointer)
>
> Ah yes, I failed to mentally preprocess the debug config :)
>
> > I'm confused. The first patch was criticized for potentially adding
> > an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> > platform with 100 clocks).
>
> I'm not sure it was a criticism so much as an observation of an aspect that
> deserved consideration (certainly it was on my part, and I read Dmitry's "It
> might still, ..." as implying the same). I'd say by this point it has been
> thoroughly considered, and personally I'm now happy with the conclusion that
> the kind of embedded platforms that will have many dozens of clocks are also
> the kind that will tend to have enough padding to make it moot, and thus the
> code simplification probably is worthwhile overall.
I wonder if we could actually avoid allocating the data with
ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
devm_k*alloc() group of functions as they are direct replacement for
k*alloc() APIs that give users aligned memory, but for other data
structures (clocks, regulators, etc, etc) it is not required.
Thanks.
--
Dmitry
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 19:10 ` Dmitry Torokhov
@ 2019-12-12 21:08 ` Robin Murphy
-1 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 21:08 UTC (permalink / raw)
To: Dmitry Torokhov
Cc: Marc Gonzalez, Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM, x86
On 2019-12-12 7:10 pm, Dmitry Torokhov wrote:
> On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
>> On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
>>> On 12/12/2019 15:47, Robin Murphy wrote:
>>>
>>>> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>>>>
>>>>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>>>>
>>>>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>>>>
>>>>>>> What is the rationale for the devm_add_action API?
>>>>>>
>>>>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>>>>> devm API (as mixing devm and manual release is verboten). Also is often
>>>>>> used when some core subsystem does not provide enough devm APIs.
>>>>>
>>>>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>>>>
>>>>> This is what I understand so far:
>>>>>
>>>>> devm_add_action() is nice because it hides/factorizes the complexity
>>>>> of the devres API, but it incurs a small storage overhead of one
>>>>> pointer per call, which makes it unfit for frequently used actions,
>>>>> such as clk_get.
>>>>>
>>>>> Is that correct?
>>>>>
>>>>> My question is: why not design the API without the small overhead?
>>>>
>>>> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
>>>> least as big as two pointers anyway, so this "overhead" should mostly be
>>>> free in practice. Plus the devres API is almost entirely about being
>>>> able to write simple robust code, rather than absolute efficiency - I
>>>> mean, struct devres itself is already 5 pointers large at the absolute
>>>> minimum ;)
>>>
>>> (3 pointers: 1 list_head + 1 function pointer)
>>
>> Ah yes, I failed to mentally preprocess the debug config :)
>>
>>> I'm confused. The first patch was criticized for potentially adding
>>> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
>>> platform with 100 clocks).
>>
>> I'm not sure it was a criticism so much as an observation of an aspect that
>> deserved consideration (certainly it was on my part, and I read Dmitry's "It
>> might still, ..." as implying the same). I'd say by this point it has been
>> thoroughly considered, and personally I'm now happy with the conclusion that
>> the kind of embedded platforms that will have many dozens of clocks are also
>> the kind that will tend to have enough padding to make it moot, and thus the
>> code simplification probably is worthwhile overall.
>
> I wonder if we could actually avoid allocating the data with
> ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
> devm_k*alloc() group of functions as they are direct replacement for
> k*alloc() APIs that give users aligned memory, but for other data
> structures (clocks, regulators, etc, etc) it is not required.
That's a very good point - perhaps something like this (only done properly)?
Robin.
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 0bbb328bd17f..2382f963abbe 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -26,14 +26,7 @@ struct devres_node {
struct devres {
struct devres_node node;
- /*
- * Some archs want to perform DMA into kmalloc caches
- * and need a guaranteed alignment larger than
- * the alignment of a 64-bit integer.
- * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
- * buffer alignment as if it was allocated by plain kmalloc().
- */
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+ u8 data[];
};
struct devres_group {
@@ -810,6 +803,17 @@ static int devm_kmalloc_match(struct device *dev,
void *res, void *data)
void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
{
struct devres *dr;
+ size_t align;
+
+ /*
+ * Some archs want to perform DMA into kmalloc caches
+ * and need a guaranteed alignment larger than
+ * the alignment of a 64-bit integer.
+ * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+ * buffer alignment as if it was allocated by plain kmalloc().
+ */
+ align = (ARCH_KMALLOC_MINALIGN - sizeof(*dr)) %
ARCH_KMALLOC_MINALIGN;
+ size += align;
/* use raw alloc_dr for kmalloc caller tracing */
dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
@@ -822,7 +826,7 @@ void * devm_kmalloc(struct device *dev, size_t size,
gfp_t gfp)
*/
set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
devres_add(dev, dr->data);
- return dr->data;
+ return dr->data + align;
}
EXPORT_SYMBOL_GPL(devm_kmalloc);
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-12 21:08 ` Robin Murphy
0 siblings, 0 replies; 40+ messages in thread
From: Robin Murphy @ 2019-12-12 21:08 UTC (permalink / raw)
To: Dmitry Torokhov
Cc: Kuninori Morimoto, Marc Gonzalez, Stephen Boyd, Michael Turquette,
x86, linux-clk, LKML, Bjorn Andersson, Russell King, Linux ARM,
Sudip Mukherjee, Guenter Roeck
On 2019-12-12 7:10 pm, Dmitry Torokhov wrote:
> On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
>> On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
>>> On 12/12/2019 15:47, Robin Murphy wrote:
>>>
>>>> On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
>>>>
>>>>> On 11/12/2019 23:28, Dmitry Torokhov wrote:
>>>>>
>>>>>> On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
>>>>>>
>>>>>>> What is the rationale for the devm_add_action API?
>>>>>>
>>>>>> For one-off and maybe complex unwind actions in drivers that wish to use
>>>>>> devm API (as mixing devm and manual release is verboten). Also is often
>>>>>> used when some core subsystem does not provide enough devm APIs.
>>>>>
>>>>> Thanks for the insight, Dmitry. Thanks to Robin too.
>>>>>
>>>>> This is what I understand so far:
>>>>>
>>>>> devm_add_action() is nice because it hides/factorizes the complexity
>>>>> of the devres API, but it incurs a small storage overhead of one
>>>>> pointer per call, which makes it unfit for frequently used actions,
>>>>> such as clk_get.
>>>>>
>>>>> Is that correct?
>>>>>
>>>>> My question is: why not design the API without the small overhead?
>>>>
>>>> Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
>>>> least as big as two pointers anyway, so this "overhead" should mostly be
>>>> free in practice. Plus the devres API is almost entirely about being
>>>> able to write simple robust code, rather than absolute efficiency - I
>>>> mean, struct devres itself is already 5 pointers large at the absolute
>>>> minimum ;)
>>>
>>> (3 pointers: 1 list_head + 1 function pointer)
>>
>> Ah yes, I failed to mentally preprocess the debug config :)
>>
>>> I'm confused. The first patch was criticized for potentially adding
>>> an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
>>> platform with 100 clocks).
>>
>> I'm not sure it was a criticism so much as an observation of an aspect that
>> deserved consideration (certainly it was on my part, and I read Dmitry's "It
>> might still, ..." as implying the same). I'd say by this point it has been
>> thoroughly considered, and personally I'm now happy with the conclusion that
>> the kind of embedded platforms that will have many dozens of clocks are also
>> the kind that will tend to have enough padding to make it moot, and thus the
>> code simplification probably is worthwhile overall.
>
> I wonder if we could actually avoid allocating the data with
> ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
> devm_k*alloc() group of functions as they are direct replacement for
> k*alloc() APIs that give users aligned memory, but for other data
> structures (clocks, regulators, etc, etc) it is not required.
That's a very good point - perhaps something like this (only done properly)?
Robin.
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 0bbb328bd17f..2382f963abbe 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -26,14 +26,7 @@ struct devres_node {
struct devres {
struct devres_node node;
- /*
- * Some archs want to perform DMA into kmalloc caches
- * and need a guaranteed alignment larger than
- * the alignment of a 64-bit integer.
- * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
- * buffer alignment as if it was allocated by plain kmalloc().
- */
- u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
+ u8 data[];
};
struct devres_group {
@@ -810,6 +803,17 @@ static int devm_kmalloc_match(struct device *dev,
void *res, void *data)
void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
{
struct devres *dr;
+ size_t align;
+
+ /*
+ * Some archs want to perform DMA into kmalloc caches
+ * and need a guaranteed alignment larger than
+ * the alignment of a 64-bit integer.
+ * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+ * buffer alignment as if it was allocated by plain kmalloc().
+ */
+ align = (ARCH_KMALLOC_MINALIGN - sizeof(*dr)) %
ARCH_KMALLOC_MINALIGN;
+ size += align;
/* use raw alloc_dr for kmalloc caller tracing */
dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
@@ -822,7 +826,7 @@ void * devm_kmalloc(struct device *dev, size_t size,
gfp_t gfp)
*/
set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
devres_add(dev, dr->data);
- return dr->data;
+ return dr->data + align;
}
EXPORT_SYMBOL_GPL(devm_kmalloc);
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
2019-12-12 21:08 ` Robin Murphy
@ 2019-12-13 0:16 ` Dmitry Torokhov
-1 siblings, 0 replies; 40+ messages in thread
From: Dmitry Torokhov @ 2019-12-13 0:16 UTC (permalink / raw)
To: Robin Murphy
Cc: Marc Gonzalez, Bjorn Andersson, Kuninori Morimoto, Stephen Boyd,
Michael Turquette, LKML, Sudip Mukherjee, Russell King,
Guenter Roeck, linux-clk, Linux ARM, x86
On Thu, Dec 12, 2019 at 09:08:04PM +0000, Robin Murphy wrote:
> On 2019-12-12 7:10 pm, Dmitry Torokhov wrote:
> > On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
> > > On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> > > > On 12/12/2019 15:47, Robin Murphy wrote:
> > > >
> > > > > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> > > > >
> > > > > > On 11/12/2019 23:28, Dmitry Torokhov wrote:
> > > > > >
> > > > > > > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> > > > > > >
> > > > > > > > What is the rationale for the devm_add_action API?
> > > > > > >
> > > > > > > For one-off and maybe complex unwind actions in drivers that wish to use
> > > > > > > devm API (as mixing devm and manual release is verboten). Also is often
> > > > > > > used when some core subsystem does not provide enough devm APIs.
> > > > > >
> > > > > > Thanks for the insight, Dmitry. Thanks to Robin too.
> > > > > >
> > > > > > This is what I understand so far:
> > > > > >
> > > > > > devm_add_action() is nice because it hides/factorizes the complexity
> > > > > > of the devres API, but it incurs a small storage overhead of one
> > > > > > pointer per call, which makes it unfit for frequently used actions,
> > > > > > such as clk_get.
> > > > > >
> > > > > > Is that correct?
> > > > > >
> > > > > > My question is: why not design the API without the small overhead?
> > > > >
> > > > > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > > > > least as big as two pointers anyway, so this "overhead" should mostly be
> > > > > free in practice. Plus the devres API is almost entirely about being
> > > > > able to write simple robust code, rather than absolute efficiency - I
> > > > > mean, struct devres itself is already 5 pointers large at the absolute
> > > > > minimum ;)
> > > >
> > > > (3 pointers: 1 list_head + 1 function pointer)
> > >
> > > Ah yes, I failed to mentally preprocess the debug config :)
> > >
> > > > I'm confused. The first patch was criticized for potentially adding
> > > > an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> > > > platform with 100 clocks).
> > >
> > > I'm not sure it was a criticism so much as an observation of an aspect that
> > > deserved consideration (certainly it was on my part, and I read Dmitry's "It
> > > might still, ..." as implying the same). I'd say by this point it has been
> > > thoroughly considered, and personally I'm now happy with the conclusion that
> > > the kind of embedded platforms that will have many dozens of clocks are also
> > > the kind that will tend to have enough padding to make it moot, and thus the
> > > code simplification probably is worthwhile overall.
> >
> > I wonder if we could actually avoid allocating the data with
> > ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
> > devm_k*alloc() group of functions as they are direct replacement for
> > k*alloc() APIs that give users aligned memory, but for other data
> > structures (clocks, regulators, etc, etc) it is not required.
>
> That's a very good point - perhaps something like this (only done properly)?
Yes, but it has to be done carefully.
>
> Robin.
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..2382f963abbe 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -26,14 +26,7 @@ struct devres_node {
>
> struct devres {
> struct devres_node node;
> - /*
> - * Some archs want to perform DMA into kmalloc caches
> - * and need a guaranteed alignment larger than
> - * the alignment of a 64-bit integer.
> - * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> - * buffer alignment as if it was allocated by plain kmalloc().
> - */
> - u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> + u8 data[];
> };
>
> struct devres_group {
> @@ -810,6 +803,17 @@ static int devm_kmalloc_match(struct device *dev, void
> *res, void *data)
> void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
> {
> struct devres *dr;
> + size_t align;
> +
> + /*
> + * Some archs want to perform DMA into kmalloc caches
> + * and need a guaranteed alignment larger than
> + * the alignment of a 64-bit integer.
> + * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> + * buffer alignment as if it was allocated by plain kmalloc().
> + */
> + align = (ARCH_KMALLOC_MINALIGN - sizeof(*dr)) %
> ARCH_KMALLOC_MINALIGN;
> + size += align;
>
> /* use raw alloc_dr for kmalloc caller tracing */
> dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
> @@ -822,7 +826,7 @@ void * devm_kmalloc(struct device *dev, size_t size,
> gfp_t gfp)
> */
> set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
> devres_add(dev, dr->data);
I think it has to be "devres_add(dev, dr->data + align);" here, as match
function checks the pointer passed to devm_kfree() with one stored in
devres structure.
> - return dr->data;
> + return dr->data + align;
> }
> EXPORT_SYMBOL_GPL(devm_kmalloc);
Thanks.
--
Dmitry
^ permalink raw reply [flat|nested] 40+ messages in thread* Re: [PATCH v1] clk: Convert managed get functions to devm_add_action API
@ 2019-12-13 0:16 ` Dmitry Torokhov
0 siblings, 0 replies; 40+ messages in thread
From: Dmitry Torokhov @ 2019-12-13 0:16 UTC (permalink / raw)
To: Robin Murphy
Cc: Kuninori Morimoto, Marc Gonzalez, Stephen Boyd, Michael Turquette,
x86, linux-clk, LKML, Bjorn Andersson, Russell King, Linux ARM,
Sudip Mukherjee, Guenter Roeck
On Thu, Dec 12, 2019 at 09:08:04PM +0000, Robin Murphy wrote:
> On 2019-12-12 7:10 pm, Dmitry Torokhov wrote:
> > On Thu, Dec 12, 2019 at 06:15:16PM +0000, Robin Murphy wrote:
> > > On 12/12/2019 4:59 pm, Marc Gonzalez wrote:
> > > > On 12/12/2019 15:47, Robin Murphy wrote:
> > > >
> > > > > On 12/12/2019 1:53 pm, Marc Gonzalez wrote:
> > > > >
> > > > > > On 11/12/2019 23:28, Dmitry Torokhov wrote:
> > > > > >
> > > > > > > On Wed, Dec 11, 2019 at 05:17:28PM +0100, Marc Gonzalez wrote:
> > > > > > >
> > > > > > > > What is the rationale for the devm_add_action API?
> > > > > > >
> > > > > > > For one-off and maybe complex unwind actions in drivers that wish to use
> > > > > > > devm API (as mixing devm and manual release is verboten). Also is often
> > > > > > > used when some core subsystem does not provide enough devm APIs.
> > > > > >
> > > > > > Thanks for the insight, Dmitry. Thanks to Robin too.
> > > > > >
> > > > > > This is what I understand so far:
> > > > > >
> > > > > > devm_add_action() is nice because it hides/factorizes the complexity
> > > > > > of the devres API, but it incurs a small storage overhead of one
> > > > > > pointer per call, which makes it unfit for frequently used actions,
> > > > > > such as clk_get.
> > > > > >
> > > > > > Is that correct?
> > > > > >
> > > > > > My question is: why not design the API without the small overhead?
> > > > >
> > > > > Probably because on most architectures, ARCH_KMALLOC_MINALIGN is at
> > > > > least as big as two pointers anyway, so this "overhead" should mostly be
> > > > > free in practice. Plus the devres API is almost entirely about being
> > > > > able to write simple robust code, rather than absolute efficiency - I
> > > > > mean, struct devres itself is already 5 pointers large at the absolute
> > > > > minimum ;)
> > > >
> > > > (3 pointers: 1 list_head + 1 function pointer)
> > >
> > > Ah yes, I failed to mentally preprocess the debug config :)
> > >
> > > > I'm confused. The first patch was criticized for potentially adding
> > > > an extra pointer for every devm_clk_get (e.g. 800 bytes on a 64-bit
> > > > platform with 100 clocks).
> > >
> > > I'm not sure it was a criticism so much as an observation of an aspect that
> > > deserved consideration (certainly it was on my part, and I read Dmitry's "It
> > > might still, ..." as implying the same). I'd say by this point it has been
> > > thoroughly considered, and personally I'm now happy with the conclusion that
> > > the kind of embedded platforms that will have many dozens of clocks are also
> > > the kind that will tend to have enough padding to make it moot, and thus the
> > > code simplification probably is worthwhile overall.
> >
> > I wonder if we could actually avoid allocating the data with
> > ARCH_KMALLOC_MINALIGN in all the cases. It is definitely needed for the
> > devm_k*alloc() group of functions as they are direct replacement for
> > k*alloc() APIs that give users aligned memory, but for other data
> > structures (clocks, regulators, etc, etc) it is not required.
>
> That's a very good point - perhaps something like this (only done properly)?
Yes, but it has to be done carefully.
>
> Robin.
>
> diff --git a/drivers/base/devres.c b/drivers/base/devres.c
> index 0bbb328bd17f..2382f963abbe 100644
> --- a/drivers/base/devres.c
> +++ b/drivers/base/devres.c
> @@ -26,14 +26,7 @@ struct devres_node {
>
> struct devres {
> struct devres_node node;
> - /*
> - * Some archs want to perform DMA into kmalloc caches
> - * and need a guaranteed alignment larger than
> - * the alignment of a 64-bit integer.
> - * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> - * buffer alignment as if it was allocated by plain kmalloc().
> - */
> - u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
> + u8 data[];
> };
>
> struct devres_group {
> @@ -810,6 +803,17 @@ static int devm_kmalloc_match(struct device *dev, void
> *res, void *data)
> void * devm_kmalloc(struct device *dev, size_t size, gfp_t gfp)
> {
> struct devres *dr;
> + size_t align;
> +
> + /*
> + * Some archs want to perform DMA into kmalloc caches
> + * and need a guaranteed alignment larger than
> + * the alignment of a 64-bit integer.
> + * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
> + * buffer alignment as if it was allocated by plain kmalloc().
> + */
> + align = (ARCH_KMALLOC_MINALIGN - sizeof(*dr)) %
> ARCH_KMALLOC_MINALIGN;
> + size += align;
>
> /* use raw alloc_dr for kmalloc caller tracing */
> dr = alloc_dr(devm_kmalloc_release, size, gfp, dev_to_node(dev));
> @@ -822,7 +826,7 @@ void * devm_kmalloc(struct device *dev, size_t size,
> gfp_t gfp)
> */
> set_node_dbginfo(&dr->node, "devm_kzalloc_release", size);
> devres_add(dev, dr->data);
I think it has to be "devres_add(dev, dr->data + align);" here, as match
function checks the pointer passed to devm_kfree() with one stored in
devres structure.
> - return dr->data;
> + return dr->data + align;
> }
> EXPORT_SYMBOL_GPL(devm_kmalloc);
Thanks.
--
Dmitry
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 40+ messages in thread