Linux Power Management development
 help / color / mirror / Atom feed
* Re: [PATCH 3/9] cgroup: implement generic child / descendant walk macros
From: Tejun Heo @ 2012-11-06 20:31 UTC (permalink / raw)
  To: lizefan, mhocko, rjw
  Cc: containers, cgroups, linux-kernel, linux-pm, fweisbec
In-Reply-To: <1351931915-1701-4-git-send-email-tj@kernel.org>

On Sat, Nov 03, 2012 at 01:38:29AM -0700, Tejun Heo wrote:
> Currently, cgroup doesn't provide any generic helper for walking a
> given cgroup's children or descendants.  This patch adds the following
> three macros.
> 
> * cgroup_for_each_child() - walk immediate children of a cgroup.
> 
> * cgroup_for_each_descendant_pre() - visit all descendants of a cgroup
>   in pre-order tree traversal.
> 
> * cgroup_for_each_descendant_post() - visit all descendants of a
>   cgroup in post-order tree traversal.
> 
> All three only require the user to hold RCU read lock during
> traversal.  Verifying that each iterated cgroup is online is the
> responsibility of the user.  When used with proper synchronization,
> cgroup_for_each_descendant_pre() can be used to propagate config
> updates to descendants in reliable way.  See comments for details.

Michal, Li, how does this look to you?  Would this be okay for memcg
too?  Li, do you think the comment on cgroup_for_each_descendant_pre()
is correct?

Thanks.

-- 
tejun

^ permalink raw reply

* Re: [linux-next PATCH 6/7] PM / devfreq: allow sysfs governor node to switch governor
From: Nishanth Menon @ 2012-11-06 21:28 UTC (permalink / raw)
  To: MyungJoo Ham
  Cc: myungjoo.ham, LKML, Kevin Hilman, linux-pm, Rajagopal Venkat,
	박경민, Rafael J. Wysocki
In-Reply-To: <CAJ0PZbSe7Orhww-amHNRkbvUvJr6x9CtisHw2hoLpcJdyBSKCA@mail.gmail.com>

On 18:18-20121106, MyungJoo Ham wrote:
> 2012. 11. 6. 오후 6:02에 "MyungJoo Ham" <myungjoo.ham@samsung.com>님이 작성:
> >
> > > --- a/drivers/devfreq/devfreq.c
> > > +++ b/drivers/devfreq/devfreq.c
> > > @@ -629,6 +629,44 @@ static ssize_t show_governor(struct device *dev,
> > >       return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name);
> > >  }
> > >
> > > +static ssize_t store_governor(struct device *dev, struct
> device_attribute *attr,
> > > +                           const char *buf, size_t count)
> > > +{
> > > +     struct devfreq *df = to_devfreq(dev);
> > > +     int ret = 0;
> > > +     struct devfreq_governor *governor;
> > > +
> > > +     mutex_lock(&devfreq_list_lock);
> >
> > Please remove the trailing \n from buf here.
> > When user enters "userspace", buf gets "userspace\n".
> >
> > With some printks in find_devfreq_governor (printing the buf and governor
> names):
> >
> > # echo userspace > /sys/class/devfreq/exynos4210-busfreq.0/governor
> > [   65.975000] [userspace
> > [   65.975000] ].[userspace]
> > [   65.980000] [userspace
> > [   65.980000] ].[powersave]
> > [   65.985000] [userspace
> > [   65.985000] ].[performance]
> > [   65.990000] [userspace
> > [   65.990000] ].[simple_ondemand]
> > [   65.995000] err no = -19
> > #
> >
> 
> Anyway this seems quite wierd. I will look more into this one in my target
> system.
Thanks for catching this.
:( I missed the \n (been using echo -n toooo often now grr..)
What do you think of the following change to the patch?

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 586b6fd..9a4e898 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -614,11 +614,16 @@ static ssize_t store_governor(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
 	struct devfreq *df = to_devfreq(dev);
-	int ret = 0;
+	int ret;
+	char str_governor[DEVFREQ_NAME_LEN + 1];
 	struct devfreq_governor *governor;
 
+	ret = sscanf(buf, "%" __stringify(DEVFREQ_NAME_LEN) "s", str_governor);
+	if (ret != 1)
+	        return -EINVAL;
+
 	mutex_lock(&devfreq_list_lock);
-	governor = find_devfreq_governor(buf);
+	governor = find_devfreq_governor(str_governor);
 	if (IS_ERR(governor)) {
 		ret = PTR_ERR(governor);
 		goto out;
-- 
Regards,
Nishanth Menon

output:
/sys/devices/platform/iva.0/devfreq/iva.0 # cat governor 
simple_ondemand
/sys/devices/platform/iva.0/devfreq/iva.0 # echo -n "userspace">governor 
/sys/devices/platform/iva.0/devfreq/iva.0 # echo -n "simple_ondeman">governor 
sh: write error: No such device
/sys/devices/platform/iva.0/devfreq/iva.0 # cat governor 
userspace
/sys/devices/platform/iva.0/devfreq/iva.0 # echo -n "simple_ondemand">governor 
/sys/devices/platform/iva.0/devfreq/iva.0 # cat governor 
simple_ondemand
/sys/devices/platform/iva.0/devfreq/iva.0 # echo "userspace">governor 
/sys/devices/platform/iva.0/devfreq/iva.0 # cat governor 
userspace
/sys/devices/platform/iva.0/devfreq/iva.0 # echo  "simple_ondemand">governor 
/sys/devices/platform/iva.0/devfreq/iva.0 # echo "userspace123123213123213123212
3123123131231">governor 
sh: write error: No such device
/sys/devices/platform/iva.0/devfreq/iva.0 # cat governor 
simple_ondemand


^ permalink raw reply related

* Re: [PATCH 3/6] ARM: OMAP: voltage: move voltdm_reset to platform_data header
From: Nishanth Menon @ 2012-11-06 21:48 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: linux-omap, Rafael, Kevin Hilman, Anton Vorontsov, linux-pm
In-Reply-To: <20121106184935.GL6801@atomide.com>

On 10:49-20121106, Tony Lindgren wrote:
> * Nishanth Menon <nm@ti.com> [121105 07:04]:
> > Move voltdm_reset to include/linux/platform_data/voltage-omap.h
> > 
> > Acked-by: Jean Pihet <j-pihet@ti.com>
> > Signed-off-by: Nishanth Menon <nm@ti.com>
> > ---
> >  arch/arm/mach-omap2/voltage.h              |    1 -
> >  include/linux/platform_data/voltage-omap.h |    1 +
> >  2 files changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/arch/arm/mach-omap2/voltage.h b/arch/arm/mach-omap2/voltage.h
> > index af9d469..0665f21 100644
> > --- a/arch/arm/mach-omap2/voltage.h
> > +++ b/arch/arm/mach-omap2/voltage.h
> > @@ -149,5 +149,4 @@ int voltdm_for_each(int (*fn)(struct voltagedomain *voltdm, void *user),
> >  int voltdm_for_each_pwrdm(struct voltagedomain *voltdm,
> >  			  int (*fn)(struct voltagedomain *voltdm,
> >  				    struct powerdomain *pwrdm));
> > -void voltdm_reset(struct voltagedomain *voltdm);
> >  #endif
> > diff --git a/include/linux/platform_data/voltage-omap.h b/include/linux/platform_data/voltage-omap.h
> > index 5be4d5d..4eb3d43 100644
> > --- a/include/linux/platform_data/voltage-omap.h
> > +++ b/include/linux/platform_data/voltage-omap.h
> > @@ -36,4 +36,5 @@ int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt);
> >  unsigned long voltdm_get_voltage(struct voltagedomain *voltdm);
> >  struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm,
> >  		unsigned long volt);
> > +void voltdm_reset(struct voltagedomain *voltdm);
> >  #endif
> 
> The include/linux/platform_data/voltage-omap.h should only contain
> pure platform_data, these should internal defines to the driver.
considering the move took place as part of:
commit 2203747c97712975accc5e69bdaf1ad38a691635
(ARM: omap: move platform_data definitions)
I suppose we should clean up the following as well
include/linux/platform_data/dsp-omap.h - has function - reserve
include/linux/platform_data/mtd-nand-omap2.h - has function -init
include/linux/platform_data/mtd-onenand-omap2.h - has function -init
include/linux/platform_data/remoteproc-omap.h - has function - reserve
> 
> Looks like there are other things there too that's not platform data:
> 
> struct voltagedomain *voltdm_lookup(const char *name);
> int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt);
> unsigned long voltdm_get_voltage(struct voltagedomain *voltdm);
> struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm,
> 		unsigned long volt);
> 
> Can you please add a patch fixing that ASAP?

Agreed include/linux/platform_data/voltage-omap.h has more functions as well.
Considering it did:
rename arch/arm/plat-omap/include/plat/voltage.h =>
include/linux/platform_data/voltage-omap.h

Where do we move these functions to?

drivers/power/avs/smartreflex.c needs:
omap_voltage_get_voltdata
and
drivers/power/avs/smartreflex-class3.c
will need voltdm_reset and voltdm_get_voltage

-- 
Regards,
Nishanth Menon

^ permalink raw reply

* Re: [RFC PATCH 6/8] mm: Demarcate and maintain pageblocks in region-order in the zones' freelists
From: Dave Hansen @ 2012-11-06 21:49 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: akpm, mgorman, mjg59, paulmck, maxime.coquelin, loic.pallardy,
	arjan, kmpark, kamezawa.hiroyu, lenb, rjw, gargankita,
	amit.kachhap, svaidy, thomas.abraham, santosh.shilimkar, linux-pm,
	linux-mm, linux-kernel
In-Reply-To: <20121106195342.6941.94892.stgit@srivatsabhat.in.ibm.com>

On 11/06/2012 11:53 AM, Srivatsa S. Bhat wrote:
> This is the main change - we keep the pageblocks in region-sorted order,
> where pageblocks belonging to region-0 come first, followed by those belonging
> to region-1 and so on. But the pageblocks within a given region need *not* be
> sorted, since we need them to be only region-sorted and not fully
> address-sorted.
> 
> This sorting is performed when adding pages back to the freelists, thus
> avoiding any region-related overhead in the critical page allocation
> paths.

It's probably _better_ to do it at free time than alloc, but it's still
pretty bad to be doing a linear walk over a potentially 256-entry array
holding the zone lock.  The overhead is going to show up somewhere.  How
does this do with a kernel compile?  Looks like exit() when a process
has a bunch of memory might get painful.


^ permalink raw reply

* Re: [RFC PATCH 1/8] mm: Introduce memory regions data-structure to capture region boundaries within node
From: Dave Hansen @ 2012-11-06 23:03 UTC (permalink / raw)
  To: Srivatsa S. Bhat
  Cc: akpm, mgorman, mjg59, paulmck, maxime.coquelin, loic.pallardy,
	arjan, kmpark, kamezawa.hiroyu, lenb, rjw, gargankita,
	amit.kachhap, svaidy, thomas.abraham, santosh.shilimkar, linux-pm,
	linux-mm, linux-kernel
In-Reply-To: <20121106195225.6941.2868.stgit@srivatsabhat.in.ibm.com>

On 11/06/2012 11:52 AM, Srivatsa S. Bhat wrote:
> But of course, memory regions are sub-divisions *within* a node, so it makes
> sense to keep the data-structures in the node's struct pglist_data. (Thus
> this placement makes memory regions parallel to zones in that node).

I think it's pretty silly to create *ANOTHER* subdivision of memory
separate from sparsemem.  One that doesn't handle large amounts of
memory or scale with memory hotplug.  As it stands, you can only support
256*512MB=128GB of address space, which seems pretty puny.

This node_regions[]:

> @@ -687,6 +698,8 @@ typedef struct pglist_data {
>  	struct zone node_zones[MAX_NR_ZONES];
>  	struct zonelist node_zonelists[MAX_ZONELISTS];
>  	int nr_zones;
> +	struct node_mem_region node_regions[MAX_NR_REGIONS];
> +	int nr_node_regions;
>  #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
>  	struct page *node_mem_map;
>  #ifdef CONFIG_MEMCG

looks like it's indexed the same way regardless of which node it is in.
 In other words, if there are two nodes, at least half of it is wasted,
and 3/4 if there are four nodes.  That seems a bit suboptimal.

Could you remind us of the logic for leaving sparsemem out of the
equation here?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v4 0/6] solve deadlock caused by memory allocation with I/O
From: Andrew Morton @ 2012-11-06 23:23 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm
In-Reply-To: <1351931714-11689-1-git-send-email-ming.lei@canonical.com>

On Sat,  3 Nov 2012 16:35:08 +0800
Ming Lei <ming.lei@canonical.com> wrote:

> This patchset try to solve one deadlock problem which might be caused
> by memory allocation with block I/O during runtime PM and block device
> error handling path. Traditionly, the problem is addressed by passing
> GFP_NOIO statically to mm, but that is not a effective solution, see
> detailed description in patch 1's commit log.

It generally looks OK to me.  I have a few comments and I expect to grab
v5.

Rafael, your thoughts?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v4 1/6] mm: teach mm by current context info to not do I/O during memory allocation
From: Andrew Morton @ 2012-11-06 23:23 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm,
	Jiri Kosina, Mel Gorman, KAMEZAWA Hiroyuki, Michal Hocko,
	Ingo Molnar, Peter Zijlstra
In-Reply-To: <1351931714-11689-2-git-send-email-ming.lei@canonical.com>

On Sat,  3 Nov 2012 16:35:09 +0800
Ming Lei <ming.lei@canonical.com> wrote:

> This patch introduces PF_MEMALLOC_NOIO on process flag('flags' field of
> 'struct task_struct'), so that the flag can be set by one task
> to avoid doing I/O inside memory allocation in the task's context.
> 
> The patch trys to solve one deadlock problem caused by block device,
> and the problem may happen at least in the below situations:
> 
> - during block device runtime resume, if memory allocation with
> GFP_KERNEL is called inside runtime resume callback of any one
> of its ancestors(or the block device itself), the deadlock may be
> triggered inside the memory allocation since it might not complete
> until the block device becomes active and the involed page I/O finishes.
> The situation is pointed out first by Alan Stern. It is not a good
> approach to convert all GFP_KERNEL[1] in the path into GFP_NOIO because
> several subsystems may be involved(for example, PCI, USB and SCSI may
> be involved for usb mass stoarage device, network devices involved too
> in the iSCSI case)
> 
> - during block device runtime suspend, because runtime resume need
> to wait for completion of concurrent runtime suspend.
> 
> - during error handling of usb mass storage deivce, USB bus reset
> will be put on the device, so there shouldn't have any
> memory allocation with GFP_KERNEL during USB bus reset, otherwise
> the deadlock similar with above may be triggered. Unfortunately, any
> usb device may include one mass storage interface in theory, so it
> requires all usb interface drivers to handle the situation. In fact,
> most usb drivers don't know how to handle bus reset on the device
> and don't provide .pre_set() and .post_reset() callback at all, so
> USB core has to unbind and bind driver for these devices. So it
> is still not practical to resort to GFP_NOIO for solving the problem.
> 
> Also the introduced solution can be used by block subsystem or block
> drivers too, for example, set the PF_MEMALLOC_NOIO flag before doing
> actual I/O transfer.
> 
> It is not a good idea to convert all these GFP_KERNEL in the
> affected path into GFP_NOIO because these functions doing that may be
> implemented as library and will be called in many other contexts.
> 
> In fact, memalloc_noio() can convert some of current static GFP_NOIO
> allocation into GFP_KERNEL back in other non-affected contexts, at least
> almost all GFP_NOIO in USB subsystem can be converted into GFP_KERNEL
> after applying the approach and make allocation with GFP_IO
> only happen in runtime resume/bus reset/block I/O transfer contexts
> generally.

It's unclear from the description why we're also clearing __GFP_FS in
this situation.

If we can avoid doing this then there will be a very small gain: there
are some situations in which a filesystem can clean pagecache without
performing I/O.


It doesn't appear that the patch will add overhead to the alloc/free
hotpaths, which is good.

> 
> ...
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1805,6 +1805,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
>  #define PF_FROZEN	0x00010000	/* frozen for system suspend */
>  #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
>  #define PF_KSWAPD	0x00040000	/* I am kswapd */
> +#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
>  #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
>  #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
>  #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
> @@ -1842,6 +1843,15 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
>  #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
>  #define used_math() tsk_used_math(current)
>  
> +#define memalloc_noio() (current->flags & PF_MEMALLOC_NOIO)
> +#define memalloc_noio_save(flag) do { \
> +	(flag) = current->flags & PF_MEMALLOC_NOIO; \
> +	current->flags |= PF_MEMALLOC_NOIO; \
> +} while (0)
> +#define memalloc_noio_restore(flag) do { \
> +	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flag; \
> +} while (0)
> +

Again with the ghastly macros.  Please, do this properly in regular old
C, as previously discussed.  It really doesn't matter what daft things
local_irq_save() did 20 years ago.  Just do it right!

Also, you can probably put the unlikely() inside memalloc_noio() and
avoid repeating it at all the callsites.

And it might be neater to do:

/*
 * Nice comment goes here
 */
static inline gfp_t memalloc_noio_flags(gfp_t flags)
{
	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
		flags &= ~GFP_IOFS;
	return flags;
}

>   * task->jobctl flags
>   */
>
> ...
>
> @@ -2304,6 +2304,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>  		.gfp_mask = sc.gfp_mask,
>  	};
>  
> +	if (unlikely(memalloc_noio())) {
> +		gfp_mask &= ~GFP_IOFS;
> +		sc.gfp_mask = gfp_mask;
> +		shrink.gfp_mask = sc.gfp_mask;
> +	}

We can avoid writing to shrink.gfp_mask twice.  And maybe sc.gfp_mask
as well.  Unclear, I didn't think about it too hard ;)

>  	throttle_direct_reclaim(gfp_mask, zonelist, nodemask);
>  
>  	/*
>
> ...
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v4 2/6] PM / Runtime: introduce pm_runtime_set_memalloc_noio()
From: Andrew Morton @ 2012-11-06 23:24 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA, Alan Stern, Oliver Neukum,
	Minchan Kim, Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, linux-pm-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg
In-Reply-To: <1351931714-11689-3-git-send-email-ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

On Sat,  3 Nov 2012 16:35:10 +0800
Ming Lei <ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org> wrote:

> The patch introduces the flag of memalloc_noio in 'struct dev_pm_info'
> to help PM core to teach mm not allocating memory with GFP_KERNEL
> flag for avoiding probable deadlock.
> 
> As explained in the comment, any GFP_KERNEL allocation inside
> runtime_resume() or runtime_suspend() on any one of device in
> the path from one block or network device to the root device
> in the device tree may cause deadlock, the introduced
> pm_runtime_set_memalloc_noio() sets or clears the flag on
> device in the path recursively.
> 

checkpatch finds a number of problems with this patch, all of which
should be fixed.  Please always use checkpatch.

> index 3148b10..d477924 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -124,6 +124,63 @@ unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
>  
> +static int dev_memalloc_noio(struct device *dev, void *data)
> +{
> +	return dev->power.memalloc_noio;
> +}
> +
> +/*
> + * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag.
> + * @dev: Device to handle.
> + * @enable: True for setting the flag and False for clearing the flag.
> + *
> + * Set the flag for all devices in the path from the device to the
> + * root device in the device tree if @enable is true, otherwise clear
> + * the flag for devices in the path whose siblings don't set the flag.
> + *
> + * The function should only be called by block device, or network
> + * device driver for solving the deadlock problem during runtime
> + * resume/suspend:
> + * 	if memory allocation with GFP_KERNEL is called inside runtime
> + * 	resume/suspend callback of any one of its ancestors(or the
> + * 	block device itself), the deadlock may be triggered inside the
> + * 	memory allocation since it might not complete until the block
> + * 	device becomes active and the involed page I/O finishes. The
> + * 	situation is pointed out first by Alan Stern. Network device
> + * 	are involved in iSCSI kind of situation.
> + *
> + * The lock of dev_hotplug_mutex is held in the function for handling
> + * hotplug race because pm_runtime_set_memalloc_noio() may be called
> + * in async probe().
> + *
> + * The function should be called between device_add() and device_del()
> + * on the affected device(block/network device).
> + */
> +void pm_runtime_set_memalloc_noio(struct device *dev, bool enable)
> +{
> +	static DEFINE_MUTEX(dev_hotplug_mutex);
> +
> +	mutex_lock(&dev_hotplug_mutex);
> +	for(;;) {
> +		/* hold power lock since bitfield is not SMP-safe. */
> +		spin_lock_irq(&dev->power.lock);
> +		dev->power.memalloc_noio = enable;
> +		spin_unlock_irq(&dev->power.lock);
> +
> +		dev = dev->parent;
> +
> +		/* only clear the flag for one device if all
> +		 * children of the device don't set the flag.
> +		 */

Such a comment is usually laid out as

		/*
		 * Only ...

More significantly, the comment describes what the code is doing but
not why the code is doing it.  The former is (usually) obvious from
reading the C, and the latter is what good code comments address.

And it's needed in this case.  Why does the code do this?

Also, can a device have more than one child?  If so, the code doesn't
do what the comment says it does.

> +		if (!dev || (!enable &&
> +			     device_for_each_child(dev, NULL,
> +						   dev_memalloc_noio)))
> +			break;
> +	}
> +	mutex_unlock(&dev_hotplug_mutex);
> +}
> +EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);
>
> ...
>
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v4 3/6] block/genhd.c: apply pm_runtime_set_memalloc_noio on block devices
From: Andrew Morton @ 2012-11-06 23:24 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm
In-Reply-To: <1351931714-11689-4-git-send-email-ming.lei@canonical.com>

On Sat,  3 Nov 2012 16:35:11 +0800
Ming Lei <ming.lei@canonical.com> wrote:

> This patch applyes the introduced pm_runtime_set_memalloc_noio on
> block device so that PM core will teach mm to not allocate memory with
> GFP_IOFS when calling the runtime_resume and runtime_suspend callback
> for block devices and its ancestors.
> 
> ...
>
> @@ -532,6 +533,13 @@ static void register_disk(struct gendisk *disk)
>  			return;
>  		}
>  	}
> +
> +	/* avoid probable deadlock caused by allocating memory with

Again, please fix the comment style.  Take a look at the rest of this file!

> +	 * GFP_KERNEL in runtime_resume callback of its all ancestor
> +	 * deivces

typo

> +	 */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [BUGFIX] PM: Fix active child counting when disabled and forbidden
From: Huang Ying @ 2012-11-07  0:26 UTC (permalink / raw)
  To: Alan Stern; +Cc: Rafael J. Wysocki, linux-kernel, linux-pm
In-Reply-To: <Pine.LNX.4.44L0.1211061010390.1430-100000@iolanthe.rowland.org>

On Tue, 2012-11-06 at 10:17 -0500, Alan Stern wrote:
> On Tue, 6 Nov 2012, Huang Ying wrote:
> 
> > On Sun, 2012-11-04 at 20:56 -0500, Alan Stern wrote:
> > > On Mon, 5 Nov 2012, Huang Ying wrote:
> > > 
> > > > In current runtime PM implementation, the active child count of the
> > > > parent device may be decreased if the runtime PM of the child device
> > > > is disabled and forbidden.  For example, to unbind a PCI driver with a
> > > > PCI device, the following code path is possible:
> > > > 
> > > >   pci_device_remove
> > > >     pm_runtime_set_suspended
> > > >       __pm_runtime_set_status
> > > >         atomic_add_unless(&parent->power.child_count, -1, 0)
> > > > 
> > > > That is, the parent device may be suspended, even if the runtime PM of
> > > > child device is forbidden to be suspended.  This violate the rule that
> > > > parent is allowed to be suspended only after all its children are
> > > > suspended, and may cause issue.
> > > 
> > > This doesn't sound like a correct description of the situation.  The 
> > > rule is not violated.  After pm_runtime_set_suspended runs, the child 
> > > _is_ suspended.  Thus there's no reason not to allow the parent to be 
> > > suspended.
> > > 
> > > The problem -- if there really is one -- is that a driver can put a 
> > > device into the suspended state by calling pm_runtime_disable followed 
> > > by pm_runtime_set_suspended, even if the usage count is > 0.
> > > 
> > > I'm not so sure this should count as a problem.  Generally devices 
> > > aren't disabled for runtime PM unless something is wrong.
> > 
> > Devices will be disabled if the PCI driver is unbound from the PCI
> > device.
> 
> Yes.  But without a PCI driver, nothing will call 
> pm_runtime_set_suspended.

pm_runtime_set_suspended will be called in pci_device_remove or error
path of local_pci_probe.

>   And even if something does call 
> pm_runtime_set_suspended, it's still not a problem -- the device can't 
> be used without a driver.

The VGA device can be used without a driver.

Best Regards,
Huang Ying


> > So I think the rule could be: even if the device is suspended, the
> > device can be put into suspended state only if its usage count == 0.  In
> > this way, we can solve the issue for PCI driver unbound and that looks
> > more reasonable.
> 
> You still have not shown that there really is a problem.  Do you have 
> any particular use case in mind?
> 
> Alan Stern
> 



^ permalink raw reply

* [PATCH -v2] ACPI/PCI: Make PCI devices notified when its power resource turned on
From: Huang Ying @ 2012-11-07  0:57 UTC (permalink / raw)
  To: Bjorn Helgaas, Rafael J. Wysocki
  Cc: linux-kernel, linux-pci, linux-pm, linux-acpi, Huang Ying

A set of power resources may be shared by multiple devices.  When all
devices share one set of power resources is put into D3_COLD state,
the power resources will be turned off.  When one of the device is
waked, the power resource will be turned on and all devices share it
will be powered on to D0uninitialized state.  These devices should be
resumed, so that they can get opportunity to go to low power state
later.

v2:

- Fix build error

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_bind.c |    2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/acpi/pci_bind.c
+++ b/drivers/acpi/pci_bind.c
@@ -45,6 +45,7 @@ static int acpi_pci_unbind(struct acpi_d
 
 	device_set_run_wake(&dev->dev, false);
 	pci_acpi_remove_pm_notifier(device);
+	acpi_power_resource_unregister_device(&dev->dev, device->handle);
 
 	if (!dev->subordinate)
 		goto out;
@@ -71,6 +72,7 @@ static int acpi_pci_bind(struct acpi_dev
 		return 0;
 
 	pci_acpi_add_pm_notifier(device, dev);
+	acpi_power_resource_register_device(&dev->dev, device->handle);
 	if (device->wakeup.flags.run_wake)
 		device_set_run_wake(&dev->dev, true);
 

^ permalink raw reply

* Re: [PATCH 3/6] ARM: OMAP: voltage: move voltdm_reset to platform_data header
From: Tony Lindgren @ 2012-11-07  1:18 UTC (permalink / raw)
  To: Nishanth Menon
  Cc: linux-omap, Rafael, Kevin Hilman, Anton Vorontsov, linux-pm
In-Reply-To: <20121106214819.GA16246@kahuna>

* Nishanth Menon <nm@ti.com> [121106 13:50]:
> On 10:49-20121106, Tony Lindgren wrote:
> > 
> > Looks like there are other things there too that's not platform data:
> > 
> > struct voltagedomain *voltdm_lookup(const char *name);
> > int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt);
> > unsigned long voltdm_get_voltage(struct voltagedomain *voltdm);
> > struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm,
> > 		unsigned long volt);
> > 
> > Can you please add a patch fixing that ASAP?
> 
> Agreed include/linux/platform_data/voltage-omap.h has more functions as well.
> Considering it did:
> rename arch/arm/plat-omap/include/plat/voltage.h =>
> include/linux/platform_data/voltage-omap.h
> 
> Where do we move these functions to?
> 
> drivers/power/avs/smartreflex.c needs:
> omap_voltage_get_voltdata
> and
> drivers/power/avs/smartreflex-class3.c
> will need voltdm_reset and voltdm_get_voltage

How about something local drivers/power/avs/smartreflex.h?

Regards,

Tony

^ permalink raw reply

* Re: [PATCH 3/6] ARM: OMAP: voltage: move voltdm_reset to platform_data header
From: Nishanth Menon @ 2012-11-07  1:28 UTC (permalink / raw)
  To: Tony Lindgren; +Cc: linux-omap, Rafael, Kevin Hilman, Anton Vorontsov, linux-pm
In-Reply-To: <20121107011808.GZ6801@atomide.com>

On 17:18-20121106, Tony Lindgren wrote:
> * Nishanth Menon <nm@ti.com> [121106 13:50]:
> > On 10:49-20121106, Tony Lindgren wrote:
> > > 
> > > Looks like there are other things there too that's not platform data:
> > > 
> > > struct voltagedomain *voltdm_lookup(const char *name);
> > > int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt);
> > > unsigned long voltdm_get_voltage(struct voltagedomain *voltdm);
> > > struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm,
> > > 		unsigned long volt);
> > > 
> > > Can you please add a patch fixing that ASAP?
> > 
> > Agreed include/linux/platform_data/voltage-omap.h has more functions as well.
> > Considering it did:
> > rename arch/arm/plat-omap/include/plat/voltage.h =>
> > include/linux/platform_data/voltage-omap.h
> > 
> > Where do we move these functions to?
> > 
> > drivers/power/avs/smartreflex.c needs:
> > omap_voltage_get_voltdata
> > and
> > drivers/power/avs/smartreflex-class3.c
> > will need voltdm_reset and voltdm_get_voltage
> 
> How about something local drivers/power/avs/smartreflex.h?
These APIs are exposed by voltage layer, not smartreflex :(
stuff like voltdm_scale will have to be used by regulator logic
eventually, so moving to AVS driver header is probably not right.
-- 
Regards,
Nishanth Menon

^ permalink raw reply

* Re: [PATCH 3/6] ARM: OMAP: voltage: move voltdm_reset to platform_data header
From: Tony Lindgren @ 2012-11-07  1:46 UTC (permalink / raw)
  To: Nishanth Menon
  Cc: linux-omap, Rafael, Kevin Hilman, Anton Vorontsov, linux-pm
In-Reply-To: <20121107012838.GA13630@kahuna>

* Nishanth Menon <nm@ti.com> [121106 17:30]:
> On 17:18-20121106, Tony Lindgren wrote:
> > * Nishanth Menon <nm@ti.com> [121106 13:50]:
> > > On 10:49-20121106, Tony Lindgren wrote:
> > > > 
> > > > Looks like there are other things there too that's not platform data:
> > > > 
> > > > struct voltagedomain *voltdm_lookup(const char *name);
> > > > int voltdm_scale(struct voltagedomain *voltdm, unsigned long target_volt);
> > > > unsigned long voltdm_get_voltage(struct voltagedomain *voltdm);
> > > > struct omap_volt_data *omap_voltage_get_voltdata(struct voltagedomain *voltdm,
> > > > 		unsigned long volt);
> > > > 
> > > > Can you please add a patch fixing that ASAP?
> > > 
> > > Agreed include/linux/platform_data/voltage-omap.h has more functions as well.
> > > Considering it did:
> > > rename arch/arm/plat-omap/include/plat/voltage.h =>
> > > include/linux/platform_data/voltage-omap.h
> > > 
> > > Where do we move these functions to?
> > > 
> > > drivers/power/avs/smartreflex.c needs:
> > > omap_voltage_get_voltdata
> > > and
> > > drivers/power/avs/smartreflex-class3.c
> > > will need voltdm_reset and voltdm_get_voltage
> > 
> > How about something local drivers/power/avs/smartreflex.h?
> These APIs are exposed by voltage layer, not smartreflex :(
> stuff like voltdm_scale will have to be used by regulator logic
> eventually, so moving to AVS driver header is probably not right.

Well ideally you'd have some generic API doing it rather than
these omap specifc exported functions.

Meanwhile, I guess you need to find some suitable location
for the header file that works for Rafael.

Regards,

Tony

^ permalink raw reply

* [PATCH 0/4] cpufreq: OMAP: if available, scale the iva coprocessor
From: Joshua Emele @ 2012-11-07  1:47 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki, linux-omap, cpufreq, linux-pm,
	linux-kernel
  Cc: Joshua Emele

The iva coprocessor, available on some omap platforms, shares a voltage domain
with the mpu. If cpufreq is active and the mpu processor is scaled down, the iva
coprocessor should also be scaled. The goal is to make sure we do not ramp down
the voltage on the domain and affect clocking on the iva coprocessor leading to
a dsp crash.

I only have access to an omap3evm-ish device, so I do not know what the iva
clock name is for omap24xx and omap44xx. This detail can be added later if the
general approach is approved.

I have tested a version of this patch against the linux-3.3 kernel, so this my
attempt at a forward port against the current mainline. I have based my patch
series against linux-omap-pm/pm-next.

Joshua Emele (4):
  cpufreq: OMAP: if an iva clock name is specified, load iva resources
  cpufreq: OMAP: for omap3 devices, specify the iva clock name
  cpufreq: OMAP: ensure the iva coprocessor is at the same opp as the
    mpu
  cpufreq: OMAP: scale the iva coprocessor if available

 drivers/cpufreq/omap-cpufreq.c |  113 +++++++++++++++++++++++++++++++++------
 1 files changed, 95 insertions(+), 18 deletions(-)

-- 
1.7.6.5


^ permalink raw reply

* [PATCH 1/4] cpufreq: OMAP: if an iva clock name is specified, load iva resources
From: Joshua Emele @ 2012-11-07  1:47 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki, linux-omap, cpufreq, linux-pm,
	linux-kernel
  Cc: Joshua Emele
In-Reply-To: <1352252861-18384-1-git-send-email-jemele@gmail.com>


Signed-off-by: Joshua Emele <jemele@gmail.com>
---
 drivers/cpufreq/omap-cpufreq.c |   73 ++++++++++++++++++++++++++++++++-------
 1 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 17fa04d..1621208 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -50,11 +50,11 @@ static DEFINE_PER_CPU(struct lpj_info, lpj_ref);
 static struct lpj_info global_lpj_ref;
 #endif
 
-static struct cpufreq_frequency_table *freq_table;
+static struct cpufreq_frequency_table *freq_table, *iva_freq_table;
 static atomic_t freq_table_users = ATOMIC_INIT(0);
-static struct clk *mpu_clk;
-static char *mpu_clk_name;
-static struct device *mpu_dev;
+static struct clk *mpu_clk, *iva_clk;
+static char *mpu_clk_name, *iva_clk_name;
+static struct device *mpu_dev, *iva_dev;
 static struct regulator *mpu_reg;
 
 static int omap_verify_speed(struct cpufreq_policy *policy)
@@ -199,8 +199,49 @@ done:
 
 static inline void freq_table_free(void)
 {
-	if (atomic_dec_and_test(&freq_table_users))
+	if (atomic_dec_and_test(&freq_table_users)) {
 		opp_free_cpufreq_table(mpu_dev, &freq_table);
+		opp_free_cpufreq_table(iva_dev, &iva_freq_table);
+	}
+}
+
+static inline void clk_free(void)
+{
+	if (mpu_clk) {
+		clk_put(mpu_clk);
+		mpu_clk = NULL;
+	}
+	if (iva_clk) {
+		clk_put(iva_clk);
+		iva_clk = NULL;
+	}
+}
+
+static int __cpuinit omap_iva_init(struct cpufreq_policy *policy)
+{
+	int result;
+	if (!iva_clk_name) {
+		pr_info("%s: iva unavailable\n", __func__);
+		return 0;
+	}
+	iva_dev = omap_device_get_by_hwmod_name("iva");
+	if (!iva_dev) {
+		pr_err("%s: unable to get the iva device\n", __func__);
+		return -EINVAL;
+	}
+	iva_clk = clk_get(NULL, iva_clk_name);
+	if (IS_ERR(iva_clk)) {
+		dev_err(iva_dev, "%s: cpu%d: %s clock unavailable\n",
+			__func__, policy->cpu, iva_clk_name);
+		return PTR_ERR(iva_clk);
+	}
+	result = opp_init_cpufreq_table(iva_dev, &iva_freq_table);
+	if (result) {
+		dev_err(iva_dev, "%s: cpu%d: failed creating freq table[%d]\n",
+				__func__, policy->cpu, result);
+		return result;
+	}
+	return 0;
 }
 
 static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
@@ -218,13 +259,19 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
 
 	policy->cur = policy->min = policy->max = omap_getspeed(policy->cpu);
 
-	if (atomic_inc_return(&freq_table_users) == 1)
+	if (atomic_inc_return(&freq_table_users) == 1) {
 		result = opp_init_cpufreq_table(mpu_dev, &freq_table);
-
-	if (result) {
-		dev_err(mpu_dev, "%s: cpu%d: failed creating freq table[%d]\n",
-				__func__, policy->cpu, result);
-		goto fail_ck;
+		if (result) {
+			dev_err(mpu_dev, "%s: cpu%d: failed creating freq table[%d]\n",
+					__func__, policy->cpu, result);
+			goto fail_ck;
+		}
+		result = omap_iva_init(policy);
+		if (result) {
+			pr_err("%s: cpu%d: failed to initialize iva[%d]\n",
+					__func__, policy->cpu, result);
+			goto fail_table;
+		}
 	}
 
 	result = cpufreq_frequency_table_cpuinfo(policy, freq_table);
@@ -257,14 +304,14 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy)
 fail_table:
 	freq_table_free();
 fail_ck:
-	clk_put(mpu_clk);
+	clk_free();
 	return result;
 }
 
 static int omap_cpu_exit(struct cpufreq_policy *policy)
 {
 	freq_table_free();
-	clk_put(mpu_clk);
+	clk_free();
 	return 0;
 }
 
-- 
1.7.6.5

^ permalink raw reply related

* [PATCH 2/4] cpufreq: OMAP: for omap3 devices, specify the iva clock name
From: Joshua Emele @ 2012-11-07  1:47 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki, linux-omap, cpufreq, linux-pm,
	linux-kernel
  Cc: Joshua Emele
In-Reply-To: <1352252861-18384-1-git-send-email-jemele@gmail.com>


Signed-off-by: Joshua Emele <jemele@gmail.com>
---
 drivers/cpufreq/omap-cpufreq.c |    5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 1621208..d8a751f 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -335,9 +335,10 @@ static int __init omap_cpufreq_init(void)
 {
 	if (cpu_is_omap24xx())
 		mpu_clk_name = "virt_prcm_set";
-	else if (cpu_is_omap34xx())
+	else if (cpu_is_omap34xx()) {
 		mpu_clk_name = "dpll1_ck";
-	else if (cpu_is_omap44xx())
+		iva_clk_name = "dpll2_ck";
+	} else if (cpu_is_omap44xx())
 		mpu_clk_name = "dpll_mpu_ck";
 
 	if (!mpu_clk_name) {
-- 
1.7.6.5

^ permalink raw reply related

* [PATCH 3/4] cpufreq: OMAP: ensure the iva coprocessor is at the same opp as the mpu
From: Joshua Emele @ 2012-11-07  1:47 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki, linux-omap, cpufreq, linux-pm,
	linux-kernel
  Cc: Joshua Emele
In-Reply-To: <1352252861-18384-1-git-send-email-jemele@gmail.com>


Signed-off-by: Joshua Emele <jemele@gmail.com>
---
 drivers/cpufreq/omap-cpufreq.c |   18 ++++++++++++++++++
 1 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index d8a751f..e8bcad8 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -220,6 +220,9 @@ static inline void clk_free(void)
 static int __cpuinit omap_iva_init(struct cpufreq_policy *policy)
 {
 	int result;
+	unsigned long iva_rate;
+	unsigned int opp_index, mpu_freq = omap_getspeed(policy->cpu);
+
 	if (!iva_clk_name) {
 		pr_info("%s: iva unavailable\n", __func__);
 		return 0;
@@ -241,6 +244,21 @@ static int __cpuinit omap_iva_init(struct cpufreq_policy *policy)
 				__func__, policy->cpu, result);
 		return result;
 	}
+	result = cpufreq_frequency_table_target(policy, freq_table, mpu_freq,
+		CPUFREQ_RELATION_L, &opp_index);
+	if (result) {
+		dev_err(mpu_dev, "%s: cpu%d: no freq match for %u[%d]\n",
+				__func__, policy->cpu, mpu_freq, result);
+		return result;
+	}
+	iva_rate = iva_freq_table[opp_index].frequency * 1000;
+	result = clk_set_rate(iva_clk, iva_rate);
+	if (result) {
+		pr_err("%s: cpu%d: failed to set %s rate %lu[%d]\n",
+				__func__, policy->cpu, iva_clk->name, iva_rate,
+				result);
+		return result;
+	}
 	return 0;
 }
 
-- 
1.7.6.5

^ permalink raw reply related

* [PATCH 4/4] cpufreq: OMAP: scale the iva coprocessor if available
From: Joshua Emele @ 2012-11-07  1:47 UTC (permalink / raw)
  To: Kevin Hilman, Rafael J. Wysocki, linux-omap, cpufreq, linux-pm,
	linux-kernel
  Cc: Joshua Emele
In-Reply-To: <1352252861-18384-1-git-send-email-jemele@gmail.com>


Signed-off-by: Joshua Emele <jemele@gmail.com>
---
 drivers/cpufreq/omap-cpufreq.c |   17 ++++++++++++++---
 1 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index e8bcad8..103fa8b 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -79,7 +79,7 @@ static int omap_target(struct cpufreq_policy *policy,
 		       unsigned int target_freq,
 		       unsigned int relation)
 {
-	unsigned int i;
+	unsigned int i, opp_index;
 	int r, ret = 0;
 	struct cpufreq_freqs freqs;
 	struct opp *opp;
@@ -92,13 +92,13 @@ static int omap_target(struct cpufreq_policy *policy,
 	}
 
 	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-			relation, &i);
+			relation, &opp_index);
 	if (ret) {
 		dev_dbg(mpu_dev, "%s: cpu%d: no freq match for %d(ret=%d)\n",
 			__func__, policy->cpu, target_freq, ret);
 		return ret;
 	}
-	freqs.new = freq_table[i].frequency;
+	freqs.new = freq_table[opp_index].frequency;
 	if (!freqs.new) {
 		dev_err(mpu_dev, "%s: cpu%d: no match for freq %d\n", __func__,
 			policy->cpu, target_freq);
@@ -161,6 +161,17 @@ static int omap_target(struct cpufreq_policy *policy,
 	}
 
 	freqs.new = omap_getspeed(policy->cpu);
+
+	if (!ret && iva_freq_table && iva_clk) {
+		const unsigned long iva_rate =
+			iva_freq_table[opp_index].frequency * 1000;
+		ret = clk_set_rate(iva_clk, iva_rate);
+		if (ret) {
+			pr_err("%s: failed to set %s rate %lu[%d]\n",
+					__func__, iva_clk->name, iva_rate, ret);
+		}
+	}
+
 #ifdef CONFIG_SMP
 	/*
 	 * Note that loops_per_jiffy is not updated on SMP systems in
-- 
1.7.6.5

^ permalink raw reply related

* Re: [PATCH v4 1/6] mm: teach mm by current context info to not do I/O during memory allocation
From: Ming Lei @ 2012-11-07  3:11 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm,
	Jiri Kosina, Mel Gorman, KAMEZAWA Hiroyuki, Michal Hocko,
	Ingo Molnar, Peter Zijlstra
In-Reply-To: <20121106152354.90150a3b.akpm@linux-foundation.org>

On Wed, Nov 7, 2012 at 7:23 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
>
> It's unclear from the description why we're also clearing __GFP_FS in
> this situation.
>
> If we can avoid doing this then there will be a very small gain: there
> are some situations in which a filesystem can clean pagecache without
> performing I/O.

Firstly,  the patch follows the policy in the system suspend/resume situation,
in which the __GFP_FS is cleared, and basically the problem is very similar
with that in system PM path.

Secondly, inside shrink_page_list(), pageout() may be triggered on dirty anon
page if __GFP_FS is set.

IMO, if performing I/O can be completely avoided when __GFP_FS is set, the
flag can be kept, otherwise it is better to clear it in the situation.

>
> It doesn't appear that the patch will add overhead to the alloc/free
> hotpaths, which is good.

Thanks for previous Minchan's comment.

>
>>
>> ...
>>
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -1805,6 +1805,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
>>  #define PF_FROZEN    0x00010000      /* frozen for system suspend */
>>  #define PF_FSTRANS   0x00020000      /* inside a filesystem transaction */
>>  #define PF_KSWAPD    0x00040000      /* I am kswapd */
>> +#define PF_MEMALLOC_NOIO 0x00080000  /* Allocating memory without IO involved */
>>  #define PF_LESS_THROTTLE 0x00100000  /* Throttle me less: I clean memory */
>>  #define PF_KTHREAD   0x00200000      /* I am a kernel thread */
>>  #define PF_RANDOMIZE 0x00400000      /* randomize virtual address space */
>> @@ -1842,6 +1843,15 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
>>  #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
>>  #define used_math() tsk_used_math(current)
>>
>> +#define memalloc_noio() (current->flags & PF_MEMALLOC_NOIO)
>> +#define memalloc_noio_save(flag) do { \
>> +     (flag) = current->flags & PF_MEMALLOC_NOIO; \
>> +     current->flags |= PF_MEMALLOC_NOIO; \
>> +} while (0)
>> +#define memalloc_noio_restore(flag) do { \
>> +     current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flag; \
>> +} while (0)
>> +
>
> Again with the ghastly macros.  Please, do this properly in regular old
> C, as previously discussed.  It really doesn't matter what daft things
> local_irq_save() did 20 years ago.  Just do it right!

OK, I will take inline function in -v5.

>
> Also, you can probably put the unlikely() inside memalloc_noio() and
> avoid repeating it at all the callsites.
>
> And it might be neater to do:
>
> /*
>  * Nice comment goes here
>  */
> static inline gfp_t memalloc_noio_flags(gfp_t flags)
> {
>         if (unlikely(current->flags & PF_MEMALLOC_NOIO))
>                 flags &= ~GFP_IOFS;
>         return flags;
> }

But without the check in callsites, some local variables will be write
two times,
so it is better to not do it.

>
>>   * task->jobctl flags
>>   */
>>
>> ...
>>
>> @@ -2304,6 +2304,12 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>>               .gfp_mask = sc.gfp_mask,
>>       };
>>
>> +     if (unlikely(memalloc_noio())) {
>> +             gfp_mask &= ~GFP_IOFS;
>> +             sc.gfp_mask = gfp_mask;
>> +             shrink.gfp_mask = sc.gfp_mask;
>> +     }
>
> We can avoid writing to shrink.gfp_mask twice.  And maybe sc.gfp_mask
> as well.  Unclear, I didn't think about it too hard ;)

Yes, we can do it by initializing 'shrink' local variable just after the branch,
so one writing is enough. Will do it in -v5.

Thanks,
--
Ming Lei

^ permalink raw reply

* Re: [PATCH v4 2/6] PM / Runtime: introduce pm_runtime_set_memalloc_noio()
From: Ming Lei @ 2012-11-07  3:32 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm
In-Reply-To: <20121106152419.9155a366.akpm@linux-foundation.org>

On Wed, Nov 7, 2012 at 7:24 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
>
> checkpatch finds a number of problems with this patch, all of which
> should be fixed.  Please always use checkpatch.

Sorry for missing the check.

>> +             /* only clear the flag for one device if all
>> +              * children of the device don't set the flag.
>> +              */
>
> Such a comment is usually laid out as
>
>                 /*
>                  * Only ...

Will do it in -v5.

> More significantly, the comment describes what the code is doing but
> not why the code is doing it.  The former is (usually) obvious from
> reading the C, and the latter is what good code comments address.
>
> And it's needed in this case.  Why does the code do this?

Suppose both two usb scsi disks which share the same usb
configuration(device) set the device memalloc_noio flag, and
its ancestors' memalloc_noio flag should be cleared only after
both the two usb scsi disk's flags have been cleared.

OK, we'll add comment on clearing flag.

>
> Also, can a device have more than one child?  If so, the code doesn't
> do what the comment says it does.

It should do that because device_for_each_child() returns true immediately
only if dev_memalloc_noio() for one child returns true.

>
>> +             if (!dev || (!enable &&
>> +                          device_for_each_child(dev, NULL,
>> +                                                dev_memalloc_noio)))
>> +                     break;
>> +     }
>> +     mutex_unlock(&dev_hotplug_mutex);
>> +}
>> +EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio);


Thanks,
--
Ming Lei

^ permalink raw reply

* Re: [PATCH v4 0/6] solve deadlock caused by memory allocation with I/O
From: Ming Lei @ 2012-11-07  3:37 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm
In-Reply-To: <20121106152303.b1e135ee.akpm@linux-foundation.org>

On Wed, Nov 7, 2012 at 7:23 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
>
> It generally looks OK to me.  I have a few comments and I expect to grab
> v5.

Andrew, thanks for your review, and I will prepare -v5 later.

Thanks,
--
Ming Lei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v4 1/6] mm: teach mm by current context info to not do I/O during memory allocation
From: Andrew Morton @ 2012-11-07  3:48 UTC (permalink / raw)
  To: Ming Lei
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm,
	Jiri Kosina, Mel Gorman, KAMEZAWA Hiroyuki, Michal Hocko,
	Ingo Molnar, Peter Zijlstra
In-Reply-To: <CACVXFVNs2JtEYQ3Y2rA8L89sAaMJ7TO-PxG3h4w+ihcZrBLtpg@mail.gmail.com>

On Wed, 7 Nov 2012 11:11:24 +0800 Ming Lei <ming.lei@canonical.com> wrote:

> On Wed, Nov 7, 2012 at 7:23 AM, Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > It's unclear from the description why we're also clearing __GFP_FS in
> > this situation.
> >
> > If we can avoid doing this then there will be a very small gain: there
> > are some situations in which a filesystem can clean pagecache without
> > performing I/O.
> 
> Firstly,  the patch follows the policy in the system suspend/resume situation,
> in which the __GFP_FS is cleared, and basically the problem is very similar
> with that in system PM path.

I suspect that code is wrong.  Or at least, suboptimal.

> Secondly, inside shrink_page_list(), pageout() may be triggered on dirty anon
> page if __GFP_FS is set.

pageout() should be called if GFP_FS is set or if GFP_IO is set and the
IO is against swap.

And that's what we want to happen: we want to enter the fs to try to
turn dirty pagecache into clean pagecache without doing IO.  If we in
fact enter the device drivers when GFP_IO was not set then that's a bug
which we should fix.

> IMO, if performing I/O can be completely avoided when __GFP_FS is set, the
> flag can be kept, otherwise it is better to clear it in the situation.

yup.

> >
> > Also, you can probably put the unlikely() inside memalloc_noio() and
> > avoid repeating it at all the callsites.
> >
> > And it might be neater to do:
> >
> > /*
> >  * Nice comment goes here
> >  */
> > static inline gfp_t memalloc_noio_flags(gfp_t flags)
> > {
> >         if (unlikely(current->flags & PF_MEMALLOC_NOIO))
> >                 flags &= ~GFP_IOFS;
> >         return flags;
> > }
> 
> But without the check in callsites, some local variables will be write
> two times,
> so it is better to not do it.

I don't see why - we just modify the incoming gfp_t at the start of the
function, then use it.

It gets a bit tricky with those struct initialisations.  Things like

	struct foo bar {
		.a = a1,
		.b = b1,
	};

should not be turned into

	struct foo bar {
		.a = a1,
	};
	
	bar.b = b1;

and we don't want to do

	struct foo bar { };

	bar.a = a1;
	bar.b = b1;

either, because these are indeed a double-write.  But we can do

	struct foo bar {
		.flags = (flags = memalloc_noio_flags(flags)),
		.b = b1,
	};

which is a bit arcane but not toooo bad.  Have a think about it...


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH v4 1/6] mm: teach mm by current context info to not do I/O during memory allocation
From: Ming Lei @ 2012-11-07  4:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Alan Stern, Oliver Neukum, Minchan Kim,
	Greg Kroah-Hartman, Rafael J. Wysocki, Jens Axboe,
	David S. Miller, netdev, linux-usb, linux-pm, linux-mm,
	Jiri Kosina, Mel Gorman, KAMEZAWA Hiroyuki, Michal Hocko,
	Ingo Molnar, Peter Zijlstra
In-Reply-To: <20121106194859.8eec3043.akpm@linux-foundation.org>

On Wed, Nov 7, 2012 at 11:48 AM, Andrew Morton
<akpm@linux-foundation.org> wrote:
>>
>> Firstly,  the patch follows the policy in the system suspend/resume situation,
>> in which the __GFP_FS is cleared, and basically the problem is very similar
>> with that in system PM path.
>
> I suspect that code is wrong.  Or at least, suboptimal.
>
>> Secondly, inside shrink_page_list(), pageout() may be triggered on dirty anon
>> page if __GFP_FS is set.
>
> pageout() should be called if GFP_FS is set or if GFP_IO is set and the
> IO is against swap.
>
> And that's what we want to happen: we want to enter the fs to try to
> turn dirty pagecache into clean pagecache without doing IO.  If we in
> fact enter the device drivers when GFP_IO was not set then that's a bug
> which we should fix.

OK, I got it, and I'll not clear GFP_FS in -v5.

>
>> IMO, if performing I/O can be completely avoided when __GFP_FS is set, the
>> flag can be kept, otherwise it is better to clear it in the situation.
>
> yup.
>
>> >
>> > Also, you can probably put the unlikely() inside memalloc_noio() and
>> > avoid repeating it at all the callsites.
>> >
>> > And it might be neater to do:
>> >
>> > /*
>> >  * Nice comment goes here
>> >  */
>> > static inline gfp_t memalloc_noio_flags(gfp_t flags)
>> > {
>> >         if (unlikely(current->flags & PF_MEMALLOC_NOIO))
>> >                 flags &= ~GFP_IOFS;
>> >         return flags;
>> > }
>>
>> But without the check in callsites, some local variables will be write
>> two times,
>> so it is better to not do it.
>
> I don't see why - we just modify the incoming gfp_t at the start of the
> function, then use it.
>
> It gets a bit tricky with those struct initialisations.  Things like
>
>         struct foo bar {
>                 .a = a1,
>                 .b = b1,
>         };
>
> should not be turned into
>
>         struct foo bar {
>                 .a = a1,
>         };
>
>         bar.b = b1;
>
> and we don't want to do
>
>         struct foo bar { };
>
>         bar.a = a1;
>         bar.b = b1;
>
> either, because these are indeed a double-write.  But we can do
>
>         struct foo bar {
>                 .flags = (flags = memalloc_noio_flags(flags)),
>                 .b = b1,
>         };
>
> which is a bit arcane but not toooo bad.  Have a think about it...

Got it, looks memalloc_noio_flags() neater, and I will take it in v5.

Thanks,
--
Ming Lei

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* RE: [PATCH v4] Thermal: exynos: Add sysfs node supporting exynos's emulation mode.
From: Zhang Rui @ 2012-11-07  6:36 UTC (permalink / raw)
  To: R, Durgadoss
  Cc: Jonghwa Lee, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org, Brown, Len, Rafael J. Wysocki,
	Amit Dinel Kachhap, MyungJoo Ham, Kyungmin Park
In-Reply-To: <4D68720C2E767A4AA6A8796D42C8EB591FFA29@BGSMSX101.gar.corp.intel.com>

On Thu, 2012-11-01 at 23:13 -0600, R, Durgadoss wrote:
> Hi Lee,
> 
> > -----Original Message-----
> > From: Jonghwa Lee [mailto:jonghwa3.lee@samsung.com]
> > Sent: Friday, November 02, 2012 7:55 AM
> > To: linux-pm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org; Brown, Len; R, Durgadoss; Rafael J.
> > Wysocki; Amit Dinel Kachhap; MyungJoo Ham; Kyungmin Park; Jonghwa Lee
> > Subject: [PATCH v4] Thermal: exynos: Add sysfs node supporting exynos's
> > emulation mode.
> > 
> > This patch supports exynos's emulation mode with newly created sysfs node.
> > Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal
> > management unit. Thermal emulation mode supports software debug for
> > TMU's
> > operation. User can set temperature manually with software code and TMU
> > will read current temperature from user value not from sensor's value.
> > This patch includes also documentary placed under
> > Documentation/thermal/.
> > 
> 
first of all, what would happen if overheat happens during emulation?

I just had a thought about if we can introduce this to the generic
thermal layer.
to do this, we only need to:
1) introduce tz->emulation
2) introduce thermal_get_temp()
      static int thermal_get_temp(tz) {
          if (tz->emulation)
              return tz->emulation;
          else
              return tz->ops->get_temp(tz);
      }
3) replace tz->ops->get_temp() with thermal_get_temp() in thermal layer
4) introduce /sys/class/thermal/thermal_zoneX/emulation
5) when setting /sys/class/thermal/thermal_zoneX/emulation,
   a) set tz->emulation
   b) invoke thermal_zone_device_update();
this is a pure software emulation solution but it would work on all
generic thermal layer users.

do you think this proposal would work properly?
if yes, I'd like to see if it is valuable for the other platform thermal
drivers.

thanks,
rui
> Thanks for fixing the comments. 
> Please CC linux-acpi, when you submit thermal patches, going forward.
> I am CCing Rui for now, for him to review/merge this patch.
> 
> Reviewed-by: Durgadoss R <durgadoss.r@intel.com>
> 
> Thanks,
> Durga
> 
> > Signed-off-by: Jonghwa Lee <jonghwa3.lee@samsung.com>
> > ---
> > v4
> >  - Fix Typo.
> >  - Remove unnecessary codes.
> >  - Add comments about feature of exynos emulation operation to the
> > document.
> > 
> > v3
> >  - Remove unnecessay variables.
> >  - Do some code clean in exynos_tmu_emulation_store().
> >  - Make wrapping function of sysfs node creation function to use
> >    #ifdefs in minimum.
> > 
> > v2
> >  exynos_thermal.c
> >  - Fix build error occured by wrong emulation control register name.
> >  - Remove exynos5410 dependent codes.
> >  exynos_thermal_emulation
> >  - Align indentation.
> > 
> >  Documentation/thermal/exynos_thermal_emulation |   56
> > +++++++++++++++
> >  drivers/thermal/Kconfig                        |    9 +++
> >  drivers/thermal/exynos_thermal.c               |   91
> > ++++++++++++++++++++++++
> >  3 files changed, 156 insertions(+), 0 deletions(-)
> >  create mode 100644 Documentation/thermal/exynos_thermal_emulation
> > 
> > diff --git a/Documentation/thermal/exynos_thermal_emulation
> > b/Documentation/thermal/exynos_thermal_emulation
> > new file mode 100644
> > index 0000000..a6ea06f
> > --- /dev/null
> > +++ b/Documentation/thermal/exynos_thermal_emulation
> > @@ -0,0 +1,56 @@
> > +EXYNOS EMULATION MODE
> > +========================
> > +
> > +Copyright (C) 2012 Samsung Electronics
> > +
> > +Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
> > +
> > +Description
> > +-----------
> > +
> > +Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal
> > management unit.
> > +Thermal emulation mode supports software debug for TMU's operation.
> > User can set temperature
> > +manually with software code and TMU will read current temperature from
> > user value not from
> > +sensor's value.
> > +
> > +Enabling CONFIG_EXYNOS_THERMAL_EMUL option will make this support
> > in available.
> > +When it's enabled, sysfs node will be created under
> > +/sys/bus/platform/devices/'exynos device name'/ with name of
> > 'emulation'.
> > +
> > +The sysfs node, 'emulation', will contain value 0 for the initial state. When
> > you input any
> > +temperature you want to update to sysfs node, it automatically enable
> > emulation mode and
> > +current temperature will be changed into it.
> > +(Exynos also supports user changable delay time which would be used to
> > delay of
> > + changing temperature. However, this node only uses same delay of real
> > sensing time, 938us.)
> > +
> > +Exynos emulation mode requires synchronous of value changing and
> > enabling. It means when you
> > +want to update the any value of delay or next temperature, then you have
> > to enable emulation
> > +mode at the same time. (Or you have to keep the mode enabling.) If you
> > don't, it fails to
> > +change the value to updated one and just use last succeessful value
> > repeatedly. That's why
> > +this node gives users the right to change termerpature only. Just one
> > interface makes it more
> > +simply to use.
> > +
> > +Disabling emulation mode only requires writing value 0 to sysfs node.
> > +
> > +
> > +TEMP	120 |
> > +	    |
> > +	100 |
> > +	    |
> > +	 80 |
> > +	    |		     	 	 +-----------
> > +	 60 |      		     	 |	    |
> > +	    |	           +-------------|          |
> > +	 40 |              |         	 |          |
> > +   	    |		   |	     	 |          |
> > +	 20 |		   |	     	 |          +----------
> > +	    |	 	   |	     	 |          |          |
> > +  	  0
> > |______________|_____________|__________|__________|_______
> > __
> > +		   A	    	 A	    A	   	       A     TIME
> > +		   |<----->|	 |<----->|  |<----->|	       |
> > +		   | 938us |  	 |	 |  |       |          |
> > +emulation    :  0  50	   |  	 70      |  20      |          0
> > +current temp :   sensor   50		 70         20	      sensor
> > +
> > +
> > +
> > diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
> > index e1cb6bd..c02a66c 100644
> > --- a/drivers/thermal/Kconfig
> > +++ b/drivers/thermal/Kconfig
> > @@ -55,3 +55,12 @@ config EXYNOS_THERMAL
> >  	help
> >  	  If you say yes here you get support for TMU (Thermal Managment
> >  	  Unit) on SAMSUNG EXYNOS series of SoC.
> > +
> > +config EXYNOS_THERMAL_EMUL
> > +	bool "EXYNOS TMU emulation mode support"
> > +	depends on !CPU_EXYNOS4210 && EXYNOS_THERMAL
> > +	help
> > +	  Exynos 4412 and 4414 and 5 series has emulation mode on TMU.
> > +	  Enable this option will be make sysfs node in exynos thermal
> > platform
> > +	  device directory to support emulation mode. With emulation mode
> > sysfs
> > +	  node, you can manually input temperature to TMU for simulation
> > purpose.
> > diff --git a/drivers/thermal/exynos_thermal.c
> > b/drivers/thermal/exynos_thermal.c
> > index fd03e85..eebd4e5 100644
> > --- a/drivers/thermal/exynos_thermal.c
> > +++ b/drivers/thermal/exynos_thermal.c
> > @@ -99,6 +99,14 @@
> >  #define IDLE_INTERVAL 10000
> >  #define MCELSIUS	1000
> > 
> > +#ifdef CONFIG_EXYNOS_THERMAL_EMUL
> > +#define EXYNOS_EMUL_TIME	0x57F0
> > +#define EXYNOS_EMUL_TIME_SHIFT	16
> > +#define EXYNOS_EMUL_DATA_SHIFT	8
> > +#define EXYNOS_EMUL_DATA_MASK	0xFF
> > +#define EXYNOS_EMUL_ENABLE	0x1
> > +#endif /* CONFIG_EXYNOS_THERMAL_EMUL */
> > +
> >  /* CPU Zone information */
> >  #define PANIC_ZONE      4
> >  #define WARN_ZONE       3
> > @@ -832,6 +840,82 @@ static inline struct  exynos_tmu_platform_data
> > *exynos_get_driver_data(
> >  	return (struct exynos_tmu_platform_data *)
> >  			platform_get_device_id(pdev)->driver_data;
> >  }
> > +
> > +#ifdef CONFIG_EXYNOS_THERMAL_EMUL
> > +static ssize_t exynos_tmu_emulation_show(struct device *dev,
> > +					 struct device_attribute *attr,
> > +					 char *buf)
> > +{
> > +	struct platform_device *pdev = container_of(dev,
> > +					struct platform_device, dev);
> > +	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
> > +	unsigned int reg;
> > +	u8 temp_code;
> > +	int temp = 0;
> > +
> > +	mutex_lock(&data->lock);
> > +	clk_enable(data->clk);
> > +	reg = readl(data->base + EXYNOS_EMUL_CON);
> > +	clk_disable(data->clk);
> > +	mutex_unlock(&data->lock);
> > +
> > +	if (reg & EXYNOS_EMUL_ENABLE) {
> > +		reg >>= EXYNOS_EMUL_DATA_SHIFT;
> > +		temp_code = reg & EXYNOS_EMUL_DATA_MASK;
> > +		temp = code_to_temp(data, temp_code);
> > +	}
> > +
> > +	return sprintf(buf, "%d\n", temp);
> > +}
> > +
> > +static ssize_t exynos_tmu_emulation_store(struct device *dev,
> > +					struct device_attribute *attr,
> > +					const char *buf, size_t count)
> > +{
> > +	struct platform_device *pdev = container_of(dev,
> > +					struct platform_device, dev);
> > +	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
> > +	unsigned int reg;
> > +	int temp;
> > +
> > +	if (!sscanf(buf, "%d\n", &temp) || temp < 0)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&data->lock);
> > +	clk_enable(data->clk);
> > +
> > +	reg = readl(data->base + EXYNOS_EMUL_CON);
> > +
> > +	if (temp)
> > +		reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT)
> > |
> > +			(temp_to_code(data, temp) <<
> > EXYNOS_EMUL_DATA_SHIFT) |
> > +			EXYNOS_EMUL_ENABLE;
> > +	else
> > +		reg &= ~EXYNOS_EMUL_ENABLE;
> > +
> > +	writel(reg, data->base + EXYNOS_EMUL_CON);
> > +
> > +	clk_disable(data->clk);
> > +	mutex_unlock(&data->lock);
> > +
> > +	return count;
> > +}
> > +
> > +static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show,
> > +					exynos_tmu_emulation_store);
> > +static int create_emulation_sysfs(struct device *dev)
> > +{
> > +	return device_create_file(dev, &dev_attr_emulation);
> > +}
> > +static void remove_emulation_sysfs(struct device *dev)
> > +{
> > +	device_remove_file(dev, &dev_attr_emulation);
> > +}
> > +#else
> > +static inline int create_emulation_sysfs(struct device *dev) {return 0;}
> > +static inline void remove_emulation_sysfs(struct device *dev){}
> > +#endif
> > +
> >  static int __devinit exynos_tmu_probe(struct platform_device *pdev)
> >  {
> >  	struct exynos_tmu_data *data;
> > @@ -930,6 +1014,11 @@ static int __devinit exynos_tmu_probe(struct
> > platform_device *pdev)
> >  		dev_err(&pdev->dev, "Failed to register thermal
> > interface\n");
> >  		goto err_clk;
> >  	}
> > +
> > +	ret = create_emulation_sysfs(&pdev->dev);
> > +	if (ret)
> > +		dev_err(&pdev->dev, "Failed to create emulation mode
> > sysfs node\n");
> > +
> >  	return 0;
> >  err_clk:
> >  	platform_set_drvdata(pdev, NULL);
> > @@ -941,6 +1030,8 @@ static int __devexit exynos_tmu_remove(struct
> > platform_device *pdev)
> >  {
> >  	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
> > 
> > +	remove_emulation_sysfs(&pdev->dev);
> > +
> >  	exynos_tmu_control(pdev, false);
> > 
> >  	exynos_unregister_thermal();
> > --
> > 1.7.4.1
> 



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox