* [PATCH v2 03/16] ARM: b.L: introduce the CPU/cluster power API
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
This is the basic API used to handle the powering up/down of individual
CPUs in a big.LITTLE system. The platform specific backend implementation
has the responsibility to also handle the cluster level power as well when
the first/last CPU in a cluster is brought up/down.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_entry.c | 88 +++++++++++++++++++++++++++++++++++++++
arch/arm/include/asm/bL_entry.h | 92 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 180 insertions(+)
diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c
index 4e1044612d..54bf0e572f 100644
--- a/arch/arm/common/bL_entry.c
+++ b/arch/arm/common/bL_entry.c
@@ -11,11 +11,13 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/irqflags.h>
#include <asm/bL_entry.h>
#include <asm/barrier.h>
#include <asm/proc-fns.h>
#include <asm/cacheflush.h>
+#include <asm/idmap.h>
extern volatile unsigned long bL_entry_vectors[BL_MAX_CLUSTERS][BL_MAX_CPUS_PER_CLUSTER];
@@ -27,3 +29,89 @@ void bL_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
outer_clean_range(__pa(&bL_entry_vectors[cluster][cpu]),
__pa(&bL_entry_vectors[cluster][cpu + 1]));
}
+
+static const struct bL_platform_power_ops *platform_ops;
+
+int __init bL_platform_power_register(const struct bL_platform_power_ops *ops)
+{
+ if (platform_ops)
+ return -EBUSY;
+ platform_ops = ops;
+ return 0;
+}
+
+int bL_cpu_power_up(unsigned int cpu, unsigned int cluster)
+{
+ if (!platform_ops)
+ return -EUNATCH;
+ might_sleep();
+ return platform_ops->power_up(cpu, cluster);
+}
+
+typedef void (*phys_reset_t)(unsigned long);
+
+void bL_cpu_power_down(void)
+{
+ phys_reset_t phys_reset;
+
+ BUG_ON(!platform_ops);
+ BUG_ON(!irqs_disabled());
+
+ /*
+ * Do this before calling into the power_down method,
+ * as it might not always be safe to do afterwards.
+ */
+ setup_mm_for_reboot();
+
+ platform_ops->power_down();
+
+ /*
+ * It is possible for a power_up request to happen concurrently
+ * with a power_down request for the same CPU. In this case the
+ * power_down method might not be able to actually enter a
+ * powered down state with the WFI instruction if the power_up
+ * method has removed the required reset condition. The
+ * power_down method is then allowed to return. We must perform
+ * a re-entry in the kernel as if the power_up method just had
+ * deasserted reset on the CPU.
+ *
+ * To simplify race issues, the platform specific implementation
+ * must accommodate for the possibility of unordered calls to
+ * power_down and power_up with a usage count. Therefore, if a
+ * call to power_up is issued for a CPU that is not down, then
+ * the next call to power_down must not attempt a full shutdown
+ * but only do the minimum (normally disabling L1 cache and CPU
+ * coherency) and return just as if a concurrent power_up request
+ * had happened as described above.
+ */
+
+ phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+ phys_reset(virt_to_phys(bL_entry_point));
+
+ /* should never get here */
+ BUG();
+}
+
+void bL_cpu_suspend(u64 expected_residency)
+{
+ phys_reset_t phys_reset;
+
+ BUG_ON(!platform_ops);
+ BUG_ON(!irqs_disabled());
+
+ /* Very similar to bL_cpu_power_down() */
+ setup_mm_for_reboot();
+ platform_ops->suspend(expected_residency);
+ phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+ phys_reset(virt_to_phys(bL_entry_point));
+ BUG();
+}
+
+int bL_cpu_powered_up(void)
+{
+ if (!platform_ops)
+ return -EUNATCH;
+ if (platform_ops->powered_up)
+ platform_ops->powered_up();
+ return 0;
+}
diff --git a/arch/arm/include/asm/bL_entry.h b/arch/arm/include/asm/bL_entry.h
index 7525614243..adf8706c76 100644
--- a/arch/arm/include/asm/bL_entry.h
+++ b/arch/arm/include/asm/bL_entry.h
@@ -31,5 +31,97 @@ extern void bL_entry_point(void);
*/
void bL_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
+/*
+ * CPU/cluster power operations API for higher subsystems to use.
+ */
+
+/**
+ * bL_cpu_power_up - make given CPU in given cluster runable
+ *
+ * @cpu: CPU number within given cluster
+ * @cluster: cluster number for the CPU
+ *
+ * The identified CPU is brought out of reset. If the cluster was powered
+ * down then it is brought up as well, taking care not to let the other CPUs
+ * in the cluster run, and ensuring appropriate cluster setup.
+ *
+ * Caller must ensure the appropriate entry vector is initialized with
+ * bL_set_entry_vector() prior to calling this.
+ *
+ * This must be called in a sleepable context. However, the implementation
+ * is strongly encouraged to return early and let the operation happen
+ * asynchronously, especially when significant delays are expected.
+ *
+ * If the operation cannot be performed then an error code is returned.
+ */
+int bL_cpu_power_up(unsigned int cpu, unsigned int cluster);
+
+/**
+ * bL_cpu_power_down - power the calling CPU down
+ *
+ * The calling CPU is powered down.
+ *
+ * If this CPU is found to be the "last man standing" in the cluster
+ * then the cluster is prepared for power-down too.
+ *
+ * This must be called with interrupts disabled.
+ *
+ * This does not return. Re-entry in the kernel is expected via
+ * bL_entry_point.
+ */
+void bL_cpu_power_down(void);
+
+/**
+ * bL_cpu_suspend - bring the calling CPU in a suspended state
+ *
+ * @expected_residency: duration in microseconds the CPU is expected
+ * to remain suspended, or 0 if unknown/infinity.
+ *
+ * The calling CPU is suspended. The expected residency argument is used
+ * as a hint by the platform specific backend to implement the appropriate
+ * sleep state level according to the knowledge it has on wake-up latency
+ * for the given hardware.
+ *
+ * If this CPU is found to be the "last man standing" in the cluster
+ * then the cluster may be prepared for power-down too, if the expected
+ * residency makes it worthwhile.
+ *
+ * This must be called with interrupts disabled.
+ *
+ * This does not return. Re-entry in the kernel is expected via
+ * bL_entry_point.
+ */
+void bL_cpu_suspend(u64 expected_residency);
+
+/**
+ * bL_cpu_powered_up - housekeeping workafter a CPU has been powered up
+ *
+ * This lets the platform specific backend code perform needed housekeeping
+ * work. This must be called by the newly activated CPU as soon as it is
+ * fully operational in kernel space, before it enables interrupts.
+ *
+ * If the operation cannot be performed then an error code is returned.
+ */
+int bL_cpu_powered_up(void);
+
+/*
+ * Platform specific methods used in the implementation of the above API.
+ */
+struct bL_platform_power_ops {
+ int (*power_up)(unsigned int cpu, unsigned int cluster);
+ void (*power_down)(void);
+ void (*suspend)(u64);
+ void (*powered_up)(void);
+};
+
+/**
+ * bL_platform_power_register - register platform specific power methods
+ *
+ * @ops: bL_platform_power_ops structure to register
+ *
+ * An error is returned if the registration has been done previously.
+ */
+int __init bL_platform_power_register(const struct bL_platform_power_ops *ops);
+
#endif /* ! __ASSEMBLY__ */
#endif
--
1.8.0
^ permalink raw reply related
* [PATCH v2 04/16] ARM: b.L: introduce helpers for platform coherency exit/setup
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Dave Martin <dave.martin@linaro.org>
This provides helper methods to coordinate between CPUs coming down
and CPUs going up, as well as documentation on the used algorithms,
so that cluster teardown and setup
operations are not done for a cluster simultaneously.
For use in the power_down() implementation:
* __bL_cpu_going_down(unsigned int cluster, unsigned int cpu)
* __bL_outbound_enter_critical(unsigned int cluster)
* __bL_outbound_leave_critical(unsigned int cluster)
* __bL_cpu_down(unsigned int cluster, unsigned int cpu)
The power_up_setup() helper should do platform-specific setup in
preparation for turning the CPU on, such as invalidating local caches
or entering coherency. It must be assembler for now, since it must
run before the MMU can be switched on. It is passed the affinity level
which should be initialized.
Because the bL_cluster_sync_struct content is looked-up and modified
with the cache enabled or disabled depending on the code path, it is
crucial to always ensure proper cache maintenance to update main memory
right away. Therefore, any cached write must be followed by a cache
clean operation and any cached read must be preceded by a cache
invalidate operation (actually a cache flush i.e. clean+invalidate to
avoid discarding possible concurrent writes) on the accessed memory.
Also, in order to prevent a cached writer from interfering with an
adjacent non-cached writer, we ensure each state variable is located to
a separate cache line.
Thanks to Nicolas Pitre and Achin Gupta for the help with this
patch.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
---
.../arm/big.LITTLE/cluster-pm-race-avoidance.txt | 498 +++++++++++++++++++++
arch/arm/common/bL_entry.c | 197 ++++++++
arch/arm/common/bL_head.S | 106 ++++-
arch/arm/include/asm/bL_entry.h | 63 +++
4 files changed, 862 insertions(+), 2 deletions(-)
create mode 100644 Documentation/arm/big.LITTLE/cluster-pm-race-avoidance.txt
diff --git a/Documentation/arm/big.LITTLE/cluster-pm-race-avoidance.txt b/Documentation/arm/big.LITTLE/cluster-pm-race-avoidance.txt
new file mode 100644
index 0000000000..ba6dadb0d4
--- /dev/null
+++ b/Documentation/arm/big.LITTLE/cluster-pm-race-avoidance.txt
@@ -0,0 +1,498 @@
+Big.LITTLE cluster Power-up/power-down race avoidance algorithm
+===============================================================
+
+This file documents the algorithm which is used to coordinate CPU and
+cluster setup and teardown operations and to manage hardware coherency
+controls safely.
+
+The section "Rationale" explains what the algorithm is for and why it is
+needed. "Basic model" explains general concepts using a simplified view
+of the system. The other sections explain the actual details of the
+algorithm in use.
+
+
+Rationale
+---------
+
+In a system containing multiple CPUs, it is desirable to have the
+ability to turn off individual CPUs when the system is idle, reducing
+power consumption and thermal dissipation.
+
+In a system containing multiple clusters of CPUs, it is also desirable
+to have the ability to turn off entire clusters.
+
+Turning entire clusters off and on is a risky business, because it
+involves performing potentially destructive operations affecting a group
+of independently running CPUs, while the OS continues to run. This
+means that we need some coordination in order to ensure that critical
+cluster-level operations are only performed when it is truly safe to do
+so.
+
+Simple locking may not be sufficient to solve this problem, because
+mechanisms like Linux spinlocks may rely on coherency mechanisms which
+are not immediately enabled when a cluster powers up. Since enabling or
+disabling those mechanisms may itself be a non-atomic operation (such as
+writing some hardware registers and invalidating large caches), other
+methods of coordination are required in order to guarantee safe
+power-down and power-up at the cluster level.
+
+The mechanism presented in this document describes a coherent memory
+based protocol for performing the needed coordination. It aims to be as
+lightweight as possible, while providing the required safety properties.
+
+
+Basic model
+-----------
+
+Each cluster and CPU is assigned a state, as follows:
+
+ DOWN
+ COMING_UP
+ UP
+ GOING_DOWN
+
+ +---------> UP ----------+
+ | v
+
+ COMING_UP GOING_DOWN
+
+ ^ |
+ +--------- DOWN <--------+
+
+
+DOWN: The CPU or cluster is not coherent, and is either powered off or
+ suspended, or is ready to be powered off or suspended.
+
+COMING_UP: The CPU or cluster has committed to moving to the UP state.
+ It may be part way through the process of initialisation and
+ enabling coherency.
+
+UP: The CPU or cluster is active and coherent at the hardware
+ level. A CPU in this state is not necessarily being used
+ actively by the kernel.
+
+GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
+ state. It may be part way through the process of teardown and
+ coherency exit.
+
+
+Each CPU has one of these states assigned to it at any point in time.
+The CPU states are described in the "CPU state" section, below.
+
+Each cluster is also assigned a state, but it is necessary to split the
+state value into two parts (the "cluster" state and "inbound" state) and
+to introduce additional states in order to avoid races between different
+CPUs in the cluster simultaneously modifying the state. The cluster-
+level states are described in the "Cluster state" section.
+
+To help distinguish the CPU states from cluster states in this
+discussion, the state names are given a CPU_ prefix for the CPU states,
+and a CLUSTER_ or INBOUND_ prefix for the cluster states.
+
+
+CPU state
+---------
+
+In this algorithm, each individual core in a multi-core processor is
+referred to as a "CPU". CPUs are assumed to be single-threaded:
+therefore, a CPU can only be doing one thing@a single point in time.
+
+This means that CPUs fit the basic model closely.
+
+The algorithm defines the following states for each CPU in the system:
+
+ CPU_DOWN
+ CPU_COMING_UP
+ CPU_UP
+ CPU_GOING_DOWN
+
+ cluster setup and
+ CPU setup complete policy decision
+ +-----------> CPU_UP ------------+
+ | v
+
+ CPU_COMING_UP CPU_GOING_DOWN
+
+ ^ |
+ +----------- CPU_DOWN <----------+
+ policy decision CPU teardown complete
+ or hardware event
+
+
+The definitions of the four states correspond closely to the states of
+the basic model.
+
+Transitions between states occur as follows.
+
+A trigger event (spontaneous) means that the CPU can transition to the
+next state as a result of making local progress only, with no
+requirement for any external event to happen.
+
+
+CPU_DOWN:
+
+ A CPU reaches the CPU_DOWN state when it is ready for
+ power-down. On reaching this state, the CPU will typically
+ power itself down or suspend itself, via a WFI instruction or a
+ firmware call.
+
+ Next state: CPU_COMING_UP
+ Conditions: none
+
+ Trigger events:
+
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CPU_COMING_UP:
+
+ A CPU cannot start participating in hardware coherency until the
+ cluster is set up and coherent. If the cluster is not ready,
+ then the CPU will wait in the CPU_COMING_UP state until the
+ cluster has been set up.
+
+ Next state: CPU_UP
+ Conditions: The CPU's parent cluster must be in CLUSTER_UP.
+ Trigger events: Transition of the parent cluster to CLUSTER_UP.
+
+ Refer to the "Cluster state" section for a description of the
+ CLUSTER_UP state.
+
+
+CPU_UP:
+ When a CPU reaches the CPU_UP state, it is safe for the CPU to
+ start participating in local coherency.
+
+ This is done by jumping to the kernel's CPU resume code.
+
+ Note that the definition of this state is slightly different
+ from the basic model definition: CPU_UP does not mean that the
+ CPU is coherent yet, but it does mean that it is safe to resume
+ the kernel. The kernel handles the rest of the resume
+ procedure, so the remaining steps are not visible as part of the
+ race avoidance algorithm.
+
+ The CPU remains in this state until an explicit policy decision
+ is made to shut down or suspend the CPU.
+
+ Next state: CPU_GOING_DOWN
+ Conditions: none
+ Trigger events: explicit policy decision
+
+
+CPU_GOING_DOWN:
+
+ While in this state, the CPU exits coherency, including any
+ operations required to achieve this (such as cleaning data
+ caches).
+
+ Next state: CPU_DOWN
+ Conditions: local CPU teardown complete
+ Trigger events: (spontaneous)
+
+
+Cluster state
+-------------
+
+A cluster is a group of connected CPUs with some common resources.
+Because a cluster contains multiple CPUs, it can be doing multiple
+things@the same time. This has some implications. In particular, a
+CPU can start up while another CPU is tearing the cluster down.
+
+In this discussion, the "outbound side" is the view of the cluster state
+as seen by a CPU tearing the cluster down. The "inbound side" is the
+view of the cluster state as seen by a CPU setting the CPU up.
+
+In order to enable safe coordination in such situations, it is important
+that a CPU which is setting up the cluster can advertise its state
+independently of the CPU which is tearing down the cluster. For this
+reason, the cluster state is split into two parts:
+
+ "cluster" state: The global state of the cluster; or the state
+ on the outbound side:
+
+ CLUSTER_DOWN
+ CLUSTER_UP
+ CLUSTER_GOING_DOWN
+
+ "inbound" state: The state of the cluster on the inbound side.
+
+ INBOUND_NOT_COMING_UP
+ INBOUND_COMING_UP
+
+
+ The different pairings of these states results in six possible
+ states for the cluster as a whole:
+
+ CLUSTER_UP
+ +==========> INBOUND_NOT_COMING_UP -------------+
+ # |
+ |
+ CLUSTER_UP <----+ |
+ INBOUND_COMING_UP | v
+
+ ^ CLUSTER_GOING_DOWN CLUSTER_GOING_DOWN
+ # INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
+
+ CLUSTER_DOWN | |
+ INBOUND_COMING_UP <----+ |
+ |
+ ^ |
+ +=========== CLUSTER_DOWN <------------+
+ INBOUND_NOT_COMING_UP
+
+ Transitions -----> can only be made by the outbound CPU, and
+ only involve changes to the "cluster" state.
+
+ Transitions ===##> can only be made by the inbound CPU, and only
+ involve changes to the "inbound" state, except where there is no
+ further transition possible on the outbound side (i.e., the
+ outbound CPU has put the cluster into the CLUSTER_DOWN state).
+
+ The race avoidance algorithm does not provide a way to determine
+ which exact CPUs within the cluster play these roles. This must
+ be decided in advance by some other means. Refer to the section
+ "Last man and first man selection" for more explanation.
+
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
+ cluster can actually be powered down.
+
+ The parallelism of the inbound and outbound CPUs is observed by
+ the existence of two different paths from CLUSTER_GOING_DOWN/
+ INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
+ model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
+ COMING_UP in the basic model). The second path avoids cluster
+ teardown completely.
+
+ CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
+ model. The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
+ is trivial and merely resets the state machine ready for the
+ next cycle.
+
+ Details of the allowable transitions follow.
+
+ The next state in each case is notated
+
+ <cluster state>/<inbound state> (<transitioner>)
+
+ where the <transitioner> is the side on which the transition
+ can occur; either the inbound or the outbound side.
+
+
+CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+
+ Next state: CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions: none
+ Trigger events:
+
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_DOWN/INBOUND_COMING_UP:
+
+ In this state, an inbound CPU sets up the cluster, including
+ enabling of hardware coherency at the cluster level and any
+ other operations (such as cache invalidation) which are required
+ in order to achieve this.
+
+ The purpose of this state is to do sufficient cluster-level
+ setup to enable other CPUs in the cluster to enter coherency
+ safely.
+
+ Next state: CLUSTER_UP/INBOUND_COMING_UP (inbound)
+ Conditions: cluster-level setup and hardware coherency complete
+ Trigger events: (spontaneous)
+
+
+CLUSTER_UP/INBOUND_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ This is a transient state, leading immediately to
+ CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster
+ should consider treat these two states as equivalent.
+
+ Next state: CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+ Conditions: none
+ Trigger events: (spontaneous)
+
+
+CLUSTER_UP/INBOUND_NOT_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ The cluster will remain in this state until a policy decision is
+ made to power the cluster down.
+
+ Next state: CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions: none
+ Trigger events: policy decision to power down the cluster
+
+
+CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
+
+ An outbound CPU is tearing the cluster down. The selected CPU
+ must wait in this state until all CPUs in the cluster are in the
+ CPU_DOWN state.
+
+ When all CPUs are in the CPU_DOWN state, the cluster can be torn
+ down, for example by cleaning data caches and exiting
+ cluster-level coherency.
+
+ To avoid wasteful unnecessary teardown operations, the outbound
+ should check the inbound cluster state for asynchronous
+ transitions to INBOUND_COMING_UP. Alternatively, individual
+ CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
+
+
+ Next states:
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions: cluster torn down and ready to power off
+ Trigger events: (spontaneous)
+
+ CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions: none
+ Trigger events:
+
+ a) an explicit hardware power-up operation,
+ resulting from a policy decision on another
+ CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
+
+ The cluster is (or was) being torn down, but another CPU has
+ come online in the meantime and is trying to set up the cluster
+ again.
+
+ If the outbound CPU observes this state, it has two choices:
+
+ a) back out of teardown, restoring the cluster to the
+ CLUSTER_UP state;
+
+ b) finish tearing the cluster down and put the cluster
+ in the CLUSTER_DOWN state; the inbound CPU will
+ set up the cluster again from there.
+
+ Choice (a) permits the removal of some latency by avoiding
+ unnecessary teardown and setup operations in situations where
+ the cluster is not really going to be powered down.
+
+
+ Next states:
+
+ CLUSTER_UP/INBOUND_COMING_UP (outbound)
+ Conditions: cluster-level setup and hardware
+ coherency complete
+ Trigger events: (spontaneous)
+
+ CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
+ Conditions: cluster torn down and ready to power off
+ Trigger events: (spontaneous)
+
+
+Last man and First man selection
+--------------------------------
+
+The CPU which performs cluster tear-down operations on the outbound side
+is commonly referred to as the "last man".
+
+The CPU which performs cluster setup on the inbound side is commonly
+referred to as the "first man".
+
+The race avoidance algorithm documented above does not provide a
+mechanism to choose which CPUs should play these roles.
+
+
+Last man:
+
+When shutting down the cluster, all the CPUs involved are initially
+executing Linux and hence coherent. Therefore, ordinary spinlocks can
+be used to select a last man safely, before the CPUs become
+non-coherent.
+
+
+First man:
+
+Because CPUs may power up asynchronously in response to external wake-up
+events, a dynamic mechanism is needed to make sure that only one CPU
+attempts to play the first man role and do the cluster-level
+initialisation: any other CPUs must wait for this to complete before
+proceeding.
+
+Cluster-level initialisation may involve actions such as configuring
+coherency controls in the bus fabric.
+
+The current implementation in bL_head.S uses a separate mutual exclusion
+mechanism to do this arbitration. This mechanism is documented in
+detail in vlocks.txt.
+
+
+Features and Limitations
+------------------------
+
+Implementation:
+
+ The current ARM-based implementation is split between
+ arch/arm/common/bL_head.S (low-level inbound CPU operations) and
+ arch/arm/common/bL_entry.c (everything else):
+
+ __bL_cpu_going_down() signals the transition of a CPU to the
+ CPU_GOING_DOWN state.
+
+ __bL_cpu_down() signals the transition of a CPU to the CPU_DOWN
+ state.
+
+ A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
+ low-level power-up code in bL_head.S. This could
+ involve CPU-specific setup code, but in the current
+ implementation it does not.
+
+ __bL_outbound_enter_critical() and __bL_outbound_leave_critical()
+ handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+ and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+ the case of an aborted cluster power-down).
+
+ These functions are more complex than the __bL_cpu_*()
+ functions due to the extra inter-CPU coordination which
+ is needed for safe transitions at the cluster level.
+
+ A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
+ the low-level power-up code in bL_head.S. This
+ typically involves platform-specific setup code,
+ provided by the platform-specific power_up_setup
+ function registered via bL_cluster_sync_init.
+
+Deep topologies:
+
+ As currently described and implemented, the algorithm does not
+ support CPU topologies involving more than two levels (i.e.,
+ clusters of clusters are not supported). The algorithm could be
+ extended by replicating the cluster-level states for the
+ additional topological levels, and modifying the transition
+ rules for the intermediate (non-outermost) cluster levels.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, in
+collaboration with Nicolas Pitre and Achin Gupta.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
diff --git a/arch/arm/common/bL_entry.c b/arch/arm/common/bL_entry.c
index 54bf0e572f..14d72b97ad 100644
--- a/arch/arm/common/bL_entry.c
+++ b/arch/arm/common/bL_entry.c
@@ -18,6 +18,7 @@
#include <asm/proc-fns.h>
#include <asm/cacheflush.h>
#include <asm/idmap.h>
+#include <asm/cputype.h>
extern volatile unsigned long bL_entry_vectors[BL_MAX_CLUSTERS][BL_MAX_CPUS_PER_CLUSTER];
@@ -115,3 +116,199 @@ int bL_cpu_powered_up(void)
platform_ops->powered_up();
return 0;
}
+
+struct bL_sync_struct bL_sync;
+
+/*
+ * There is no __cpuc_clean_dcache_area but we use it anyway for
+ * code clarity, and alias it to __cpuc_flush_dcache_area.
+ */
+#define __cpuc_clean_dcache_area __cpuc_flush_dcache_area
+
+/*
+ * Ensure preceding writes to *p by this CPU are visible to
+ * subsequent reads by other CPUs:
+ */
+static void __sync_range_w(volatile void *p, size_t size)
+{
+ char *_p = (char *)p;
+
+ __cpuc_clean_dcache_area(_p, size);
+ outer_clean_range(__pa(_p), __pa(_p + size));
+}
+
+/*
+ * Ensure preceding writes to *p by other CPUs are visible to
+ * subsequent reads by this CPU. We must be careful not to
+ * discard data simultaneously written by another CPU, hence the
+ * usage of flush rather than invalidate operations.
+ */
+static void __sync_range_r(volatile void *p, size_t size)
+{
+ char *_p = (char *)p;
+
+#ifdef CONFIG_OUTER_CACHE
+ if (outer_cache.flush_range) {
+ /*
+ * Ensure dirty data migrated from other CPUs into our cache
+ * are cleaned out safely before the outer cache is cleaned:
+ */
+ __cpuc_clean_dcache_area(_p, size);
+
+ /* Clean and invalidate stale data for *p from outer ... */
+ outer_flush_range(__pa(_p), __pa(_p + size));
+ }
+#endif
+
+ /* ... and inner cache: */
+ __cpuc_flush_dcache_area(_p, size);
+}
+
+#define sync_w(ptr) __sync_range_w(ptr, sizeof *(ptr))
+#define sync_r(ptr) __sync_range_r(ptr, sizeof *(ptr))
+
+/*
+ * __bL_cpu_going_down: Indicates that the cpu is being torn down.
+ * This must be called at the point of committing to teardown of a CPU.
+ * The CPU cache (SCTRL.C bit) is expected to still be active.
+ */
+void __bL_cpu_going_down(unsigned int cpu, unsigned int cluster)
+{
+ bL_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN;
+ sync_w(&bL_sync.clusters[cluster].cpus[cpu].cpu);
+}
+
+/*
+ * __bL_cpu_down: Indicates that cpu teardown is complete and that the
+ * cluster can be torn down without disrupting this CPU.
+ * To avoid deadlocks, this must be called before a CPU is powered down.
+ * The CPU cache (SCTRL.C bit) is expected to be off.
+ */
+void __bL_cpu_down(unsigned int cpu, unsigned int cluster)
+{
+ dmb();
+ bL_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN;
+ sync_w(&bL_sync.clusters[cluster].cpus[cpu].cpu);
+ dsb_sev();
+}
+
+/*
+ * __bL_outbound_leave_critical: Leave the cluster teardown critical section.
+ * @state: the final state of the cluster:
+ * CLUSTER_UP: no destructive teardown was done and the cluster has been
+ * restored to the previous state (CPU cache still active); or
+ * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off
+ * (CPU cache disabled).
+ */
+void __bL_outbound_leave_critical(unsigned int cluster, int state)
+{
+ dmb();
+ bL_sync.clusters[cluster].cluster = state;
+ sync_w(&bL_sync.clusters[cluster].cluster);
+ dsb_sev();
+}
+
+/*
+ * __bL_outbound_enter_critical: Enter the cluster teardown critical section.
+ * This function should be called by the last man, after local CPU teardown
+ * is complete. CPU cache expected to be active.
+ *
+ * Returns:
+ * false: the critical section was not entered because an inbound CPU was
+ * observed, or the cluster is already being set up;
+ * true: the critical section was entered: it is now safe to tear down the
+ * cluster.
+ */
+bool __bL_outbound_enter_critical(unsigned int cpu, unsigned int cluster)
+{
+ unsigned int i;
+ struct bL_cluster_sync_struct *c = &bL_sync.clusters[cluster];
+
+ /* Warn inbound CPUs that the cluster is being torn down: */
+ c->cluster = CLUSTER_GOING_DOWN;
+ sync_w(&c->cluster);
+
+ /* Back out if the inbound cluster is already in the critical region: */
+ sync_r(&c->inbound);
+ if (c->inbound == INBOUND_COMING_UP)
+ goto abort;
+
+ /*
+ * Wait for all CPUs to get out of the GOING_DOWN state, so that local
+ * teardown is complete on each CPU before tearing down the cluster.
+ *
+ * If any CPU has been woken up again from the DOWN state, then we
+ * shouldn't be taking the cluster down at all: abort in that case.
+ */
+ sync_r(&c->cpus);
+ for (i = 0; i < BL_MAX_CPUS_PER_CLUSTER; i++) {
+ int cpustate;
+
+ if (i == cpu)
+ continue;
+
+ while (1) {
+ cpustate = c->cpus[i].cpu;
+ if (cpustate != CPU_GOING_DOWN)
+ break;
+
+ wfe();
+ sync_r(&c->cpus[i].cpu);
+ }
+
+ switch (cpustate) {
+ case CPU_DOWN:
+ continue;
+
+ default:
+ goto abort;
+ }
+ }
+
+ return true;
+
+abort:
+ __bL_outbound_leave_critical(cluster, CLUSTER_UP);
+ return false;
+}
+
+int __bL_cluster_state(unsigned int cluster)
+{
+ sync_r(&bL_sync.clusters[cluster].cluster);
+ return bL_sync.clusters[cluster].cluster;
+}
+
+extern unsigned long bL_power_up_setup_phys;
+
+int __init bL_cluster_sync_init(
+ void (*power_up_setup)(unsigned int affinity_level))
+{
+ unsigned int i, j, mpidr, this_cluster;
+
+ BUILD_BUG_ON(BL_SYNC_CLUSTER_SIZE * BL_MAX_CLUSTERS != sizeof bL_sync);
+ BUG_ON((unsigned long)&bL_sync & (__CACHE_WRITEBACK_GRANULE - 1));
+
+ /*
+ * Set initial CPU and cluster states.
+ * Only one cluster is assumed to be active at this point.
+ */
+ for (i = 0; i < BL_MAX_CLUSTERS; i++) {
+ bL_sync.clusters[i].cluster = CLUSTER_DOWN;
+ bL_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP;
+ for (j = 0; j < BL_MAX_CPUS_PER_CLUSTER; j++)
+ bL_sync.clusters[i].cpus[j].cpu = CPU_DOWN;
+ }
+ mpidr = read_cpuid_mpidr();
+ this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ for_each_online_cpu(i)
+ bL_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP;
+ bL_sync.clusters[this_cluster].cluster = CLUSTER_UP;
+ sync_w(&bL_sync);
+
+ if (power_up_setup) {
+ bL_power_up_setup_phys = virt_to_phys(power_up_setup);
+ sync_w(&bL_power_up_setup_phys);
+ }
+
+ return 0;
+}
diff --git a/arch/arm/common/bL_head.S b/arch/arm/common/bL_head.S
index 072a13da20..a226cdf4ce 100644
--- a/arch/arm/common/bL_head.S
+++ b/arch/arm/common/bL_head.S
@@ -7,11 +7,19 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
+ *
+ *
+ * Refer to Documentation/arm/big.LITTLE/cluster-pm-race-avoidance.txt
+ * for details of the synchronisation algorithms used here.
*/
#include <linux/linkage.h>
#include <asm/bL_entry.h>
+.if BL_SYNC_CLUSTER_CPUS
+.error "cpus must be the first member of struct bL_cluster_sync_struct"
+.endif
+
.macro pr_dbg cpu, string
#if defined(CONFIG_DEBUG_LL) && defined(DEBUG)
b 1901f
@@ -52,24 +60,114 @@ ENTRY(bL_entry_point)
2: pr_dbg r4, "kernel bL_entry_point\n"
/*
- * MMU is off so we need to get to bL_entry_vectors in a
+ * MMU is off so we need to get to various variables in a
* position independent way.
*/
adr r5, 3f
- ldr r6, [r5]
+ ldmia r5, {r6, r7, r8}
add r6, r5, r6 @ r6 = bL_entry_vectors
+ ldr r7, [r5, r7] @ r7 = bL_power_up_setup_phys
+ add r8, r5, r8 @ r8 = bL_sync
+
+ mov r0, #BL_SYNC_CLUSTER_SIZE
+ mla r8, r0, r10, r8 @ r8 = bL_sync cluster base
+
+ @ Signal that this CPU is coming UP:
+ mov r0, #CPU_COMING_UP
+ mov r5, #BL_SYNC_CPU_SIZE
+ mla r5, r9, r5, r8 @ r5 = bL_sync cpu address
+ strb r0, [r5]
+
+ @ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
+ @ state, because there is at least one active CPU (this CPU).
+
+ @ Note: the following is racy as another CPU might be testing
+ @ the same flag at the same moment. That'll be fixed later.
+ ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER]
+ cmp r0, #CLUSTER_UP @ cluster already up?
+ bne cluster_setup @ if not, set up the cluster
+
+ @ Otherwise, skip setup:
+ b cluster_setup_complete
+
+cluster_setup:
+ @ Control dependency implies strb not observable before previous ldrb.
+
+ @ Signal that the cluster is being brought up:
+ mov r0, #INBOUND_COMING_UP
+ strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND]
+ dmb
+
+ @ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this
+ @ point onwards will observe INBOUND_COMING_UP and abort.
+
+ @ Wait for any previously-pending cluster teardown operations to abort
+ @ or complete:
+cluster_teardown_wait:
+ ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER]
+ cmp r0, #CLUSTER_GOING_DOWN
+ bne first_man_setup
+ wfe
+ b cluster_teardown_wait
+
+first_man_setup:
+ dmb
+
+ @ If the outbound gave up before teardown started, skip cluster setup:
+
+ cmp r0, #CLUSTER_UP
+ beq cluster_setup_leave
+
+ @ power_up_setup is now responsible for setting up the cluster:
+
+ cmp r7, #0
+ mov r0, #1 @ second (cluster) affinity level
+ blxne r7 @ Call power_up_setup if defined
+ dmb
+
+ mov r0, #CLUSTER_UP
+ strb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER]
+ dmb
+
+cluster_setup_leave:
+ @ Leave the cluster setup critical section:
+
+ mov r0, #INBOUND_NOT_COMING_UP
+ strb r0, [r8, #BL_SYNC_CLUSTER_INBOUND]
+ dsb
+ sev
+
+cluster_setup_complete:
+ @ If a platform-specific CPU setup hook is needed, it is
+ @ called from here.
+
+ cmp r7, #0
+ mov r0, #0 @ first (CPU) affinity level
+ blxne r7 @ Call power_up_setup if defined
+ dmb
+
+ @ Mark the CPU as up:
+
+ mov r0, #CPU_UP
+ strb r0, [r5]
+
+ @ Observability order of CPU_UP and opening of the gate does not matter.
bL_entry_gated:
ldr r5, [r6, r4, lsl #2] @ r5 = CPU entry vector
cmp r5, #0
wfeeq
beq bL_entry_gated
+ dmb
+
pr_dbg r4, "released\n"
bx r5
.align 2
3: .word bL_entry_vectors - .
+ .word bL_power_up_setup_phys - 3b
+ .word bL_sync - 3b
ENDPROC(bL_entry_point)
@@ -79,3 +177,7 @@ ENDPROC(bL_entry_point)
.type bL_entry_vectors, #object
ENTRY(bL_entry_vectors)
.space 4 * BL_MAX_CLUSTERS * BL_MAX_CPUS_PER_CLUSTER
+
+ .type bL_power_up_setup_phys, #object
+ENTRY(bL_power_up_setup_phys)
+ .space 4 @ set by bL_cluster_sync_init()
diff --git a/arch/arm/include/asm/bL_entry.h b/arch/arm/include/asm/bL_entry.h
index adf8706c76..0736575783 100644
--- a/arch/arm/include/asm/bL_entry.h
+++ b/arch/arm/include/asm/bL_entry.h
@@ -15,8 +15,37 @@
#define BL_MAX_CPUS_PER_CLUSTER 4
#define BL_MAX_CLUSTERS 2
+/* Definitions for bL_cluster_sync_struct */
+#define CPU_DOWN 0x11
+#define CPU_COMING_UP 0x12
+#define CPU_UP 0x13
+#define CPU_GOING_DOWN 0x14
+
+#define CLUSTER_DOWN 0x21
+#define CLUSTER_UP 0x22
+#define CLUSTER_GOING_DOWN 0x23
+
+#define INBOUND_NOT_COMING_UP 0x31
+#define INBOUND_COMING_UP 0x32
+
+/* This is a complete guess. */
+#define __CACHE_WRITEBACK_ORDER 6
+#define __CACHE_WRITEBACK_GRANULE (1 << __CACHE_WRITEBACK_ORDER)
+
+/* Offsets for the bL_cluster_sync_struct members, for use in asm: */
+#define BL_SYNC_CLUSTER_CPUS 0
+#define BL_SYNC_CPU_SIZE __CACHE_WRITEBACK_GRANULE
+#define BL_SYNC_CLUSTER_CLUSTER \
+ (BL_SYNC_CLUSTER_CPUS + BL_SYNC_CPU_SIZE * BL_MAX_CPUS_PER_CLUSTER)
+#define BL_SYNC_CLUSTER_INBOUND \
+ (BL_SYNC_CLUSTER_CLUSTER + __CACHE_WRITEBACK_GRANULE)
+#define BL_SYNC_CLUSTER_SIZE \
+ (BL_SYNC_CLUSTER_INBOUND + __CACHE_WRITEBACK_GRANULE)
+
#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
/*
* Platform specific code should use this symbol to set up secondary
* entry location for processors to use when released from reset.
@@ -123,5 +152,39 @@ struct bL_platform_power_ops {
*/
int __init bL_platform_power_register(const struct bL_platform_power_ops *ops);
+/* Synchronisation structures for coordinating safe cluster setup/teardown: */
+
+/*
+ * When modifying this structure, make sure you update the BL_SYNC_ defines
+ * to match.
+ */
+struct bL_cluster_sync_struct {
+ /* individual CPU states */
+ struct {
+ volatile s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE);
+ } cpus[BL_MAX_CPUS_PER_CLUSTER];
+
+ /* cluster state */
+ volatile s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE);
+
+ /* inbound-side state */
+ volatile s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE);
+};
+
+struct bL_sync_struct {
+ struct bL_cluster_sync_struct clusters[BL_MAX_CLUSTERS];
+};
+
+extern unsigned long bL_sync_phys; /* physical address of *bL_sync */
+
+void __bL_cpu_going_down(unsigned int cpu, unsigned int cluster);
+void __bL_cpu_down(unsigned int cpu, unsigned int cluster);
+void __bL_outbound_leave_critical(unsigned int cluster, int state);
+bool __bL_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster);
+int __bL_cluster_state(unsigned int cluster);
+
+int __init bL_cluster_sync_init(
+ void (*power_up_setup)(unsigned int affinity_level));
+
#endif /* ! __ASSEMBLY__ */
#endif
--
1.8.0
^ permalink raw reply related
* [PATCH v2 05/16] ARM: b.L: Add baremetal voting mutexes
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Dave Martin <dave.martin@linaro.org>
This patch adds a simple low-level voting mutex implementation
to be used to arbitrate during first man selection when no load/store
exclusive instructions are usable.
For want of a better name, these are called "vlocks". (I was
tempted to call them ballot locks, but "block" is way too confusing
an abbreviation...)
There is no function to wait for the lock to be released, and no
vlock_lock() function since we don't need these at the moment.
These could straightforwardly be added if vlocks get used for other
purposes.
For architectural correctness even Strongly-Ordered memory accesses
require barriers in order to guarantee that multiple CPUs have a
coherent view of the ordering of memory accesses. Whether or not
this matters depends on hardware implementation details of the
memory system. Since the purpose of this code is to provide a clean,
generic locking mechanism with no platform-specific dependencies the
barriers should be present to avoid unpleasant surprises on future
platforms.
Note:
* When taking the lock, we don't care about implicit background
memory operations and other signalling which may be pending,
because those are not part of the critical section anyway.
A DMB is sufficient to ensure correctly observed ordering if
the explicit memory accesses in vlock_trylock.
* No barrier is required after checking the election result,
because the result is determined by the store to
VLOCK_OWNER_OFFSET and is already globally observed due to the
barriers in voting_end. This means that global agreement on
the winner is guaranteed, even before the winner is known
locally.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
Documentation/arm/big.LITTLE/vlocks.txt | 211 ++++++++++++++++++++++++++++++++
arch/arm/common/vlock.S | 108 ++++++++++++++++
arch/arm/common/vlock.h | 28 +++++
3 files changed, 347 insertions(+)
create mode 100644 Documentation/arm/big.LITTLE/vlocks.txt
create mode 100644 arch/arm/common/vlock.S
create mode 100644 arch/arm/common/vlock.h
diff --git a/Documentation/arm/big.LITTLE/vlocks.txt b/Documentation/arm/big.LITTLE/vlocks.txt
new file mode 100644
index 0000000000..415960a9ba
--- /dev/null
+++ b/Documentation/arm/big.LITTLE/vlocks.txt
@@ -0,0 +1,211 @@
+vlocks for Bare-Metal Mutual Exclusion
+======================================
+
+Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
+mechanism, with reasonable but minimal requirements on the memory
+system.
+
+These are intended to be used to coordinate critical activity among CPUs
+which are otherwise non-coherent, in situations where the hardware
+provides no other mechanism to support this and ordinary spinlocks
+cannot be used.
+
+
+vlocks make use of the atomicity provided by the memory system for
+writes to a single memory location. To arbitrate, every CPU "votes for
+itself", by storing a unique number to a common memory location. The
+final value seen in that memory location when all the votes have been
+cast identifies the winner.
+
+In order to make sure that the election produces an unambiguous result
+in finite time, a CPU will only enter the election in the first place if
+no winner has been chosen and the election does not appear to have
+started yet.
+
+
+Algorithm
+---------
+
+The easiest way to explain the vlocks algorithm is with some pseudo-code:
+
+
+ int currently_voting[NR_CPUS] = { 0, };
+ int last_vote = -1; /* no votes yet */
+
+ bool vlock_trylock(int this_cpu)
+ {
+ /* signal our desire to vote */
+ currently_voting[this_cpu] = 1;
+ if (last_vote != -1) {
+ /* someone already volunteered himself */
+ currently_voting[this_cpu] = 0;
+ return false; /* not ourself */
+ }
+
+ /* let's suggest ourself */
+ last_vote = this_cpu;
+ currently_voting[this_cpu] = 0;
+
+ /* then wait until everyone else is done voting */
+ for_each_cpu(i) {
+ while (currently_voting[i] != 0)
+ /* wait */;
+ }
+
+ /* result */
+ if (last_vote == this_cpu)
+ return true; /* we won */
+ return false;
+ }
+
+ bool vlock_unlock(void)
+ {
+ last_vote = -1;
+ }
+
+
+The currently_voting[] array provides a way for the CPUs to determine
+whether an election is in progress, and plays a role analogous to the
+"entering" array in Lamport's bakery algorithm [1].
+
+However, once the election has started, the underlying memory system
+atomicity is used to pick the winner. This avoids the need for a static
+priority rule to act as a tie-breaker, or any counters which could
+overflow.
+
+As long as the last_vote variable is globally visible to all CPUs, it
+will contain only one value that won't change once every CPU has cleared
+its currently_voting flag.
+
+
+Features and limitations
+------------------------
+
+ * vlocks are not intended to be fair. In the contended case, it is the
+ _last_ CPU which attempts to get the lock which will be most likely
+ to win.
+
+ vlocks are therefore best suited to situations where it is necessary
+ to pick a unique winner, but it does not matter which CPU actually
+ wins.
+
+ * Like other similar mechanisms, vlocks will not scale well to a large
+ number of CPUs.
+
+ vlocks can be cascaded in a voting hierarchy to permit better scaling
+ if necessary, as in the following hypothetical example for 4096 CPUs:
+
+ /* first level: local election */
+ my_town = towns[(this_cpu >> 4) & 0xf];
+ I_won = vlock_trylock(my_town, this_cpu & 0xf);
+ if (I_won) {
+ /* we won the town election, let's go for the state */
+ my_state = states[(this_cpu >> 8) & 0xf];
+ I_won = vlock_lock(my_state, this_cpu & 0xf));
+ if (I_won) {
+ /* and so on */
+ I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
+ if (I_won) {
+ /* ... */
+ }
+ vlock_unlock(the_whole_country);
+ }
+ vlock_unlock(my_state);
+ }
+ vlock_unlock(my_town);
+
+
+ARM implementation
+------------------
+
+The current ARM implementation [2] contains some optimisations beyond
+the basic algorithm:
+
+ * By packing the members of the currently_voting array close together,
+ we can read the whole array in one transaction (providing the number
+ of CPUs potentially contending the lock is small enough). This
+ reduces the number of round-trips required to external memory.
+
+ In the ARM implementation, this means that we can use a single load
+ and comparison:
+
+ LDR Rt, [Rn]
+ CMP Rt, #0
+
+ ...in place of code equivalent to:
+
+ LDRB Rt, [Rn]
+ CMP Rt, #0
+ LDRBEQ Rt, [Rn, #1]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #2]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #3]
+ CMPEQ Rt, #0
+
+ This cuts down on the fast-path latency, as well as potentially
+ reducing bus contention in contended cases.
+
+ The optimisation relies on the fact that the ARM memory system
+ guarantees coherency between overlapping memory accesses of
+ different sizes, similarly to many other architectures. Note that
+ we do not care which element of currently_voting appears in which
+ bits of Rt, so there is no need to worry about endianness in this
+ optimisation.
+
+ If there are too many CPUs to read the currently_voting array in
+ one transaction then multiple transations are still required. The
+ implementation uses a simple loop of word-sized loads for this
+ case. The number of transactions is still fewer than would be
+ required if bytes were loaded individually.
+
+
+ In principle, we could aggregate further by using LDRD or LDM, but
+ to keep the code simple this was not attempted in the initial
+ implementation.
+
+
+ * vlocks are currently only used to coordinate between CPUs which are
+ unable to enable their caches yet. This means that the
+ implementation removes many of the barriers which would be required
+ when executing the algorithm in cached memory.
+
+ packing of the currently_voting array does not work with cached
+ memory unless all CPUs contending the lock are cache-coherent, due
+ to cache writebacks from one CPU clobbering values written by other
+ CPUs. (Though if all the CPUs are cache-coherent, you should be
+ probably be using proper spinlocks instead anyway).
+
+
+ * The "no votes yet" value used for the last_vote variable is 0 (not
+ -1 as in the pseudocode). This allows statically-allocated vlocks
+ to be implicitly initialised to an unlocked state simply by putting
+ them in .bss.
+
+ An offset is added to each CPU's ID for the purpose of setting this
+ variable, so that no CPU uses the value 0 for its ID.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, for
+use in ARM-based big.LITTLE platforms, with review and input gratefully
+received from Nicolas Pitre and Achin Gupta. Thanks to Nicolas for
+grabbing most of this text out of the relevant mail thread and writing
+up the pseudocode.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
+
+
+References
+----------
+
+[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
+ Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
+
+ http://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
+
+[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S
new file mode 100644
index 0000000000..9109825606
--- /dev/null
+++ b/arch/arm/common/vlock.S
@@ -0,0 +1,108 @@
+/*
+ * vlock.S - simple voting lock implementation for ARM
+ *
+ * Created by: Dave Martin, 2012-08-16
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ *
+ * This algorithm is described in more detail in
+ * Documentation/arm/big.LITTLE/vlocks.txt.
+ */
+
+#include <linux/linkage.h>
+#include "vlock.h"
+
+/* Select different code if voting flags can fit in a single word. */
+#if VLOCK_VOTING_SIZE > 4
+#define FEW(x...)
+#define MANY(x...) x
+#else
+#define FEW(x...) x
+#define MANY(x...)
+#endif
+
+@ voting lock for first-man coordination
+
+.macro voting_begin rbase:req, rcpu:req, rscratch:req
+ mov \rscratch, #1
+ strb \rscratch, [\rbase, \rcpu]
+ dmb
+.endm
+
+.macro voting_end rbase:req, rcpu:req, rscratch:req
+ dmb
+ mov \rscratch, #0
+ strb \rscratch, [\rbase, \rcpu]
+ dsb
+ sev
+.endm
+
+/*
+ * The vlock structure must reside in Strongly-Ordered or Device memory.
+ * This implementation deliberately eliminates most of the barriers which
+ * would be required for other memory types, and assumes that independent
+ * writes to neighbouring locations within a cacheline do not interfere
+ * with one another.
+ */
+
+@ r0: lock structure base
+@ r1: CPU ID (0-based index within cluster)
+ENTRY(vlock_trylock)
+ add r1, r1, #VLOCK_VOTING_OFFSET
+
+ voting_begin r0, r1, r2
+
+ ldrb r2, [r0, #VLOCK_OWNER_OFFSET] @ check whether lock is held
+ cmp r2, #VLOCK_OWNER_NONE
+ bne trylock_fail @ fail if so
+
+ @ Control dependency implies strb not observable before previous ldrb.
+
+ strb r1, [r0, #VLOCK_OWNER_OFFSET] @ submit my vote
+
+ voting_end r0, r1, r2 @ implies DMB
+
+ @ Wait for the current round of voting to finish:
+
+ MANY( mov r3, #VLOCK_VOTING_OFFSET )
+0:
+ MANY( ldr r2, [r0, r3] )
+ FEW( ldr r2, [r0, #VLOCK_VOTING_OFFSET] )
+ cmp r2, #0
+ wfene
+ bne 0b
+ MANY( add r3, r3, #4 )
+ MANY( cmp r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE )
+ MANY( bne 0b )
+
+ @ Check who won:
+
+ dmb
+ ldrb r2, [r0, #VLOCK_OWNER_OFFSET]
+ eor r0, r1, r2 @ zero if I won, else nonzero
+ bx lr
+
+trylock_fail:
+ voting_end r0, r1, r2
+ mov r0, #1 @ nonzero indicates that I lost
+ bx lr
+ENDPROC(vlock_trylock)
+
+@ r0: lock structure base
+ENTRY(vlock_unlock)
+ dmb
+ mov r1, #VLOCK_OWNER_NONE
+ strb r1, [r0, #VLOCK_OWNER_OFFSET]
+ dsb
+ sev
+ bx lr
+ENDPROC(vlock_unlock)
diff --git a/arch/arm/common/vlock.h b/arch/arm/common/vlock.h
new file mode 100644
index 0000000000..bd4d649af2
--- /dev/null
+++ b/arch/arm/common/vlock.h
@@ -0,0 +1,28 @@
+/*
+ * vlock.h - simple voting lock implementation
+ *
+ * Created by: Dave Martin, 2012-08-16
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __VLOCK_H
+#define __VLOCK_H
+
+#include <asm/bL_entry.h>
+
+#define VLOCK_OWNER_OFFSET 0
+#define VLOCK_VOTING_OFFSET 4
+#define VLOCK_VOTING_SIZE ((BL_MAX_CPUS_PER_CLUSTER + 3) / 4 * 4)
+#define VLOCK_SIZE (VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE)
+#define VLOCK_OWNER_NONE 0
+
+#endif /* ! __VLOCK_H */
--
1.8.0
^ permalink raw reply related
* [PATCH v2 06/16] ARM: bL_head.S: vlock-based first man election
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Dave Martin <dave.martin@linaro.org>
Instead of requiring the first man to be elected in advance (which
can be suboptimal in some situations), this patch uses a per-
cluster mutex to co-ordinate selection of the first man.
This should also make it more feasible to reuse this code path for
asynchronous cluster resume (as in CPUidle scenarios).
We must ensure that the vlock data doesn't share a cacheline with
anything else, or dirty cache eviction could corrupt it.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nicolas.pitre@linaro.org>
---
arch/arm/common/Makefile | 2 +-
arch/arm/common/bL_head.S | 41 ++++++++++++++++++++++++++++++++++++-----
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 8025899a20..aa797237a7 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -13,4 +13,4 @@ obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o
obj-$(CONFIG_SHARP_SCOOP) += scoop.o
obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o
obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o
-obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o
+obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o vlock.o
diff --git a/arch/arm/common/bL_head.S b/arch/arm/common/bL_head.S
index a226cdf4ce..86bcc8a003 100644
--- a/arch/arm/common/bL_head.S
+++ b/arch/arm/common/bL_head.S
@@ -16,6 +16,8 @@
#include <linux/linkage.h>
#include <asm/bL_entry.h>
+#include "vlock.h"
+
.if BL_SYNC_CLUSTER_CPUS
.error "cpus must be the first member of struct bL_cluster_sync_struct"
.endif
@@ -64,10 +66,11 @@ ENTRY(bL_entry_point)
* position independent way.
*/
adr r5, 3f
- ldmia r5, {r6, r7, r8}
+ ldmia r5, {r6, r7, r8, r11}
add r6, r5, r6 @ r6 = bL_entry_vectors
ldr r7, [r5, r7] @ r7 = bL_power_up_setup_phys
add r8, r5, r8 @ r8 = bL_sync
+ add r11, r5, r11 @ r11 = first_man_locks
mov r0, #BL_SYNC_CLUSTER_SIZE
mla r8, r0, r10, r8 @ r8 = bL_sync cluster base
@@ -81,13 +84,22 @@ ENTRY(bL_entry_point)
@ At this point, the cluster cannot unexpectedly enter the GOING_DOWN
@ state, because there is at least one active CPU (this CPU).
- @ Note: the following is racy as another CPU might be testing
- @ the same flag at the same moment. That'll be fixed later.
+ mov r0, #VLOCK_SIZE
+ mla r11, r0, r10, r11 @ r11 = cluster first man lock
+ mov r0, r11
+ mov r1, r9 @ cpu
+ bl vlock_trylock @ implies DMB
+
+ cmp r0, #0 @ failed to get the lock?
+ bne cluster_setup_wait @ wait for cluster setup if so
+
ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER]
cmp r0, #CLUSTER_UP @ cluster already up?
bne cluster_setup @ if not, set up the cluster
- @ Otherwise, skip setup:
+ @ Otherwise, release the first man lock and skip setup:
+ mov r0, r11
+ bl vlock_unlock
b cluster_setup_complete
cluster_setup:
@@ -137,6 +149,19 @@ cluster_setup_leave:
dsb
sev
+ mov r0, r11
+ bl vlock_unlock @ implies DMB
+ b cluster_setup_complete
+
+ @ In the contended case, non-first men wait here for cluster setup
+ @ to complete:
+cluster_setup_wait:
+ ldrb r0, [r8, #BL_SYNC_CLUSTER_CLUSTER]
+ cmp r0, #CLUSTER_UP
+ wfene
+ bne cluster_setup_wait
+ dmb
+
cluster_setup_complete:
@ If a platform-specific CPU setup hook is needed, it is
@ called from here.
@@ -168,11 +193,17 @@ bL_entry_gated:
3: .word bL_entry_vectors - .
.word bL_power_up_setup_phys - 3b
.word bL_sync - 3b
+ .word first_man_locks - 3b
ENDPROC(bL_entry_point)
.bss
- .align 5
+
+ .align __CACHE_WRITEBACK_ORDER
+ .type first_man_locks, #object
+first_man_locks:
+ .space VLOCK_SIZE * BL_MAX_CLUSTERS
+ .align __CACHE_WRITEBACK_ORDER
.type bL_entry_vectors, #object
ENTRY(bL_entry_vectors)
--
1.8.0
^ permalink raw reply related
* [PATCH v2 07/16] ARM: b.L: generic SMP secondary bringup and hotplug support
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
Now that the b.L power API is in place, we can use it for SMP secondary
bringup and CPU hotplug in a generic fashion.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/Makefile | 2 +-
arch/arm/common/bL_platsmp.c | 79 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 1 deletion(-)
create mode 100644 arch/arm/common/bL_platsmp.c
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index aa797237a7..d1eaae8eb7 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -13,4 +13,4 @@ obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o
obj-$(CONFIG_SHARP_SCOOP) += scoop.o
obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o
obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o
-obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o vlock.o
+obj-$(CONFIG_BIG_LITTLE) += bL_head.o bL_entry.o bL_platsmp.o vlock.o
diff --git a/arch/arm/common/bL_platsmp.c b/arch/arm/common/bL_platsmp.c
new file mode 100644
index 0000000000..e28dd50640
--- /dev/null
+++ b/arch/arm/common/bL_platsmp.c
@@ -0,0 +1,79 @@
+/*
+ * linux/arch/arm/mach-vexpress/bL_platsmp.c
+ *
+ * Created by: Nicolas Pitre, November 2012
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Code to handle secondary CPU bringup and hotplug for the bL power API.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/bL_entry.h>
+#include <asm/smp_plat.h>
+#include <asm/hardware/gic.h>
+
+static void __init simple_smp_init_cpus(void)
+{
+ set_smp_cross_call(gic_raise_softirq);
+}
+
+static int __cpuinit bL_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+ unsigned int pcpu, pcluster, ret;
+ extern void secondary_startup(void);
+
+ pcpu = cpu_logical_map(cpu) & 0xff;
+ pcluster = (cpu_logical_map(cpu) >> 8) & 0xff;
+ pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n",
+ __func__, cpu, pcpu, pcluster);
+
+ bL_set_entry_vector(pcpu, pcluster, NULL);
+ ret = bL_cpu_power_up(pcpu, pcluster);
+ if (ret)
+ return ret;
+ bL_set_entry_vector(pcpu, pcluster, secondary_startup);
+ gic_raise_softirq(cpumask_of(cpu), 0);
+ dsb_sev();
+ return 0;
+}
+
+static void __cpuinit bL_secondary_init(unsigned int cpu)
+{
+ bL_cpu_powered_up();
+ gic_secondary_init(0);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int bL_cpu_disable(unsigned int cpu)
+{
+ /*
+ * We assume all CPUs may be shut down.
+ * This would be the hook to use for eventual Secure
+ * OS migration requests.
+ */
+ return 0;
+}
+
+static void __ref bL_cpu_die(unsigned int cpu)
+{
+ bL_cpu_power_down();
+}
+
+#endif
+
+struct smp_operations __initdata bL_smp_ops = {
+ .smp_init_cpus = simple_smp_init_cpus,
+ .smp_boot_secondary = bL_boot_secondary,
+ .smp_secondary_init = bL_secondary_init,
+#ifdef CONFIG_HOTPLUG_CPU
+ .cpu_disable = bL_cpu_disable,
+ .cpu_die = bL_cpu_die,
+#endif
+};
--
1.8.0
^ permalink raw reply related
* [PATCH v2 08/16] ARM: bL_platsmp.c: close the kernel entry gate before hot-unplugging a CPU
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
If for whatever reason a CPU is unexpectedly awaken, it shouldn't
re-enter the kernel by using whatever entry vector that might have
been set by a previous operation.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/common/bL_platsmp.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arm/common/bL_platsmp.c b/arch/arm/common/bL_platsmp.c
index e28dd50640..c3835b0e29 100644
--- a/arch/arm/common/bL_platsmp.c
+++ b/arch/arm/common/bL_platsmp.c
@@ -63,6 +63,11 @@ static int bL_cpu_disable(unsigned int cpu)
static void __ref bL_cpu_die(unsigned int cpu)
{
+ unsigned int mpidr, pcpu, pcluster;
+ mpidr = read_cpuid_mpidr();
+ pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ bL_set_entry_vector(pcpu, pcluster, NULL);
bL_cpu_power_down();
}
--
1.8.0
^ permalink raw reply related
* [PATCH v2 09/16] ARM: vexpress: Select the correct SMP operations at run-time
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
arch/arm/include/asm/mach/arch.h | 3 +++
arch/arm/kernel/setup.c | 5 ++++-
arch/arm/mach-vexpress/core.h | 2 ++
arch/arm/mach-vexpress/platsmp.c | 12 ++++++++++++
arch/arm/mach-vexpress/v2m.c | 2 +-
5 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 917d4fcfd9..3d01c6d6c3 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -17,8 +17,10 @@ struct pt_regs;
struct smp_operations;
#ifdef CONFIG_SMP
#define smp_ops(ops) (&(ops))
+#define smp_init_ops(ops) (&(ops))
#else
#define smp_ops(ops) (struct smp_operations *)NULL
+#define smp_init_ops(ops) (void (*)(void))NULL
#endif
struct machine_desc {
@@ -42,6 +44,7 @@ struct machine_desc {
unsigned char reserve_lp2 :1; /* never has lp2 */
char restart_mode; /* default restart mode */
struct smp_operations *smp; /* SMP operations */
+ void (*smp_init)(void);
void (*fixup)(struct tag *, char **,
struct meminfo *);
void (*reserve)(void);/* reserve mem blocks */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3f6cbb2e3e..41edca8582 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -768,7 +768,10 @@ void __init setup_arch(char **cmdline_p)
arm_dt_init_cpu_maps();
#ifdef CONFIG_SMP
if (is_smp()) {
- smp_set_ops(mdesc->smp);
+ if(mdesc->smp_init)
+ (*mdesc->smp_init)();
+ else
+ smp_set_ops(mdesc->smp);
smp_init_cpus();
}
#endif
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4a85..3a761fd76c 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
void vexpress_dt_smp_map_io(void);
+void vexpress_smp_init_ops(void);
+
extern struct smp_operations vexpress_smp_ops;
extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index c5d70de9bb..e62a08b561 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/vexpress.h>
@@ -206,3 +207,14 @@ struct smp_operations __initdata vexpress_smp_ops = {
.cpu_die = vexpress_cpu_die,
#endif
};
+
+void __init vexpress_smp_init_ops(void)
+{
+ struct smp_operations *ops = &vexpress_smp_ops;
+#ifdef CONFIG_BIG_LITTLE
+ extern struct smp_operations bL_smp_ops;
+ if(of_find_compatible_node(NULL, NULL, "arm,cci"))
+ ops = &bL_smp_ops;
+#endif
+ smp_set_ops(ops);
+}
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 011661a6c5..34172bd504 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -494,7 +494,7 @@ static const char * const v2m_dt_match[] __initconst = {
DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
.dt_compat = v2m_dt_match,
- .smp = smp_ops(vexpress_smp_ops),
+ .smp_init = smp_init_ops(vexpress_smp_init_ops),
.map_io = v2m_dt_map_io,
.init_early = v2m_dt_init_early,
.init_irq = v2m_dt_init_irq,
--
1.8.0
^ permalink raw reply related
* [PATCH v2 10/16] ARM: vexpress: introduce DCSCB support
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
This adds basic CPU and cluster reset controls on RTSM for the
A15x4-A7x4 model configuration using the Dual Cluster System
Configuration Block (DCSCB).
The cache coherency interconnect (CCI) is not handled yet.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/mach-vexpress/Kconfig | 8 ++
arch/arm/mach-vexpress/Makefile | 1 +
arch/arm/mach-vexpress/dcscb.c | 159 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 168 insertions(+)
create mode 100644 arch/arm/mach-vexpress/dcscb.c
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 52d315b792..50d6587d0c 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -52,4 +52,12 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
config ARCH_VEXPRESS_CA9X4
bool "Versatile Express Cortex-A9x4 tile"
+config ARCH_VEXPRESS_DCSCB
+ bool "Dual Cluster System Control Block (DCSCB) support"
+ depends on BIG_LITTLE
+ help
+ Support for the Dual Cluster System Configuration Block (DCSCB).
+ This is needed to provide CPU and cluster power management
+ on RTSM.
+
endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 80b64971fb..2253644054 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
obj-y := v2m.o reset.o
obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o
obj-$(CONFIG_SMP) += platsmp.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 0000000000..220b990edd
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,159 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Control Block
+ *
+ * Created by: Nicolas Pitre, May 2012
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/vexpress.h>
+
+#include <asm/bL_entry.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+
+#define DCSCB_PHYS_BASE 0x60000000
+
+#define RST_HOLD0 0x0
+#define RST_HOLD1 0x4
+#define SYS_SWRESET 0x8
+#define RST_STAT0 0xc
+#define RST_STAT1 0x10
+#define EAG_CFG_R 0x20
+#define EAG_CFG_W 0x24
+#define KFC_CFG_R 0x28
+#define KFC_CFG_W 0x2c
+#define DCS_CFG_R 0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+ unsigned int rst_hold, cpumask = (1 << cpu);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ if (cpu >= 4 || cluster >= 2)
+ return -EINVAL;
+
+ /*
+ * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+ * variant exists, we need to disable IRQs manually here.
+ */
+ local_irq_disable();
+ arch_spin_lock(&dcscb_lock);
+
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ if (rst_hold & (1 << 8)) {
+ /* remove cluster reset and add individual CPU's reset */
+ rst_hold &= ~(1 << 8);
+ rst_hold |= 0xf;
+ }
+ rst_hold &= ~(cpumask | (cpumask << 4));
+ writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+
+ arch_spin_unlock(&dcscb_lock);
+ local_irq_enable();
+
+ return 0;
+}
+
+static void dcscb_power_down(void)
+{
+ unsigned int mpidr, cpu, cluster, rst_hold, cpumask, last_man;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ cpumask = (1 << cpu);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cpu >= 4 || cluster >= 2);
+
+ arch_spin_lock(&dcscb_lock);
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ rst_hold |= cpumask;
+ if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf)
+ rst_hold |= (1 << 8);
+ writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ arch_spin_unlock(&dcscb_lock);
+ last_man = (rst_hold & (1 << 8));
+
+ /*
+ * Now let's clean our L1 cache and shut ourself down.
+ * If we're the last CPU in this cluster then clean L2 too.
+ */
+
+ /*
+ * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+ * a preliminary flush here for those CPUs. At least, that's
+ * the theory -- without the extra flush, Linux explodes on
+ * RTSM (maybe not needed anymore, to be investigated)..
+ */
+ flush_cache_louis();
+ cpu_proc_fin();
+
+ if (!last_man) {
+ flush_cache_louis();
+ } else {
+ flush_cache_all();
+ outer_flush_all();
+ }
+
+ /* Disable local coherency by clearing the ACTLR "SMP" bit: */
+ set_auxcr(get_auxcr() & ~(1 << 6));
+
+ /* Now we are prepared for power-down, do it: */
+ dsb();
+ wfi();
+
+ /* Not dead@this point? Let our caller cope. */
+}
+
+static const struct bL_platform_power_ops dcscb_power_ops = {
+ .power_up = dcscb_power_up,
+ .power_down = dcscb_power_down,
+};
+
+static int __init dcscb_init(void)
+{
+ int ret;
+
+ dcscb_base = ioremap(DCSCB_PHYS_BASE, 0x1000);
+ if (!dcscb_base)
+ return -ENOMEM;
+
+ ret = bL_platform_power_register(&dcscb_power_ops);
+ if (ret) {
+ iounmap(dcscb_base);
+ return ret;
+ }
+
+ /*
+ * Future entries into the kernel can now go
+ * through the b.L entry vectors.
+ */
+ vexpress_flags_set(virt_to_phys(bL_entry_point));
+
+ return 0;
+}
+
+early_initcall(dcscb_init);
--
1.8.0
^ permalink raw reply related
* [PATCH v2 11/16] ARM: vexpress/dcscb: add CPU use counts to the power up/down API implementation
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
It is possible for a CPU to be told to power up before it managed
to power itself down. Solve this race with a usage count as mandated
by the API definition.
Signed-off-by: nicolas Pitre <nico@linaro.org>
---
arch/arm/mach-vexpress/dcscb.c | 77 +++++++++++++++++++++++++++++++++---------
1 file changed, 61 insertions(+), 16 deletions(-)
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 220b990edd..cde9d3a8d2 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -45,6 +45,7 @@
static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
{
@@ -61,14 +62,27 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
local_irq_disable();
arch_spin_lock(&dcscb_lock);
- rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
- if (rst_hold & (1 << 8)) {
- /* remove cluster reset and add individual CPU's reset */
- rst_hold &= ~(1 << 8);
- rst_hold |= 0xf;
+ dcscb_use_count[cpu][cluster]++;
+ if (dcscb_use_count[cpu][cluster] == 1) {
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ if (rst_hold & (1 << 8)) {
+ /* remove cluster reset and add individual CPU's reset */
+ rst_hold &= ~(1 << 8);
+ rst_hold |= 0xf;
+ }
+ rst_hold &= ~(cpumask | (cpumask << 4));
+ writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ } else if (dcscb_use_count[cpu][cluster] != 2) {
+ /*
+ * The only possible values are:
+ * 0 = CPU down
+ * 1 = CPU (still) up
+ * 2 = CPU requested to be up before it had a chance
+ * to actually make itself down.
+ * Any other value is a bug.
+ */
+ BUG();
}
- rst_hold &= ~(cpumask | (cpumask << 4));
- writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
arch_spin_unlock(&dcscb_lock);
local_irq_enable();
@@ -78,7 +92,8 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
static void dcscb_power_down(void)
{
- unsigned int mpidr, cpu, cluster, rst_hold, cpumask, last_man;
+ unsigned int mpidr, cpu, cluster, rst_hold, cpumask;
+ bool last_man = false, skip_wfi = false;
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
@@ -89,13 +104,26 @@ static void dcscb_power_down(void)
BUG_ON(cpu >= 4 || cluster >= 2);
arch_spin_lock(&dcscb_lock);
- rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
- rst_hold |= cpumask;
- if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf)
- rst_hold |= (1 << 8);
- writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ dcscb_use_count[cpu][cluster]--;
+ if (dcscb_use_count[cpu][cluster] == 0) {
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ rst_hold |= cpumask;
+ if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf) {
+ rst_hold |= (1 << 8);
+ last_man = true;
+ }
+ writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ } else if (dcscb_use_count[cpu][cluster] == 1) {
+ /*
+ * A power_up request went ahead of us.
+ * Even if we do not want to shut this CPU down,
+ * the caller expects a certain state as if the WFI
+ * was aborted. So let's continue with cache cleaning.
+ */
+ skip_wfi = true;
+ } else
+ BUG();
arch_spin_unlock(&dcscb_lock);
- last_man = (rst_hold & (1 << 8));
/*
* Now let's clean our L1 cache and shut ourself down.
@@ -122,8 +150,10 @@ static void dcscb_power_down(void)
set_auxcr(get_auxcr() & ~(1 << 6));
/* Now we are prepared for power-down, do it: */
- dsb();
- wfi();
+ if (!skip_wfi) {
+ dsb();
+ wfi();
+ }
/* Not dead@this point? Let our caller cope. */
}
@@ -133,6 +163,19 @@ static const struct bL_platform_power_ops dcscb_power_ops = {
.power_down = dcscb_power_down,
};
+static void __init dcscb_usage_count_init(void)
+{
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cpu >= 4 || cluster >= 2);
+ dcscb_use_count[cpu][cluster] = 1;
+}
+
static int __init dcscb_init(void)
{
int ret;
@@ -141,6 +184,8 @@ static int __init dcscb_init(void)
if (!dcscb_base)
return -ENOMEM;
+ dcscb_usage_count_init();
+
ret = bL_platform_power_register(&dcscb_power_ops);
if (ret) {
iounmap(dcscb_base);
--
1.8.0
^ permalink raw reply related
* [PATCH v2 12/16] ARM: vexpress/dcscb: do not hardcode number of CPUs per cluster
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
If 4 CPUs are assumed, the A15x1-A7x1 model configuration would never
shut down the initial cluster as the 0xf reset bit mask will never be
observed. Let's construct this mask based on the provided information
in the DCSCB config register for the number of CPUs per cluster.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/mach-vexpress/dcscb.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index cde9d3a8d2..e61ccce3c5 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -46,10 +46,12 @@ static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static void __iomem *dcscb_base;
static int dcscb_use_count[4][2];
+static int dcscb_cluster_cpu_mask[2];
static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
{
unsigned int rst_hold, cpumask = (1 << cpu);
+ unsigned int cluster_mask = dcscb_cluster_cpu_mask[cluster];
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
if (cpu >= 4 || cluster >= 2)
@@ -68,7 +70,7 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
if (rst_hold & (1 << 8)) {
/* remove cluster reset and add individual CPU's reset */
rst_hold &= ~(1 << 8);
- rst_hold |= 0xf;
+ rst_hold |= cluster_mask;
}
rst_hold &= ~(cpumask | (cpumask << 4));
writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
@@ -92,13 +94,14 @@ static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
static void dcscb_power_down(void)
{
- unsigned int mpidr, cpu, cluster, rst_hold, cpumask;
+ unsigned int mpidr, cpu, cluster, rst_hold, cpumask, cluster_mask;
bool last_man = false, skip_wfi = false;
mpidr = read_cpuid_mpidr();
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
cpumask = (1 << cpu);
+ cluster_mask = dcscb_cluster_cpu_mask[cluster];
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
BUG_ON(cpu >= 4 || cluster >= 2);
@@ -108,7 +111,7 @@ static void dcscb_power_down(void)
if (dcscb_use_count[cpu][cluster] == 0) {
rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
rst_hold |= cpumask;
- if (((rst_hold | (rst_hold >> 4)) & 0xf) == 0xf) {
+ if (((rst_hold | (rst_hold >> 4)) & cluster_mask) == cluster_mask) {
rst_hold |= (1 << 8);
last_man = true;
}
@@ -178,12 +181,15 @@ static void __init dcscb_usage_count_init(void)
static int __init dcscb_init(void)
{
+ unsigned int cfg;
int ret;
dcscb_base = ioremap(DCSCB_PHYS_BASE, 0x1000);
if (!dcscb_base)
return -ENOMEM;
-
+ cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+ dcscb_cluster_cpu_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+ dcscb_cluster_cpu_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
dcscb_usage_count_init();
ret = bL_platform_power_register(&dcscb_power_ops);
--
1.8.0
^ permalink raw reply related
* [PATCH v2 13/16] drivers/bus: add ARM CCI support
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
On ARM multi-cluster systems coherency between cores running on
different clusters is managed by the cache-coherent interconnect (CCI).
It allows broadcasting of TLB invalidates and memory barriers and it
guarantees cache coherency at system level.
This patch enables the basic infrastructure required in Linux to
handle and programme the CCI component. The first implementation is
based on a platform device, its relative DT compatible property and
a simple programming interface.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
drivers/bus/Kconfig | 4 ++
drivers/bus/Makefile | 2 +
drivers/bus/arm-cci.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/arm-cci.h | 30 ++++++++++++++
4 files changed, 143 insertions(+)
create mode 100644 drivers/bus/arm-cci.c
create mode 100644 include/linux/arm-cci.h
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 0f51ed687d..d032f74ff2 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -19,4 +19,8 @@ config OMAP_INTERCONNECT
help
Driver to enable OMAP interconnect error handling driver.
+
+config ARM_CCI
+ bool "ARM CCI driver support"
+
endmenu
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 45d997c854..55aac809e5 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -6,3 +6,5 @@ obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o
# Interconnect bus driver for OMAP SoCs.
obj-$(CONFIG_OMAP_INTERCONNECT) += omap_l3_smx.o omap_l3_noc.o
+
+obj-$(CONFIG_ARM_CCI) += arm-cci.o
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
new file mode 100644
index 0000000000..5de3aa3d1f
--- /dev/null
+++ b/drivers/bus/arm-cci.c
@@ -0,0 +1,107 @@
+/*
+ * ARM Cache Coherency Interconnect (CCI400) support
+ *
+ * Copyright (C) 2012-2013 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/arm-cci.h>
+
+#define CCI400_EAG_OFFSET 0x4000
+#define CCI400_KF_OFFSET 0x5000
+
+#define DRIVER_NAME "CCI"
+struct cci_drvdata {
+ void __iomem *baseaddr;
+ spinlock_t lock;
+};
+
+static struct cci_drvdata *info;
+
+void disable_cci(int cluster)
+{
+ u32 cci_reg = cluster ? CCI400_KF_OFFSET : CCI400_EAG_OFFSET;
+ writel_relaxed(0x0, info->baseaddr + cci_reg);
+
+ while (readl_relaxed(info->baseaddr + 0xc) & 0x1)
+ ;
+}
+EXPORT_SYMBOL_GPL(disable_cci);
+
+static int cci_driver_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret = 0;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ dev_err(&pdev->dev, "unable to allocate mem\n");
+ return -ENOMEM;
+ }
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "No memory resource\n");
+ ret = -EINVAL;
+ goto mem_free;
+ }
+
+ if (!request_mem_region(res->start, resource_size(res),
+ dev_name(&pdev->dev))) {
+ dev_err(&pdev->dev, "address 0x%x in use\n", (u32) res->start);
+ ret = -EBUSY;
+ goto mem_free;
+ }
+
+ info->baseaddr = ioremap(res->start, resource_size(res));
+ if (!info->baseaddr) {
+ ret = -ENXIO;
+ goto ioremap_err;
+ }
+
+ platform_set_drvdata(pdev, info);
+
+ pr_info("CCI loaded at %p\n", info->baseaddr);
+ return ret;
+
+ioremap_err:
+ release_region(res->start, resource_size(res));
+mem_free:
+ kfree(info);
+
+ return ret;
+}
+
+static const struct of_device_id arm_cci_matches[] = {
+ {.compatible = "arm,cci"},
+ {},
+};
+
+static struct platform_driver cci_platform_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = arm_cci_matches,
+ },
+ .probe = cci_driver_probe,
+};
+
+static int __init cci_init(void)
+{
+ return platform_driver_register(&cci_platform_driver);
+}
+
+core_initcall(cci_init);
diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h
new file mode 100644
index 0000000000..86ae587817
--- /dev/null
+++ b/include/linux/arm-cci.h
@@ -0,0 +1,30 @@
+/*
+ * CCI support
+ *
+ * Copyright (C) 2012-2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_ARM_CCI_H
+#define __LINUX_ARM_CCI_H
+
+#ifdef CONFIG_ARM_CCI
+extern void disable_cci(int cluster);
+#else
+static inline void disable_cci(int cluster) { }
+#endif
+
+#endif
--
1.8.0
^ permalink raw reply related
* [PATCH v5 9/9] ARM: davinci: da850: Added dsp clock definition
From: Sekhar Nori @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <13514BD7FAEBA745BBD7D8A672905C1431206095@DFLE08.ent.ti.com>
On 1/23/2013 7:07 AM, Tivy, Robert wrote:
>> -----Original Message-----
>> From: Nori, Sekhar
>> Sent: Tuesday, January 22, 2013 4:04 AM
>>
>> Rob,
>>
>> On 1/11/2013 5:53 AM, Robert Tivy wrote:
>>> Added dsp clock definition, keyed to "davinci-rproc.0".
>>>
>>> Signed-off-by: Robert Tivy <rtivy@ti.com>
>>> ---
>>> arch/arm/mach-davinci/da850.c | 9 +++++++++
>>> 1 file changed, 9 insertions(+)
>>>
>>> diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-
>> davinci/da850.c
>>> index 097fcb2..50107c5 100644
>>> --- a/arch/arm/mach-davinci/da850.c
>>> +++ b/arch/arm/mach-davinci/da850.c
>>> @@ -375,6 +375,14 @@ static struct clk sata_clk = {
>>> .flags = PSC_FORCE,
>>> };
>>>
>>> +static struct clk dsp_clk = {
>>> + .name = "dsp",
>>> + .parent = &pll0_sysclk1,
>>> + .domain = DAVINCI_GPSC_DSPDOMAIN,
>>> + .lpsc = DA8XX_LPSC0_GEM,
>>> + .flags = PSC_LRST,
>>> +};
>>> +
>>> static struct clk_lookup da850_clks[] = {
>>> CLK(NULL, "ref", &ref_clk),
>>> CLK(NULL, "pll0", &pll0_clk),
>>> @@ -421,6 +429,7 @@ static struct clk_lookup da850_clks[] = {
>>> CLK("spi_davinci.1", NULL, &spi1_clk),
>>> CLK("vpif", NULL, &vpif_clk),
>>> CLK("ahci", NULL, &sata_clk),
>>> + CLK("davinci-rproc.0", NULL, &dsp_clk),
>>
>> Adding this clock node without the having the driver probed leads to
>> kernel hang while booting. With CONFIG_DAVINCI_RESET_CLOCKS=n, the
>> kernel boot fine. It looks like there is some trouble disabling the
>> clock if it is not used. Can you please check this issue?
>>
>> Thanks,
>> Sekhar
>
> Sekhar,
>
> Thankyou for trying that out.
>
> I discovered that the kernel boot hang is caused when trying to disable this clk during init, from within the function davinci_clk_disable_unused(). That function calls davinci_psc_config() which attempts to disable the DSP clock. Since this clk is enabled after reset, disabling it involves a state transition, and therefore the function must wait for GOSTAT[1] to be 0. For most peripherals this happens automatically, but for the DSP (and ARM, too), the DSP must execute the IDLE instruction to allow a clock enable->disable transition. Since this is bootup and there is no real program on the DSP yet, this doesn't happen.
>
> I was able to overcome this by adding PSC_FORCE to the clk->flags for dsp_clk. In fact, without PSC_FORCE I was not able to "rmmod davinci_remoteproc" since the firmware file that I'm loading did not execute IDLE. It feels like for davinci we should have a way to control the clk->flags' PSC_FORCE setting at run-time. The PSC_FORCE would be set initially (during boot) and the user (or system integrator) would have a way to unset it dynamically. That way, when the DSP firmware does actually have a structured way to enter IDLE, the user can say "I'd like a structured shutdown" and unset PSC_FORCE (via a clean API). But for now I'm just going to turn on PSC_FORCE:
> .flags = PSC_LRST | PSC_FORCE,
Thanks for the debug. It works for me after I added PSC_FORCE. Here is
the patch I am finally committing.
Thanks,
Sekhar
commit 09810a853b9a7920ba8c250d18815ef236effc47
Author: Robert Tivy <rtivy@ti.com>
AuthorDate: Thu Jan 10 16:23:22 2013 -0800
Commit: Sekhar Nori <nsekhar@ti.com>
CommitDate: Thu Jan 24 10:54:08 2013 +0530
ARM: davinci: da850: add dsp clock definition
Added dsp clock definition, keyed to "davinci-rproc.0".
DSP clocks is derived from pll0 sysclk1. Add a clock tree
node for that too.
Signed-off-by: Robert Tivy <rtivy@ti.com>
[nsekhar at ti.com: merge addition of pll0 sysclk1 and dsp clock
into one commit. Add PSC_FORCE to dsp clock node to handle the
case where DSP does not go into IDLE and its clock needs to
be disabled.]
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 6b9154e..0c4a26d 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -76,6 +76,13 @@ static struct clk pll0_aux_clk = {
.flags = CLK_PLL | PRE_PLL,
};
+static struct clk pll0_sysclk1 = {
+ .name = "pll0_sysclk1",
+ .parent = &pll0_clk,
+ .flags = CLK_PLL,
+ .div_reg = PLLDIV1,
+};
+
static struct clk pll0_sysclk2 = {
.name = "pll0_sysclk2",
.parent = &pll0_clk,
@@ -368,10 +375,19 @@ static struct clk sata_clk = {
.flags = PSC_FORCE,
};
+static struct clk dsp_clk = {
+ .name = "dsp",
+ .parent = &pll0_sysclk1,
+ .domain = DAVINCI_GPSC_DSPDOMAIN,
+ .lpsc = DA8XX_LPSC0_GEM,
+ .flags = PSC_LRST | PSC_FORCE,
+};
+
static struct clk_lookup da850_clks[] = {
CLK(NULL, "ref", &ref_clk),
CLK(NULL, "pll0", &pll0_clk),
CLK(NULL, "pll0_aux", &pll0_aux_clk),
+ CLK(NULL, "pll0_sysclk1", &pll0_sysclk1),
CLK(NULL, "pll0_sysclk2", &pll0_sysclk2),
CLK(NULL, "pll0_sysclk3", &pll0_sysclk3),
CLK(NULL, "pll0_sysclk4", &pll0_sysclk4),
@@ -413,6 +429,7 @@ static struct clk_lookup da850_clks[] = {
CLK("spi_davinci.1", NULL, &spi1_clk),
CLK("vpif", NULL, &vpif_clk),
CLK("ahci", NULL, &sata_clk),
+ CLK("davinci-rproc.0", NULL, &dsp_clk),
CLK(NULL, NULL, NULL),
};
^ permalink raw reply related
* [PATCH v2 14/16] ARM: CCI: ensure powerdown-time data is flushed from cache
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Dave Martin <dave.martin@linaro.org>
Non-local variables used by the CCI management function called after
disabling the cache must be flushed out to main memory in advance,
otherwise incoherency of those values may occur if they are sitting
in the cache of some other CPU when cci_disable() executes.
This patch adds the appropriate flushing to the CCI driver to ensure
that the relevant data is available in RAM ahead of time.
Because this creates a dependency on arch-specific cacheflushing
functions, this patch also makes ARM_CCI depend on ARM.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
drivers/bus/Kconfig | 1 +
drivers/bus/arm-cci.c | 21 +++++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index d032f74ff2..cd4ac9f001 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -22,5 +22,6 @@ config OMAP_INTERCONNECT
config ARM_CCI
bool "ARM CCI driver support"
+ depends on ARM
endmenu
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 5de3aa3d1f..11689f166d 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -21,8 +21,16 @@
#include <linux/slab.h>
#include <linux/arm-cci.h>
-#define CCI400_EAG_OFFSET 0x4000
-#define CCI400_KF_OFFSET 0x5000
+#include <asm/cacheflush.h>
+#include <asm/memory.h>
+#include <asm/outercache.h>
+
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+
+#define CCI400_PMCR 0x0100
+#define CCI400_EAG_OFFSET 0x4000
+#define CCI400_KF_OFFSET 0x5000
#define DRIVER_NAME "CCI"
struct cci_drvdata {
@@ -73,6 +81,15 @@ static int cci_driver_probe(struct platform_device *pdev)
goto ioremap_err;
}
+ /*
+ * Multi-cluster systems may need this data when non-coherent, during
+ * cluster power-up/power-down. Make sure it reaches main memory:
+ */
+ __cpuc_flush_dcache_area(info, sizeof *info);
+ __cpuc_flush_dcache_area(&info, sizeof info);
+ outer_clean_range(virt_to_phys(info), virt_to_phys(info + 1));
+ outer_clean_range(virt_to_phys(&info), virt_to_phys(&info + 1));
+
platform_set_drvdata(pdev, info);
pr_info("CCI loaded at %p\n", info->baseaddr);
--
1.8.0
^ permalink raw reply related
* [PATCH v2 15/16] ARM: vexpress/dcscb: handle platform coherency exit/setup and CCI
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
From: Dave Martin <dave.martin@linaro.org>
Add the required code to properly handle race free platform coherency exit
to the DCSCB power down method.
The power_up_setup callback is used to enable the CCI interface for
the cluster being brought up. This must be done in assembly before
the kernel environment is entered.
Thanks to Achin Gupta and Nicolas Pitre for their help and
contributions.
Signed-off-by: Dave Martin <dave.martin@linaro.org>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/mach-vexpress/Kconfig | 1 +
arch/arm/mach-vexpress/Makefile | 2 +-
arch/arm/mach-vexpress/dcscb.c | 74 ++++++++++++++++++++++++---------
arch/arm/mach-vexpress/dcscb_setup.S | 80 ++++++++++++++++++++++++++++++++++++
4 files changed, 137 insertions(+), 20 deletions(-)
create mode 100644 arch/arm/mach-vexpress/dcscb_setup.S
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 50d6587d0c..2feee6dfa4 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -55,6 +55,7 @@ config ARCH_VEXPRESS_CA9X4
config ARCH_VEXPRESS_DCSCB
bool "Dual Cluster System Control Block (DCSCB) support"
depends on BIG_LITTLE
+ select ARM_CCI
help
Support for the Dual Cluster System Configuration Block (DCSCB).
This is needed to provide CPU and cluster power management
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 2253644054..f6e90f3272 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,6 +6,6 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
obj-y := v2m.o reset.o
obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o
-obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o
obj-$(CONFIG_SMP) += platsmp.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index e61ccce3c5..575c489a4c 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/errno.h>
#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
#include <asm/bL_entry.h>
#include <asm/proc-fns.h>
@@ -106,6 +107,8 @@ static void dcscb_power_down(void)
pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
BUG_ON(cpu >= 4 || cluster >= 2);
+ __bL_cpu_going_down(cpu, cluster);
+
arch_spin_lock(&dcscb_lock);
dcscb_use_count[cpu][cluster]--;
if (dcscb_use_count[cpu][cluster] == 0) {
@@ -113,6 +116,7 @@ static void dcscb_power_down(void)
rst_hold |= cpumask;
if (((rst_hold | (rst_hold >> 4)) & cluster_mask) == cluster_mask) {
rst_hold |= (1 << 8);
+ BUG_ON(__bL_cluster_state(cluster) != CLUSTER_UP);
last_man = true;
}
writel(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
@@ -126,31 +130,59 @@ static void dcscb_power_down(void)
skip_wfi = true;
} else
BUG();
- arch_spin_unlock(&dcscb_lock);
- /*
- * Now let's clean our L1 cache and shut ourself down.
- * If we're the last CPU in this cluster then clean L2 too.
- */
-
- /*
- * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
- * a preliminary flush here for those CPUs. At least, that's
- * the theory -- without the extra flush, Linux explodes on
- * RTSM (maybe not needed anymore, to be investigated)..
- */
- flush_cache_louis();
- cpu_proc_fin();
+ if (last_man && __bL_outbound_enter_critical(cpu, cluster)) {
+ arch_spin_unlock(&dcscb_lock);
- if (!last_man) {
- flush_cache_louis();
- } else {
+ /*
+ * Flush all cache levels for this cluster.
+ *
+ * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+ * a preliminary flush here for those CPUs. At least, that's
+ * the theory -- without the extra flush, Linux explodes on
+ * RTSM (maybe not needed anymore, to be investigated).
+ */
flush_cache_all();
+ cpu_proc_fin(); /* disable allocation into internal caches*/
+ flush_cache_all();
+
+ /*
+ * This is a harmless no-op. On platforms with a real
+ * outer cache this might either be needed or not,
+ * depending on where the outer cache sits.
+ */
outer_flush_all();
+
+ /* Disable local coherency by clearing the ACTLR "SMP" bit: */
+ set_auxcr(get_auxcr() & ~(1 << 6));
+
+ /*
+ * Disable cluster-level coherency by masking
+ * incoming snoops and DVM messages:
+ */
+ disable_cci(cluster);
+
+ __bL_outbound_leave_critical(cluster, CLUSTER_DOWN);
+ } else {
+ arch_spin_unlock(&dcscb_lock);
+
+ /*
+ * Flush the local CPU cache.
+ *
+ * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need
+ * a preliminary flush here for those CPUs. At least, that's
+ * the theory -- without the extra flush, Linux explodes on
+ * RTSM (maybe not needed anymore, to be investigated).
+ */
+ flush_cache_louis();
+ cpu_proc_fin(); /* disable allocation into internal caches*/
+ flush_cache_louis();
+
+ /* Disable local coherency by clearing the ACTLR "SMP" bit: */
+ set_auxcr(get_auxcr() & ~(1 << 6));
}
- /* Disable local coherency by clearing the ACTLR "SMP" bit: */
- set_auxcr(get_auxcr() & ~(1 << 6));
+ __bL_cpu_down(cpu, cluster);
/* Now we are prepared for power-down, do it: */
if (!skip_wfi) {
@@ -179,6 +211,8 @@ static void __init dcscb_usage_count_init(void)
dcscb_use_count[cpu][cluster] = 1;
}
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
static int __init dcscb_init(void)
{
unsigned int cfg;
@@ -193,6 +227,8 @@ static int __init dcscb_init(void)
dcscb_usage_count_init();
ret = bL_platform_power_register(&dcscb_power_ops);
+ if (!ret)
+ ret = bL_cluster_sync_init(dcscb_power_up_setup);
if (ret) {
iounmap(dcscb_base);
return ret;
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 0000000000..d61a2f3552
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,80 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by: Dave Martin, 2012-06-22
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/bL_entry.h>
+
+
+#define SLAVE_SNOOPCTL_OFFSET 0
+#define SNOOPCTL_SNOOP_ENABLE (1 << 0)
+#define SNOOPCTL_DVM_ENABLE (1 << 1)
+
+#define CCI_STATUS_OFFSET 0xc
+#define STATUS_CHANGE_PENDING (1 << 0)
+
+#define CCI_SLAVE_OFFSET(n) (0x1000 + 0x1000 * (n))
+
+#define RTSM_CCI_PHYS_BASE 0x2c090000
+#define RTSM_CCI_SLAVE_A15 3
+#define RTSM_CCI_SLAVE_A7 4
+
+#define RTSM_CCI_A15_OFFSET CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A15)
+#define RTSM_CCI_A7_OFFSET CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A7)
+
+
+ENTRY(dcscb_power_up_setup)
+
+ cmp r0, #0 @ check affinity level
+ beq 2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+ mrc p15, 0, r0, c0, c0, 5 @ MPIDR
+ ubfx r0, r0, #8, #4 @ cluster
+
+ @ A15/A7 may not require explicit L2 invalidation on reset, dependent
+ @ on hardware integration desicions.
+ @ For now, this code assumes that L2 is either already invalidated, or
+ @ invalidation is not required.
+
+ ldr r3, =RTSM_CCI_PHYS_BASE + RTSM_CCI_A15_OFFSET
+ cmp r0, #0 @ A15 cluster?
+ addne r3, r3, #RTSM_CCI_A7_OFFSET - RTSM_CCI_A15_OFFSET
+
+ @ r3 now points to the correct CCI slave register block
+
+ ldr r0, [r3, #SLAVE_SNOOPCTL_OFFSET]
+ orr r0, r0, #SNOOPCTL_SNOOP_ENABLE | SNOOPCTL_DVM_ENABLE
+ str r0, [r3, #SLAVE_SNOOPCTL_OFFSET] @ enable CCI snoops
+
+ @ Wait for snoop control change to complete:
+
+ ldr r3, =RTSM_CCI_PHYS_BASE
+
+1: ldr r0, [r3, #CCI_STATUS_OFFSET]
+ tst r0, #STATUS_CHANGE_PENDING
+ bne 1b
+
+ dsb @ Synchronise side-effects of enabling CCI
+
+ bx lr
+
+2: @ Implementation-specific local CPU setup operations should go here,
+ @ if any. In this case, there is nothing to do.
+
+ bx lr
+
+ENDPROC(dcscb_power_up_setup)
--
1.8.0
^ permalink raw reply related
* [PATCH v2 16/16] ARM: vexpress/dcscb: probe via device tree
From: Nicolas Pitre @ 2013-01-24 6:27 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359008879-9015-1-git-send-email-nicolas.pitre@linaro.org>
This allows for the DCSCB support to be compiled in and selected
at run time.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/mach-vexpress/dcscb.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
index 575c489a4c..17e410e4fe 100644
--- a/arch/arm/mach-vexpress/dcscb.c
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -14,6 +14,7 @@
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/errno.h>
+#include <linux/of_address.h>
#include <linux/vexpress.h>
#include <linux/arm-cci.h>
@@ -24,8 +25,6 @@
#include <asm/cp15.h>
-#define DCSCB_PHYS_BASE 0x60000000
-
#define RST_HOLD0 0x0
#define RST_HOLD1 0x4
#define SYS_SWRESET 0x8
@@ -215,12 +214,16 @@ extern void dcscb_power_up_setup(unsigned int affinity_level);
static int __init dcscb_init(void)
{
+ struct device_node *node;
unsigned int cfg;
int ret;
- dcscb_base = ioremap(DCSCB_PHYS_BASE, 0x1000);
+ node = of_find_compatible_node(NULL, NULL, "arm,dcscb");
+ if (!node)
+ return -ENODEV;
+ dcscb_base= of_iomap(node, 0);
if (!dcscb_base)
- return -ENOMEM;
+ return -EINVAL;
cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
dcscb_cluster_cpu_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
dcscb_cluster_cpu_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
--
1.8.0
^ permalink raw reply related
* [V5 PATCH 00/26] mv-usb fix and enhancement patches
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
The patches are divied into 4 parts
1. bug fixes
usb: gadget: mv_udc: use udc_start and udc_stop functions
usb: gadget: mv_udc: use devm_xxx for probe
usb: gadget: mv_udc: fix the warning of mv_udc_remove
usb: otg: mv_otg: use devm_xxx for probe
usb: host: ehci-mv: remove unused variable
usb: gadget: mv_udc: fix the value of tranceiver
usb: gadget: mv_udc: make mv_udc depends on ARCH_MMP or ARCH_PXA
Above patches are bug fixes.
2. PHY driver
To remove the callbacks in the platform data, a usb PHY driver
for marvell udc/otg/ehci is written.
For device tree support, it is not good to pass the callback
pointers by platform data. The PHY driver also removes the
block.
usb: phy: mv_usb2: add PHY driver for marvell usb2 controller
usb: gadget: mv_udc: use PHY driver for udc
usb: ehci: ehci-mv: use PHY driver for ehci
usb: otg: mv_otg: use PHY driver for otg
Above patches are marvell usb PHY driver support.
arm: mmp2: change the defintion of usb devices
arm: pxa910: change the defintion of usb devices
arm: brownstone: add usb support for the board
arm: ttc_dkb: add usb support
arm: mmp: remove the usb phy setting
arm: mmp: remove usb devices from pxa168
Above patches are for SOC/board support for marvell usb PHY
driver.
3. external chip support
The marvell usb controller can detect the vbus/idpin, but it
need PHY and usb clocks to be enabled.
Based on measurement it will import 15mA current, and increase
the power when the usb is not used.
Using a external chip to detect vbus/idpin changes will save
the power.
In fact the marvell PMIC 88pm860x and 88pm80x can do it. The
drivers are located at drivers/mfd.
So add a middle layer in the marvell usb PHY driver.
PMIC call the APIs in middle driver to registers the callback
for vbus/idpin detection/query
udc/otg/ehci driver will call the APIs to get vbus/idpin changes
and query the states of the vbus/idpin.
usb: phy: mv_usb2_phy: add externel chip support
usb: gadget: mv_udc: add extern chip support
usb: ehci: ehci-mv: add extern chip support
usb: otg: mv_otg: add extern chip support
Above patches are the middle layer suppor for udc/otg/ehci
arm: mmp: add extern chip support for brownstone
arm: mmp: add extern chip support for ttc_dkb
Above patches are corresponding board file changes
4. device tree support
After removing the callbacks in platform data, and the not
constant variables in platform data. All the information needed
by udc/otg/ehci driver are constant.
usb: gadget: mv_udc: add device tree support
usb: otg: mv_otg: add device tree support
usb: ehci: ehci-mv: add device tree support
Above patches are device tree support for udc/otg/ehci driver.
V2->V1:
Change the Signed-off-by to be right email address
v3->v2
re-format the patches to new code base
v4->v3
1. make mv udc gadget driver depend on ARCH_PXA and ARCH_MMP
2. remove __devinit and __devexit
3. make the driver compiled successful if CONFIG_MV_USB2_PHY is not defined.
The modified patches are
usb: gadget: mv_udc: make mv_udc depends on ARCH_MMP or ARCH_PXA
usb: phy: mv_usb2: add PHY driver for marvell usb2 controller
v5->v4
make the struct mv_usb2_extern_chip member ->head to be
struct atomic_notifier_head instead of struct atomic_notifier_head *.
Chao Xie (26):
usb: gadget: mv_udc: use udc_start and udc_stop functions
usb: gadget: mv_udc: use devm_xxx for probe
usb: gadget: mv_udc: fix the warning of mv_udc_remove
usb: otg: mv_otg: use devm_xxx for probe
usb: host: ehci-mv: remove unused variable
usb: gadget: mv_udc: fix the value of tranceiver
usb: gadget: mv_udc: make mv_udc depends on ARCH_MMP or ARCH_PXA
usb: phy: mv_usb2: add PHY driver for marvell usb2 controller
usb: gadget: mv_udc: use PHY driver for udc
usb: ehci: ehci-mv: use PHY driver for ehci
usb: otg: mv_otg: use PHY driver for otg
arm: mmp2: change the defintion of usb devices
arm: pxa910: change the defintion of usb devices
arm: brownstone: add usb support for the board
arm: ttc_dkb: add usb support
arm: mmp: remove the usb phy setting
arm: mmp: remove usb devices from pxa168
usb: phy: mv_usb2_phy: add externel chip support
usb: gadget: mv_udc: add extern chip support
usb: ehci: ehci-mv: add extern chip support
usb: otg: mv_otg: add extern chip support
arm: mmp: add extern chip support for brownstone
arm: mmp: add extern chip support for ttc_dkb
usb: gadget: mv_udc: add device tree support
usb: otg: mv_otg: add device tree support
usb: ehci: ehci-mv: add device tree support
arch/arm/mach-mmp/brownstone.c | 45 +++
arch/arm/mach-mmp/devices.c | 278 ----------------
arch/arm/mach-mmp/include/mach/mmp2.h | 4 +
arch/arm/mach-mmp/include/mach/pxa910.h | 7 +-
arch/arm/mach-mmp/include/mach/regs-usb.h | 253 ---------------
arch/arm/mach-mmp/mmp2.c | 4 +
arch/arm/mach-mmp/pxa168.c | 42 ---
arch/arm/mach-mmp/pxa910.c | 4 +
arch/arm/mach-mmp/ttc_dkb.c | 32 ++-
drivers/usb/gadget/Kconfig | 1 +
drivers/usb/gadget/mv_udc.h | 10 +-
drivers/usb/gadget/mv_udc_core.c | 384 ++++++++++++-----------
drivers/usb/host/ehci-mv.c | 155 ++++++----
drivers/usb/otg/mv_otg.c | 280 +++++++++-------
drivers/usb/otg/mv_otg.h | 11 +-
drivers/usb/phy/Kconfig | 7 +
drivers/usb/phy/Makefile | 1 +
drivers/usb/phy/mv_usb2_phy.c | 503 +++++++++++++++++++++++++++++
include/linux/platform_data/mv_usb.h | 22 +-
include/linux/usb/mv_usb2.h | 128 ++++++++
20 files changed, 1196 insertions(+), 975 deletions(-)
delete mode 100644 arch/arm/mach-mmp/include/mach/regs-usb.h
create mode 100644 drivers/usb/phy/mv_usb2_phy.c
create mode 100644 include/linux/usb/mv_usb2.h
--
1.7.4.1
^ permalink raw reply
* [V5 PATCH 01/26] usb: gadget: mv_udc: use udc_start and udc_stop functions
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
This patches converts the driver into the new style start/stop
interface. As a result the driver no longer uses the static
global the_conroller variable.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/mv_udc_core.c | 79 +++++++++++++++++---------------------
1 files changed, 35 insertions(+), 44 deletions(-)
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index 6e8b127..32a9972 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -61,9 +61,6 @@ static DECLARE_COMPLETION(release_done);
static const char driver_name[] = "mv_udc";
static const char driver_desc[] = DRIVER_DESC;
-/* controller device global variable */
-static struct mv_udc *the_controller;
-
static void nuke(struct mv_ep *ep, int status);
static void stop_activity(struct mv_udc *udc, struct usb_gadget_driver *driver);
@@ -1268,9 +1265,8 @@ static int mv_udc_pullup(struct usb_gadget *gadget, int is_on)
return retval;
}
-static int mv_udc_start(struct usb_gadget_driver *driver,
- int (*bind)(struct usb_gadget *, struct usb_gadget_driver *));
-static int mv_udc_stop(struct usb_gadget_driver *driver);
+static int mv_udc_start(struct usb_gadget *, struct usb_gadget_driver *);
+static int mv_udc_stop(struct usb_gadget *, struct usb_gadget_driver *);
/* device controller usb_gadget_ops structure */
static const struct usb_gadget_ops mv_ops = {
@@ -1285,8 +1281,8 @@ static const struct usb_gadget_ops mv_ops = {
/* D+ pullup, software-controlled connect/disconnect to USB host */
.pullup = mv_udc_pullup,
- .start = mv_udc_start,
- .stop = mv_udc_stop,
+ .udc_start = mv_udc_start,
+ .udc_stop = mv_udc_stop,
};
static int eps_init(struct mv_udc *udc)
@@ -1373,15 +1369,14 @@ static void stop_activity(struct mv_udc *udc, struct usb_gadget_driver *driver)
}
}
-static int mv_udc_start(struct usb_gadget_driver *driver,
- int (*bind)(struct usb_gadget *, struct usb_gadget_driver *))
+static int mv_udc_start(struct usb_gadget *gadget,
+ struct usb_gadget_driver *driver)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
int retval = 0;
unsigned long flags;
- if (!udc)
- return -ENODEV;
+ udc = container_of(gadget, struct mv_udc, gadget);
if (udc->driver)
return -EBUSY;
@@ -1399,26 +1394,14 @@ static int mv_udc_start(struct usb_gadget_driver *driver,
spin_unlock_irqrestore(&udc->lock, flags);
- retval = bind(&udc->gadget, driver);
- if (retval) {
- dev_err(&udc->dev->dev, "bind to driver %s --> %d\n",
- driver->driver.name, retval);
- udc->driver = NULL;
- udc->gadget.dev.driver = NULL;
- return retval;
- }
-
if (!IS_ERR_OR_NULL(udc->transceiver)) {
retval = otg_set_peripheral(udc->transceiver->otg,
&udc->gadget);
if (retval) {
dev_err(&udc->dev->dev,
"unable to register peripheral to otg\n");
- if (driver->unbind) {
- driver->unbind(&udc->gadget);
- udc->gadget.dev.driver = NULL;
- udc->driver = NULL;
- }
+ udc->driver = NULL;
+ udc->gadget.dev.driver = NULL;
return retval;
}
}
@@ -1433,13 +1416,13 @@ static int mv_udc_start(struct usb_gadget_driver *driver,
return 0;
}
-static int mv_udc_stop(struct usb_gadget_driver *driver)
+static int mv_udc_stop(struct usb_gadget *gadget,
+ struct usb_gadget_driver *driver)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
unsigned long flags;
- if (!udc)
- return -ENODEV;
+ udc = container_of(gadget, struct mv_udc, gadget);
spin_lock_irqsave(&udc->lock, flags);
@@ -1454,7 +1437,6 @@ static int mv_udc_stop(struct usb_gadget_driver *driver)
spin_unlock_irqrestore(&udc->lock, flags);
/* unbind gadget driver */
- driver->unbind(&udc->gadget);
udc->gadget.dev.driver = NULL;
udc->driver = NULL;
@@ -1472,10 +1454,13 @@ static void mv_set_ptc(struct mv_udc *udc, u32 mode)
static void prime_status_complete(struct usb_ep *ep, struct usb_request *_req)
{
- struct mv_udc *udc = the_controller;
+ struct mv_ep *mvep = container_of(ep, struct mv_ep, ep);
struct mv_req *req = container_of(_req, struct mv_req, req);
+ struct mv_udc *udc;
unsigned long flags;
+ udc = mvep->udc;
+
dev_info(&udc->dev->dev, "switch to test mode %d\n", req->test_mode);
spin_lock_irqsave(&udc->lock, flags);
@@ -2123,16 +2108,20 @@ static void mv_udc_vbus_work(struct work_struct *work)
/* release device structure */
static void gadget_release(struct device *_dev)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
+
+ udc = dev_get_drvdata(_dev);
complete(udc->done);
}
static int mv_udc_remove(struct platform_device *dev)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
int clk_i;
+ udc = platform_get_drvdata(dev);
+
usb_del_gadget_udc(&udc->gadget);
if (udc->qwork) {
@@ -2183,8 +2172,6 @@ static int mv_udc_remove(struct platform_device *dev)
wait_for_completion(udc->done);
kfree(udc);
- the_controller = NULL;
-
return 0;
}
@@ -2209,7 +2196,6 @@ static int mv_udc_probe(struct platform_device *dev)
return -ENOMEM;
}
- the_controller = udc;
udc->done = &release_done;
udc->pdata = dev->dev.platform_data;
spin_lock_init(&udc->lock);
@@ -2400,6 +2386,7 @@ static int mv_udc_probe(struct platform_device *dev)
if (retval)
goto err_unregister;
+ platform_set_drvdata(dev, udc);
dev_info(&dev->dev, "successful probe UDC device %s clock gating.\n",
udc->clock_gating ? "with" : "without");
@@ -2431,15 +2418,16 @@ err_iounmap_capreg:
err_put_clk:
for (clk_i--; clk_i >= 0; clk_i--)
clk_put(udc->clk[clk_i]);
- the_controller = NULL;
kfree(udc);
return retval;
}
#ifdef CONFIG_PM
-static int mv_udc_suspend(struct device *_dev)
+static int mv_udc_suspend(struct device *dev)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
+
+ udc = dev_get_drvdata(dev);
/* if OTG is enabled, the following will be done in OTG driver*/
if (!IS_ERR_OR_NULL(udc->transceiver))
@@ -2469,11 +2457,13 @@ static int mv_udc_suspend(struct device *_dev)
return 0;
}
-static int mv_udc_resume(struct device *_dev)
+static int mv_udc_resume(struct device *dev)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
int retval;
+ udc = dev_get_drvdata(dev);
+
/* if OTG is enabled, the following will be done in OTG driver*/
if (!IS_ERR_OR_NULL(udc->transceiver))
return 0;
@@ -2501,9 +2491,10 @@ static const struct dev_pm_ops mv_udc_pm_ops = {
static void mv_udc_shutdown(struct platform_device *dev)
{
- struct mv_udc *udc = the_controller;
+ struct mv_udc *udc;
u32 mode;
+ udc = platform_get_drvdata(dev);
/* reset controller mode to IDLE */
mv_udc_enable(udc);
mode = readl(&udc->op_regs->usbmode);
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 02/26] usb: gadget: mv_udc: use devm_xxx for probe
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
use devm_xxx for udc driver probe. So we do need care about
the resources release in driver remove or failure handling
in driver probe.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/mv_udc_core.c | 156 ++++++++++++++------------------------
1 files changed, 56 insertions(+), 100 deletions(-)
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index 32a9972..e2fdc8e 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -2115,12 +2115,11 @@ static void gadget_release(struct device *_dev)
complete(udc->done);
}
-static int mv_udc_remove(struct platform_device *dev)
+static int mv_udc_remove(struct platform_device *pdev)
{
struct mv_udc *udc;
- int clk_i;
- udc = platform_get_drvdata(dev);
+ udc = platform_get_drvdata(pdev);
usb_del_gadget_udc(&udc->gadget);
@@ -2129,55 +2128,27 @@ static int mv_udc_remove(struct platform_device *dev)
destroy_workqueue(udc->qwork);
}
- /*
- * If we have transceiver inited,
- * then vbus irq will not be requested in udc driver.
- */
- if (udc->pdata && udc->pdata->vbus
- && udc->clock_gating && IS_ERR_OR_NULL(udc->transceiver))
- free_irq(udc->pdata->vbus->irq, &dev->dev);
-
/* free memory allocated in probe */
if (udc->dtd_pool)
dma_pool_destroy(udc->dtd_pool);
if (udc->ep_dqh)
- dma_free_coherent(&dev->dev, udc->ep_dqh_size,
+ dma_free_coherent(&pdev->dev, udc->ep_dqh_size,
udc->ep_dqh, udc->ep_dqh_dma);
- kfree(udc->eps);
-
- if (udc->irq)
- free_irq(udc->irq, &dev->dev);
-
mv_udc_disable(udc);
- if (udc->cap_regs)
- iounmap(udc->cap_regs);
-
- if (udc->phy_regs)
- iounmap(udc->phy_regs);
-
- if (udc->status_req) {
- kfree(udc->status_req->req.buf);
- kfree(udc->status_req);
- }
-
- for (clk_i = 0; clk_i <= udc->clknum; clk_i++)
- clk_put(udc->clk[clk_i]);
-
device_unregister(&udc->gadget.dev);
/* free dev, wait for the release() finished */
wait_for_completion(udc->done);
- kfree(udc);
return 0;
}
-static int mv_udc_probe(struct platform_device *dev)
+static int mv_udc_probe(struct platform_device *pdev)
{
- struct mv_usb_platform_data *pdata = dev->dev.platform_data;
+ struct mv_usb_platform_data *pdata = pdev->dev.platform_data;
struct mv_udc *udc;
int retval = 0;
int clk_i = 0;
@@ -2185,70 +2156,68 @@ static int mv_udc_probe(struct platform_device *dev)
size_t size;
if (pdata == NULL) {
- dev_err(&dev->dev, "missing platform_data\n");
+ dev_err(&pdev->dev, "missing platform_data\n");
return -ENODEV;
}
size = sizeof(*udc) + sizeof(struct clk *) * pdata->clknum;
- udc = kzalloc(size, GFP_KERNEL);
+ udc = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
if (udc == NULL) {
- dev_err(&dev->dev, "failed to allocate memory for udc\n");
+ dev_err(&pdev->dev, "failed to allocate memory for udc\n");
return -ENOMEM;
}
udc->done = &release_done;
- udc->pdata = dev->dev.platform_data;
+ udc->pdata = pdev->dev.platform_data;
spin_lock_init(&udc->lock);
- udc->dev = dev;
+ udc->dev = pdev;
#ifdef CONFIG_USB_OTG_UTILS
if (pdata->mode == MV_USB_MODE_OTG)
- udc->transceiver = usb_get_phy(USB_PHY_TYPE_USB2);
+ udc->transceiver = devm_usb_get_phy(&pdev->dev,
+ USB_PHY_TYPE_USB2);
#endif
udc->clknum = pdata->clknum;
for (clk_i = 0; clk_i < udc->clknum; clk_i++) {
- udc->clk[clk_i] = clk_get(&dev->dev, pdata->clkname[clk_i]);
+ udc->clk[clk_i] = devm_clk_get(&pdev->dev,
+ pdata->clkname[clk_i]);
if (IS_ERR(udc->clk[clk_i])) {
retval = PTR_ERR(udc->clk[clk_i]);
- goto err_put_clk;
+ return retval;
}
}
r = platform_get_resource_byname(udc->dev, IORESOURCE_MEM, "capregs");
if (r == NULL) {
- dev_err(&dev->dev, "no I/O memory resource defined\n");
- retval = -ENODEV;
- goto err_put_clk;
+ dev_err(&pdev->dev, "no I/O memory resource defined\n");
+ return -ENODEV;
}
udc->cap_regs = (struct mv_cap_regs __iomem *)
- ioremap(r->start, resource_size(r));
+ devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (udc->cap_regs == NULL) {
- dev_err(&dev->dev, "failed to map I/O memory\n");
- retval = -EBUSY;
- goto err_put_clk;
+ dev_err(&pdev->dev, "failed to map I/O memory\n");
+ return -EBUSY;
}
r = platform_get_resource_byname(udc->dev, IORESOURCE_MEM, "phyregs");
if (r == NULL) {
- dev_err(&dev->dev, "no phy I/O memory resource defined\n");
- retval = -ENODEV;
- goto err_iounmap_capreg;
+ dev_err(&pdev->dev, "no phy I/O memory resource defined\n");
+ return -ENODEV;
}
udc->phy_regs = ioremap(r->start, resource_size(r));
if (udc->phy_regs == NULL) {
- dev_err(&dev->dev, "failed to map phy I/O memory\n");
- retval = -EBUSY;
- goto err_iounmap_capreg;
+ dev_err(&pdev->dev, "failed to map phy I/O memory\n");
+ return -EBUSY;
}
/* we will acces controller register, so enable the clk */
retval = mv_udc_enable_internal(udc);
if (retval)
- goto err_iounmap_phyreg;
+ return retval;
udc->op_regs =
(struct mv_op_regs __iomem *)((unsigned long)udc->cap_regs
@@ -2265,11 +2234,11 @@ static int mv_udc_probe(struct platform_device *dev)
size = udc->max_eps * sizeof(struct mv_dqh) *2;
size = (size + DQH_ALIGNMENT - 1) & ~(DQH_ALIGNMENT - 1);
- udc->ep_dqh = dma_alloc_coherent(&dev->dev, size,
+ udc->ep_dqh = dma_alloc_coherent(&pdev->dev, size,
&udc->ep_dqh_dma, GFP_KERNEL);
if (udc->ep_dqh == NULL) {
- dev_err(&dev->dev, "allocate dQH memory failed\n");
+ dev_err(&pdev->dev, "allocate dQH memory failed\n");
retval = -ENOMEM;
goto err_disable_clock;
}
@@ -2277,7 +2246,7 @@ static int mv_udc_probe(struct platform_device *dev)
/* create dTD dma_pool resource */
udc->dtd_pool = dma_pool_create("mv_dtd",
- &dev->dev,
+ &pdev->dev,
sizeof(struct mv_dtd),
DTD_ALIGNMENT,
DMA_BOUNDARY);
@@ -2288,19 +2257,20 @@ static int mv_udc_probe(struct platform_device *dev)
}
size = udc->max_eps * sizeof(struct mv_ep) *2;
- udc->eps = kzalloc(size, GFP_KERNEL);
+ udc->eps = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
if (udc->eps == NULL) {
- dev_err(&dev->dev, "allocate ep memory failed\n");
+ dev_err(&pdev->dev, "allocate ep memory failed\n");
retval = -ENOMEM;
goto err_destroy_dma;
}
/* initialize ep0 status request structure */
- udc->status_req = kzalloc(sizeof(struct mv_req), GFP_KERNEL);
+ udc->status_req = devm_kzalloc(&pdev->dev, sizeof(struct mv_req),
+ GFP_KERNEL);
if (!udc->status_req) {
- dev_err(&dev->dev, "allocate status_req memory failed\n");
+ dev_err(&pdev->dev, "allocate status_req memory failed\n");
retval = -ENOMEM;
- goto err_free_eps;
+ goto err_destroy_dma;
}
INIT_LIST_HEAD(&udc->status_req->queue);
@@ -2315,17 +2285,17 @@ static int mv_udc_probe(struct platform_device *dev)
r = platform_get_resource(udc->dev, IORESOURCE_IRQ, 0);
if (r == NULL) {
- dev_err(&dev->dev, "no IRQ resource defined\n");
+ dev_err(&pdev->dev, "no IRQ resource defined\n");
retval = -ENODEV;
- goto err_free_status_req;
+ goto err_destroy_dma;
}
udc->irq = r->start;
- if (request_irq(udc->irq, mv_udc_irq,
+ if (devm_request_irq(&pdev->dev, udc->irq, mv_udc_irq,
IRQF_SHARED, driver_name, udc)) {
- dev_err(&dev->dev, "Request irq %d for UDC failed\n",
+ dev_err(&pdev->dev, "Request irq %d for UDC failed\n",
udc->irq);
retval = -ENODEV;
- goto err_free_status_req;
+ goto err_destroy_dma;
}
/* initialize gadget structure */
@@ -2337,14 +2307,14 @@ static int mv_udc_probe(struct platform_device *dev)
/* the "gadget" abstracts/virtualizes the controller */
dev_set_name(&udc->gadget.dev, "gadget");
- udc->gadget.dev.parent = &dev->dev;
- udc->gadget.dev.dma_mask = dev->dev.dma_mask;
+ udc->gadget.dev.parent = &pdev->dev;
+ udc->gadget.dev.dma_mask = pdev->dev.dma_mask;
udc->gadget.dev.release = gadget_release;
udc->gadget.name = driver_name; /* gadget name */
retval = device_register(&udc->gadget.dev);
if (retval)
- goto err_free_irq;
+ goto err_destroy_dma;
eps_init(udc);
@@ -2353,10 +2323,11 @@ static int mv_udc_probe(struct platform_device *dev)
udc->clock_gating = 1;
else if (pdata->vbus) {
udc->clock_gating = 1;
- retval = request_threaded_irq(pdata->vbus->irq, NULL,
+ retval = devm_request_threaded_irq(&pdev->dev,
+ pdata->vbus->irq, NULL,
mv_udc_vbus_irq, IRQF_ONESHOT, "vbus", udc);
if (retval) {
- dev_info(&dev->dev,
+ dev_info(&pdev->dev,
"Can not request irq for VBUS, "
"disable clock gating\n");
udc->clock_gating = 0;
@@ -2364,7 +2335,7 @@ static int mv_udc_probe(struct platform_device *dev)
udc->qwork = create_singlethread_workqueue("mv_udc_queue");
if (!udc->qwork) {
- dev_err(&dev->dev, "cannot create workqueue\n");
+ dev_err(&pdev->dev, "cannot create workqueue\n");
retval = -ENOMEM;
goto err_unregister;
}
@@ -2382,43 +2353,28 @@ static int mv_udc_probe(struct platform_device *dev)
else
udc->vbus_active = 1;
- retval = usb_add_gadget_udc(&dev->dev, &udc->gadget);
+ retval = usb_add_gadget_udc(&pdev->dev, &udc->gadget);
if (retval)
- goto err_unregister;
+ goto err_create_workqueue;
- platform_set_drvdata(dev, udc);
- dev_info(&dev->dev, "successful probe UDC device %s clock gating.\n",
+ platform_set_drvdata(pdev, udc);
+ dev_info(&pdev->dev, "successful probe UDC device %s clock gating.\n",
udc->clock_gating ? "with" : "without");
return 0;
+err_create_workqueue:
+ destroy_workqueue(udc->qwork);
err_unregister:
- if (udc->pdata && udc->pdata->vbus
- && udc->clock_gating && IS_ERR_OR_NULL(udc->transceiver))
- free_irq(pdata->vbus->irq, &dev->dev);
device_unregister(&udc->gadget.dev);
-err_free_irq:
- free_irq(udc->irq, &dev->dev);
-err_free_status_req:
- kfree(udc->status_req->req.buf);
- kfree(udc->status_req);
-err_free_eps:
- kfree(udc->eps);
err_destroy_dma:
dma_pool_destroy(udc->dtd_pool);
err_free_dma:
- dma_free_coherent(&dev->dev, udc->ep_dqh_size,
+ dma_free_coherent(&pdev->dev, udc->ep_dqh_size,
udc->ep_dqh, udc->ep_dqh_dma);
err_disable_clock:
mv_udc_disable_internal(udc);
-err_iounmap_phyreg:
- iounmap(udc->phy_regs);
-err_iounmap_capreg:
- iounmap(udc->cap_regs);
-err_put_clk:
- for (clk_i--; clk_i >= 0; clk_i--)
- clk_put(udc->clk[clk_i]);
- kfree(udc);
+
return retval;
}
@@ -2489,12 +2445,12 @@ static const struct dev_pm_ops mv_udc_pm_ops = {
};
#endif
-static void mv_udc_shutdown(struct platform_device *dev)
+static void mv_udc_shutdown(struct platform_device *pdev)
{
struct mv_udc *udc;
u32 mode;
- udc = platform_get_drvdata(dev);
+ udc = platform_get_drvdata(pdev);
/* reset controller mode to IDLE */
mv_udc_enable(udc);
mode = readl(&udc->op_regs->usbmode);
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 03/26] usb: gadget: mv_udc: fix the warning of mv_udc_remove
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
The __exit_p() will be NULL if MODULE is no defined.
It will cause the warning. Removing __exit_p to remove
the warning.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/mv_udc_core.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index e2fdc8e..910c4b5 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -2461,7 +2461,7 @@ static void mv_udc_shutdown(struct platform_device *pdev)
static struct platform_driver udc_driver = {
.probe = mv_udc_probe,
- .remove = __exit_p(mv_udc_remove),
+ .remove = mv_udc_remove,
.shutdown = mv_udc_shutdown,
.driver = {
.owner = THIS_MODULE,
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 04/26] usb: otg: mv_otg: use devm_xxx for probe
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
use devm_xxx for otg driver probe. So we do need care about
the resources release in driver remove or failure handling
in driver probe.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/otg/mv_otg.c | 82 ++++++++++++---------------------------------
1 files changed, 22 insertions(+), 60 deletions(-)
diff --git a/drivers/usb/otg/mv_otg.c b/drivers/usb/otg/mv_otg.c
index eace975..da2d60c 100644
--- a/drivers/usb/otg/mv_otg.c
+++ b/drivers/usb/otg/mv_otg.c
@@ -662,18 +662,9 @@ static struct attribute_group inputs_attr_group = {
int mv_otg_remove(struct platform_device *pdev)
{
struct mv_otg *mvotg = platform_get_drvdata(pdev);
- int clk_i;
sysfs_remove_group(&mvotg->pdev->dev.kobj, &inputs_attr_group);
- if (mvotg->irq)
- free_irq(mvotg->irq, mvotg);
-
- if (mvotg->pdata->vbus)
- free_irq(mvotg->pdata->vbus->irq, mvotg);
- if (mvotg->pdata->id)
- free_irq(mvotg->pdata->id->irq, mvotg);
-
if (mvotg->qwork) {
flush_workqueue(mvotg->qwork);
destroy_workqueue(mvotg->qwork);
@@ -681,21 +672,9 @@ int mv_otg_remove(struct platform_device *pdev)
mv_otg_disable(mvotg);
- if (mvotg->cap_regs)
- iounmap(mvotg->cap_regs);
-
- if (mvotg->phy_regs)
- iounmap(mvotg->phy_regs);
-
- for (clk_i = 0; clk_i <= mvotg->clknum; clk_i++)
- clk_put(mvotg->clk[clk_i]);
-
usb_remove_phy(&mvotg->phy);
platform_set_drvdata(pdev, NULL);
- kfree(mvotg->phy.otg);
- kfree(mvotg);
-
return 0;
}
@@ -714,17 +693,15 @@ static int mv_otg_probe(struct platform_device *pdev)
}
size = sizeof(*mvotg) + sizeof(struct clk *) * pdata->clknum;
- mvotg = kzalloc(size, GFP_KERNEL);
+ mvotg = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
if (!mvotg) {
dev_err(&pdev->dev, "failed to allocate memory!\n");
return -ENOMEM;
}
- otg = kzalloc(sizeof *otg, GFP_KERNEL);
- if (!otg) {
- kfree(mvotg);
+ otg = devm_kzalloc(&pdev->dev, sizeof(*otg), GFP_KERNEL);
+ if (!otg)
return -ENOMEM;
- }
platform_set_drvdata(pdev, mvotg);
@@ -733,18 +710,18 @@ static int mv_otg_probe(struct platform_device *pdev)
mvotg->clknum = pdata->clknum;
for (clk_i = 0; clk_i < mvotg->clknum; clk_i++) {
- mvotg->clk[clk_i] = clk_get(&pdev->dev, pdata->clkname[clk_i]);
+ mvotg->clk[clk_i] = devm_clk_get(&pdev->dev,
+ pdata->clkname[clk_i]);
if (IS_ERR(mvotg->clk[clk_i])) {
retval = PTR_ERR(mvotg->clk[clk_i]);
- goto err_put_clk;
+ return retval;
}
}
mvotg->qwork = create_singlethread_workqueue("mv_otg_queue");
if (!mvotg->qwork) {
dev_dbg(&pdev->dev, "cannot create workqueue for OTG\n");
- retval = -ENOMEM;
- goto err_put_clk;
+ return -ENOMEM;
}
INIT_DELAYED_WORK(&mvotg->work, mv_otg_work);
@@ -772,7 +749,7 @@ static int mv_otg_probe(struct platform_device *pdev)
goto err_destroy_workqueue;
}
- mvotg->phy_regs = ioremap(r->start, resource_size(r));
+ mvotg->phy_regs = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (mvotg->phy_regs == NULL) {
dev_err(&pdev->dev, "failed to map phy I/O memory\n");
retval = -EFAULT;
@@ -784,21 +761,21 @@ static int mv_otg_probe(struct platform_device *pdev)
if (r == NULL) {
dev_err(&pdev->dev, "no I/O memory resource defined\n");
retval = -ENODEV;
- goto err_unmap_phyreg;
+ goto err_destroy_workqueue;
}
- mvotg->cap_regs = ioremap(r->start, resource_size(r));
+ mvotg->cap_regs = devm_ioremap(&pdev->dev, r->start, resource_size(r));
if (mvotg->cap_regs == NULL) {
dev_err(&pdev->dev, "failed to map I/O memory\n");
retval = -EFAULT;
- goto err_unmap_phyreg;
+ goto err_destroy_workqueue;
}
/* we will acces controller register, so enable the udc controller */
retval = mv_otg_enable_internal(mvotg);
if (retval) {
dev_err(&pdev->dev, "mv otg enable error %d\n", retval);
- goto err_unmap_capreg;
+ goto err_destroy_workqueue;
}
mvotg->op_regs =
@@ -806,9 +783,9 @@ static int mv_otg_probe(struct platform_device *pdev)
+ (readl(mvotg->cap_regs) & CAPLENGTH_MASK));
if (pdata->id) {
- retval = request_threaded_irq(pdata->id->irq, NULL,
- mv_otg_inputs_irq,
- IRQF_ONESHOT, "id", mvotg);
+ retval = devm_request_threaded_irq(&pdev->dev, pdata->id->irq,
+ NULL, mv_otg_inputs_irq,
+ IRQF_ONESHOT, "id", mvotg);
if (retval) {
dev_info(&pdev->dev,
"Failed to request irq for ID\n");
@@ -818,9 +795,9 @@ static int mv_otg_probe(struct platform_device *pdev)
if (pdata->vbus) {
mvotg->clock_gating = 1;
- retval = request_threaded_irq(pdata->vbus->irq, NULL,
- mv_otg_inputs_irq,
- IRQF_ONESHOT, "vbus", mvotg);
+ retval = devm_request_threaded_irq(&pdev->dev, pdata->vbus->irq,
+ NULL, mv_otg_inputs_irq,
+ IRQF_ONESHOT, "vbus", mvotg);
if (retval) {
dev_info(&pdev->dev,
"Failed to request irq for VBUS, "
@@ -844,7 +821,7 @@ static int mv_otg_probe(struct platform_device *pdev)
}
mvotg->irq = r->start;
- if (request_irq(mvotg->irq, mv_otg_irq, IRQF_SHARED,
+ if (devm_request_irq(&pdev->dev, mvotg->irq, mv_otg_irq, IRQF_SHARED,
driver_name, mvotg)) {
dev_err(&pdev->dev, "Request irq %d for OTG failed\n",
mvotg->irq);
@@ -857,14 +834,14 @@ static int mv_otg_probe(struct platform_device *pdev)
if (retval < 0) {
dev_err(&pdev->dev, "can't register transceiver, %d\n",
retval);
- goto err_free_irq;
+ goto err_disable_clk;
}
retval = sysfs_create_group(&pdev->dev.kobj, &inputs_attr_group);
if (retval < 0) {
dev_dbg(&pdev->dev,
"Can't register sysfs attr group: %d\n", retval);
- goto err_set_transceiver;
+ goto err_remove_phy;
}
spin_lock_init(&mvotg->wq_lock);
@@ -879,30 +856,15 @@ static int mv_otg_probe(struct platform_device *pdev)
return 0;
-err_set_transceiver:
+err_remove_phy:
usb_remove_phy(&mvotg->phy);
-err_free_irq:
- free_irq(mvotg->irq, mvotg);
err_disable_clk:
- if (pdata->vbus)
- free_irq(pdata->vbus->irq, mvotg);
- if (pdata->id)
- free_irq(pdata->id->irq, mvotg);
mv_otg_disable_internal(mvotg);
-err_unmap_capreg:
- iounmap(mvotg->cap_regs);
-err_unmap_phyreg:
- iounmap(mvotg->phy_regs);
err_destroy_workqueue:
flush_workqueue(mvotg->qwork);
destroy_workqueue(mvotg->qwork);
-err_put_clk:
- for (clk_i--; clk_i >= 0; clk_i--)
- clk_put(mvotg->clk[clk_i]);
platform_set_drvdata(pdev, NULL);
- kfree(otg);
- kfree(mvotg);
return retval;
}
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 05/26] usb: host: ehci-mv: remove unused variable
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
Signed-off-by: Chao Xie <chao.xie@marvell.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
---
drivers/usb/host/ehci-mv.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c
index 6c56297..3065809 100644
--- a/drivers/usb/host/ehci-mv.c
+++ b/drivers/usb/host/ehci-mv.c
@@ -302,7 +302,6 @@ static int mv_ehci_remove(struct platform_device *pdev)
{
struct ehci_hcd_mv *ehci_mv = platform_get_drvdata(pdev);
struct usb_hcd *hcd = ehci_mv->hcd;
- int clk_i;
if (hcd->rh_registered)
usb_remove_hcd(hcd);
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 06/26] usb: gadget: mv_udc: fix the value of tranceiver
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
usally we will use udc->tranceiver == NULL or
udc->tranceiver != NULL.
So when failed to get the udc->tranceiver by usb_get_phy(), we
directly set udc->tranceiver to be NULL.
Then the source code will not need macro IS_ERR_OR_NULL() for
udc->tranceiver judgement. It can reduce the line size and make
the judgement simple.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/mv_udc_core.c | 15 ++++++++++-----
1 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index 910c4b5..c8cf959 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -1394,7 +1394,7 @@ static int mv_udc_start(struct usb_gadget *gadget,
spin_unlock_irqrestore(&udc->lock, flags);
- if (!IS_ERR_OR_NULL(udc->transceiver)) {
+ if (udc->transceiver) {
retval = otg_set_peripheral(udc->transceiver->otg,
&udc->gadget);
if (retval) {
@@ -2174,9 +2174,14 @@ static int mv_udc_probe(struct platform_device *pdev)
udc->dev = pdev;
#ifdef CONFIG_USB_OTG_UTILS
- if (pdata->mode == MV_USB_MODE_OTG)
+ if (pdata->mode == MV_USB_MODE_OTG) {
udc->transceiver = devm_usb_get_phy(&pdev->dev,
USB_PHY_TYPE_USB2);
+ if (IS_ERR_OR_NULL(udc->transceiver)) {
+ udc->transceiver = NULL;
+ return -ENODEV;
+ }
+ }
#endif
udc->clknum = pdata->clknum;
@@ -2319,7 +2324,7 @@ static int mv_udc_probe(struct platform_device *pdev)
eps_init(udc);
/* VBUS detect: we can disable/enable clock on demand.*/
- if (!IS_ERR_OR_NULL(udc->transceiver))
+ if (udc->transceiver)
udc->clock_gating = 1;
else if (pdata->vbus) {
udc->clock_gating = 1;
@@ -2386,7 +2391,7 @@ static int mv_udc_suspend(struct device *dev)
udc = dev_get_drvdata(dev);
/* if OTG is enabled, the following will be done in OTG driver*/
- if (!IS_ERR_OR_NULL(udc->transceiver))
+ if (udc->transceiver)
return 0;
if (udc->pdata->vbus && udc->pdata->vbus->poll)
@@ -2421,7 +2426,7 @@ static int mv_udc_resume(struct device *dev)
udc = dev_get_drvdata(dev);
/* if OTG is enabled, the following will be done in OTG driver*/
- if (!IS_ERR_OR_NULL(udc->transceiver))
+ if (udc->transceiver)
return 0;
if (!udc->clock_gating) {
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 07/26] usb: gadget: mv_udc: make mv_udc depends on ARCH_MMP or ARCH_PXA
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
Only ARCH_PXA and ARCH_MMP will use mv_udc.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/Kconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 14625fd..53943f7 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -319,6 +319,7 @@ config USB_S3C_HSUDC
config USB_MV_UDC
tristate "Marvell USB2.0 Device Controller"
+ depends on ARCH_PXA || ARCH_MMP
help
Marvell Socs (including PXA and MMP series) include a high speed
USB2.0 OTG controller, which can be configured as high speed or
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 08/26] usb: phy: mv_usb2: add PHY driver for marvell usb2 controller
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
The PHY is seperated from usb controller.
The usb controller used in marvell pxa168/pxa910/mmp2 are same,
but PHY initialization may be different.
the usb controller can support udc/otg/ehci, and for each of
the mode, it need PHY to initialized before use the controller.
Direclty writing the phy driver will make the usb controller
driver to be simple and portable.
The PHY driver will be used by marvell udc/otg/ehci.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/phy/Kconfig | 7 +
drivers/usb/phy/Makefile | 1 +
drivers/usb/phy/mv_usb2_phy.c | 454 ++++++++++++++++++++++++++++++++++
include/linux/platform_data/mv_usb.h | 9 +-
include/linux/usb/mv_usb2.h | 43 ++++
5 files changed, 511 insertions(+), 3 deletions(-)
create mode 100644 drivers/usb/phy/mv_usb2_phy.c
create mode 100644 include/linux/usb/mv_usb2.h
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 5de6e7f..7bfce04 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -45,3 +45,10 @@ config USB_RCAR_PHY
To compile this driver as a module, choose M here: the
module will be called rcar-phy.
+
+config MV_USB2_PHY
+ tristate "Marvell USB 2.0 PHY Driver"
+ depends on ARCH_PXA || ARCH_MMP
+ help
+ Enable this to support Marvell USB 2.0 phy driver for Marvell
+ SoC.
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 1a579a8..e33e09c 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_USB_ISP1301) += isp1301.o
obj-$(CONFIG_MV_U3D_PHY) += mv_u3d_phy.o
obj-$(CONFIG_USB_EHCI_TEGRA) += tegra_usb_phy.o
obj-$(CONFIG_USB_RCAR_PHY) += rcar-phy.o
+obj-$(CONFIG_MV_USB2_PHY) += mv_usb2_phy.o
diff --git a/drivers/usb/phy/mv_usb2_phy.c b/drivers/usb/phy/mv_usb2_phy.c
new file mode 100644
index 0000000..c2bccae
--- /dev/null
+++ b/drivers/usb/phy/mv_usb2_phy.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * Author:
+ * Chao Xie <xiechao.mail@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/resource.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/mv_usb.h>
+#include <linux/usb/mv_usb2.h>
+
+/* phy regs */
+#define UTMI_REVISION 0x0
+#define UTMI_CTRL 0x4
+#define UTMI_PLL 0x8
+#define UTMI_TX 0xc
+#define UTMI_RX 0x10
+#define UTMI_IVREF 0x14
+#define UTMI_T0 0x18
+#define UTMI_T1 0x1c
+#define UTMI_T2 0x20
+#define UTMI_T3 0x24
+#define UTMI_T4 0x28
+#define UTMI_T5 0x2c
+#define UTMI_RESERVE 0x30
+#define UTMI_USB_INT 0x34
+#define UTMI_DBG_CTL 0x38
+#define UTMI_OTG_ADDON 0x3c
+
+/* For UTMICTRL Register */
+#define UTMI_CTRL_USB_CLK_EN (1 << 31)
+/* pxa168 */
+#define UTMI_CTRL_SUSPEND_SET1 (1 << 30)
+#define UTMI_CTRL_SUSPEND_SET2 (1 << 29)
+#define UTMI_CTRL_RXBUF_PDWN (1 << 24)
+#define UTMI_CTRL_TXBUF_PDWN (1 << 11)
+
+#define UTMI_CTRL_INPKT_DELAY_SHIFT 30
+#define UTMI_CTRL_INPKT_DELAY_SOF_SHIFT 28
+#define UTMI_CTRL_PU_REF_SHIFT 20
+#define UTMI_CTRL_ARC_PULLDN_SHIFT 12
+#define UTMI_CTRL_PLL_PWR_UP_SHIFT 1
+#define UTMI_CTRL_PWR_UP_SHIFT 0
+
+/* For UTMI_PLL Register */
+#define UTMI_PLL_PLLCALI12_SHIFT 29
+#define UTMI_PLL_PLLCALI12_MASK (0x3 << 29)
+
+#define UTMI_PLL_PLLVDD18_SHIFT 27
+#define UTMI_PLL_PLLVDD18_MASK (0x3 << 27)
+
+#define UTMI_PLL_PLLVDD12_SHIFT 25
+#define UTMI_PLL_PLLVDD12_MASK (0x3 << 25)
+
+#define UTMI_PLL_CLK_BLK_EN_SHIFT 24
+#define CLK_BLK_EN (0x1 << 24)
+#define PLL_READY (0x1 << 23)
+#define KVCO_EXT (0x1 << 22)
+#define VCOCAL_START (0x1 << 21)
+
+#define UTMI_PLL_KVCO_SHIFT 15
+#define UTMI_PLL_KVCO_MASK (0x7 << 15)
+
+#define UTMI_PLL_ICP_SHIFT 12
+#define UTMI_PLL_ICP_MASK (0x7 << 12)
+
+#define UTMI_PLL_FBDIV_SHIFT 4
+#define UTMI_PLL_FBDIV_MASK (0xFF << 4)
+
+#define UTMI_PLL_REFDIV_SHIFT 0
+#define UTMI_PLL_REFDIV_MASK (0xF << 0)
+
+/* For UTMI_TX Register */
+#define UTMI_TX_REG_EXT_FS_RCAL_SHIFT 27
+#define UTMI_TX_REG_EXT_FS_RCAL_MASK (0xf << 27)
+
+#define UTMI_TX_REG_EXT_FS_RCAL_EN_SHIFT 26
+#define UTMI_TX_REG_EXT_FS_RCAL_EN_MASK (0x1 << 26)
+
+#define UTMI_TX_TXVDD12_SHIFT 22
+#define UTMI_TX_TXVDD12_MASK (0x3 << 22)
+
+#define UTMI_TX_CK60_PHSEL_SHIFT 17
+#define UTMI_TX_CK60_PHSEL_MASK (0xf << 17)
+
+#define UTMI_TX_IMPCAL_VTH_SHIFT 14
+#define UTMI_TX_IMPCAL_VTH_MASK (0x7 << 14)
+
+#define REG_RCAL_START (0x1 << 12)
+
+#define UTMI_TX_LOW_VDD_EN_SHIFT 11
+
+#define UTMI_TX_AMP_SHIFT 0
+#define UTMI_TX_AMP_MASK (0x7 << 0)
+
+/* For UTMI_RX Register */
+#define UTMI_REG_SQ_LENGTH_SHIFT 15
+#define UTMI_REG_SQ_LENGTH_MASK (0x3 << 15)
+
+#define UTMI_RX_SQ_THRESH_SHIFT 4
+#define UTMI_RX_SQ_THRESH_MASK (0xf << 4)
+
+#define UTMI_OTG_ADDON_OTG_ON (1 << 0)
+
+enum mv_usb2_phy_type {
+ PXA168_USB,
+ PXA910_USB,
+ MMP2_USB,
+};
+
+static struct mv_usb2_phy *the_phy;
+
+struct mv_usb2_phy *mv_usb2_get_phy(void)
+{
+ return the_phy;
+}
+EXPORT_SYMBOL(mv_usb2_get_phy);
+
+static unsigned int u2o_get(void __iomem *base, unsigned int offset)
+{
+ return readl(base + offset);
+}
+
+static void u2o_set(void __iomem *base, unsigned int offset,
+ unsigned int value)
+{
+ u32 reg;
+
+ reg = readl(base + offset);
+ reg |= value;
+ writel(reg, base + offset);
+ readl(base + offset);
+}
+
+static void u2o_clear(void __iomem *base, unsigned int offset,
+ unsigned int value)
+{
+ u32 reg;
+
+ reg = readl(base + offset);
+ reg &= ~value;
+ writel(reg, base + offset);
+ readl(base + offset);
+}
+
+static void u2o_write(void __iomem *base, unsigned int offset,
+ unsigned int value)
+{
+ writel(value, base + offset);
+ readl(base + offset);
+}
+
+static int _usb_phy_init(struct mv_usb2_phy *mv_phy)
+{
+ struct platform_device *pdev = mv_phy->pdev;
+ unsigned int loops = 0;
+ void __iomem *base = mv_phy->base;
+
+ dev_dbg(&pdev->dev, "phy init\n");
+
+ /* Initialize the USB PHY power */
+ if (mv_phy->type == PXA910_USB) {
+ u2o_set(base, UTMI_CTRL, (1<<UTMI_CTRL_INPKT_DELAY_SOF_SHIFT)
+ | (1<<UTMI_CTRL_PU_REF_SHIFT));
+ }
+
+ u2o_set(base, UTMI_CTRL, 1<<UTMI_CTRL_PLL_PWR_UP_SHIFT);
+ u2o_set(base, UTMI_CTRL, 1<<UTMI_CTRL_PWR_UP_SHIFT);
+
+ /* UTMI_PLL settings */
+ u2o_clear(base, UTMI_PLL, UTMI_PLL_PLLVDD18_MASK
+ | UTMI_PLL_PLLVDD12_MASK | UTMI_PLL_PLLCALI12_MASK
+ | UTMI_PLL_FBDIV_MASK | UTMI_PLL_REFDIV_MASK
+ | UTMI_PLL_ICP_MASK | UTMI_PLL_KVCO_MASK);
+
+ u2o_set(base, UTMI_PLL, 0xee<<UTMI_PLL_FBDIV_SHIFT
+ | 0xb<<UTMI_PLL_REFDIV_SHIFT | 3<<UTMI_PLL_PLLVDD18_SHIFT
+ | 3<<UTMI_PLL_PLLVDD12_SHIFT | 3<<UTMI_PLL_PLLCALI12_SHIFT
+ | 1<<UTMI_PLL_ICP_SHIFT | 3<<UTMI_PLL_KVCO_SHIFT);
+
+ /* UTMI_TX */
+ u2o_clear(base, UTMI_TX, UTMI_TX_REG_EXT_FS_RCAL_EN_MASK
+ | UTMI_TX_TXVDD12_MASK | UTMI_TX_CK60_PHSEL_MASK
+ | UTMI_TX_IMPCAL_VTH_MASK | UTMI_TX_REG_EXT_FS_RCAL_MASK
+ | UTMI_TX_AMP_MASK);
+ u2o_set(base, UTMI_TX, 3<<UTMI_TX_TXVDD12_SHIFT
+ | 4<<UTMI_TX_CK60_PHSEL_SHIFT | 4<<UTMI_TX_IMPCAL_VTH_SHIFT
+ | 8<<UTMI_TX_REG_EXT_FS_RCAL_SHIFT | 3<<UTMI_TX_AMP_SHIFT);
+
+ /* UTMI_RX */
+ u2o_clear(base, UTMI_RX, UTMI_RX_SQ_THRESH_MASK
+ | UTMI_REG_SQ_LENGTH_MASK);
+ u2o_set(base, UTMI_RX, 7<<UTMI_RX_SQ_THRESH_SHIFT
+ | 2<<UTMI_REG_SQ_LENGTH_SHIFT);
+
+ /* UTMI_IVREF */
+ if (mv_phy->type == PXA168_USB)
+ /* fixing Microsoft Altair board interface with NEC hub issue -
+ * Set UTMI_IVREF from 0x4a3 to 0x4bf */
+ u2o_write(base, UTMI_IVREF, 0x4bf);
+
+ /* toggle VCOCAL_START bit of UTMI_PLL */
+ udelay(200);
+ u2o_set(base, UTMI_PLL, VCOCAL_START);
+ udelay(40);
+ u2o_clear(base, UTMI_PLL, VCOCAL_START);
+
+ /* toggle REG_RCAL_START bit of UTMI_TX */
+ udelay(400);
+ u2o_set(base, UTMI_TX, REG_RCAL_START);
+ udelay(40);
+ u2o_clear(base, UTMI_TX, REG_RCAL_START);
+ udelay(400);
+
+ /* Make sure PHY PLL is ready */
+ loops = 0;
+ while ((u2o_get(base, UTMI_PLL) & PLL_READY) == 0) {
+ mdelay(1);
+ loops++;
+ if (loops > 100) {
+ dev_warn(&pdev->dev, "calibrate timeout, UTMI_PLL %x\n",
+ u2o_get(base, UTMI_PLL));
+ break;
+ }
+ }
+
+ if (mv_phy->type == PXA168_USB) {
+ u2o_set(base, UTMI_RESERVE, 1 << 5);
+ /* Turn on UTMI PHY OTG extension */
+ u2o_write(base, UTMI_OTG_ADDON, 1);
+ }
+
+ return 0;
+}
+
+static int _usb_phy_shutdown(struct mv_usb2_phy *mv_phy)
+{
+ void __iomem *base = mv_phy->base;
+
+ if (mv_phy->type == PXA168_USB)
+ u2o_clear(base, UTMI_OTG_ADDON, UTMI_OTG_ADDON_OTG_ON);
+
+ u2o_clear(base, UTMI_CTRL, UTMI_CTRL_RXBUF_PDWN);
+ u2o_clear(base, UTMI_CTRL, UTMI_CTRL_TXBUF_PDWN);
+ u2o_clear(base, UTMI_CTRL, UTMI_CTRL_USB_CLK_EN);
+ u2o_clear(base, UTMI_CTRL, 1<<UTMI_CTRL_PWR_UP_SHIFT);
+ u2o_clear(base, UTMI_CTRL, 1<<UTMI_CTRL_PLL_PWR_UP_SHIFT);
+
+ return 0;
+}
+
+static int usb_phy_init(struct mv_usb2_phy *mv_phy)
+{
+ int i = 0;
+
+ mutex_lock(&mv_phy->phy_lock);
+ if (mv_phy->refcount++ == 0) {
+ for (i = 0; i < mv_phy->clks_num; i++)
+ clk_prepare_enable(mv_phy->clks[i]);
+ _usb_phy_init(mv_phy);
+ }
+ mutex_unlock(&mv_phy->phy_lock);
+ return 0;
+}
+
+static void usb_phy_shutdown(struct mv_usb2_phy *mv_phy)
+{
+ int i = 0;
+
+ mutex_lock(&mv_phy->phy_lock);
+ if (mv_phy->refcount++ == 0) {
+ _usb_phy_shutdown(mv_phy);
+ for (i = 0; i < mv_phy->clks_num; i++)
+ clk_disable_unprepare(mv_phy->clks[i]);
+ }
+ mutex_unlock(&mv_phy->phy_lock);
+}
+
+static struct of_device_id usb_phy_dt_ids[] = {
+ { .compatible = "mrvl,pxa168-usb-phy", .data = (void *)PXA168_USB},
+ { .compatible = "mrvl,pxa910-usb-phy", .data = (void *)PXA910_USB},
+ { .compatible = "mrvl,mmp2-usb-phy", .data = (void *)MMP2_USB},
+ {}
+};
+MODULE_DEVICE_TABLE(of, usb_phy_dt_ids);
+
+static int usb_phy_parse_dt(struct platform_device *pdev,
+ struct mv_usb2_phy *mv_phy)
+{
+ struct device_node *np = pdev->dev.of_node;
+ const struct of_device_id *of_id =
+ of_match_device(usb_phy_dt_ids, &pdev->dev);
+ unsigned int clks_num;
+ int i, ret;
+ const char *clk_name;
+
+ if (!np)
+ return 1;
+
+ clks_num = of_property_count_strings(np, "clocks");
+ if (clks_num < 0) {
+ dev_err(&pdev->dev, "failed to get clock number\n");
+ return clks_num;
+ }
+
+ mv_phy->clks = devm_kzalloc(&pdev->dev,
+ sizeof(struct clk *) * clks_num, GFP_KERNEL);
+ if (mv_phy->clks == NULL) {
+ dev_err(&pdev->dev,
+ "failed to allocate mempory for clocks");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < clks_num; i++) {
+ ret = of_property_read_string_index(np, "clocks", i,
+ &clk_name);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to read clocks\n");
+ return ret;
+ }
+ mv_phy->clks[i] = devm_clk_get(&pdev->dev, clk_name);
+ if (IS_ERR(mv_phy->clks[i])) {
+ dev_err(&pdev->dev, "failed to get clock %s\n",
+ clk_name);
+ return PTR_ERR(mv_phy->clks[i]);
+ }
+ }
+
+ mv_phy->clks_num = clks_num;
+ mv_phy->type = (enum mv_usb2_phy_type)(of_id->data);
+
+ return 0;
+}
+
+static int usb_phy_probe(struct platform_device *pdev)
+{
+ struct mv_usb2_phy *mv_phy;
+ struct resource *r;
+ int ret, i;
+
+ mv_phy = devm_kzalloc(&pdev->dev, sizeof(*mv_phy), GFP_KERNEL);
+ if (mv_phy == NULL) {
+ dev_err(&pdev->dev, "failed to allocate memory\n");
+ return -ENOMEM;
+ }
+ mutex_init(&mv_phy->phy_lock);
+
+ ret = usb_phy_parse_dt(pdev, mv_phy);
+ /* no CONFIG_OF */
+ if (ret > 0) {
+ struct mv_usb_phy_platform_data *pdata
+ = pdev->dev.platform_data;
+ const struct platform_device_id *id
+ = platform_get_device_id(pdev);
+
+ if (pdata == NULL || id == NULL) {
+ dev_err(&pdev->dev,
+ "missing platform_data or id_entry\n");
+ return -ENODEV;
+ }
+ mv_phy->type = (unsigned int)(id->driver_data);
+ mv_phy->clks_num = pdata->clknum;
+ mv_phy->clks = devm_kzalloc(&pdev->dev,
+ sizeof(struct clk *) * mv_phy->clks_num, GFP_KERNEL);
+ if (mv_phy->clks == NULL) {
+ dev_err(&pdev->dev,
+ "failed to allocate mempory for clocks");
+ return -ENOMEM;
+ }
+ for (i = 0; i < mv_phy->clks_num; i++)
+ mv_phy->clks[i] = devm_clk_get(&pdev->dev,
+ pdata->clkname[i]);
+ if (IS_ERR(mv_phy->clks[i])) {
+ dev_err(&pdev->dev, "failed to get clock %s\n",
+ pdata->clkname[i]);
+ return PTR_ERR(mv_phy->clks[i]);
+ }
+ } else if (ret < 0) {
+ dev_err(&pdev->dev, "error parse dt\n");
+ return ret;
+ }
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (r == NULL) {
+ dev_err(&pdev->dev, "no phy I/O memory resource defined\n");
+ return -ENODEV;
+ }
+ mv_phy->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+ if (mv_phy->base == NULL) {
+ dev_err(&pdev->dev, "error map register base\n");
+ return -EBUSY;
+ }
+ platform_set_drvdata(pdev, mv_phy);
+ mv_phy->pdev = pdev;
+ mv_phy->init = usb_phy_init;
+ mv_phy->shutdown = usb_phy_shutdown;
+
+ platform_set_drvdata(pdev, mv_phy);
+
+ the_phy = mv_phy;
+
+ dev_info(&pdev->dev, "mv usb2 phy initialized\n");
+
+ return 0;
+}
+
+static int usb_phy_remove(struct platform_device *pdev)
+{
+ the_phy = NULL;
+
+ return 0;
+}
+
+static struct platform_device_id usb_phy_ids[] = {
+ { .name = "pxa168-usb-phy", .driver_data = PXA168_USB },
+ { .name = "pxa910-usb-phy", .driver_data = PXA910_USB },
+ { .name = "mmp2-usb-phy", .driver_data = MMP2_USB },
+ {}
+};
+
+static struct platform_driver usb_phy_driver = {
+ .probe = usb_phy_probe,
+ .remove = usb_phy_remove,
+ .driver = {
+ .name = "pxa168-usb-phy",
+ .of_match_table = usb_phy_dt_ids,
+ },
+ .id_table = usb_phy_ids,
+};
+
+static int __init mv_usb2_phy_init(void)
+{
+ return platform_driver_register(&usb_phy_driver);
+}
+arch_initcall(mv_usb2_phy_init);
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index 944b01d..fd3d1b4 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -47,9 +47,12 @@ struct mv_usb_platform_data {
/* Force a_bus_req to be asserted */
unsigned int otg_force_a_bus_req:1;
- int (*phy_init)(void __iomem *regbase);
- void (*phy_deinit)(void __iomem *regbase);
int (*set_vbus)(unsigned int vbus);
- int (*private_init)(void __iomem *opregs, void __iomem *phyregs);
};
+
+struct mv_usb_phy_platform_data {
+ unsigned int clknum;
+ char **clkname;
+};
+
#endif
diff --git a/include/linux/usb/mv_usb2.h b/include/linux/usb/mv_usb2.h
new file mode 100644
index 0000000..9744a97
--- /dev/null
+++ b/include/linux/usb/mv_usb2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2012 Marvell Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __MV_USB2_H
+#define __MV_USB2_H
+
+#include <linux/clk.h>
+
+struct mv_usb2_phy {
+ struct platform_device *pdev;
+ struct mutex phy_lock;
+ unsigned int refcount;
+ unsigned int type;
+ void __iomem *base;
+ struct clk **clks;
+ unsigned int clks_num;
+
+ int (*init)(struct mv_usb2_phy *mv_phy);
+ void (*shutdown)(struct mv_usb2_phy *mv_phy);
+};
+
+#if defined(CONFIG_MV_USB2_PHY) || defined(CONFIG_MV_USB2_PHY_MODULE)
+
+extern struct mv_usb2_phy *mv_usb2_get_phy(void);
+
+#else
+
+struct mv_usb2_phy *mv_usb2_get_phy(void) { return NULL; }
+
+#endif
+
+#endif
--
1.7.4.1
^ permalink raw reply related
* [V5 PATCH 09/26] usb: gadget: mv_udc: use PHY driver for udc
From: Chao Xie @ 2013-01-24 6:38 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1359009530-28182-1-git-send-email-chao.xie@marvell.com>
Originaly, udc driver will call the callbacks in platform data
for PHY initialization and shut down.
With PHY driver, it will call the APIs provided by PHY driver
for PHY initialization and shut down. It removes the callbacks
in platform data, and at same time it removes one block in the
way of enabling device tree for udc driver.
Signed-off-by: Chao Xie <chao.xie@marvell.com>
---
drivers/usb/gadget/mv_udc.h | 2 +-
drivers/usb/gadget/mv_udc_core.c | 23 ++++++++---------------
2 files changed, 9 insertions(+), 16 deletions(-)
diff --git a/drivers/usb/gadget/mv_udc.h b/drivers/usb/gadget/mv_udc.h
index 9073436..4851b2b 100644
--- a/drivers/usb/gadget/mv_udc.h
+++ b/drivers/usb/gadget/mv_udc.h
@@ -180,7 +180,6 @@ struct mv_udc {
struct mv_cap_regs __iomem *cap_regs;
struct mv_op_regs __iomem *op_regs;
- void __iomem *phy_regs;
unsigned int max_eps;
struct mv_dqh *ep_dqh;
size_t ep_dqh_size;
@@ -217,6 +216,7 @@ struct mv_udc {
struct work_struct vbus_work;
struct workqueue_struct *qwork;
+ struct mv_usb2_phy *phy;
struct usb_phy *transceiver;
struct mv_usb_platform_data *pdata;
diff --git a/drivers/usb/gadget/mv_udc_core.c b/drivers/usb/gadget/mv_udc_core.c
index c8cf959..76fcfc7 100644
--- a/drivers/usb/gadget/mv_udc_core.c
+++ b/drivers/usb/gadget/mv_udc_core.c
@@ -35,6 +35,7 @@
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/platform_data/mv_usb.h>
+#include <linux/usb/mv_usb2.h>
#include <asm/unaligned.h>
#include "mv_udc.h"
@@ -1121,8 +1122,8 @@ static int mv_udc_enable_internal(struct mv_udc *udc)
dev_dbg(&udc->dev->dev, "enable udc\n");
udc_clock_enable(udc);
- if (udc->pdata->phy_init) {
- retval = udc->pdata->phy_init(udc->phy_regs);
+ if (udc->phy->init) {
+ retval = udc->phy->init(udc->phy);
if (retval) {
dev_err(&udc->dev->dev,
"init phy error %d\n", retval);
@@ -1147,8 +1148,8 @@ static void mv_udc_disable_internal(struct mv_udc *udc)
{
if (udc->active) {
dev_dbg(&udc->dev->dev, "disable udc\n");
- if (udc->pdata->phy_deinit)
- udc->pdata->phy_deinit(udc->phy_regs);
+ if (udc->phy->shutdown)
+ udc->phy->shutdown(udc->phy);
udc_clock_disable(udc);
udc->active = 0;
}
@@ -2194,7 +2195,7 @@ static int mv_udc_probe(struct platform_device *pdev)
}
}
- r = platform_get_resource_byname(udc->dev, IORESOURCE_MEM, "capregs");
+ r = platform_get_resource(udc->dev, IORESOURCE_MEM, 0);
if (r == NULL) {
dev_err(&pdev->dev, "no I/O memory resource defined\n");
return -ENODEV;
@@ -2207,17 +2208,9 @@ static int mv_udc_probe(struct platform_device *pdev)
return -EBUSY;
}
- r = platform_get_resource_byname(udc->dev, IORESOURCE_MEM, "phyregs");
- if (r == NULL) {
- dev_err(&pdev->dev, "no phy I/O memory resource defined\n");
+ udc->phy = mv_usb2_get_phy();
+ if (udc->phy == NULL)
return -ENODEV;
- }
-
- udc->phy_regs = ioremap(r->start, resource_size(r));
- if (udc->phy_regs == NULL) {
- dev_err(&pdev->dev, "failed to map phy I/O memory\n");
- return -EBUSY;
- }
/* we will acces controller register, so enable the clk */
retval = mv_udc_enable_internal(udc);
--
1.7.4.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox