Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 4/8] coresight: platform: Cleanup coresight connection handling
From: Suzuki K Poulose @ 2018-06-01 13:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527858967-16047-1-git-send-email-suzuki.poulose@arm.com>

The platform code parses the component connections and populates
a platform-description of the output connections in arrays of fields
(which is never freed). This is later copied in the coresight_register
to a newly allocated area, represented by coresight_connection(s).

This patch cleans up the code dealing with connections by making
use of the "coresight_connection" structure right at the platform
code and lets the generic driver simply re-use information provided
by the platform.

Thus making it reader friendly as well as avoiding the wastage of
unused memory.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 drivers/hwtracing/coresight/coresight.c    | 21 +---------------
 drivers/hwtracing/coresight/of_coresight.c | 40 +++++++++++-------------------
 include/linux/coresight.h                  |  9 ++-----
 3 files changed, 17 insertions(+), 53 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 389c4ba..3b3756e 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -961,13 +961,11 @@ postcore_initcall(coresight_init);
 
 struct coresight_device *coresight_register(struct coresight_desc *desc)
 {
-	int i;
 	int ret;
 	int link_subtype;
 	int nr_refcnts = 1;
 	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
-	struct coresight_connection *conns = NULL;
 
 	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
 	if (!csdev) {
@@ -996,22 +994,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	csdev->nr_inport = desc->pdata->nr_inport;
 	csdev->nr_outport = desc->pdata->nr_outport;
 
-	/* Initialise connections if there is at least one outport */
-	if (csdev->nr_outport) {
-		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
-		if (!conns) {
-			ret = -ENOMEM;
-			goto err_kzalloc_conns;
-		}
-
-		for (i = 0; i < csdev->nr_outport; i++) {
-			conns[i].outport = desc->pdata->outports[i];
-			conns[i].child_name = desc->pdata->child_names[i];
-			conns[i].child_port = desc->pdata->child_ports[i];
-		}
-	}
-
-	csdev->conns = conns;
+	csdev->conns = desc->pdata->conns;
 
 	csdev->type = desc->type;
 	csdev->subtype = desc->subtype;
@@ -1039,8 +1022,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	return csdev;
 
 err_device_register:
-	kfree(conns);
-err_kzalloc_conns:
 	kfree(refcnts);
 err_kzalloc_refcnts:
 	kfree(csdev);
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index e0deab0..a3f3416 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -77,26 +77,13 @@ static void of_coresight_get_ports(const struct device_node *node,
 static int of_coresight_alloc_memory(struct device *dev,
 			struct coresight_platform_data *pdata)
 {
-	/* List of output port on this component */
-	pdata->outports = devm_kzalloc(dev, pdata->nr_outport *
-				       sizeof(*pdata->outports),
-				       GFP_KERNEL);
-	if (!pdata->outports)
-		return -ENOMEM;
-
-	/* Children connected to this component via @outports */
-	pdata->child_names = devm_kzalloc(dev, pdata->nr_outport *
-					  sizeof(*pdata->child_names),
-					  GFP_KERNEL);
-	if (!pdata->child_names)
-		return -ENOMEM;
-
-	/* Port number on the child this component is connected to */
-	pdata->child_ports = devm_kzalloc(dev, pdata->nr_outport *
-					  sizeof(*pdata->child_ports),
-					  GFP_KERNEL);
-	if (!pdata->child_ports)
-		return -ENOMEM;
+	if (pdata->nr_outport) {
+		pdata->conns = devm_kzalloc(dev, pdata->nr_outport *
+					    sizeof(*pdata->conns),
+					    GFP_KERNEL);
+		if (!pdata->conns)
+			return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -122,8 +109,9 @@ struct coresight_platform_data *
 of_get_coresight_platform_data(struct device *dev,
 			       const struct device_node *node)
 {
-	int i = 0, ret = 0;
+	int ret = 0;
 	struct coresight_platform_data *pdata;
+	struct coresight_connection *conn;
 	struct of_endpoint endpoint, rendpoint;
 	struct device *rdev;
 	struct device_node *ep = NULL;
@@ -145,6 +133,7 @@ of_get_coresight_platform_data(struct device *dev,
 		if (ret)
 			return ERR_PTR(ret);
 
+		conn = pdata->conns;
 		/* Iterate through each port to discover topology */
 		do {
 			/* Get a handle on a port */
@@ -166,7 +155,7 @@ of_get_coresight_platform_data(struct device *dev,
 				continue;
 
 			/* The local out port number */
-			pdata->outports[i] = endpoint.port;
+			conn->outport = endpoint.port;
 
 			/*
 			 * Get a handle on the remote endpoint and the device
@@ -186,10 +175,9 @@ of_get_coresight_platform_data(struct device *dev,
 			if (!rdev)
 				return ERR_PTR(-EPROBE_DEFER);
 
-			pdata->child_names[i] = dev_name(rdev);
-			pdata->child_ports[i] = rendpoint.port;
-
-			i++;
+			conn->child_name = dev_name(rdev);
+			conn->child_port = rendpoint.port;
+			conn++;
 		} while (ep);
 	}
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 32aaa1c..c2363bb 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -89,20 +89,15 @@ struct coresight_dev_subtype {
  * @cpu:	the CPU a source belongs to. Only applicable for ETM/PTMs.
  * @name:	name of the component as shown under sysfs.
  * @nr_inport:	number of input ports for this component.
- * @outports:	list of remote endpoint port number.
- * @child_names:name of all child components connected to this device.
- * @child_ports:child component port number the current component is
-		connected  to.
  * @nr_outport:	number of output ports for this component.
+ * @conns:	Array of nr_outport connections from this component
  */
 struct coresight_platform_data {
 	int cpu;
 	const char *name;
 	int nr_inport;
-	int *outports;
-	const char **child_names;
-	int *child_ports;
 	int nr_outport;
+	struct coresight_connection *conns;
 };
 
 /**
-- 
2.7.4

^ permalink raw reply related

* [RFC PATCH 3/8] coresight: Cleanup platform description data
From: Suzuki K Poulose @ 2018-06-01 13:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527858967-16047-1-git-send-email-suzuki.poulose@arm.com>

Nobody uses the "clk" field in struct coresight_platform_data.
Remove it.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 include/linux/coresight.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index d950dad..32aaa1c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -94,7 +94,6 @@ struct coresight_dev_subtype {
  * @child_ports:child component port number the current component is
 		connected  to.
  * @nr_outport:	number of output ports for this component.
- * @clk:	The clock this component is associated to.
  */
 struct coresight_platform_data {
 	int cpu;
@@ -104,7 +103,6 @@ struct coresight_platform_data {
 	const char **child_names;
 	int *child_ports;
 	int nr_outport;
-	struct clk *clk;
 };
 
 /**
-- 
2.7.4

^ permalink raw reply related

* [RFC PATCH 2/8] coresight: Fix remote endpoint parsing
From: Suzuki K Poulose @ 2018-06-01 13:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527858967-16047-1-git-send-email-suzuki.poulose@arm.com>

When parsing the remote endpoint of an output port, we do :
     rport = of_graph_get_remote_port(ep);
     rparent = of_graph_get_remote_port_parent(ep);

and then parse the "remote_port" as if it was the remote endpoint,
which is wrong. The code worked fine because we used endpoint number
as the port number. Let us fix it and optimise a bit as:

     remote_ep = of_graph_get_remote_endpoint(ep);
     if (remote_ep)
        remote_parent = of_graph_get_port_parent(remote_ep);

and then, parse the remote_ep for the port/endpoint details.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 drivers/hwtracing/coresight/of_coresight.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 7c37544..e0deab0 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -128,7 +128,7 @@ of_get_coresight_platform_data(struct device *dev,
 	struct device *rdev;
 	struct device_node *ep = NULL;
 	struct device_node *rparent = NULL;
-	struct device_node *rport = NULL;
+	struct device_node *rep = NULL;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -169,16 +169,17 @@ of_get_coresight_platform_data(struct device *dev,
 			pdata->outports[i] = endpoint.port;
 
 			/*
-			 * Get a handle on the remote port and parent
-			 * attached to it.
+			 * Get a handle on the remote endpoint and the device
+			 * it is attached to.
 			 */
-			rparent = of_graph_get_remote_port_parent(ep);
-			rport = of_graph_get_remote_port(ep);
-
-			if (!rparent || !rport)
+			rep = of_graph_get_remote_endpoint(ep);
+			if (!rep)
+				continue;
+			rparent = of_graph_get_port_parent(rep);
+			if (!rparent)
 				continue;
 
-			if (of_graph_parse_endpoint(rport, &rendpoint))
+			if (of_graph_parse_endpoint(rep, &rendpoint))
 				continue;
 
 			rdev = of_coresight_get_endpoint_device(rparent);
@@ -186,7 +187,7 @@ of_get_coresight_platform_data(struct device *dev,
 				return ERR_PTR(-EPROBE_DEFER);
 
 			pdata->child_names[i] = dev_name(rdev);
-			pdata->child_ports[i] = rendpoint.id;
+			pdata->child_ports[i] = rendpoint.port;
 
 			i++;
 		} while (ep);
-- 
2.7.4

^ permalink raw reply related

* [RFC PATCH 1/8] dts: binding: coresight: Document graph bindings
From: Suzuki K Poulose @ 2018-06-01 13:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527858967-16047-1-git-send-email-suzuki.poulose@arm.com>

Before we updat the bindings, document the current graph bindings
and usage of additional properties.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
---
 .../devicetree/bindings/arm/coresight.txt          | 28 ++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
index 15ac8e8..bd36e40 100644
--- a/Documentation/devicetree/bindings/arm/coresight.txt
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -52,9 +52,7 @@ its hardware characteristcs.
 	  clocks the core of that coresight component. The latter clock
 	  is optional.
 
-	* port or ports: The representation of the component's port
-	  layout using the generic DT graph presentation found in
-	  "bindings/graph.txt".
+	* port or ports: see "Graph bindings for Coresight" below
 
 * Additional required properties for System Trace Macrocells (STM):
 	* reg: along with the physical base address and length of the register
@@ -71,7 +69,7 @@ its hardware characteristcs.
 	  AMBA markee):
 		- "arm,coresight-replicator"
 
-	* port or ports: same as above.
+	* port or ports: see "Graph bindings for Coresight" below.
 
 * Optional properties for ETM/PTMs:
 
@@ -86,6 +84,28 @@ its hardware characteristcs.
 	* arm,buffer-size: size of contiguous buffer space for TMC ETR
 	 (embedded trace router)
 
+Graph bindings for Coresight
+-------------------------------
+
+Coresight components are interconnected to create a data path for the flow of
+trace data generated from the "sources" to their collection points "sink". Each
+coresight component must describe the "input" and "output" connections.
+The connections must be described via generic DT graph bindings as described
+by the "bindings/graph.txt", where each "port" along with an "endpoint" component
+represents a hardware port and the connection.
+
+Since it is possible to have multiple connections for any coresight component with
+a specific direction of data flow, each connection must define the following
+properties to uniquely identify the connection details.
+
+ * Direction of the data flow w.r.t the component :
+   Each input port must have the following property defined at the "endpoint"
+   for the port.
+	"slave-mode"
+
+ * Hardware Port number at the component:
+     -  The hardware port number is assumed to be the address of the "port" component.
+
 
 Example:
 
-- 
2.7.4

^ permalink raw reply related

* [RFC PATCH 0/8] coresight: Update device tree bindings
From: Suzuki K Poulose @ 2018-06-01 13:15 UTC (permalink / raw)
  To: linux-arm-kernel

Coresight uses DT graph bindings to describe the connections of the
components. However we have some undocumented usage of the bindings
to describe some of the properties of the connections.

The coresight driver needs to know the hardware ports invovled
in the connection and the direction of data flow to effectively
manage the trace sessions. So far we have relied on the "port"
address (as described by the generic graph bindings) to represent
the hardware port of the component for a connection.

The hardware uses separate numbering scheme for input and output
ports, which implies, we could have two different (input and output)
ports with the same port number. This could create problems in the
graph bindings where the label of the port wouldn't match the address.

e.g, with the existing bindings we get :

	port at 0{				// Output port 0
		reg = <0>;
		...
	};

	port at 1{
		reg = <0>;		// Input port 0
		endpoint {
			slave-mode;
			...
		};
	};

With the new enforcement in the DT rules, mismatches in label and address
are not allowed (as see in the case for port at 1). So, we need a new mechanism
to describe the hardware port number reliably.

Also, we relied on an undocumented "slave-mode" property (see the above
example) to indicate if the port is an input port. Let us formalise and
switch to a new property to describe the direction of data flow.

There were three options considered for the hardware port number scheme:

 1) Use natural ordering in the DT to infer the hardware port number.
  i.e, Mandate that the all ports are listed in the DT and in the ascending
  order for each class (input and output respectively).
   Pros :
      - We don't need new properties and if the existing DTS list them in
        order (which most of them do), they work out of the box.
   Cons :
      - We must list all the ports even if the system cannot/shouldn't use
        it.
      - It is prone to human errors (if the order is not kept).

 2) Use an explicit property to list both the direction and the hw port
    number and direction. Define "coresight,hwid" as 2 member array of u32,
    where the members are port number and the direction respectively.
	e.g

	port at 0{
		reg = <0>;
		endpoint {
			coresight,hwid = <0 1>;	// Port # 0, Output
		}
	};

	port at 1{
		reg = <1>;
		endpoint {
			coresight,hwid = <0 0>;	// Port # 0, Input
		};
	};

	Pros:
	  - The bindings are formal but not so reader friendly and could potentially
	    lead to human errors.
	Cons:
	  - Backward compatiblity is lost.
 3) Use explicit properties (implemented in the series) for the hardware
    port id and direction. We define a new property "coresight,hwid" for
    each endpoint in coresight devices to specify the hardware port number
    explicitly. Also use a separate property "direction" to specify the
    direction of the data flow.

	e.g,

	port at 0{
		reg = <0>;
		endpoint {
			direction = <1>;	// Output
			coresight,hwid = <0>;	// Port # 0
		}
	};

	port at 1{
		reg = <1>;
		endpoint {
			direction = <0>;	// Input
			coresight,hwid = <0>;	// Port # 0
		};
	};

    Pros:
       - The bindings are formal and reader friendly, and less prone to errors.
    Cons:
       - Backward compatibility is lost.


This series achieves implements Option (3) listed above while still retaining
the backward compatibility. The driver now issues a warning (once) when it
encounters the old bindings.
It also cleans up the platform parsing code to reduce the memory usage by
reusing the platform description. The series also includes the
changes for Juno platform as an example. If there are no objections
to the approach, I could post the series, converting all the
in-kernel DTS to the new binding.

Suzuki K Poulose (8):
  dts: binding: coresight: Document graph bindings
  coresight: Fix remote endpoint parsing
  coresight: Cleanup platform description data
  coresight: platform: Cleanup coresight connection handling
  coresight: Handle errors in finding input/output ports
  dts: coresight: Clean up the device tree graph bindings
  dts: coresight: Define new bindings for direction of data flow
  dts: juno: Update coresight bindings for hw port

 .../devicetree/bindings/arm/coresight.txt          |  52 ++++++++--
 arch/arm64/boot/dts/arm/juno-base.dtsi             |  82 +++++++++++----
 arch/arm64/boot/dts/arm/juno.dts                   |   5 +-
 drivers/hwtracing/coresight/coresight.c            |  28 ++----
 drivers/hwtracing/coresight/of_coresight.c         | 111 ++++++++++++---------
 include/linux/coresight.h                          |  11 +-
 6 files changed, 181 insertions(+), 108 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH v2]irqchip/irq-gic-v3:Avoid a waste of LPI resource
From: Marc Zyngier @ 2018-06-01 12:56 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <8898674D84E3B24BA3A2D289B872026A69F3004A@G01JPEXMBKW03>

Hi Lei,

On 01/06/18 13:44, Zhang, Lei wrote:
> Hi Marc
> 
> I have reviewed your patch.	 
> I think the approach is same between your patch and mine.
> Your patch is simpler and more beautiful, and match our bus's requirement.
> 
> I have only one question.
> According your patch, if there are no enough lpis, amount of lpis required
> will be divide by 2. it means someone want 16 lpis, maybe they can only get 8?
> I don?t' understand why we need it.

That's the way the MSI allocation works in the kernel.

A driver asks a number of MSIs (let's imagine, for example, one MSI per
CPU in the system), but the underlying HW can only provide a smaller number.

Instead of failing and just returning an error, we reduce the allocation
in order to provide the driver with something. If that's not enough,
well, the driver itself will have the opportunity to give up.

See pci_alloc_irq_vectors_affinity(), which takes a min and a max number
of vectors, for example.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply

* [PATCH] media: atmel-isi: drop unnecessary while loop
From: Nicholas Mc Guire @ 2018-06-01 12:46 UTC (permalink / raw)
  To: linux-arm-kernel

From: Nicholas Mc Guire <hofrat@osadl.org>

As there is no way this can loop it actually makes no sense to have
a while(1){} around the body - all three possible paths end in a return
statement. 

Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
Fixes: commit c1d82b895380 "[media] atmel-isi: move out of soc_camera to atmel"
---

The diff output is unfortunately not that readable - essentially only
the outer while(1){ } was removed. 

Patch was compile tested with: x86_64_defconfig + CONFIG_MEDIA_SUPPORT=y
MEDIA_CAMERA_SUPPORT=y, CONFIG_MEDIA_CONTROLLER=y, V4L_PLATFORM_DRIVERS=y
OF=y, CONFIG_COMPILE_TEST=y, CONFIG_VIDEO_ATMEL_ISI=y

Compile testing atmel-isi.c shows some sparse warnings. Seems to be due to
sizeof operator being applied to a union (not related to the function being
changed though).

Patch is against 4.17-rc7 (localversion-next is next-20180531)

 drivers/media/platform/atmel/atmel-isi.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c
index e5be21a..85fc7b9 100644
--- a/drivers/media/platform/atmel/atmel-isi.c
+++ b/drivers/media/platform/atmel/atmel-isi.c
@@ -1106,23 +1106,21 @@ static int isi_graph_parse(struct atmel_isi *isi, struct device_node *node)
 	struct device_node *ep = NULL;
 	struct device_node *remote;
 
-	while (1) {
-		ep = of_graph_get_next_endpoint(node, ep);
-		if (!ep)
-			return -EINVAL;
-
-		remote = of_graph_get_remote_port_parent(ep);
-		if (!remote) {
-			of_node_put(ep);
-			return -EINVAL;
-		}
+	ep = of_graph_get_next_endpoint(node, ep);
+	if (!ep)
+		return -EINVAL;
 
-		/* Remote node to connect */
-		isi->entity.node = remote;
-		isi->entity.asd.match_type = V4L2_ASYNC_MATCH_FWNODE;
-		isi->entity.asd.match.fwnode = of_fwnode_handle(remote);
-		return 0;
+	remote = of_graph_get_remote_port_parent(ep);
+	if (!remote) {
+		of_node_put(ep);
+		return -EINVAL;
 	}
+
+	/* Remote node to connect */
+	isi->entity.node = remote;
+	isi->entity.asd.match_type = V4L2_ASYNC_MATCH_FWNODE;
+	isi->entity.asd.match.fwnode = of_fwnode_handle(remote);
+	return 0;
 }
 
 static int isi_graph_init(struct atmel_isi *isi)
-- 
2.1.4

^ permalink raw reply related

* [PATCH v2]irqchip/irq-gic-v3:Avoid a waste of LPI resource
From: Zhang, Lei @ 2018-06-01 12:44 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <86d0xhaucm.wl-marc.zyngier@arm.com>

Hi Marc

I have reviewed your patch.	 
I think the approach is same between your patch and mine.
Your patch is simpler and more beautiful, and match our bus's requirement.

I have only one question.
According your patch, if there are no enough lpis, amount of lpis required
will be divide by 2. it means someone want 16 lpis, maybe they can only get 8?
I don?t' understand why we need it.

 static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids)
 {
         unsigned long *bitmap = NULL;
         int err = 0;

         do {
                 err = alloc_lpi_range(nr_irqs, base);
                 if (!err)
                         break;

                 nr_irqs /= 2;
         } while (nr_irqs > 0);


Best Regards,
Lei Zhang zhang.lei at jp.fujitsu.com
FUJITSU LIMITED

> -----Original Message-----
> From: linux-arm-kernel
> [mailto:linux-arm-kernel-bounces at lists.infradead.org] On Behalf Of
> Marc Zyngier
> Sent: Monday, May 28, 2018 2:20 AM
> To: Zhang, Lei/? ?
> Cc: 'linux-arm-kernel at lists.infradead.org'
> Subject: Re: [PATCH v2]irqchip/irq-gic-v3:Avoid a waste of LPI resource
> 
> On Fri, 25 May 2018 13:45:24 +0100,
> Zhang, Lei wrote:
> 
 
> I've just prototyped what I had in mind for this at [1] (see the first
> patch for the allocator itself). As you can see, the allocator is a
> lot simpler, yet it has all the flexibility you would need, The
> drawback is of course that freeing LPIs is quite expensive, but that
> almost never happens. And if it does, we can switch to a tree or some
> more efficient data structure.
> 
> Thanks,
> 
> 	M.
> 
> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.gi
> t/log/?h=irq/lpi-allocator
> 

^ permalink raw reply

* [PATCH v12 5/5] arm64: Allow huge io mappings again
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527856758-27169-1-git-send-email-cpandya@codeaurora.org>

Huge mappings have had stability issues due to stale
TLB entry and memory leak issues. Since, those are
addressed in this series of patches, it is now safe
to allow huge mappings.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
---
 arch/arm64/mm/mmu.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6e7e16c..c65abc4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -934,15 +934,8 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
-	pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
-
-	/* Only allow permission changes for now */
-	if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
-				   pud_val(new_pud)))
-		return 0;
-
 	BUG_ON(phys & ~PUD_MASK);
-	set_pud(pudp, new_pud);
+	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
@@ -950,15 +943,8 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
 	pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
 					pgprot_val(mk_sect_prot(prot)));
-	pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
-
-	/* Only allow permission changes for now */
-	if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
-				   pmd_val(new_pmd)))
-		return 0;
-
 	BUG_ON(phys & ~PMD_MASK);
-	set_pmd(pmdp, new_pmd);
+	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply related

* [PATCH v12 4/5] arm64: Implement page table free interfaces
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527856758-27169-1-git-send-email-cpandya@codeaurora.org>

Implement pud_free_pmd_page() and pmd_free_pte_page().

Implementation requires,
 1) Clearing off the current pud/pmd entry
 2) Invalidate TLB which could have previously
    valid but not stale entry
 3) Freeing of the un-used next level page tables

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
---
 arch/arm64/mm/mmu.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8ae5d7a..6e7e16c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -45,6 +45,7 @@
 #include <asm/memblock.h>
 #include <asm/mmu_context.h>
 #include <asm/ptdump.h>
+#include <asm/tlbflush.h>
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
@@ -977,12 +978,41 @@ int pmd_clear_huge(pmd_t *pmdp)
 	return 1;
 }
 
-int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
 {
-	return pud_none(*pud);
+	pte_t *table;
+	pmd_t pmd;
+
+	pmd = READ_ONCE(*pmdp);
+	if (pmd_present(pmd)) {
+		table = pmd_page_vaddr(pmd);
+		pmd_clear(pmdp);
+		__flush_tlb_kernel_pgtable(addr);
+		pte_free_kernel(NULL, table);
+	}
+	return 1;
 }
 
-int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
 {
-	return pmd_none(*pmd);
+	pmd_t *table;
+	pmd_t *entry;
+	pud_t pud;
+	unsigned long next, end;
+
+	pud = READ_ONCE(*pudp);
+	if (pud_present(pud)) {
+		table = pud_page_vaddr(pud);
+		entry = table;
+		next = addr;
+		end = addr + PUD_SIZE;
+		do {
+			pmd_free_pte_page(entry, next);
+		} while (entry++, next += PMD_SIZE, next != end);
+
+		pud_clear(pudp);
+		__flush_tlb_kernel_pgtable(addr);
+		pmd_free(NULL, table);
+	}
+	return 1;
 }
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply related

* [PATCH v12 3/5] arm64: pgtable: Add p*d_page_vaddr helper macros
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527856758-27169-1-git-send-email-cpandya@codeaurora.org>

Add helper macros to give virtual references to page
tables. These will be used while freeing dangling
page tables.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
---
 arch/arm64/include/asm/pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7c4c8f3..ef4047f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -580,6 +580,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
+#define pmd_page_vaddr(pmd) __va(pmd_page_paddr(pmd))
+#define pud_page_vaddr(pud) __va(pud_page_paddr(pud))
+
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
 
 /* to find an entry in a page-table-directory */
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply related

* [PATCH v12 2/5] arm64: tlbflush: Introduce __flush_tlb_kernel_pgtable
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527856758-27169-1-git-send-email-cpandya@codeaurora.org>

Add an interface to invalidate intermediate page tables
from TLB for kernel.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
---
 arch/arm64/include/asm/tlbflush.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index dfc61d7..a4a1901 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -218,6 +218,13 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
 	dsb(ish);
 }
 
+static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+{
+	unsigned long addr = __TLBI_VADDR(kaddr, 0);
+
+	__tlbi(vaae1is, addr);
+	dsb(ish);
+}
 #endif
 
 #endif
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply related

* [PATCH v12 1/5] ioremap: Update pgtable free interfaces with addr
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1527856758-27169-1-git-send-email-cpandya@codeaurora.org>

From: Chintan Pandya <cpandya@codeaurora.org>

The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.

 1. ioremap with 4K size, a valid pte page table is set.
 2. iounmap it, its pte entry is set to 0.
 3. ioremap the same address with 2M size, update its pmd entry with
    a new value.
 4. CPU may hit an exception because the old pmd entry is still in TLB,
    which leads to a kernel panic.

Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.

To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.

Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.

[toshi at hpe.com: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: <stable@vger.kernel.org>

---
 arch/arm64/mm/mmu.c           | 4 ++--
 arch/x86/mm/pgtable.c         | 8 +++++---
 include/asm-generic/pgtable.h | 8 ++++----
 lib/ioremap.c                 | 4 ++--
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493ff75..8ae5d7a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
 	return 1;
 }
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
 
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ffc8c13..37e3cba 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -718,11 +718,12 @@ int pmd_clear_huge(pmd_t *pmd)
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
  *
  * Context: The pud range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	pmd_t *pmd;
 	int i;
@@ -733,7 +734,7 @@ int pud_free_pmd_page(pud_t *pud)
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
 
 	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i]))
+		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
 			return 0;
 
 	pud_clear(pud);
@@ -745,11 +746,12 @@ int pud_free_pmd_page(pud_t *pud)
 /**
  * pmd_free_pte_page - Clear pmd entry and free pte page.
  * @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
  *
  * Context: The pmd range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
 
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639a..b081794 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ static inline int p4d_clear_huge(p4d_t *p4d)
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 {
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
 	return 0;
 }
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return 0;
 }
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return 0;
 }
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bba..517f585 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
-		    pmd_free_pte_page(pmd)) {
+		    pmd_free_pte_page(pmd, addr)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
-		    pud_free_pmd_page(pud)) {
+		    pud_free_pmd_page(pud, addr)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply related

* [PATCH v12 0/5] Fix issues with huge mapping in ioremap for ARM64
From: Chintan Pandya @ 2018-06-01 12:39 UTC (permalink / raw)
  To: linux-arm-kernel

This series of patches re-bring huge vmap back for arm64.

Patch 1/4 has been taken by Toshi in his series of patches
by name "[PATCH v3 0/3] fix free pmd/pte page handlings on x86"
to avoid merge conflict with this series.

These patches are tested on 4.16 kernel with Cortex-A75 based SoC.

The test used for verifying these patches is a stress test on
ioremap/unmap which tries to re-use same io-address but changes
size of mapping randomly i.e. 4K to 2M to 1G etc. The same test
used to reproduce 3rd level translation fault without these fixes
(and also of course with Revert "arm64: Enforce BBM for huge IO/VMAP
mappings" being part of the tree).

These patches can also go into '-stable' branch (if accepted)
for 4.6 onwards.

>From V11->V12:
 - Introduced p*d_page_vaddr helper macros and using them
 - Rebased over current tip

>From V10->V11:
 - Updated pud_free_pmd_page & pmd_free_pte_page to use consistent
   conding style
 - Fixed few bugs by using pmd_page_paddr & pud_page_paddr

>From V9->V10:
 - Updated commit log for patch 1/4 by Toshi
 - Addressed review comments by Will on patch 3/4

>From V8->V9:
 - Used __TLBI_VADDR macros in new TLB flush API

>From V7->V8:
 - Properly fixed compilation issue in x86 file

>From V6->V7:
 - Fixed compilation issue in x86 case
 - V6 patches were not properly enumarated

>From V5->V6:
 - Use __flush_tlb_kernel_pgtable() for both PUD and PMD. Remove
   "bool tlb_inv" based variance as it is not need now
 - Re-naming for consistency

>From V4->V5:
 - Add new API __flush_tlb_kernel_pgtable(unsigned long addr)
   for kernel addresses

>From V3->V4:
 - Add header for 'addr' in x86 implementation
 - Re-order pmd/pud clear and table free
 - Avoid redundant TLB invalidatation in one perticular case

>From V2->V3:
 - Use the exisiting page table free interface to do arm64
   specific things

>From V1->V2:
 - Rebased my patches on top of "[PATCH v2 1/2] mm/vmalloc:
   Add interfaces to free unmapped page table"
 - Honored BBM for ARM64

Chintan Pandya (5):
  ioremap: Update pgtable free interfaces with addr
  arm64: tlbflush: Introduce __flush_tlb_kernel_pgtable
  arm64: pgtable: Add p*d_page_vaddr helper macros
  arm64: Implement page table free interfaces
  arm64: Re-enable huge io mappings

 arch/arm64/include/asm/pgtable.h  |  3 +++
 arch/arm64/include/asm/tlbflush.h |  7 +++++
 arch/arm64/mm/mmu.c               | 56 +++++++++++++++++++++++++--------------
 arch/x86/mm/pgtable.c             |  8 +++---
 include/asm-generic/pgtable.h     |  8 +++---
 lib/ioremap.c                     |  4 +--
 6 files changed, 57 insertions(+), 29 deletions(-)

-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation
Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
Collaborative Project

^ permalink raw reply

* [PATCH 1/4] arm64/mm: pass swapper_pg_dir as an argument to __enable_mmu()
From: James Morse @ 2018-06-01 12:35 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601080833.11762-1-yaojun8558363@gmail.com>

Hi Jun Yao,

On 01/06/18 09:08, Jun Yao wrote:
> Introduce __pa_swapper_pg_dir to save physical address of
> swapper_pg_dir. And pass it as an argument to __enable_mmu().


> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index b0853069702f..e3bb44b4b6c6 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -706,6 +706,8 @@ secondary_startup:
>  	 * Common entry point for secondary CPUs.
>  	 */
>  	bl	__cpu_setup			// initialise processor
> +	adrp    x25, idmap_pg_dir
> +	ldr_l   x26, __pa_swapper_pg_dir
>  	bl	__enable_mmu
>  	ldr	x8, =__secondary_switched
>  	br	x8

'__pa_swapper_pg_dir' here is read with the MMU off, but you write it with the
MMU on. To make this safe, you need to clean this value to the 'PoC' each time
you write it, so that secondaries here will read the new value.

Please put __pa_swapper_pg_dir in the mmuoff.data.read section, this ensures
hibernate will re-clean this value to the PoC once it has restored the memory.


> @@ -761,10 +763,8 @@ ENTRY(__enable_mmu)
>  	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
>  	b.ne	__no_granule_support
>  	update_early_cpu_boot_status 0, x1, x2
> -	adrp	x1, idmap_pg_dir
> -	adrp	x2, swapper_pg_dir
> -	phys_to_ttbr x3, x1
> -	phys_to_ttbr x4, x2
> +	phys_to_ttbr x3, x25
> +	phys_to_ttbr x4, x26
>  	msr	ttbr0_el1, x3			// load TTBR0
>  	msr	ttbr1_el1, x4			// load TTBR1
>  	isb

__enable_mmu() is now taking arguments in x25 and x26. Please update the comment
above it that describes the 'x0' argument.

Why do you pass the idmap ttbr value in too? Its always the same.


> @@ -823,6 +823,8 @@ __primary_switch:
>  	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
>  #endif
>  
> +	adrp    x25, idmap_pg_dir
> +	adrp    x26, swapper_pg_dir
>  	bl	__enable_mmu
>  #ifdef CONFIG_RELOCATABLE
>  	bl	__relocate_kernel

> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 2dbb2c9f1ec1..41eee333f91a 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -55,6 +55,8 @@ u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
>  u64 kimage_voffset __ro_after_init;
>  EXPORT_SYMBOL(kimage_voffset);
>  
> +phys_addr_t __pa_swapper_pg_dir;

See the definition of 'secondary_holding_pen_release' for an example of how to
put this in the mmuoff.data.read section.


> +
>  /*
>   * Empty_zero_page is a special page that is used for zero-initialized data
>   * and COW.
> @@ -631,6 +633,8 @@ void __init paging_init(void)
>  	phys_addr_t pgd_phys = early_pgtable_alloc();
>  	pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
>  
> +	__pa_swapper_pg_dir = __pa_symbol(swapper_pg_dir);

This write needs to be cleaned to the PoC, see write_pen_release() for an example.


>  	map_kernel(pgdp);
>  	map_mem(pgdp);
>  


Thanks,

James

^ permalink raw reply

* [PATCH 0/4] lib/vsprintf: Remove atomic-unsafe support for printk format %pCr
From: Petr Mladek @ 2018-06-01 11:47 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CA+55aFxHHSRnDbbNGq6SwNAwEt0EhX15n_pNqon3MzGYJgMJmA@mail.gmail.com>

On Fri 2018-06-01 06:00:47, Linus Torvalds wrote:
> On Fri, Jun 1, 2018 at 4:29 AM Geert Uytterhoeven
> <geert+renesas@glider.be> wrote:
> >
> > This patch series:
> >   - Changes all existing users of "%pCr" to print the result of
> >     clk_get_rate() directly, which is safe as they all do this in task
> >     context only,
> >   - Removes support for the "%pCr" printk format.
> 
> Looks good to me.
> 
> What tree will this go through? The normal printk one? Just checking
> that this doesn't end up falling through the cracks because nobody
> knows who would take it...

I will take it via printk.git. There already is bunch of vsprintf
changes for-4.18.

Best Regards,
Petr

^ permalink raw reply

* [PATCHv2 19/19] arm64: implement syscall wrappers
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

To minimize the risk of userspace-controlled values being used under
speculation, this patch adds pt_regs based syscall wrappers for arm64,
which pass the minimum set of required userspace values to syscall
implementations. For each syscall, a wrapper which takes a pt_regs
argument is automatically generated, and this extracts the arguments
before calling the "real" syscall implementation.

Each syscall has three functions generated:

* __do_<compat_>sys_<name> is the "real" syscall implementation, with
  the expected prototype.

* __se_<compat_>sys_<name> is the sign-extension/narrowing wrapper,
  inherited from common code. This takes a series of long parameters,
  casting each to the requisite types required by the "real" syscall
  implementation in __do_<compat_>sys_<name>.

  This wrapper *may* not be necessary on arm64 given the AAPCS rules on
  unused register bits, but it seemed safer to keep the wrapper for now.

* __arm64_<compat_>_sys_<name> takes a struct pt_regs pointer, and
  extracts *only* the relevant register values, passing these on to the
  __se_<compat_>sys_<name> wrapper.

The syscall invocation code is updated to handle the calling convention
required by __arm64_<compat_>_sys_<name>, and passes a single struct
pt_regs pointer.

The compiler can fold the syscall implementation and its wrappers, such
that the overhead of this approach is minimized.

Note that we play games with sys_ni_syscall(). It can't be defined with
SYSCALL_DEFINE0() because we must avoid the possibility of error
injection. Additionally, there are a couple of locations where we need
to call it from C code, and we don't (currently) have a
ksys_ni_syscall().  While it has no wrapper, passing in a redundant
pt_regs pointer is benign per the AAPCS.

When ARCH_HAS_SYSCALL_WRAPPER is selected, no prototype is define for
sys_ni_syscall(). Since we need to treat it differently for in-kernel
calls and the syscall tables, the prototype is defined as-required.

The wrappers are largely the same as their x86 counterparts, but
simplified as we don't have a variety of compat calling conventions that
require separate stubs. Unlike x86, we have some zero-argument compat
syscalls, and must define COMPAT_SYSCALL_DEFINE0() to ensure that these
are also given an __arm64_compat_sys_ prefix.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                       |  1 +
 arch/arm64/include/asm/syscall_wrapper.h | 80 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/sys.c                  | 10 +++-
 arch/arm64/kernel/sys32.c                |  9 +++-
 arch/arm64/kernel/syscall.c              |  8 +---
 arch/arm64/kernel/traps.c                |  2 +
 6 files changed, 101 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/include/asm/syscall_wrapper.h

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0b98a6c42454..4c18c6b3c915 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -23,6 +23,7 @@ config ARM64
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
+	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK if !PREEMPT
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..a4477e515b79
--- /dev/null
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - arm64 specific wrappers to syscall definitions
+ *
+ * Based on arch/x86/include/asm_syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+#define SC_ARM64_REGS_TO_ARGS(x, ...)				\
+	__MAP(x,__SC_ARGS					\
+	      ,,regs->regs[0],,regs->regs[1],,regs->regs[2]	\
+	      ,,regs->regs[3],,regs->regs[4],,regs->regs[5])
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)						\
+	asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__arm64_compat_sys##name, ERRNO);				\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long __arm64_compat_sys##name(const struct pt_regs *regs)		\
+	{										\
+		return __se_compat_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}										\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{										\
+		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
+	}										\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define COMPAT_SYSCALL_DEFINE0(sname)					\
+	asmlinkage long __arm64_compat_sys_##sname(void);		\
+	ALLOW_ERROR_INJECTION(__arm64_compat_sys_##sname, ERRNO);	\
+	asmlinkage long __arm64_compat_sys_##sname(void)
+
+#define COND_SYSCALL_COMPAT(name) \
+	cond_syscall(__arm64_compat_sys_##name);
+
+#define COMPAT_SYS_NI(name) \
+	SYSCALL_ALIAS(__arm64_compat_sys_##name, sys_ni_posix_timers);
+
+#endif /* CONFIG_COMPAT */
+
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	asmlinkage long __arm64_sys##name(const struct pt_regs *regs);		\
+	ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO);			\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long __arm64_sys##name(const struct pt_regs *regs)		\
+	{									\
+		return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{									\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__MAP(x,__SC_TEST,__VA_ARGS__);					\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
+		return ret;							\
+	}									\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#ifndef SYSCALL_DEFINE0
+#define SYSCALL_DEFINE0(sname)					\
+	SYSCALL_METADATA(_##sname, 0);				\
+	asmlinkage long __arm64_sys_##sname(void);		\
+	ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO);	\
+	asmlinkage long __arm64_sys_##sname(void)
+#endif
+
+#ifndef COND_SYSCALL
+#define COND_SYSCALL(name) cond_syscall(__arm64_sys_##name)
+#endif
+
+#ifndef SYS_NI
+#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
+#endif
+
+#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 2ad1497a184e..ee93bf789f0a 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -48,11 +48,17 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
 /*
  * Wrappers to pass the pt_regs argument.
  */
-asmlinkage long sys_rt_sigreturn(void);
 #define sys_personality		sys_arm64_personality
 
+asmlinkage long sys_ni_syscall(const struct pt_regs *);
+#define __arm64_sys_ni_syscall	sys_ni_syscall
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)	asmlinkage long __arm64_##sym(const struct pt_regs *);
+#include <asm/unistd.h>
+
 #undef __SYSCALL
-#define __SYSCALL(nr, sym)	[nr] = sym,
+#define __SYSCALL(nr, sym)	[nr] = __arm64_##sym,
 
 /*
  * The sys_call_table array must be 4K aligned to be accessible from
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 793bd0952ee0..533c97c5c232 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -131,8 +131,15 @@ COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode,
 	return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
 }
 
+asmlinkage long sys_ni_syscall(const struct pt_regs *);
+#define __arm64_sys_ni_syscall	sys_ni_syscall
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym)	asmlinkage long __arm64_##sym(const struct pt_regs *);
+#include <asm/unistd32.h>
+
 #undef __SYSCALL
-#define __SYSCALL(nr, sym)	[nr] = sym,
+#define __SYSCALL(nr, sym)	[nr] = __arm64_##sym,
 
 /*
  * The sys_call_table array must be 4K aligned to be accessible from
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 6a31bb2a382b..e5985924f57e 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -12,15 +12,11 @@
 
 long do_ni_syscall(struct pt_regs *regs);
 
-typedef long (*syscall_fn_t)(unsigned long, unsigned long,
-			     unsigned long, unsigned long,
-			     unsigned long, unsigned long);
+typedef long (*syscall_fn_t)(struct pt_regs *regs);
 
 static void __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn)
 {
-	regs->regs[0] = syscall_fn(regs->regs[0], regs->regs[1],
-				   regs->regs[2], regs->regs[3],
-				   regs->regs[4], regs->regs[5]);
+	regs->regs[0] = syscall_fn(regs);
 }
 
 static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index b8d3e0d8c616..8bfcc37cbc65 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -549,6 +549,8 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
 
 long compat_arm_syscall(struct pt_regs *regs);
 
+long sys_ni_syscall(void);
+
 asmlinkage long do_ni_syscall(struct pt_regs *regs)
 {
 #ifdef CONFIG_COMPAT
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 18/19] arm64: convert compat wrappers to C
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

In preparation for converting to pt_regs syscall wrappers, convert our
existing compat wrappers to C. This will allow the pt_regs wrappers to
be automatically generated, and will allow for the compat register
manipulation to be folded in with the pt_regs accesses.

To avoid confusion with the upcoming pt_regs wrappers and existing
compat wrappers provided by core code, the C wrappers are renamed to
compat_sys_aarch32_<syscall>.

With the assembly wrappers gone, we can get rid of entry32.S and the
associated boilerplate.

Note that these must call the ksys_* syscall entry points, as the usual
sys_* entry points will be modified to take a single pt_regs pointer
argument.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/unistd32.h |  22 ++++----
 arch/arm64/kernel/Makefile        |   2 +-
 arch/arm64/kernel/entry32.S       | 111 -------------------------------------
 arch/arm64/kernel/sys32.c         | 114 ++++++++++++++++++++++++++++++++++----
 4 files changed, 115 insertions(+), 134 deletions(-)
 delete mode 100644 arch/arm64/kernel/entry32.S

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index ab95554b1734..0e3dd3265993 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -382,9 +382,9 @@ __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigsuspend 179
 __SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend)
 #define __NR_pread64 180
-__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper)
+__SYSCALL(__NR_pread64, compat_sys_aarch32_pread64)
 #define __NR_pwrite64 181
-__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper)
+__SYSCALL(__NR_pwrite64, compat_sys_aarch32_pwrite64)
 #define __NR_chown 182
 __SYSCALL(__NR_chown, sys_chown16)
 #define __NR_getcwd 183
@@ -406,11 +406,11 @@ __SYSCALL(__NR_vfork, sys_vfork)
 #define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
 __SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 #define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
+__SYSCALL(__NR_mmap2, compat_sys_aarch32_mmap2)
 #define __NR_truncate64 193
-__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
+__SYSCALL(__NR_truncate64, compat_sys_aarch32_truncate64)
 #define __NR_ftruncate64 194
-__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper)
+__SYSCALL(__NR_ftruncate64, compat_sys_aarch32_ftruncate64)
 #define __NR_stat64 195
 __SYSCALL(__NR_stat64, sys_stat64)
 #define __NR_lstat64 196
@@ -472,7 +472,7 @@ __SYSCALL(223, sys_ni_syscall)
 #define __NR_gettid 224
 __SYSCALL(__NR_gettid, sys_gettid)
 #define __NR_readahead 225
-__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper)
+__SYSCALL(__NR_readahead, compat_sys_aarch32_readahead)
 #define __NR_setxattr 226
 __SYSCALL(__NR_setxattr, sys_setxattr)
 #define __NR_lsetxattr 227
@@ -554,15 +554,15 @@ __SYSCALL(__NR_clock_getres, compat_sys_clock_getres)
 #define __NR_clock_nanosleep 265
 __SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep)
 #define __NR_statfs64 266
-__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper)
+__SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64)
 #define __NR_fstatfs64 267
-__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper)
+__SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64)
 #define __NR_tgkill 268
 __SYSCALL(__NR_tgkill, sys_tgkill)
 #define __NR_utimes 269
 __SYSCALL(__NR_utimes, compat_sys_utimes)
 #define __NR_arm_fadvise64_64 270
-__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper)
+__SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64)
 #define __NR_pciconfig_iobase 271
 __SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase)
 #define __NR_pciconfig_read 272
@@ -704,7 +704,7 @@ __SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list)
 #define __NR_splice 340
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_sync_file_range2 341
-__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper)
+__SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
 #define __NR_tee 342
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
@@ -726,7 +726,7 @@ __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_eventfd 351
 __SYSCALL(__NR_eventfd, sys_eventfd)
 #define __NR_fallocate 352
-__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper)
+__SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate)
 #define __NR_timerfd_settime 353
 __SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime)
 #define __NR_timerfd_gettime 354
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e24d2244bd1..95ac7374d723 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 	$(call if_changed,objcopy)
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
-					   sys_compat.o entry32.o
+					   sys_compat.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
deleted file mode 100644
index f9461696dde4..000000000000
--- a/arch/arm64/kernel/entry32.S
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Compat system call wrappers
- *
- * Copyright (C) 2012 ARM Ltd.
- * Authors: Will Deacon <will.deacon@arm.com>
- *	    Catalin Marinas <catalin.marinas@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/const.h>
-
-#include <asm/assembler.h>
-#include <asm/asm-offsets.h>
-#include <asm/errno.h>
-#include <asm/page.h>
-
-/*
- * System call wrappers for the AArch32 compatibility layer.
- */
-
-ENTRY(compat_sys_statfs64_wrapper)
-	mov	w3, #84
-	cmp	w1, #88
-	csel	w1, w3, w1, eq
-	b	compat_sys_statfs64
-ENDPROC(compat_sys_statfs64_wrapper)
-
-ENTRY(compat_sys_fstatfs64_wrapper)
-	mov	w3, #84
-	cmp	w1, #88
-	csel	w1, w3, w1, eq
-	b	compat_sys_fstatfs64
-ENDPROC(compat_sys_fstatfs64_wrapper)
-
-/*
- * Note: off_4k (w5) is always in units of 4K. If we can't do the
- * requested offset because it is not page-aligned, we return -EINVAL.
- */
-ENTRY(compat_sys_mmap2_wrapper)
-#if PAGE_SHIFT > 12
-	tst	w5, #~PAGE_MASK >> 12
-	b.ne	1f
-	lsr	w5, w5, #PAGE_SHIFT - 12
-#endif
-	b	sys_mmap_pgoff
-1:	mov	x0, #-EINVAL
-	ret
-ENDPROC(compat_sys_mmap2_wrapper)
-
-/*
- * Wrappers for AArch32 syscalls that either take 64-bit parameters
- * in registers or that take 32-bit parameters which require sign
- * extension.
- */
-ENTRY(compat_sys_pread64_wrapper)
-	regs_to_64	x3, x4, x5
-	b	sys_pread64
-ENDPROC(compat_sys_pread64_wrapper)
-
-ENTRY(compat_sys_pwrite64_wrapper)
-	regs_to_64	x3, x4, x5
-	b	sys_pwrite64
-ENDPROC(compat_sys_pwrite64_wrapper)
-
-ENTRY(compat_sys_truncate64_wrapper)
-	regs_to_64	x1, x2, x3
-	b	sys_truncate
-ENDPROC(compat_sys_truncate64_wrapper)
-
-ENTRY(compat_sys_ftruncate64_wrapper)
-	regs_to_64	x1, x2, x3
-	b	sys_ftruncate
-ENDPROC(compat_sys_ftruncate64_wrapper)
-
-ENTRY(compat_sys_readahead_wrapper)
-	regs_to_64	x1, x2, x3
-	mov	w2, w4
-	b	sys_readahead
-ENDPROC(compat_sys_readahead_wrapper)
-
-ENTRY(compat_sys_fadvise64_64_wrapper)
-	mov	w6, w1
-	regs_to_64	x1, x2, x3
-	regs_to_64	x2, x4, x5
-	mov	w3, w6
-	b	sys_fadvise64_64
-ENDPROC(compat_sys_fadvise64_64_wrapper)
-
-ENTRY(compat_sys_sync_file_range2_wrapper)
-	regs_to_64	x2, x2, x3
-	regs_to_64	x3, x4, x5
-	b	sys_sync_file_range2
-ENDPROC(compat_sys_sync_file_range2_wrapper)
-
-ENTRY(compat_sys_fallocate_wrapper)
-	regs_to_64	x2, x2, x3
-	regs_to_64	x3, x4, x5
-	b	sys_fallocate
-ENDPROC(compat_sys_fallocate_wrapper)
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 1ef103c95410..793bd0952ee0 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -22,22 +22,114 @@
  */
 #define __COMPAT_SYSCALL_NR
 
+#include <linux/compat.h>
 #include <linux/compiler.h>
 #include <linux/syscalls.h>
 
 asmlinkage long compat_sys_sigreturn(void);
 asmlinkage long compat_sys_rt_sigreturn(void);
-asmlinkage long compat_sys_statfs64_wrapper(void);
-asmlinkage long compat_sys_fstatfs64_wrapper(void);
-asmlinkage long compat_sys_pread64_wrapper(void);
-asmlinkage long compat_sys_pwrite64_wrapper(void);
-asmlinkage long compat_sys_truncate64_wrapper(void);
-asmlinkage long compat_sys_ftruncate64_wrapper(void);
-asmlinkage long compat_sys_readahead_wrapper(void);
-asmlinkage long compat_sys_fadvise64_64_wrapper(void);
-asmlinkage long compat_sys_sync_file_range2_wrapper(void);
-asmlinkage long compat_sys_fallocate_wrapper(void);
-asmlinkage long compat_sys_mmap2_wrapper(void);
+
+COMPAT_SYSCALL_DEFINE3(aarch32_statfs64, const char __user *, pathname,
+		       compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	/*
+	 * 32-bit ARM applies an OABI compatibility fixup to statfs64 and
+	 * fstatfs64 regardless of whether OABI is in use, and therefore
+	 * arbitrary binaries may rely upon it, so we must do the same.
+	 * For more details, see commit:
+	 *
+	 * 713c481519f19df9 ("[ARM] 3108/2: old ABI compat: statfs64 and
+	 * fstatfs64")
+	 */
+	if (sz == 88)
+		sz = 84;
+
+	return kcompat_sys_statfs64(pathname, sz, buf);
+}
+
+COMPAT_SYSCALL_DEFINE3(aarch32_fstatfs64, unsigned int, fd, compat_size_t, sz,
+		       struct compat_statfs64 __user *, buf)
+{
+	/* see aarch32_statfs64 */
+	if (sz == 88)
+		sz = 84;
+
+	return kcompat_sys_fstatfs64(fd, sz, buf);
+}
+
+/*
+ * Note: off_4k is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+COMPAT_SYSCALL_DEFINE6(aarch32_mmap2, unsigned long, addr, unsigned long, len,
+		       unsigned long, prot, unsigned long, flags,
+		       unsigned long, fd, unsigned long, off_4k)
+{
+	if (off_4k & (~PAGE_MASK >> 12))
+		return -EINVAL;
+
+	off_4k >>= (PAGE_SHIFT - 12);
+
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off_4k);
+}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define arg_u32p(name)	u32, name##_hi, u32, name##_lo
+#else
+#define arg_u32p(name)	u32, name##_lo, u32, name##_hi
+#endif
+
+#define arg_u64(name)	(((u64)name##_hi << 32) | name##_lo)
+
+COMPAT_SYSCALL_DEFINE6(aarch32_pread64, unsigned int, fd, char __user *, buf,
+		       size_t, count, u32, __pad, arg_u32p(pos))
+{
+	return ksys_pread64(fd, buf, count, arg_u64(pos));
+}
+
+COMPAT_SYSCALL_DEFINE6(aarch32_pwrite64, unsigned int, fd,
+		       const char __user *, buf, size_t, count, u32, __pad,
+		       arg_u32p(pos))
+{
+	return ksys_pwrite64(fd, buf, count, arg_u64(pos));
+}
+
+COMPAT_SYSCALL_DEFINE4(aarch32_truncate64, const char __user *, pathname,
+		       u32, __pad, arg_u32p(length))
+{
+	return ksys_truncate(pathname, arg_u64(length));
+}
+
+COMPAT_SYSCALL_DEFINE4(aarch32_ftruncate64, unsigned int, fd, u32, __pad,
+		       arg_u32p(length))
+{
+	return ksys_ftruncate(fd, arg_u64(length));
+}
+
+COMPAT_SYSCALL_DEFINE5(aarch32_readahead, int, fd, u32, __pad,
+		       arg_u32p(offset), size_t, count)
+{
+	return ksys_readahead(fd, arg_u64(offset), count);
+}
+
+COMPAT_SYSCALL_DEFINE6(aarch32_fadvise64_64, int, fd, int, advice,
+		       arg_u32p(offset), arg_u32p(len))
+{
+	return ksys_fadvise64_64(fd, arg_u64(offset), arg_u64(len), advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(aarch32_sync_file_range2, int, fd, unsigned int, flags,
+		       arg_u32p(offset), arg_u32p(nbytes))
+{
+	return ksys_sync_file_range(fd, arg_u64(offset), arg_u64(nbytes),
+				    flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(aarch32_fallocate, int, fd, int, mode,
+		       arg_u32p(offset), arg_u32p(len))
+{
+	return ksys_fallocate(fd, mode, arg_u64(offset), arg_u64(len));
+}
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym)	[nr] = sym,
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 17/19] arm64: use SYSCALL_DEFINE6() for mmap
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

We don't currently annotate our mmap implementation as a syscall, as we
need to do to use pt_regs syscall wrappers.

Let's mark it as a real syscall.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/sys.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index a82c3f7a9a90..2ad1497a184e 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -27,9 +27,9 @@
 #include <linux/syscalls.h>
 #include <asm/cpufeature.h>
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
-			 unsigned long prot, unsigned long flags,
-			 unsigned long fd, off_t off)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags,
+		unsigned long, fd, off_t, off)
 {
 	if (offset_in_page(off) != 0)
 		return -EINVAL;
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 16/19] arm64: use {COMPAT,}SYSCALL_DEFINE0 for sigreturn
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

We don't currently annotate our various sigreturn functions as syscalls,
as we need to do to use pt_regs syscall wrappers.

Let's mark them as real syscalls.

For compat_sys_sigreturn and compat_sys_rt_sigreturn, this changes the
return type from int to long, matching the prototypes in sys32.c.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/signal.c   | 2 +-
 arch/arm64/kernel/signal32.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index caa7a68cf2d2..04a26dcbea32 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -538,7 +538,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	return err;
 }
 
-asmlinkage long sys_rt_sigreturn(void)
+SYSCALL_DEFINE0(rt_sigreturn)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index cb10588a7cb2..1948566dcccf 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -282,7 +282,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	return err;
 }
 
-asmlinkage int compat_sys_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(sigreturn)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct compat_sigframe __user *frame;
@@ -313,7 +313,7 @@ asmlinkage int compat_sys_sigreturn(void)
 	return 0;
 }
 
-asmlinkage int compat_sys_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
 {
 	struct pt_regs *regs = current_pt_regs();
 	struct compat_rt_sigframe __user *frame;
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 15/19] arm64: remove in-kernel call to sys_personality()
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

With pt_regs syscall wrappers, the calling convention for
sys_personality() will change. Use ksys_personality(), which is
functionally equivalent.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 31045f3fed92..a82c3f7a9a90 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -42,7 +42,7 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
 	if (personality(personality) == PER_LINUX32 &&
 		!system_supports_32bit_el0())
 		return -EINVAL;
-	return sys_personality(personality);
+	return ksys_personality(personality);
 }
 
 /*
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 14/19] kernel: add kcompat_sys_{f,}statfs64()
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

Using this helper allows us to avoid the in-kernel calls to the
compat_sys_{f,}statfs64() sycalls, as are necessary for parameter
mangling in arm64's compat handling.

Following the example of ksys_* functions, kcompat_sys_* functions are
intended to be a drop-in replacement for their compat_sys_*
counterparts, with the same calling convention.

This is necessary to enable conversion of arm64's syscall handling to
use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel at vger.kernel.org
---
 fs/statfs.c            | 14 ++++++++++++--
 include/linux/compat.h | 11 +++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/statfs.c b/fs/statfs.c
index 5b2a24f0f263..f0216629621d 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s
 	return error;
 }
 
-COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_statfs64(pathname, sz, buf);
+}
+
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co
 	return error;
 }
 
+COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_fstatfs64(fd, sz, buf);
+}
+
 /*
  * This is a copy of sys_ustat, just dealing with a structure layout.
  * Given how simple this syscall is that apporach is more maintainable
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 081281ad5772..6cecf9f11d6b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1013,6 +1013,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
 	return ctv;
 }
 
+/*
+ * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz())
+ * directly.  Instead, use one of the functions which work equivalently, such
+ * as the kcompat_sys_xyzyyz() functions prototyped below.
+ */
+
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
+		     struct compat_statfs64 __user * buf);
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+			  struct compat_statfs64 __user * buf);
+
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 13/19] kernel: add ksys_personality()
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

Using this helper allows us to avoid the in-kernel call to the
sys_personality() syscall. The ksys_ prefix denotes that this function
is meant as a drop-in replacement for the syscall. In particular, it
uses the same calling convention as sys_personality().

Since ksys_personality is trivial, it is implemented directly in
<linux/syscalls.h>, as we do for ksys_close() and friends.

This helper is necessary to enable conversion of arm64's syscall
handling to use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
---
 include/linux/syscalls.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 70fcda1a9049..652e166deb64 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -79,6 +79,7 @@ union bpf_attr;
 #include <linux/unistd.h>
 #include <linux/quota.h>
 #include <linux/key.h>
+#include <linux/personality.h>
 #include <trace/syscall.h>
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
@@ -1268,4 +1269,14 @@ static inline long ksys_truncate(const char __user *pathname, loff_t length)
 	return do_sys_truncate(pathname, length);
 }
 
+static inline unsigned int ksys_personality(unsigned int personality)
+{
+	unsigned int old = current->personality;
+
+	if (personality != 0xffffffff)
+		set_personality(personality);
+
+	return old;
+}
+
 #endif
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 12/19] arm64: zero GPRs upon entry from EL0
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

We can zero GPRs x0 - x29 upon entry from EL0 to make it harder for
userspace to control values consumed by speculative gadgets.

We don't blat x30, since this is stashed much later, and we'll blat it
before invoking C code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 22c58e7dfc0f..39440c2ee66d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -63,6 +63,12 @@
 #endif
 	.endm
 
+	.macro	clear_gp_regs
+	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
+	mov	x\n, xzr
+	.endr
+	.endm
+
 /*
  * Bad Abort numbers
  *-----------------
@@ -179,6 +185,7 @@ skip_apply_ssbd\@:
 	stp	x28, x29, [sp, #16 * 14]
 
 	.if	\el == 0
+	clear_gp_regs
 	mrs	x21, sp_el0
 	ldr_this_cpu	tsk, __entry_task, x20	// Ensure MDSCR_EL1.SS is clear,
 	ldr	x19, [tsk, #TSK_TI_FLAGS]	// since we can unmask debug
@@ -186,7 +193,6 @@ skip_apply_ssbd\@:
 
 	apply_ssbd 1, x22, x23
 
-	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
 	get_thread_info tsk
-- 
2.11.0

^ permalink raw reply related

* [PATCHv2 11/19] arm64: don't reload GPRs after apply_ssbd
From: Mark Rutland @ 2018-06-01 11:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20180601112441.37810-1-mark.rutland@arm.com>

Now that all of the syscall logic works on the saved pt_regs, apply_ssbd
can safely corrupt x0-x3 in the entry paths, and we no longer need to
restore them. So let's remotve the logic doing so.

With that logic gone, we can fold the branch target into the macro, so
that callers need not deal with this. GAS provides \@, which provides a
unique value per macro invocation, which we can use to create a unique
label.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 156c4e3fd1a4..22c58e7dfc0f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -140,20 +140,21 @@ alternative_else_nop_endif
 
 	// This macro corrupts x0-x3. It is the caller's duty
 	// to save/restore them if required.
-	.macro	apply_ssbd, state, targ, tmp1, tmp2
+	.macro	apply_ssbd, state, tmp1, tmp2
 #ifdef CONFIG_ARM64_SSBD
 alternative_cb	arm64_enable_wa2_handling
-	b	\targ
+	b	skip_apply_ssbd\@
 alternative_cb_end
 	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
-	cbz	\tmp2, \targ
+	cbz	\tmp2, skip_apply_ssbd\@
 	ldr	\tmp2, [tsk, #TSK_TI_FLAGS]
-	tbnz	\tmp2, #TIF_SSBD, \targ
+	tbnz	\tmp2, #TIF_SSBD, skip_apply_ssbd\@
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
 alternative_cb	arm64_update_smccc_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
+skip_apply_ssbd\@:
 #endif
 	.endm
 
@@ -183,13 +184,7 @@ alternative_cb_end
 	ldr	x19, [tsk, #TSK_TI_FLAGS]	// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
 
-	apply_ssbd 1, 1f, x22, x23
-
-#ifdef CONFIG_ARM64_SSBD
-	ldp	x0, x1, [sp, #16 * 0]
-	ldp	x2, x3, [sp, #16 * 1]
-#endif
-1:
+	apply_ssbd 1, x22, x23
 
 	mov	x29, xzr			// fp pointed to user-space
 	.else
@@ -331,8 +326,7 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
-	apply_ssbd 0, 5f, x0, x1
-5:
+	apply_ssbd 0, x0, x1
 	.endif
 
 	msr	elr_el1, x21			// set up the return data
-- 
2.11.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox