Linux userland API discussions

Linux userland API discussions
 help / color / mirror / Atom feed

* [PATCH v3 3/5] arm64: dts: Add support for Spreadtrum SC9836 SoC in dts and Makefile
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
  To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
	broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
	orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
	wei.qiao
  Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
	sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>

From: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>

Adds the device tree support for Spreadtrum SC9836 SoC which is based on
Sharkl64 platform.

Sharkl64 platform contains the common nodes of Spreadtrum's arm64-based SoCs.

Signed-off-by: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
---
 arch/arm64/boot/dts/Makefile                  |    1 +
 arch/arm64/boot/dts/sprd-sc9836-openphone.dts |   85 ++++++++++++++++++++
 arch/arm64/boot/dts/sprd-sc9836.dtsi          |  103 ++++++++++++++++++++++++
 arch/arm64/boot/dts/sprd-sharkl64.dtsi        |  105 +++++++++++++++++++++++++
 4 files changed, 294 insertions(+)
 create mode 100644 arch/arm64/boot/dts/sprd-sc9836-openphone.dts
 create mode 100644 arch/arm64/boot/dts/sprd-sc9836.dtsi
 create mode 100644 arch/arm64/boot/dts/sprd-sharkl64.dtsi

diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index f8001a6..d0aff8a 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,4 +1,5 @@
 dtb-$(CONFIG_ARCH_THUNDER) += thunder-88xx.dtb
+dtb-$(CONFIG_ARCH_SHARKL64) += sprd-sc9836-openphone.dtb
 dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
 dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb
 
diff --git a/arch/arm64/boot/dts/sprd-sc9836-openphone.dts b/arch/arm64/boot/dts/sprd-sc9836-openphone.dts
new file mode 100644
index 0000000..484d714
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sc9836-openphone.dts
@@ -0,0 +1,85 @@
+/*
+ * Spreadtrum SC9836 openphone board DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/dts-v1/;
+
+#include "sprd-sc9836.dtsi"
+
+/ {
+	model = "Spreadtrum,SC9836 Openphone Board";
+
+	compatible = "sprd,sc9836-openphone", "sprd,sc9836";
+
+	aliases {
+		serial0 = &uart0;
+		serial1 = &uart1;
+		serial2 = &uart2;
+		serial3 = &uart3;
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0 0x80000000 0 0x20000000>;
+	};
+
+	chosen {
+		stdout-path = &uart0;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart1 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
+
+&uart3 {
+	status = "okay";
+};
diff --git a/arch/arm64/boot/dts/sprd-sc9836.dtsi b/arch/arm64/boot/dts/sprd-sc9836.dtsi
new file mode 100644
index 0000000..d5fe552
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sc9836.dtsi
@@ -0,0 +1,103 @@
+/*
+ * Spreadtrum SC9836 SoC DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sprd-sharkl64.dtsi"
+
+/ {
+	compatible = "sprd,sc9836";
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53", "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+		};
+	};
+
+	gic: interrupt-controller@12001000 {
+		compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+		#interrupt-cells = <3>;
+		interrupt-controller;
+		reg = <0 0x12001000 0 0x1000>,
+		      <0 0x12002000 0 0x1000>,
+		      <0 0x12004000 0 0x2000>,
+		      <0 0x12006000 0 0x2000>;
+	};
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		interrupts = <1 13 0xff01>,
+			     <1 14 0xff01>,
+			     <1 11 0xff01>,
+			     <1 10 0xff01>;
+		clock-frequency = <26000000>;
+	};
+};
diff --git a/arch/arm64/boot/dts/sprd-sharkl64.dtsi b/arch/arm64/boot/dts/sprd-sharkl64.dtsi
new file mode 100644
index 0000000..d9ecfe9
--- /dev/null
+++ b/arch/arm64/boot/dts/sprd-sharkl64.dtsi
@@ -0,0 +1,105 @@
+/*
+ * Spreadtrum Sharkl64 platform DTS file
+ *
+ * Copyright (C) 2014, Spreadtrum Communications Inc.
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This library is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This library is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/ {
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	soc {
+		compatible = "simple-bus";
+		reg = <0x0 0x0 0x0 0x80000000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+
+		ap_apb: apb@70000000 {
+			compatible = "simple-bus";
+			reg = <0x0 0x70000000 0x0 0x10000000>;
+			#address-cells = <2>;
+			#size-cells = <2>;
+			ranges;
+
+			uart0: serial@70000000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70000000 0 0x100>;
+				interrupts = <0 2 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart1: serial@70100000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70100000 0 0x100>;
+				interrupts = <0 3 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart2: serial@70200000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70200000 0 0x100>;
+				interrupts = <0 2 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+
+			uart3: serial@70300000 {
+				compatible = "sprd,sc9836-uart";
+				reg = <0 0x70300000 0 0x100>;
+				interrupts = <0 3 0xf04>;
+				clocks = <&clk26mhz>;
+				status = "disabled";
+			};
+		};
+	};
+
+	clocks {
+		clk26mhz: clk26mhz {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <26000000>;
+		};
+	};
+};
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v3 4/5] arm64: Add support for Spreadtrum's Sharkl64 Platform in Kconfig and defconfig
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
  To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
	broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
	orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
	wei.qiao
  Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
	sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>

From: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>

Adds support for Spreadtrum's SoC Platform and its subset Sharkl64
in the arm64 Kconfig and defconfig files.

Signed-off-by: Zhizhou Zhang <zhizhou.zhang@spreadtrum.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
---
 arch/arm64/Kconfig           |   17 +++++++++++++++++
 arch/arm64/configs/defconfig |    2 ++
 2 files changed, 19 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9532f8d..a63ec45 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -147,6 +147,23 @@ config ARCH_THUNDER
 	help
 	  This enables support for Cavium's Thunder Family of SoCs.
 
+menuconfig ARCH_SPRD
+	bool "Spreadtrum SoC platform"
+	depends on ARM64
+	help
+	  Support for Spreadtrum ARM based SoCs
+
+if ARCH_SPRD
+
+config ARCH_SHARKL64
+	bool "Sharkl64 SoC Platform"
+	help
+	  Sharkl64 is a Spreadtrum's SoC Platform which is based
+	  on ARM 64-bit processor core including
+	    sc9836
+
+endif #ARCH_SPRD
+
 config ARCH_VEXPRESS
 	bool "ARMv8 software model (Versatile Express)"
 	select ARCH_REQUIRE_GPIOLIB
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index dd301be..d7934a8 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -33,6 +33,8 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
 CONFIG_ARCH_THUNDER=y
+CONFIG_ARCH_SPRD=y
+CONFIG_ARCH_SHARKL64=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_XGENE=y
 CONFIG_PCI=y
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v3 5/5] tty/serial: Add Spreadtrum sc9836-uart driver support
From: Chunyan Zhang @ 2014-11-25 12:16 UTC (permalink / raw)
  To: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, arnd, gnomes, corbet, jason,
	broonie, heiko, shawn.guo, florian.vaussard, andrew, hytszk,
	orsonzhai, geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra,
	wei.qiao
  Cc: devicetree, linux-doc, linux-api, linux-kernel, linux-serial,
	sprdlinux, linux-arm-kernel
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>

Add a full sc9836-uart driver for SC9836 SoC which is based on the
spreadtrum sharkl64 platform.
This driver also support earlycon.

Signed-off-by: Chunyan Zhang <chunyan.zhang@spreadtrum.com>
Signed-off-by: Orson Zhai <orson.zhai@spreadtrum.com>
Originally-by: Lanqing Liu <lanqing.liu@spreadtrum.com>
---
 Documentation/devices.txt        |    3 +
 drivers/tty/serial/Kconfig       |   23 ++
 drivers/tty/serial/Makefile      |    1 +
 drivers/tty/serial/sprd_serial.c |  752 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |    3 +
 5 files changed, 782 insertions(+)
 create mode 100644 drivers/tty/serial/sprd_serial.c

diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 87b4c5e..1da0432 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -2816,6 +2816,9 @@ Your cooperation is appreciated.
 		 210 = /dev/ttyMAX1		MAX3100 serial port 1
 		 211 = /dev/ttyMAX2		MAX3100 serial port 2
 		 212 = /dev/ttyMAX3		MAX3100 serial port 3
+		 213 = /dev/ttySPX0		SPRD serial port 0
+		    ...
+		 216 = /dev/ttySPX3		SPRD serial port 3
 
 205 char	Low-density serial ports (alternate device)
 		  0 = /dev/culu0		Callout device for ttyLU0
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 649b784..2c2cf60 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1573,6 +1573,29 @@ config SERIAL_MEN_Z135
 	  This driver can also be build as a module. If so, the module will be called
 	  men_z135_uart.ko
 
+config SERIAL_SPRD
+	tristate "Support for SPRD serial"
+	depends on ARCH_SPRD
+	select SERIAL_CORE
+	help
+          This enables the driver for the Spreadtrum's serial.
+
+config SERIAL_SPRD_NR
+        int "Maximum number of sprd serial ports"
+        depends on SERIAL_SPRD
+        default "4"
+
+config SERIAL_SPRD_CONSOLE
+        bool "SPRD UART console support"
+        depends on SERIAL_SPRD=y
+        select SERIAL_CORE_CONSOLE
+	select SERIAL_EARLYCON
+        help
+	  Support for early debug console using Spreadtrum's serial. This enables
+	  the console before standard serial driver is probed. This is enabled
+	  with "earlycon" on the kernel command line. The console is
+	  enabled when early_param is processed.
+
 endmenu
 
 config SERIAL_MCTRL_GPIO
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 9a548ac..4801aca 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_SERIAL_ARC)	+= arc_uart.o
 obj-$(CONFIG_SERIAL_RP2)	+= rp2.o
 obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
+obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)	+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
new file mode 100644
index 0000000..58214c8
--- /dev/null
+++ b/drivers/tty/serial/sprd_serial.c
@@ -0,0 +1,752 @@
+/*
+ * Copyright (C) 2012 Spreadtrum Communications Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/platform_device.h>
+#include <linux/tty_flip.h>
+#include <linux/serial_core.h>
+#include <linux/serial.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <asm/irq.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+
+/* device name */
+#define UART_NR_MAX		CONFIG_SERIAL_SPRD_NR
+#define SPRD_TTY_NAME		"ttySPX"
+#define SPRD_TTY_MAJOR		204
+#define SPRD_TTY_MINOR_START	213
+#define SPRD_FIFO_SIZE		128
+#define SPRD_DEF_RATE		26000000
+
+/* the offset of serial registers and BITs for them */
+/* data registers */
+#define SPRD_TXD		0x0000
+#define SPRD_RXD		0x0004
+
+/* line status register and its BITs  */
+#define SPRD_LSR		0x0008
+#define SPRD_LSR_OE		BIT(4)
+#define SPRD_LSR_FE		BIT(3)
+#define SPRD_LSR_PE		BIT(2)
+#define SPRD_LSR_BI		BIT(7)
+#define SPRD_LSR_TX_OVER	BIT(15)
+
+/* data number in TX and RX fifo */
+#define SPRD_STS1		0x000C
+
+/* interrupt enable register and its BITs */
+#define SPRD_IEN		0x0010
+#define SPRD_IEN_RX_FULL	BIT(0)
+#define SPRD_IEN_TX_EMPTY	BIT(1)
+#define SPRD_IEN_BREAK_DETECT	BIT(7)
+#define SPRD_IEN_TIMEOUT	BIT(13)
+
+/* interrupt clear register */
+#define SPRD_ICLR		0x0014
+
+/* line control register */
+#define SPRD_LCR		0x0018
+#define SPRD_LCR_STOP_1BIT	0x10
+#define SPRD_LCR_STOP_2BIT	0x30
+#define SPRD_LCR_DATA_LEN	(BIT(2) | BIT(3))
+#define SPRD_LCR_DATA_LEN5	0x0
+#define SPRD_LCR_DATA_LEN6	0x4
+#define SPRD_LCR_DATA_LEN7	0x8
+#define SPRD_LCR_DATA_LEN8	0xc
+#define SPRD_LCR_PARITY		(BIT(0) | BIT(1))
+#define SPRD_LCR_PARITY_EN	0x2
+#define SPRD_LCR_EVEN_PAR	0x0
+#define SPRD_LCR_ODD_PAR	0x1
+
+/* control register 1 */
+#define SPRD_CTL1		0x001C
+#define RX_HW_FLOW_CTL_THLD	BIT(6)
+#define RX_HW_FLOW_CTL_EN	BIT(7)
+#define TX_HW_FLOW_CTL_EN	BIT(8)
+
+/* fifo threshold register */
+#define SPRD_CTL2		0x0020
+#define THLD_TX_EMPTY		0x40
+#define THLD_RX_FULL		0x40
+
+/* config baud rate register */
+#define SPRD_CLKD0		0x0024
+#define SPRD_CLKD1		0x0028
+
+/* interrupt mask status register */
+#define SPRD_IMSR		0x002C
+#define SPRD_IMSR_RX_FIFO_FULL	BIT(0)
+#define SPRD_IMSR_TX_FIFO_EMPTY	BIT(1)
+#define SPRD_IMSR_BREAK_DETECT	BIT(7)
+#define SPRD_IMSR_TIMEOUT	BIT(13)
+
+struct reg_backup {
+	uint32_t ien;
+	uint32_t ctrl0;
+	uint32_t ctrl1;
+	uint32_t ctrl2;
+	uint32_t clkd0;
+	uint32_t clkd1;
+	uint32_t dspwait;
+};
+struct sprd_uart_port {
+	struct uart_port port;
+	struct reg_backup reg_bak;
+	char name[16];
+};
+static struct sprd_uart_port *sprd_port[UART_NR_MAX] = { NULL };
+
+static inline unsigned int serial_in(struct uart_port *port, int offset)
+{
+	return readl_relaxed(port->membase + offset);
+}
+
+static inline void serial_out(struct uart_port *port, int offset, int value)
+{
+	writel_relaxed(value, port->membase + offset);
+}
+
+static unsigned int sprd_tx_empty(struct uart_port *port)
+{
+	if (serial_in(port, SPRD_STS1) & 0xff00)
+		return 0;
+	else
+		return TIOCSER_TEMT;
+}
+
+static unsigned int sprd_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_DSR | TIOCM_CTS;
+}
+
+static void sprd_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	/* nothing to do */
+}
+
+static void sprd_stop_tx(struct uart_port *port)
+{
+	unsigned int ien, iclr;
+
+	iclr = serial_in(port, SPRD_ICLR);
+	ien = serial_in(port, SPRD_IEN);
+
+	iclr |= SPRD_IEN_TX_EMPTY;
+	ien &= ~SPRD_IEN_TX_EMPTY;
+
+	serial_out(port, SPRD_ICLR, iclr);
+	serial_out(port, SPRD_IEN, ien);
+}
+
+static void sprd_start_tx(struct uart_port *port)
+{
+	unsigned int ien;
+
+	ien = serial_in(port, SPRD_IEN);
+	if (!(ien & SPRD_IEN_TX_EMPTY)) {
+		ien |= SPRD_IEN_TX_EMPTY;
+		serial_out(port, SPRD_IEN, ien);
+	}
+}
+
+static void sprd_stop_rx(struct uart_port *port)
+{
+	unsigned int ien, iclr;
+
+	iclr = serial_in(port, SPRD_ICLR);
+	ien = serial_in(port, SPRD_IEN);
+
+	ien &= ~(SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT);
+	iclr |= SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT;
+
+	serial_out(port, SPRD_IEN, ien);
+	serial_out(port, SPRD_ICLR, iclr);
+}
+
+/* The Sprd serial does not support this function.  */
+static void sprd_break_ctl(struct uart_port *port, int break_state)
+{
+	/* nothing to do */
+}
+
+static inline int handle_lsr_errors(struct uart_port *port,
+	unsigned int *flag, unsigned int *lsr)
+{
+	int ret = 0;
+
+	/* stastics */
+	if (*lsr & SPRD_LSR_BI) {
+		*lsr &= ~(SPRD_LSR_FE | SPRD_LSR_PE);
+		port->icount.brk++;
+		ret = uart_handle_break(port);
+		if (ret)
+			return ret;
+	} else if (*lsr & SPRD_LSR_PE)
+		port->icount.parity++;
+	else if (*lsr & SPRD_LSR_FE)
+		port->icount.frame++;
+	if (*lsr & SPRD_LSR_OE)
+		port->icount.overrun++;
+
+	/* mask off conditions which should be ignored */
+	*lsr &= port->read_status_mask;
+	if (*lsr & SPRD_LSR_BI)
+		*flag = TTY_BREAK;
+	else if (*lsr & SPRD_LSR_PE)
+		*flag = TTY_PARITY;
+	else if (*lsr & SPRD_LSR_FE)
+		*flag = TTY_FRAME;
+
+	return ret;
+}
+
+static inline void sprd_rx(int irq, void *dev_id)
+{
+	struct uart_port *port = (struct uart_port *)dev_id;
+	struct tty_port *tty = &port->state->port;
+	unsigned int ch, flag, lsr, max_count = 2048;
+
+	while ((serial_in(port, SPRD_STS1) & 0x00ff) && max_count--) {
+		lsr = serial_in(port, SPRD_LSR);
+		ch = serial_in(port, SPRD_RXD);
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (unlikely(lsr & (SPRD_LSR_BI | SPRD_LSR_PE
+				| SPRD_LSR_FE | SPRD_LSR_OE)))
+			if (handle_lsr_errors(port, &lsr, &flag))
+				continue;
+		if (uart_handle_sysrq_char(port, ch))
+			continue;
+
+		uart_insert_char(port, lsr, SPRD_LSR_OE, ch, flag);
+	}
+
+	tty_flip_buffer_push(tty);
+}
+
+static inline void sprd_tx(int irq, void *dev_id)
+{
+	struct uart_port *port = dev_id;
+	struct circ_buf *xmit = &port->state->xmit;
+	int count;
+
+	if (port->x_char) {
+		serial_out(port, SPRD_TXD, port->x_char);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
+		sprd_stop_tx(port);
+		return;
+	}
+	count = THLD_TX_EMPTY;
+	do {
+		serial_out(port, SPRD_TXD, xmit->buf[xmit->tail]);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	} while (--count > 0);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		sprd_stop_tx(port);
+}
+
+/*
+ *this handles the interrupt from one port
+ */
+static irqreturn_t sprd_handle_irq(int irq, void *dev_id)
+{
+	struct uart_port *port = (struct uart_port *)dev_id;
+	u32 ims;
+
+	ims = serial_in(port, SPRD_IMSR);
+
+	serial_out(port, SPRD_ICLR, ~0);
+
+	if (ims & (SPRD_IMSR_RX_FIFO_FULL |
+		SPRD_IMSR_BREAK_DETECT | SPRD_IMSR_TIMEOUT)) {
+		sprd_rx(irq, port);
+	}
+	if (ims & SPRD_IMSR_TX_FIFO_EMPTY)
+		sprd_tx(irq, port);
+
+	return IRQ_HANDLED;
+}
+
+static int sprd_startup(struct uart_port *port)
+{
+	int ret = 0;
+	unsigned int ien, ctrl1;
+	struct sprd_uart_port *sp;
+
+	serial_out(port, SPRD_CTL2, ((THLD_TX_EMPTY << 8) | THLD_RX_FULL));
+
+	/* clear rx fifo */
+	while (serial_in(port, SPRD_STS1) & 0x00ff)
+		serial_in(port, SPRD_RXD);
+
+	/* clear tx fifo */
+	while (serial_in(port, SPRD_STS1) & 0xff00)
+		;
+
+	/* clear interrupt */
+	serial_out(port, SPRD_IEN, 0x0);
+	serial_out(port, SPRD_ICLR, ~0);
+
+	/* allocate irq */
+	sp = container_of(port, struct sprd_uart_port, port);
+	snprintf(sp->name, sizeof(sp->name), "sprd_serial%d", port->line);
+	ret = devm_request_irq(port->dev, port->irq, sprd_handle_irq,
+			IRQF_SHARED, sp->name, port);
+	if (ret) {
+		dev_err(port->dev, "fail to request serial irq %d\n",
+			port->irq);
+		return ret;
+	}
+	ctrl1 = serial_in(port, SPRD_CTL1);
+	ctrl1 |= 0x3e00 | THLD_RX_FULL;
+	serial_out(port, SPRD_CTL1, ctrl1);
+
+	/* enable interrupt */
+	spin_lock(&port->lock);
+	ien = serial_in(port, SPRD_IEN);
+	ien |= SPRD_IEN_RX_FULL | SPRD_IEN_BREAK_DETECT | SPRD_IEN_TIMEOUT;
+	serial_out(port, SPRD_IEN, ien);
+	spin_unlock(&port->lock);
+
+	return 0;
+}
+
+static void sprd_shutdown(struct uart_port *port)
+{
+	serial_out(port, SPRD_IEN, 0x0);
+	serial_out(port, SPRD_ICLR, ~0);
+	devm_free_irq(port->dev, port->irq, port);
+}
+
+static void sprd_set_termios(struct uart_port *port,
+				    struct ktermios *termios,
+				    struct ktermios *old)
+{
+	unsigned int baud, quot;
+	unsigned int lcr, fc;
+
+	/* ask the core to calculate the divisor for us */
+	baud = uart_get_baud_rate(port, termios, old, 1200, 3000000);
+
+	quot = (unsigned int)((port->uartclk + baud / 2) / baud);
+
+	/* set data length */
+	lcr = serial_in(port, SPRD_LCR);
+	lcr &= ~SPRD_LCR_DATA_LEN;
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		lcr |= SPRD_LCR_DATA_LEN5;
+		break;
+	case CS6:
+		lcr |= SPRD_LCR_DATA_LEN6;
+		break;
+	case CS7:
+		lcr |= SPRD_LCR_DATA_LEN7;
+		break;
+	case CS8:
+	default:
+		lcr |= SPRD_LCR_DATA_LEN8;
+		break;
+	}
+
+	/* calculate stop bits */
+	lcr &= ~(SPRD_LCR_STOP_1BIT | SPRD_LCR_STOP_2BIT);
+	if (termios->c_cflag & CSTOPB)
+		lcr |= SPRD_LCR_STOP_2BIT;
+	else
+		lcr |= SPRD_LCR_STOP_1BIT;
+
+	/* calculate parity */
+	lcr &= ~SPRD_LCR_PARITY;
+	if (termios->c_cflag & PARENB) {
+		lcr |= SPRD_LCR_PARITY_EN;
+		if (termios->c_cflag & PARODD)
+			lcr |= SPRD_LCR_ODD_PAR;
+		else
+			lcr |= SPRD_LCR_EVEN_PAR;
+	}
+
+	/* change the port state. */
+	/* update the per-port timeout */
+	uart_update_timeout(port, termios->c_cflag, baud);
+
+	port->read_status_mask = SPRD_LSR_OE;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= SPRD_LSR_FE | SPRD_LSR_PE;
+	if (termios->c_iflag & (BRKINT | PARMRK))
+		port->read_status_mask |= SPRD_LSR_BI;
+
+	/* characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= SPRD_LSR_PE | SPRD_LSR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= SPRD_LSR_BI;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= SPRD_LSR_OE;
+	}
+
+	/* flow control */
+	fc = serial_in(port, SPRD_CTL1);
+	fc &= ~(RX_HW_FLOW_CTL_THLD | RX_HW_FLOW_CTL_EN | TX_HW_FLOW_CTL_EN);
+	if (termios->c_cflag & CRTSCTS) {
+		fc |= RX_HW_FLOW_CTL_THLD;
+		fc |= RX_HW_FLOW_CTL_EN;
+		fc |= TX_HW_FLOW_CTL_EN;
+	}
+
+	/* clock divider bit0~bit15 */
+	serial_out(port, SPRD_CLKD0, quot & 0xffff);
+
+	/* clock divider bit16~bit20 */
+	serial_out(port, SPRD_CLKD1, (quot & 0x1f0000) >> 16);
+	serial_out(port, SPRD_LCR, lcr);
+	fc |= 0x3e00 | THLD_RX_FULL;
+	serial_out(port, SPRD_CTL1, fc);
+}
+
+static const char *sprd_type(struct uart_port *port)
+{
+	return "SPX";
+}
+
+static void sprd_release_port(struct uart_port *port)
+{
+	/* nothing to do */
+}
+
+static int sprd_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void sprd_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_SPRD;
+}
+
+static int sprd_verify_port(struct uart_port *port,
+				   struct serial_struct *ser)
+{
+	if (unlikely(ser->type != PORT_SPRD))
+		return -EINVAL;
+	if (unlikely(port->irq != ser->irq))
+		return -EINVAL;
+	return 0;
+}
+
+static struct uart_ops serial_sprd_ops = {
+	.tx_empty = sprd_tx_empty,
+	.get_mctrl = sprd_get_mctrl,
+	.set_mctrl = sprd_set_mctrl,
+	.stop_tx = sprd_stop_tx,
+	.start_tx = sprd_start_tx,
+	.stop_rx = sprd_stop_rx,
+	.break_ctl = sprd_break_ctl,
+	.startup = sprd_startup,
+	.shutdown = sprd_shutdown,
+	.set_termios = sprd_set_termios,
+	.type = sprd_type,
+	.release_port = sprd_release_port,
+	.request_port = sprd_request_port,
+	.config_port = sprd_config_port,
+	.verify_port = sprd_verify_port,
+};
+
+#ifdef CONFIG_SERIAL_SPRD_CONSOLE
+static inline void wait_for_xmitr(struct uart_port *port)
+{
+	unsigned int status, tmout = 10000;
+
+	/* wait up to 10ms for the character(s) to be sent */
+	do {
+		status = serial_in(port, SPRD_STS1);
+		if (--tmout == 0)
+			break;
+		udelay(1);
+	} while (status & 0xff00);
+}
+
+static void sprd_console_putchar(struct uart_port *port, int ch)
+{
+	wait_for_xmitr(port);
+	serial_out(port, SPRD_TXD, ch);
+}
+
+static void sprd_console_write(struct console *co, const char *s,
+				      unsigned int count)
+{
+	struct uart_port *port = (struct uart_port *)sprd_port[co->index];
+	int ien;
+	int locked = 1;
+
+	if (oops_in_progress)
+		locked = spin_trylock(&port->lock);
+	else
+		spin_lock(&port->lock);
+	/* save the IEN then disable the interrupts */
+	ien = serial_in(port, SPRD_IEN);
+	serial_out(port, SPRD_IEN, 0x0);
+
+	uart_console_write(port, s, count, sprd_console_putchar);
+
+	/* wait for transmitter to become empty and restore the IEN */
+	wait_for_xmitr(port);
+	serial_out(port, SPRD_IEN, ien);
+	if (locked)
+		spin_unlock(&port->lock);
+}
+
+static int __init sprd_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (unlikely(co->index >= UART_NR_MAX || co->index < 0))
+		co->index = 0;
+
+	port = (struct uart_port *)sprd_port[co->index];
+	if (port == NULL) {
+		pr_info("srial port %d not yet initialized\n", co->index);
+		return -ENODEV;
+	}
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver sprd_uart_driver;
+static struct console sprd_console = {
+	.name = SPRD_TTY_NAME,
+	.write = sprd_console_write,
+	.device = uart_console_device,
+	.setup = sprd_console_setup,
+	.flags = CON_PRINTBUFFER,
+	.index = -1,
+	.data = &sprd_uart_driver,
+};
+
+#define SPRD_CONSOLE	(&sprd_console)
+
+/* Support for earlycon */
+static void sprd_putc(struct uart_port *port, int c)
+{
+	while (!(readl(port->membase + SPRD_LSR) & SPRD_LSR_TX_OVER))
+		;
+	writeb(c, port->membase + SPRD_TXD);
+}
+
+static void sprd_early_write(struct console *con, const char *s,
+				    unsigned n)
+{
+	struct earlycon_device *dev = con->data;
+
+	uart_console_write(&dev->port, s, n, sprd_putc);
+}
+
+static int __init sprd_early_console_setup(
+				struct earlycon_device *device,
+				const char *opt)
+{
+	if (!device->port.membase)
+		return -ENODEV;
+
+	device->con->write = sprd_early_write;
+	return 0;
+}
+
+EARLYCON_DECLARE(sprd_serial, sprd_early_console_setup);
+OF_EARLYCON_DECLARE(sprd_serial, "sprd,sc9836-uart",
+		    sprd_early_console_setup);
+
+#else /* !CONFIG_SERIAL_SPRD_CONSOLE */
+#define SPRD_CONSOLE		NULL
+#endif
+
+static struct uart_driver sprd_uart_driver = {
+	.owner = THIS_MODULE,
+	.driver_name = "sprd_serial",
+	.dev_name = SPRD_TTY_NAME,
+	.major = SPRD_TTY_MAJOR,
+	.minor = SPRD_TTY_MINOR_START,
+	.nr = UART_NR_MAX,
+	.cons = SPRD_CONSOLE,
+};
+
+static int sprd_probe(struct platform_device *pdev)
+{
+	struct resource *mem;
+	struct device_node *np = pdev->dev.of_node;
+	struct uart_port *up;
+	struct clk *clk;
+	int irq;
+
+
+	if (np)
+		pdev->id = of_alias_get_id(np, "serial");
+
+	if (unlikely(pdev->id < 0 || pdev->id >= UART_NR_MAX)) {
+		dev_err(&pdev->dev, "does not support id %d\n", pdev->id);
+		return -ENXIO;
+	}
+
+	sprd_port[pdev->id] = devm_kzalloc(&pdev->dev,
+		sizeof(*sprd_port[pdev->id]), GFP_KERNEL);
+	if (!sprd_port[pdev->id])
+		return -ENOMEM;
+
+	up = (struct uart_port *)sprd_port[pdev->id];
+	up->dev = &pdev->dev;
+	up->line = pdev->id;
+	up->type = PORT_SPRD;
+	up->iotype = SERIAL_IO_PORT;
+	up->uartclk = SPRD_DEF_RATE;
+	up->fifosize = SPRD_FIFO_SIZE;
+	up->ops = &serial_sprd_ops;
+	up->flags = ASYNC_BOOT_AUTOCONF;
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (!IS_ERR(clk))
+		up->uartclk = clk_get_rate(clk);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (unlikely(!mem)) {
+		dev_err(&pdev->dev, "not provide mem resource\n");
+		return -ENODEV;
+	}
+	up->mapbase = mem->start;
+	up->membase = ioremap(mem->start, resource_size(mem));
+
+	irq = platform_get_irq(pdev, 0);
+	if (unlikely(irq < 0)) {
+		dev_err(&pdev->dev, "not provide irq resource\n");
+		return -ENODEV;
+	}
+	up->irq = irq;
+
+	platform_set_drvdata(pdev, up);
+
+	return uart_add_one_port(&sprd_uart_driver, up);
+}
+
+static int sprd_remove(struct platform_device *dev)
+{
+	struct uart_port *up = platform_get_drvdata(dev);
+
+	return uart_remove_one_port(&sprd_uart_driver, up);
+}
+
+static int sprd_suspend(struct platform_device *dev, pm_message_t state)
+{
+	int id = dev->id;
+	struct uart_port *port = (struct uart_port *)sprd_port[id];
+	struct reg_backup *reg_bak = &(sprd_port[id]->reg_bak);
+
+	reg_bak->ien = serial_in(port, SPRD_IEN);
+	reg_bak->ctrl0 = serial_in(port, SPRD_LCR);
+	reg_bak->ctrl1 = serial_in(port, SPRD_CTL1);
+	reg_bak->ctrl2 = serial_in(port, SPRD_CTL2);
+	reg_bak->clkd0 = serial_in(port, SPRD_CLKD0);
+	reg_bak->clkd1 = serial_in(port, SPRD_CLKD1);
+
+	return 0;
+}
+
+static int sprd_resume(struct platform_device *dev)
+{
+	int id = dev->id;
+	struct uart_port *port = (struct uart_port *)sprd_port[id];
+	struct reg_backup *reg_bak = &(sprd_port[id]->reg_bak);
+
+	serial_out(port, SPRD_LCR, reg_bak->ctrl0);
+	serial_out(port, SPRD_CTL1, reg_bak->ctrl1);
+	serial_out(port, SPRD_CTL2, reg_bak->ctrl2);
+	serial_out(port, SPRD_CLKD0, reg_bak->clkd0);
+	serial_out(port, SPRD_CLKD1, reg_bak->clkd1);
+	serial_out(port, SPRD_IEN, reg_bak->ien);
+
+	return 0;
+}
+
+static const struct of_device_id serial_ids[] = {
+	{.compatible = "sprd,sc9836-uart",},
+	{}
+};
+
+static struct platform_driver sprd_platform_driver = {
+	.probe = sprd_probe,
+	.remove = sprd_remove,
+	.suspend = sprd_suspend,
+	.resume = sprd_resume,
+	.driver = {
+		   .name = "sprd_serial",
+		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(serial_ids),
+		   },
+};
+
+static int __init sprd_serial_init(void)
+{
+	int ret = 0;
+
+	ret = uart_register_driver(&sprd_uart_driver);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = platform_driver_register(&sprd_platform_driver);
+	if (unlikely(ret != 0))
+		uart_unregister_driver(&sprd_uart_driver);
+
+	return ret;
+}
+
+static void __exit sprd_serial_exit(void)
+{
+	platform_driver_unregister(&sprd_platform_driver);
+	uart_unregister_driver(&sprd_uart_driver);
+}
+
+module_init(sprd_serial_init);
+module_exit(sprd_serial_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Spreadtrum SoC serial driver series");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 16ad852..d9a8c88 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -247,4 +247,7 @@
 /* MESON */
 #define PORT_MESON	109
 
+/* SPRD SERIAL  */
+#define PORT_SPRD   110
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
1.7.9.5

^ permalink raw reply related

* Re: [PATCH v3 2/5] Documentation: DT: Add bindings for Spreadtrum SoC Platform
From: Arnd Bergmann @ 2014-11-25 12:52 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, gnomes, corbet, jason, broonie,
	heiko, shawn.guo, florian.vaussard, andrew, hytszk, orsonzhai,
	geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra, wei.qiao,
	devicetree, linux-arm-kernel, linux-kernel, sprdlinux, linux-doc,
	linux-serial, linux-api
In-Reply-To: <1416917818-10506-3-git-send-email-chunyan.zhang@spreadtrum.com>

On Tuesday 25 November 2014 20:16:55 Chunyan Zhang wrote:
> diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.txt b/Documentation/devicetree/bindings/serial/sprd-uart.txt
> new file mode 100644
> index 0000000..54e532f
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/serial/sprd-uart.txt
> @@ -0,0 +1,6 @@
> +* Spreadtrum serial UART
> +
> +Required properties:
> +- compatible: must be "sprd,sc9836-uart"
> +- reg: offset and length of the register set for the device
> +- interrupts: exactly one interrupt specifier
> 

The driver uses a clock, and the dts file lists a clock
property, so it should be documented here, either as optional
or mandatory, and with a description what this clock refers to.

	Arnd

^ permalink raw reply

* Re: [PATCH v3 0/5] Add Spreadtrum Sharkl64 Platform support
From: Mark Brown @ 2014-11-25 12:57 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, mark.rutland, m-karicheri2, pawel.moll, artagnon,
	rrichter, will.deacon, arnd, gnomes, corbet, jason, heiko,
	shawn.guo, florian.vaussard, andrew, hytszk, orsonzhai, geng.ren,
	zhizhou.zhang, lanqing.liu, zhang.lyra, wei.qiao, devicetree,
	linux-arm-kernel, linux-kernel, sprdlinux, linux-doc,
	linux-serial, linux-api
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>

[-- Attachment #1: Type: text/plain, Size: 522 bytes --]

On Tue, Nov 25, 2014 at 08:16:53PM +0800, Chunyan Zhang wrote:
> Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
> 
> Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
> standards based on ARMv8 multiple core architecture.Now we have only one
> SoC(SC9836) based on this Platform in developing.

This series is being sent to both my work and upstream addresses -
please don't do that, send it to just one (normally my upstream one for
upstream work).

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply

* Re: [PATCH v3 4/5] arm64: Add support for Spreadtrum's Sharkl64 Platform in Kconfig and defconfig
From: Arnd Bergmann @ 2014-11-25 12:57 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, gnomes, corbet, jason, broonie,
	heiko, shawn.guo, florian.vaussard, andrew, hytszk, orsonzhai,
	geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra, wei.qiao,
	devicetree, linux-arm-kernel, linux-kernel, sprdlinux, linux-doc,
	linux-serial, linux-api
In-Reply-To: <1416917818-10506-5-git-send-email-chunyan.zhang@spreadtrum.com>

On Tuesday 25 November 2014 20:16:57 Chunyan Zhang wrote:
> 
> +menuconfig ARCH_SPRD
> +       bool "Spreadtrum SoC platform"
> +       depends on ARM64
> +       help
> +         Support for Spreadtrum ARM based SoCs
> +
> +if ARCH_SPRD
> +
> +config ARCH_SHARKL64
> +       bool "Sharkl64 SoC Platform"
> +       help
> +         Sharkl64 is a Spreadtrum's SoC Platform which is based
> +         on ARM 64-bit processor core including
> +           sc9836
> +
> +endif #ARCH_SPRD
> +

I don't think we need multiple levels here, it should be enough to
have either ARCH_SPRD or ARCH_SHARKL64, because all device drivers
are going to be optional anyway. Typically a Kconfig symbol covers
all SoCs that are related, so if you Spreadtrum are doing both
phone and server chips and these are designed independently, you
would have two symbols, but if you only expect to see phone chips
here that are all derived from the same product line, using ARCH_SPRD
to refer to all of them should be enough.

	Arnd

^ permalink raw reply

* Re: [PATCH v3 0/5] Add Spreadtrum Sharkl64 Platform support
From: Arnd Bergmann @ 2014-11-25 12:59 UTC (permalink / raw)
  To: Chunyan Zhang
  Cc: grant.likely, robh+dt, catalin.marinas, gregkh, ijc+devicetree,
	jslaby, galak, broonie, mark.rutland, m-karicheri2, pawel.moll,
	artagnon, rrichter, will.deacon, gnomes, corbet, jason, broonie,
	heiko, shawn.guo, florian.vaussard, andrew, hytszk, orsonzhai,
	geng.ren, zhizhou.zhang, lanqing.liu, zhang.lyra, wei.qiao,
	devicetree, linux-arm-kernel, linux-kernel, sprdlinux, linux-doc,
	linux-serial, linux-api
In-Reply-To: <1416917818-10506-1-git-send-email-chunyan.zhang@spreadtrum.com>

On Tuesday 25 November 2014 20:16:53 Chunyan Zhang wrote:
> Spreadtrum is a rapid growing chip vendor providing smart phone total solutions.
> 
> Sharkl64 Platform is nominated as a SoC infrastructure that supports 4G/3G/2G
> standards based on ARMv8 multiple core architecture.Now we have only one
> SoC(SC9836) based on this Platform in developing.
> 
> This patchset adds Sharkl64 support in arm64 device tree and the serial driver
> of SC9836-UART.
> 
> This patchset also has patches which address "sprd" prefix and DT compatible
> strings for nodes which appear un-documented.
> 
> This version code was tesed on Fast Mode.
> We use boot-wrapper-aarch64 as the bootloader.

Looks very good overall. I have two small comments to individual patches,
but I think we can still merge them for 3.19. Please send the serial
driver to Greg for inclusion, unless you get further comments, and send
the updated version of the other patches to arm@kernel.org and ask
for including them in the cover letter.

	Arnd

^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice (was: Pre-emption control for userspace)
From: Rik van Riel @ 2014-11-25 13:38 UTC (permalink / raw)
  To: Davidlohr Bueso
  Cc: Khalid Aziz, tglx, corbet, mingo, hpa, peterz, akpm, rientjes, ak,
	mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
	gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
	keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck,
	linux-kernel, linux-doc, linux-api
In-Reply-To: <1416897050.20575.18.camel@linux-t7sj.site>

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 11/25/2014 01:30 AM, Davidlohr Bueso wrote:
> On Mon, 2014-11-24 at 21:03 -0500, Rik van Riel wrote:
>> I can see this "solution" help mostly with userspace spinlocks, 
>> which are relics of a past era that need to die. There is no way
>> userspace spinlocks will not fail miserably on virtual machines,
>> and it is time to get rid of them.
> 
> No, not really. Spinlocks are still very useful on bare metal. 
> Virtualization is not the only thing out there.

How many people are going to build two different binaries,
one for bare metal, and one for virtualized environments?

I suspect the vast majority of applications will only be
built once, and it would be nice if it wasn't using a
locking scheme that broke horribly on a significant part
of the deployments...

- -- 
All rights reversed
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1

iQEcBAEBAgAGBQJUdIY6AAoJEM553pKExN6DaisH+wWzTc+onHTsPUXs6EU/s+sa
lp3KFWmRQACPjiWSyIfg7aWFxakiS8BQ4ypbXdC/55lHuX/KMm/1k3zZF/lHiyYA
vIwfPUX7TnZxgYGVGk++nrCTffQImAc5RXlCBU6Hp6dHxV5Pead6S9afO8dfOeVu
80cpsqCyUqX+jhMDKq6NkIE0mCMb/U4L0cqo7m67h7PTlWmj8V64PKJjvkDu48O1
tPd+6jj4xDoEl8dde00EMaYETA6Utngt8+LslV1hMB1nxn82aNIGJnEqQco4WGXH
gE8Pkn+iToBe1hPF63MVZFJHRzXPUOAoaBCTgu7+l9LLDkfBgv2A/ckh5NLUrd8=
=41Jl
-----END PGP SIGNATURE-----

^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice
From: Khalid Aziz @ 2014-11-25 14:45 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: corbet, mingo, hpa, peterz, riel, akpm, rientjes, ak, mgorman,
	liwanp, raistlin, kirill.shutemov, atomlin, avagin, gorcunov,
	serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn, keescook,
	yangds.fnst, sbauer, vishnu.ps, axboe, paulmck, linux-kernel,
	linux-doc, linux-api
In-Reply-To: <alpine.DEB.2.11.1411242317380.6439@nanos>

Thanks for the review. I appreciate your comments. Please see my 
response inline.

On 11/24/2014 04:35 PM, Thomas Gleixner wrote:
> On Mon, 24 Nov 2014, Khalid Aziz wrote:
>> sched/fair: Add advisory flag for borrowing a timeslice
>>
>> This patch adds a way for a task to request to borrow one timeslice
>> from future if it is about to be preempted, so it could delay
>> preemption and complete any critical task it is in the middle of.
>>
>> This feature helps with performance on databases and has been
>> used for many years on other OSs by the databases. This feature
>> helps in situation where a task acquires a lock before performing a
>> critical operation on the database and happens to get preempted before
>> it completes its task.  This lock being held causes all other tasks
>> that also acquire the same lock to perform their critical operation
>> on the database, to start queueing up and causing large number of
>> context switches.  This queueing problem can be avoided if the task
>> that acquires lock first could request scheduler to let it borrow one
>> timeslice once it enters its critical section and hence allow it to
>> complete its critical section without causing queueing problem. If
>
> While you are niftily avoiding to talk about the nature of the lock, I
> can take it for granted that you are talking about user space
> spinlocks, right?

Sorry, it was certainly not my intention to avoid talking about the 
nature of the locks. I could have done a better job of explaining the 
locks in use. Yes, these are userspace spinlocks implemented in database 
library used by the database.

>
> Simply if you would talk about futexes and pthread_mutexes then it
> would have occured to you while implementing that feature, that the
> kernel already has a mechanism to record a reference to a user space
> data structure (robust_list_head) which is updated when a futex is
> acquired in user space, i.e. when a critical section is entered. It's
> not the same as you need, but it would be relatively simple to convey
> that information there.
>
> So what are the actual lock types and use cases and why can't you
> combine that with the existing robust list mechanism?

When I was asked to solve this queueing problem, I started working on a 
solution built around futexes and then I found out futex based solution 
is a no-go. Two primary users of this feature are database and Java. 
Neither uses POSIX locks or futex I am told by the folks that maintain 
these two. Hence a solution that allows them to ask for amnesty from 
preemption for just one more timeslice by letting them borrow a 
timeslice from future IF they are going to be preempted BEFORE their 
critical section is done. Userspace app is expected to yield the 
processor as soon as it is done with critical section which means the 
app may never use the extra timeslice it had asked for.

This solution has been used by both database and java on other OSs and 
has shown performance improvement. Andrew had asked for performance 
numbers on Linux with this patch last time I sent this out and it took 
me a while to get performance folks to run a full TPC-C workload. They 
did see a 3% improvement in tpcc as I noted in commit log and that is a 
significant improvement.

>
>> critical section completes before the task is due for preemption,
>> the task can simply desassert its request. A task sends the
>
> And that deassertion has which consequences before the next preempt
> check happens?
>
>> +config SCHED_PREEMPT_DELAY
>> +	def_bool n
>> +	prompt "Scheduler preemption delay support"
>> +	depends on PROC_FS
>
> Why so?

I assume you are asking about "depends on PROC_FS"? This patch uses proc 
to publish statistics but it really is publishing through existing proc 
file, so this is a weak dependency at best. I will remove this.

>
>> @@ -1324,6 +1325,13 @@ struct task_struct {
>>   	/* Revert to default priority/policy when forking */
>>   	unsigned sched_reset_on_fork:1;
>>   	unsigned sched_contributes_to_load:1;
>> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
>> +	struct preempt_delay {
>> +		u32 __user *delay_req;		/* delay request flag pointer */
>> +		unsigned char delay_granted;	/* currently in delay */
>> +		unsigned char yield_penalty;	/* failure to yield penalty */
>> +	} sched_preempt_delay;
>
> No. First of all this wants to be a proper struct declaration outside
> of task_struct.
>
> Aside of that your user space side is actually a structure and not a
> opaque u32 pointer, so this should be an explicit data type and not
> something randomly defined in the guts of task_struct.

That is a good point. I will fix it.

>
>> +#if defined(CONFIG_SCHED_PREEMPT_DELAY) && defined(CONFIG_PROC_FS)
>> +extern void sched_preempt_delay_show(struct seq_file *m,
>> +					struct task_struct *task);
>> +extern void sched_preempt_delay_set(struct task_struct *task,
>> +					unsigned char *val);
>> +#endif
>
> Can you please get rid of the leftovers of your previous patches
> yourself and before posting? It's annoying as hell to review patches
> which contain stale code.

Sorry, my screw up. I missed that one when reviewing my own patch.

>
>> diff --git a/kernel/fork.c b/kernel/fork.c
>> index 9b7d746..7f0d843 100644
>> --- a/kernel/fork.c
>> +++ b/kernel/fork.c
>> @@ -1671,6 +1671,11 @@ long do_fork(unsigned long clone_flags,
>>   			init_completion(&vfork);
>>   			get_task_struct(p);
>>   		}
>> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
>> +		p->sched_preempt_delay.delay_req = NULL;
>> +		p->sched_preempt_delay.delay_granted = 0;
>> +		p->sched_preempt_delay.yield_penalty = 0;
>> +#endif
>
> Sigh. We do not sprinkle that kind of #ifdef crap all over the
> place. That's what inline functions in header files are for.

Sure. I will change that.

>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 240157c..38cb515 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -4230,6 +4230,14 @@ SYSCALL_DEFINE0(sched_yield)
>>   {
>>   	struct rq *rq = this_rq_lock();
>>
>> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
>> +	/*
>> +	 * Clear the penalty flag for current task to reward it for
>> +	 * palying by the rules
>
> Looking at that mess makes me palying^Wpale.

:) will clean it up.

>
>> +	 */
>> +	current->sched_preempt_delay.yield_penalty = 0;
>> +#endif
>> +
>>   	schedstat_inc(rq, yld_count);
>>   	current->sched_class->yield_task(rq);
>
>> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
>> +/*
>> + * delay_resched_rq(): Check if the task about to be preempted has
>> + *	requested an additional time slice. If it has, grant it additional
>> + *	timeslice once.
>> + */
>> +static void
>> +delay_resched_rq(struct rq *rq)
>> +{
>> +	struct task_struct *curr = rq->curr;
>> +	struct sched_entity *se;
>> +	int cpu = task_cpu(curr);
>> +	u32 __user *delay_req;
>> +	unsigned int delay_req_flag;
>> +	unsigned char *delay_flag;
>> +
>> +	/*
>> +	 * Check if task is using pre-emption delay feature. If address
>> +	 * for preemption delay request flag is not set, this task is
>> +	 * not using preemption delay feature, we can reschedule without
>> +	 * any delay
>
> So what happens if:
>
>     kernel.preempt_delay_available = 1;
>
>     prctl(PR_SET_PREEMPT_DELAY, ...);
>
>     kernel.preempt_delay_available = 0;
>
> Nothing happens at all because you fail to give the sysop control over
> the feature once you unleashed it.
>
> The proper solution for this is to use a static key to control the
> feature itself. That also reduces the overhead for those who are not
> interested in that.

I was trying to reduce adding one more check to scheduler path and that 
opened up a hole as you pointed out. Good catch! I will add the 
conditional to scheduler path to plug this hole.

>
>> +	 */
>> +	delay_req = curr->sched_preempt_delay.delay_req;
>> +
>> +	if ((delay_req == NULL) || (cpu != smp_processor_id()))
>
> check_preempt_tick() clearly does not care about that, but you inflict
> a smp_processor_id() on every caller. I can see that you really care
> about performance.

:) I do care about performance. I will remove that check.

>
>> +		goto resched_now;
>> +
>> +	/*
>> +	 * Pre-emption delay will  be granted only once. If this task
>> +	 * has already been granted delay, rechedule now
>> +	 */
>> +	if (curr->sched_preempt_delay.delay_granted) {
>> +		curr->sched_preempt_delay.delay_granted = 0;
>> +		goto resched_now;
>> +	}
>> +	/*
>> +	 * Get the value of preemption delay request flag from userspace.
>> +	 * Task had already passed us the address where the flag is stored
>> +	 * in userspace earlier. This flag is just like the PROCESS_PRIVATE
>> +	 * futex, leverage the futex code here to read the flag. If there
>> +	 * is a page fault accessing this flag in userspace, that means
>> +	 * userspace has not touched this flag recently and we can
>> +	 * assume no preemption delay is needed.
>> +	 *
>> +	 * If task is not requesting additional timeslice, resched now
>> +	 */
>> +	if (delay_req) {
>
> Surely we need to recheck delay_req here.
>

Agreed, this is superfluous. I will clean it up.

>> +		int ret;
>> +
>> +		pagefault_disable();
>> +		ret = __copy_from_user_inatomic(&delay_req_flag, delay_req,
>> +				sizeof(u32));
>> +		pagefault_enable();
>> +		delay_flag = &delay_req_flag;
>> +		if (ret || !delay_flag[0])
>
> This is really a well designed kernel/user space interface. NOT.

Please do suggest a better interface. My constraint is it has to be very 
low overhead. A system call is just too much overhead and will negate 
any benefit from this.

>
>> +			goto resched_now;
>> +	} else {
>> +		goto resched_now;
>> +	}
>> +
>> +	/*
>> +	 * Current thread has requested preemption delay and has not
>> +	 * been granted an extension yet. If this thread failed to yield
>> +	 * processor after being granted amnesty last time, penalize it
>> +	 * by not granting this delay request, otherwise give it an extra
>> +	 * timeslice.
>> +	 */
>> +	if (curr->sched_preempt_delay.yield_penalty) {
>> +		curr->sched_preempt_delay.yield_penalty = 0;
>> +		goto resched_now;
>> +	}
>> +
>> +	se = &curr->se;
>> +	curr->sched_preempt_delay.delay_granted = 1;
>> +	/*
>> +	 * Set the penalty flag for failing to yield the processor after
>> +	 * being granted immunity. This flag will be cleared in
>> +	 * sched_yield() if the thread indeed calls sched_yield
>> +	 */
>> +	curr->sched_preempt_delay.yield_penalty = 1;
>
> Why on earth do we need two flags here? Just because we can create
> more code in the guts of the scheduler hot pathes that way?
>
> And surely we want to put them into two adjacent u8 to make life
> easier for all architectures.
>

They keep track of two different things. delay_granted keeps track of 
whether a request for preemption delay was granted and is used to stop 
the task from being granted a second timeslice. yield_penalty keeps 
track of whether the task should be penalized or not. These two flags 
are cleared at two different times. delay_granted is cleared when the 
task is preempted forcibly after being granted a delay whereas 
yield_penalty is cleared when the task gives up the processor voluntarily.

>> +	/*
>> +	 * Let the thread know it got amnesty and it should call
>> +	 * sched_yield() when it is done to avoid penalty next time
>> +	 * it wants amnesty. We need to write to userspace location.
>> +	 * Since we just read from this location, chances are extremley
>> +	 * low we might page fault. If we do page fault, we will ignore
>> +	 * it and accept the cost of failed write in form of unnecessary
>> +	 * penalty for userspace task for not yielding processor.
>
> This is the completely wrong argument. We know that the task was
> asking for an extra time slice because the copy from user above
> succeeded. So we are better of to let the task actually handle its
> pagefault than scheduling it out.
>
>> +	 * This is a highly unlikely scenario.
>> +	 */
>> +	delay_flag[0] = 0;
>> +	delay_flag[1] = 1;
>
> Sigh.
>
>> +#ifdef CONFIG_SCHED_PREEMPT_DELAY
>
> And all of this needs to be in kernel/sys.c just because...

I will look for a better way to do it.

>
>> +int sysctl_preempt_delay_available;
>> +
>> +static int
>> +preempt_delay_write(struct task_struct *task, unsigned long preempt_delay_addr)
>> +{
>> +	/*
>> +	 * Do not allow write if pointer is currently set
>> +	 */
>> +	if (task->sched_preempt_delay.delay_req &&
>> +			((void *)preempt_delay_addr != NULL))
>> +		return -EINVAL;
>> +	/*
>> +	 * Validate the pointer. It should be aligned to 4-byte boundary.
>
> So 4 bytes is a perfect boundary for everyone, right? Pulled that
> number out of thin air or what?

OK. What would you suggest? I can be taught to do this better.

>
>> +	 */
>> +	if (unlikely(!IS_ALIGNED(preempt_delay_addr, 4)))
>> +		return -EFAULT;
>> +	if (unlikely(!access_ok(rw, preempt_delay_addr, sizeof(u32))))
>> +		return -EFAULT;
>> +
>> +	task->sched_preempt_delay.delay_req = (u32 __user *) preempt_delay_addr;
>> +
>> +	/* zero out flags */
>
> Brilliant comment. I can see what the code is doing. What's way more
> interesting and of course undocumented is why you are ignoring the
> return value of put_user() ..
>
>> +	put_user(0, (uint32_t *)preempt_delay_addr);
>
> Aside of the general issues I have with this (see the inline replies
> to your changelog) the overall impression of this patch is that it is
> a half baken and carelessly cobbled together extract of some data base
> specific kernel hackery, which I prefer not to see at all.
>
> Thanks,
>
> 	tglx
>

Even if it helps one of the major workloads for Linux to perform better?

Thanks! You have lot of good feedback for me and I really appreciate it.

--
Khalid


^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice
From: Khalid Aziz @ 2014-11-25 14:50 UTC (permalink / raw)
  To: Mike Galbraith, Thomas Gleixner
  Cc: corbet-T1hC0tSOHrs, mingo-H+wXaHxf7aLQT0dZR+AlfA,
	hpa-YMNOUZJC4hwAvxtiuMwx3w, peterz-wEGCiKHe2LqWVfeAwA7xHQ,
	riel-H+wXaHxf7aLQT0dZR+AlfA,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	rientjes-hpIqsD4AKlfQT0dZR+AlfA, ak-VuQAYsv1563Yd54FQh9/CA,
	mgorman-l3A5Bk7waGM, liwanp-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	raistlin-k2GhghHVRtY, kirill.shutemov-VuQAYsv1563Yd54FQh9/CA,
	atomlin-H+wXaHxf7aLQT0dZR+AlfA, avagin-GEFAQzZX7r8dnm+yROfE0A,
	gorcunov-GEFAQzZX7r8dnm+yROfE0A,
	serge.hallyn-Z7WLFzj8eWMS+FvcfC7Uqw, athorlton-sJ/iWh9BUns,
	oleg-H+wXaHxf7aLQT0dZR+AlfA, vdavydov-bzQdu9zFT3WakBO8gow8eQ,
	daeseok.youn-Re5JQEeQqe8AvxtiuMwx3w,
	keescook-F7+t8E8rja9g9hUCZPvPmw,
	yangds.fnst-BthXqXjhjHXQFUHtdCDX3A, sbauer-F61uvSdQLzf2fBVCVOL8/A,
	vishnu.ps-Sze3O3UU22JBDgjK7y7TUQ, axboe-b10kYP2dOMg,
	paulmck-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-doc-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1416889208.4335.127.camel-sZ+7a5bGyC/1wTEvPJ5Q0F6hYfS7NtTn@public.gmane.org>

On 11/24/2014 09:20 PM, Mike Galbraith wrote:
> On Tue, 2014-11-25 at 00:35 +0100, Thomas Gleixner wrote:
>
>> Aside of the general issues I have with this (see the inline replies
>> to your changelog) the overall impression of this patch is that it is
>> a half baken and carelessly cobbled together extract of some data base
>> specific kernel hackery, which I prefer not to see at all.
>
> It culminates in a lumbering pseudo RT class of task disguised as a fair
> class task.  I'd expect more gain by twiddling knobs to let last buddy
> do its job than the 3% mentioned.
>
> You could perhaps create a SUPER_BATCH class that is not wakeup
> preempted by any fair class task of <= priority, not only BATCH and
> IDLE, but that's as nasty as this patch, though loads prettier.  The
> tick time thing doesn't feel right at all... if you're hurt badly by the
> tick, you're likely holding the lock too long methinks.
>
> 	-Mike
>
>

It is definitely not an attempt to solve any kind of RT problem. It 
would be a poor attempt if it indeed attempted to solve an RT problem. 
RT is all about guarantees. This patch does not help there at all and 
hence I have no intention of ever applying anything like this to 
SCHED_FIFO or SCHED_RR.

This problem is not caused by task holding the lock too long. It is 
caused by the task happening to acquire the lock just before its current 
timeslice is up. In that case, it does not matter how long the task 
holds the lock for.

--
Khalid

^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice
From: Khalid Aziz @ 2014-11-25 14:52 UTC (permalink / raw)
  To: Rik van Riel, tglx, corbet, mingo, hpa, peterz, akpm, rientjes,
	ak, mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
	gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
	keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck
  Cc: linux-kernel, linux-doc, linux-api
In-Reply-To: <5473E388.6000605@redhat.com>

On 11/24/2014 07:03 PM, Rik van Riel wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> On 11/24/2014 03:56 PM, Khalid Aziz wrote:
>> sched/fair: Add advisory flag for borrowing a timeslice
>>
>> This patch adds a way for a task to request to borrow one
>> timeslice from future if it is about to be preempted, so it could
>> delay preemption and complete any critical task it is in the middle
>> of.
>>
>> This feature helps with performance on databases and has been used
>> for many years on other OSs by the databases. This feature helps in
>> situation where a task acquires a lock before performing a critical
>> operation on the database and happens to get preempted
>
> Why don't the other tasks that want the lock sleep on the
> lock?
>
> I can see this "solution" help mostly with userspace spinlocks,
> which are relics of a past era that need to die. There is no
> way userspace spinlocks will not fail miserably on virtual
> machines, and it is time to get rid of them.

This solution indeed is for userspace spinlocks. Database code has been 
written with all critical locking implemented in userspace (as I have 
been told by database folks. I am not a database guy).

--
Khalid

^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice
From: Khalid Aziz @ 2014-11-25 14:56 UTC (permalink / raw)
  To: Srikar Dronamraju
  Cc: tglx, corbet, mingo, hpa, peterz, riel, akpm, rientjes, ak,
	mgorman, raistlin, kirill.shutemov, atomlin, avagin, gorcunov,
	serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn, keescook,
	yangds.fnst, sbauer, vishnu.ps, axboe, paulmck, linux-kernel,
	linux-doc, linux-api
In-Reply-To: <20141125101238.GA19795@linux.vnet.ibm.com>

On 11/25/2014 03:12 AM, Srikar Dronamraju wrote:
>>
>> - Request to borrow timeslice is not guranteed to be honored.
>> - If the task is allowed to borrow, kernel will inform the task
>>    of this. When this happens, task must yield the processor as soon
>>    as it completes its critical section.
>> - If the task fails to yield processor after being allowed to
>>    borrow, it is penalized by forcing it to skip its next time slot
>>    by the scheduler.
>> - Task is charged additional time for the borrowed timeslice as
>>    accumulated run time. This pushes it further down in consideration
>>    for the next task to run.
>>
>
> Is there a way for us to identify if the lock is contended?
> Because it may not be prudent to allow a task to borrow timeslice for a
> lock which isnt contended.
>

Userspace knows that. It is hard to determine this from kernel. Darren 
Hart had worked on a solution to solving similar issue and I spent fair 
amount of time looking through that code. Darren's solution comes into 
play after contention has already happened and does reduce the cost of 
contention. Database folks think the cost is already too high once 
contention has happened because of the resulting context switches and 
post-contention solutions do not help. This solution helps reduce 
contention on locks and userspace code designer is in best position to 
determine which locks are subject to such contention.

--
Khalid

^ permalink raw reply

* Re: [PATCH v3 4/7] crypto: AF_ALG: add AEAD support
From: Herbert Xu @ 2014-11-25 14:58 UTC (permalink / raw)
  To: Stephan Mueller
  Cc: Daniel Borkmann, 'Quentin Gouchet',
	lkml - Kernel Mailing List, linux-crypto, linux-api
In-Reply-To: <5492722.Cc6uZ9OM2L@tauon>

On Mon, Nov 24, 2014 at 03:58:34PM +0100, Stephan Mueller wrote:
> Ok. But in the code you see that skcipher is a 100% subset of AEAD. For 
> AEAD, all we need to do in addition to normal symmetric ciphers is to 
> select the AEAD kernel crypto API calls, to locate and use the AD and to 
> ensure we have the right memory size to process the tag.

There is still one fundamental difference between AEAD and ciphers.
Namely that ciphers can operate as you go while AEAD requests must
be done in one lot.  So that should make the AEAD code simpler vs.
ciphers.

I think the best course of action for now is to start with sharing
no code and then chop bits off as we see fit.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH v3 4/7] crypto: AF_ALG: add AEAD support
From: Stephan Mueller @ 2014-11-25 15:08 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Daniel Borkmann, 'Quentin Gouchet',
	lkml - Kernel Mailing List, linux-crypto-u79uwXL29TY76Z2rM5mHXA,
	linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20141125145850.GD8541-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q@public.gmane.org>

Am Dienstag, 25. November 2014, 22:58:50 schrieb Herbert Xu:

Hi Herbert,

>On Mon, Nov 24, 2014 at 03:58:34PM +0100, Stephan Mueller wrote:
>> Ok. But in the code you see that skcipher is a 100% subset of AEAD.
>> For AEAD, all we need to do in addition to normal symmetric ciphers
>> is to select the AEAD kernel crypto API calls, to locate and use the
>> AD and to ensure we have the right memory size to process the tag.
>
>There is still one fundamental difference between AEAD and ciphers.
>Namely that ciphers can operate as you go while AEAD requests must
>be done in one lot.  So that should make the AEAD code simpler vs.
>ciphers.

Yes, that is a key difference.
>
>I think the best course of action for now is to start with sharing
>no code and then chop bits off as we see fit.

Ok, I will create a new patch set with a separate algif_aead.c. I guess 
the entire sgl handling logic will be gone in AEAD.
>
>Cheers,


Ciao
Stephan

^ permalink raw reply

* Re: [PATCH v3] sched/fair: Add advisory flag for borrowing a timeslice
From: Rik van Riel @ 2014-11-25 15:25 UTC (permalink / raw)
  To: Khalid Aziz, tglx, corbet, mingo, hpa, peterz, akpm, rientjes, ak,
	mgorman, liwanp, raistlin, kirill.shutemov, atomlin, avagin,
	gorcunov, serge.hallyn, athorlton, oleg, vdavydov, daeseok.youn,
	keescook, yangds.fnst, sbauer, vishnu.ps, axboe, paulmck
  Cc: linux-kernel, linux-doc, linux-api
In-Reply-To: <547497A9.1080800@oracle.com>

On 11/25/2014 09:52 AM, Khalid Aziz wrote:
> On 11/24/2014 07:03 PM, Rik van Riel wrote:
>> -----BEGIN PGP SIGNED MESSAGE-----
>> Hash: SHA1
>>
>> On 11/24/2014 03:56 PM, Khalid Aziz wrote:
>>> sched/fair: Add advisory flag for borrowing a timeslice
>>>
>>> This patch adds a way for a task to request to borrow one
>>> timeslice from future if it is about to be preempted, so it could
>>> delay preemption and complete any critical task it is in the middle
>>> of.
>>>
>>> This feature helps with performance on databases and has been used
>>> for many years on other OSs by the databases. This feature helps in
>>> situation where a task acquires a lock before performing a critical
>>> operation on the database and happens to get preempted
>>
>> Why don't the other tasks that want the lock sleep on the
>> lock?
>>
>> I can see this "solution" help mostly with userspace spinlocks,
>> which are relics of a past era that need to die. There is no
>> way userspace spinlocks will not fail miserably on virtual
>> machines, and it is time to get rid of them.
> 
> This solution indeed is for userspace spinlocks. Database code has been
> written with all critical locking implemented in userspace (as I have
> been told by database folks. I am not a database guy).

They should fix that.

The scheme you propose can really only work on bare metal,
and not on virtual machines. That improves the problem for,
what, 60% of new installs (and dropping)?

^ permalink raw reply

* [PATCH 1/7] KVM: add commentary for kvm_debug_exit_arch struct
From: Alex Bennée @ 2014-11-25 16:09 UTC (permalink / raw)
  To: kvm, linux-arm-kernel, kvmarm, christoffer.dall, marc.zyngier,
	peter.maydell, agraf
  Cc: jan.kiszka, dahi, r65777, bp, pbonzini, Alex Bennée,
	Gleb Natapov, open list:ABI/API, open list
In-Reply-To: <1416931805-23223-1-git-send-email-alex.bennee@linaro.org>

Bring into line with the commentary for the other structures and their
KVM_EXIT_* cases.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..523f476 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -226,6 +226,7 @@ struct kvm_run {
 			__u32 count;
 			__u64 data_offset; /* relative to kvm_run start */
 		} io;
+		/* KVM_EXIT_DEBUG */
 		struct {
 			struct kvm_debug_exit_arch arch;
 		} debug;
-- 
2.1.3


^ permalink raw reply related

* [PATCH 5/7] KVM: arm64: guest debug, add support for single-step
From: Alex Bennée @ 2014-11-25 16:10 UTC (permalink / raw)
  To: kvm, linux-arm-kernel, kvmarm, christoffer.dall, marc.zyngier,
	peter.maydell, agraf
  Cc: jan.kiszka, dahi, r65777, bp, pbonzini, Alex Bennée,
	Gleb Natapov, Russell King, Catalin Marinas, Will Deacon,
	Lorenzo Pieralisi, open list, open list:ABI/API
In-Reply-To: <1416931805-23223-1-git-send-email-alex.bennee@linaro.org>

This adds support for single-stepping the guest. As userspace can and
will manipulate guest registers before restarting any tweaking of the
registers has to occur just before control is passed back to the guest.
Furthermore while guest debugging is in effect we need to squash the
ability of the guest to single-step itself as we have no easy way of
re-entering the guest after the exception has been delivered to the
hypervisor.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 48d26bb..a76daae 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -38,6 +38,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/virt.h>
+#include <asm/debug-monitors.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
@@ -300,6 +301,17 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 	kvm_arm_set_running_vcpu(NULL);
 }
 
+/**
+ * kvm_arch_vcpu_ioctl_set_guest_debug - Setup guest debugging
+ * @kvm:	pointer to the KVM struct
+ * @kvm_guest_debug: the ioctl data buffer
+ *
+ * This sets up the VM for guest debugging. Care has to be taken when
+ * manipulating guest registers as these will be set/cleared by the
+ * hyper-visor controller, typically before each kvm_run event. As a
+ * result modification of the guest registers needs to take place
+ * after they have been restored in the hyp.S trampoline code.
+ */
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
@@ -317,8 +329,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
 	/* Single Step */
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-		kvm_info("SS requested, not yet implemented\n");
-		return -EINVAL;
+		kvm_info("SS requested\n");
+		route_el2 = true;
 	}
 
 	/* Software Break Points */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 8da1043..78e5ae1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -121,6 +121,7 @@ int main(void)
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
   DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
+  DEFINE(GUEST_DEBUG,		offsetof(struct kvm_vcpu, guest_debug));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 28dc92b..6def054 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -91,6 +91,25 @@ static int kvm_handle_bkpt(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 0;
 }
 
+/**
+ * kvm_handle_ss - handle single step exceptions
+ *
+ * @vcpu:	the vcpu pointer
+ *
+ * See: ARM ARM D2.12 for the details. While the host is routing debug
+ * exceptions to it's handlers we have to suppress the ability of the
+ * guest to trigger exceptions.
+ */
+static int kvm_handle_ss(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	WARN_ON(!(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP));
+
+	run->exit_reason = KVM_EXIT_DEBUG;
+	run->debug.arch.exit_type = KVM_DEBUG_EXIT_SINGLE_STEP;
+	run->debug.arch.address = *vcpu_pc(vcpu);
+	return 0;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
@@ -105,6 +124,7 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
 	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
 	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
+	[ESR_EL2_EC_SOFTSTP]    = kvm_handle_ss,
 	[ESR_EL2_EC_BKPT32]	= kvm_handle_bkpt,
 	[ESR_EL2_EC_BRK64]	= kvm_handle_bkpt,
 };
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 3c733ea..c0bc218 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -16,6 +16,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/kvm.h>
 
 #include <asm/assembler.h>
 #include <asm/memory.h>
@@ -168,6 +169,31 @@
 	// x19-x29, lr, sp*, elr*, spsr*
 	restore_common_regs
 
+	// After restoring the guest registers but before we return to the guest
+	// we may want to make some final tweaks to support guest debugging.
+	ldr	x3, [x0, #GUEST_DEBUG]
+	tbz	x3, #KVM_GUESTDBG_ENABLE_SHIFT, 2f	// No guest debug
+
+	// x0 - preserved as VCPU ptr
+	// x1 - spsr
+	// x2 - mdscr
+	mrs	x1, spsr_el2
+	mrs 	x2, mdscr_el1
+
+	// See ARM ARM D2.12.3 The software step state machine
+	// If we are doing Single Step - set MDSCR_EL1.SS and PSTATE.SS
+	orr	x1, x1, #DBG_SPSR_SS
+	orr	x2, x2, #DBG_MDSCR_SS
+	tbnz	x3, #KVM_GUESTDBG_SINGLESTEP_SHIFT, 1f
+	// If we are not doing Single Step we want to prevent the guest doing so
+	// as otherwise we will have to deal with the re-routed exceptions as we
+	// are doing other guest debug related things
+	eor	x1, x1, #DBG_SPSR_SS
+	eor	x2, x2, #DBG_MDSCR_SS
+1:
+	msr	spsr_el2, x1
+	msr	mdscr_el1, x2
+2:
 	// Last bits of the 64bit state
 	pop	x2, x3
 	pop	x0, x1
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 523f476..347e5b0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -7,6 +7,8 @@
  * Note: you must update KVM_API_VERSION if you change this interface.
  */
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/ioctl.h>
@@ -515,11 +517,6 @@ struct kvm_s390_irq {
 	} u;
 };
 
-/* for KVM_SET_GUEST_DEBUG */
-
-#define KVM_GUESTDBG_ENABLE		0x00000001
-#define KVM_GUESTDBG_SINGLESTEP		0x00000002
-
 struct kvm_guest_debug {
 	__u32 control;
 	__u32 pad;
@@ -1189,4 +1186,15 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#endif /* __ASSEMBLY__ */
+
+/* for KVM_SET_GUEST_DEBUG */
+
+#define KVM_GUESTDBG_ENABLE_SHIFT	0
+#define KVM_GUESTDBG_ENABLE		(1 << KVM_GUESTDBG_ENABLE_SHIFT)
+#define KVM_GUESTDBG_SINGLESTEP_SHIFT	1
+#define KVM_GUESTDBG_SINGLESTEP	(1 << KVM_GUESTDBG_SINGLESTEP_SHIFT)
+
+
+
 #endif /* __LINUX_KVM_H */
-- 
2.1.3


^ permalink raw reply related

* [PATCH 7/7] KVM: arm64: guest debug, HW assisted debug support
From: Alex Bennée @ 2014-11-25 16:10 UTC (permalink / raw)
  To: kvm, linux-arm-kernel, kvmarm, christoffer.dall, marc.zyngier,
	peter.maydell, agraf
  Cc: jan.kiszka, dahi, r65777, bp, pbonzini, Alex Bennée,
	Gleb Natapov, Jonathan Corbet, Russell King, Catalin Marinas,
	Will Deacon, Lorenzo Pieralisi, AKASHI Takahiro, Srivatsa S. Bhat,
	open list:DOCUMENTATION, open list, open list:ABI/API
In-Reply-To: <1416931805-23223-1-git-send-email-alex.bennee@linaro.org>

This adds support for userspace to control the HW debug registers for
guest debug. We'll only copy the $ARCH defined number across as that's
all that hyp.S will use anyway. I've moved some helper functions into
the hw_breakpoint.h header for re-use.

As with single step we need to tweak the guest registers to enable the
exceptions but we don't want to overwrite the guest copy of these
registers so this is done close to the guest entry.

Two new capabilities have been added to the KVM_EXTENSION ioctl to allow
userspace to query the number of hardware break and watch points
available on the host hardware.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9383359..5e8c673 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2593,7 +2593,7 @@ The top 16 bits of the control field are architecture specific control
 flags which can include the following:
 
   - KVM_GUESTDBG_USE_SW_BP:     using software breakpoints [x86, arm64]
-  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390]
+  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390, arm64]
   - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
   - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
   - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
@@ -2606,7 +2606,10 @@ we need to ensure the guest vCPUs architecture specific registers are
 updated to the correct (supplied) values.
 
 The second part of the structure is architecture specific and
-typically contains a set of debug registers.
+typically contains a set of debug registers. For arm64 the number of
+debug registers is implementation defined and can be determined by
+querying the KVM_CAP_GUEST_DEBUG_HW_BPS and KVM_CAP_GUEST_DEBUG_HW_WPS
+capabilities.
 
 When debug events exit the main run loop with the reason
 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a76daae..c8ec23a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -39,6 +39,7 @@
 #include <asm/cacheflush.h>
 #include <asm/virt.h>
 #include <asm/debug-monitors.h>
+#include <asm/hw_breakpoint.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
@@ -341,8 +342,37 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
 	/* Hardware assisted Break and Watch points */
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
-		kvm_info("HW BP support requested, not yet implemented\n");
-		return -EINVAL;
+		int i;
+		int nb = get_num_brps();
+		int nw = get_num_wrps();
+
+		/* Copy across up to IMPDEF debug registers to our
+		 * shadow copy in the vcpu structure. The hyp.S code
+		 * will then set them up before we re-enter the guest.
+		 */
+		memcpy(vcpu->arch.guest_debug_regs.dbg_bcr,
+			dbg->arch.dbg_bcr, sizeof(__u64)*nb);
+		memcpy(vcpu->arch.guest_debug_regs.dbg_bvr,
+			dbg->arch.dbg_bvr, sizeof(__u64)*nb);
+		memcpy(vcpu->arch.guest_debug_regs.dbg_wcr,
+			dbg->arch.dbg_wcr, sizeof(__u64)*nw);
+		memcpy(vcpu->arch.guest_debug_regs.dbg_wvr,
+			dbg->arch.dbg_wvr, sizeof(__u64)*nw);
+
+		kvm_info("HW BP support requested\n");
+		for (i = 0; i < nb; i++) {
+			kvm_info("%d: dbg_bcr=0x%llx dbg_bvr=0x%llx\n",
+				 i,
+				vcpu->arch.guest_debug_regs.dbg_bcr[i],
+				vcpu->arch.guest_debug_regs.dbg_bvr[i]);
+		}
+		for (i = 0; i < nw; i++) {
+			kvm_info("%d: dbg_wcr=0x%llx dbg_wvr=0x%llx\n",
+				 i,
+				 vcpu->arch.guest_debug_regs.dbg_wcr[i],
+				 vcpu->arch.guest_debug_regs.dbg_wvr[i]);
+		}
+		route_el2 = true;
 	}
 
 	/* If we are going to handle any debug exceptions we need to
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 52b484b..c450552 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
 }
 #endif
 
+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+}
+
 extern struct pmu perf_ops_bp;
 
 #endif	/* __KERNEL__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 38b0f07..e386bf4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -103,8 +103,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
-	/* Debug state */
+	/* Guest debug state */
 	u64 debug_flags;
+	struct kvm_guest_debug_arch guest_debug_regs;
 
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 78e5ae1..c9ecfd3 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -122,6 +122,10 @@ int main(void)
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
   DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
   DEFINE(GUEST_DEBUG,		offsetof(struct kvm_vcpu, guest_debug));
+  DEFINE(GUEST_DEBUG_BCR,	offsetof(struct kvm_vcpu, arch.guest_debug_regs.dbg_bcr));
+  DEFINE(GUEST_DEBUG_BVR,	offsetof(struct kvm_vcpu, arch.guest_debug_regs.dbg_bvr));
+  DEFINE(GUEST_DEBUG_WCR,	offsetof(struct kvm_vcpu, arch.guest_debug_regs.dbg_wcr));
+  DEFINE(GUEST_DEBUG_WVR,	offsetof(struct kvm_vcpu, arch.guest_debug_regs.dbg_wvr));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index df1cf15..45dcc6f 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -49,18 +49,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
 static int core_num_brps;
 static int core_num_wrps;
 
-/* Determine number of BRP registers available. */
-static int get_num_brps(void)
-{
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
-}
-
-/* Determine number of WRP registers available. */
-static int get_num_wrps(void)
-{
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
-}
-
 int hw_breakpoint_slots(int type)
 {
 	/*
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 6def054..d024e47 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -110,6 +110,42 @@ static int kvm_handle_ss(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 0;
 }
 
+/**
+ * kvm_handle_hw_bp - handle HW assisted break point
+ *
+ * @vcpu:	the vcpu pointer
+ *
+ */
+static int kvm_handle_hw_bp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	WARN_ON(!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP));
+
+	run->exit_reason = KVM_EXIT_DEBUG;
+	run->debug.arch.exit_type = KVM_DEBUG_EXIT_HW_BKPT;
+	run->debug.arch.address = *vcpu_pc(vcpu);
+	return 0;
+}
+
+/**
+ * kvm_handle_watch - handle HW assisted watch point
+ *
+ * @vcpu:	the vcpu pointer
+ *
+ * These are basically the same as breakpoints (and indeed may use the
+ * breakpoint in a linked fashion). However they generate a specific
+ * exception so we trap it here for reporting to the guest.
+ *
+ */
+static int kvm_handle_watch(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	WARN_ON(!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP));
+
+	run->exit_reason = KVM_EXIT_DEBUG;
+	run->debug.arch.exit_type = KVM_DEBUG_EXIT_HW_WTPT;
+	run->debug.arch.address = *vcpu_pc(vcpu);
+	return 0;
+}
+
 static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
@@ -125,6 +161,8 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
 	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
 	[ESR_EL2_EC_SOFTSTP]    = kvm_handle_ss,
+	[ESR_EL2_EC_WATCHPT]	= kvm_handle_watch,
+	[ESR_EL2_EC_BREAKPT]	= kvm_handle_hw_bp,
 	[ESR_EL2_EC_BKPT32]	= kvm_handle_bkpt,
 	[ESR_EL2_EC_BRK64]	= kvm_handle_bkpt,
 };
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b38ce3d..96f71ab 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -18,6 +18,7 @@
 #include <linux/linkage.h>
 #include <linux/kvm.h>
 
+#include <uapi/asm/kvm.h>
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/asm-offsets.h>
@@ -174,6 +175,7 @@
 	ldr	x3, [x0, #GUEST_DEBUG]
 	tbz	x3, #KVM_GUESTDBG_ENABLE_SHIFT, 2f	// No guest debug
 
+	// Both Step and HW BP/WP ops need to modify spsr_el2 and mdscr_el1
 	// x0 - preserved as VCPU ptr
 	// x1 - spsr
 	// x2 - mdscr
@@ -191,6 +193,11 @@
 	eor	x1, x1, #DBG_SPSR_SS
 	eor	x2, x2, #DBG_MDSCR_SS
 1:
+	// If we are doing HW BP/WP - set MDSCR_EL1.KDE/MDE
+	tbz	x3, #KVM_GUESTDBG_USE_HW_BP_SHIFT, 3f
+	orr	x2, x2, #DBG_MDSCR_KDE
+	orr	x2, x2, #DBG_MDSCR_MDE
+3:
 	msr	spsr_el2, x1
 	msr	mdscr_el1, x2
 2:
@@ -815,6 +822,33 @@ __restore_debug:
 
 	ret
 
+/* Setup debug state for debug of guest */
+__setup_debug:
+	// x0: vcpu base address
+	// x3: ptr to guest registers passed to setup_debug_registers
+	// x5..x20/x26: trashed
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	mov     x4, x24
+	add	x3, x0, #GUEST_DEBUG_BCR
+	setup_debug_registers dbgbcr
+	add	x3, x0, #GUEST_DEBUG_BVR
+	setup_debug_registers dbgbvr
+
+	mov     x4, x25
+	add	x3, x0, #GUEST_DEBUG_WCR
+	setup_debug_registers dbgwcr
+	add	x3, x0, #GUEST_DEBUG_WVR
+	setup_debug_registers dbgwvr
+
+	ret
+
 __save_fpsimd:
 	save_fpsimd
 	ret
@@ -861,6 +895,13 @@ ENTRY(__kvm_vcpu_run)
 	bl __restore_sysregs
 	bl __restore_fpsimd
 
+        // Now is the time to set-up the debug registers if we
+        // are debugging the guest
+	ldr	x3, [x0, #GUEST_DEBUG]
+	tbz	x3, #KVM_GUESTDBG_USE_HW_BP_SHIFT, 2f
+	bl	__setup_debug
+	b	1f
+2:
 	skip_debug_state x3, 1f
 	bl	__restore_debug
 1:
@@ -881,6 +922,11 @@ __kvm_vcpu_return:
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	// If we are debugging the guest don't save debug registers
+	// otherwise we'll be trashing are only good copy we have.
+	ldr	x3, [x0, #GUEST_DEBUG]
+	tbnz	x3, #KVM_GUESTDBG_USE_HW_BP_SHIFT, 1f
+
 	skip_debug_state x3, 1f
 	bl	__save_debug
 1:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 70a7816..0de6caa 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -64,6 +64,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ARM_EL1_32BIT:
 		r = cpu_has_32bit_el1();
 		break;
+	case KVM_CAP_GUEST_DEBUG_HW_BPS:
+		r = get_num_brps();
+		break;
+	case KVM_CAP_GUEST_DEBUG_HW_WPS:
+		r  = get_num_wrps();
+		break;
 	default:
 		r = 0;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 347e5b0..49a5f97 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -759,6 +759,8 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_GUEST_DEBUG_HW_BPS 106
+#define KVM_CAP_GUEST_DEBUG_HW_WPS 107
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.1.3


^ permalink raw reply related

* Re: [PATCH v14 0/3] Add drm driver for Rockchip Socs
From: Heiko Stübner @ 2014-11-25 16:38 UTC (permalink / raw)
  To: Mark Yao
  Cc: Boris BREZILLON, David Airlie, Rob Clark, Daniel Vetter,
	Rob Herring, Pawel Moll, Mark Rutland, Ian Campbell, Kumar Gala,
	Randy Dunlap, Grant Likely, Greg Kroah-Hartman, John Stultz,
	Rom Lemarchand, devicetree, linux-doc, linux-kernel, dri-devel,
	linux-api, linux-rockchip, dianders, marcheu, dbehr, olof,
	djkurtz, cf, xxm, huangtao
In-Reply-To: <1416447994-9921-1-git-send-email-mark.yao@rock-chips.com>

Mark,

Am Donnerstag, 20. November 2014, 09:46:34 schrieb Mark Yao:
> This a series of patches is a DRM Driver for Rockchip Socs, add support
> for vop devices. Future patches will add additional encoders/connectors,
> such as eDP, HDMI.
> 
> The basic "crtc" for rockchip is a "VOP" - Video Output Processor.
> the vop devices found on Rockchip rk3288 Soc, rk3288 soc have two similar
> Vop devices. Vop devices support iommu mapping, we use dma-mapping API with
> ARM_DMA_USE_IOMMU.

it looks like everybody is more or less happy with this version - in past 
versions responses voicing concerns where quite swift.

As David requested a pull request the last time, it might be time to do so, so 
that we maybe still reach 3.19. 


Heiko

^ permalink raw reply

* [PATCH v4 03/42] virtio: add virtio 1.0 feature bit
From: Michael S. Tsirkin @ 2014-11-25 16:41 UTC (permalink / raw)
  To: linux-kernel; +Cc: rusty, linux-api, virtualization, pbonzini, David Miller
In-Reply-To: <1416933600-21398-1-git-send-email-mst@redhat.com>

Based on original patches by Rusty Russell, Thomas Huth
and Cornelia Huck.

Note: at this time, we do not negotiate this feature bit
in core, drivers have to declare VERSION_1 support explicitly.

For this reason we treat this bit as a device bit
and not as a transport bit for now.

After all drivers are converted, we will be able to
move VERSION_1 to core and drop it from all drivers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 include/uapi/linux/virtio_config.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 3ce768c..80e7381 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -54,4 +54,7 @@
 /* Can the device handle any descriptor layout? */
 #define VIRTIO_F_ANY_LAYOUT		27
 
+/* v1.0 compliant. */
+#define VIRTIO_F_VERSION_1		32
+
 #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */
-- 
MST

^ permalink raw reply related

* [PATCH v4 05/42] virtio: memory access APIs
From: Michael S. Tsirkin @ 2014-11-25 16:41 UTC (permalink / raw)
  To: linux-kernel
  Cc: Bjarke Istrup Pedersen, rusty, Greg Kroah-Hartman, virtualization,
	Geert Uytterhoeven, Laurent Pinchart, Sakari Ailus, linux-api,
	pbonzini, David Miller, Alexei Starovoitov
In-Reply-To: <1416933600-21398-1-git-send-email-mst@redhat.com>

virtio 1.0 makes all memory structures LE, so
we need APIs to conditionally do a byteswap on BE
architectures.

To make it easier to check code statically,
add virtio specific types for multi-byte integers
in memory.

Add low level wrappers that do a byteswap conditionally, these will be
useful e.g. for vhost.  Add high level wrappers that
query device endian-ness and act accordingly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_byteorder.h  | 59 +++++++++++++++++++++++++++++++++++++++
 include/linux/virtio_config.h     | 32 +++++++++++++++++++++
 include/uapi/linux/virtio_ring.h  | 45 ++++++++++++++---------------
 include/uapi/linux/virtio_types.h | 48 +++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild         |  1 +
 5 files changed, 163 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/virtio_byteorder.h
 create mode 100644 include/uapi/linux/virtio_types.h

diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h
new file mode 100644
index 0000000..824ed0b
--- /dev/null
+++ b/include/linux/virtio_byteorder.h
@@ -0,0 +1,59 @@
+#ifndef _LINUX_VIRTIO_BYTEORDER_H
+#define _LINUX_VIRTIO_BYTEORDER_H
+#include <linux/types.h>
+#include <uapi/linux/virtio_types.h>
+
+/*
+ * Memory accessors for handling virtio in modern little endian and in
+ * compatibility native endian format.
+ */
+
+static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val)
+{
+	if (little_endian)
+		return le16_to_cpu((__force __le16)val);
+	else
+		return (__force u16)val;
+}
+
+static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
+{
+	if (little_endian)
+		return (__force __virtio16)cpu_to_le16(val);
+	else
+		return (__force __virtio16)val;
+}
+
+static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
+{
+	if (little_endian)
+		return le32_to_cpu((__force __le32)val);
+	else
+		return (__force u32)val;
+}
+
+static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
+{
+	if (little_endian)
+		return (__force __virtio32)cpu_to_le32(val);
+	else
+		return (__force __virtio32)val;
+}
+
+static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
+{
+	if (little_endian)
+		return le64_to_cpu((__force __le64)val);
+	else
+		return (__force u64)val;
+}
+
+static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
+{
+	if (little_endian)
+		return (__force __virtio64)cpu_to_le64(val);
+	else
+		return (__force __virtio64)val;
+}
+
+#endif /* _LINUX_VIRTIO_BYTEORDER */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 022d904..b9cd689 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -4,6 +4,7 @@
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/virtio.h>
+#include <linux/virtio_byteorder.h>
 #include <uapi/linux/virtio_config.h>
 
 /**
@@ -152,6 +153,37 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+/* Memory accessors */
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+	return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+	return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+	return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+	return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+	return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
 /* Config space accessors. */
 #define virtio_cread(vdev, structname, member, ptr)			\
 	do {								\
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index a99f9b7..61c818a 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -32,6 +32,7 @@
  *
  * Copyright Rusty Russell IBM Corporation 2007. */
 #include <linux/types.h>
+#include <linux/virtio_types.h>
 
 /* This marks a buffer as continuing via the next field. */
 #define VRING_DESC_F_NEXT	1
@@ -61,32 +62,32 @@
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
 	/* Address (guest-physical). */
-	__u64 addr;
+	__virtio64 addr;
 	/* Length. */
-	__u32 len;
+	__virtio32 len;
 	/* The flags as indicated above. */
-	__u16 flags;
+	__virtio16 flags;
 	/* We chain unused descriptors via this, too */
-	__u16 next;
+	__virtio16 next;
 };
 
 struct vring_avail {
-	__u16 flags;
-	__u16 idx;
-	__u16 ring[];
+	__virtio16 flags;
+	__virtio16 idx;
+	__virtio16 ring[];
 };
 
 /* u32 is used here for ids for padding reasons. */
 struct vring_used_elem {
 	/* Index of start of used descriptor chain. */
-	__u32 id;
+	__virtio32 id;
 	/* Total length of the descriptor chain which was used (written to) */
-	__u32 len;
+	__virtio32 len;
 };
 
 struct vring_used {
-	__u16 flags;
-	__u16 idx;
+	__virtio16 flags;
+	__virtio16 idx;
 	struct vring_used_elem ring[];
 };
 
@@ -109,25 +110,25 @@ struct vring {
  *	struct vring_desc desc[num];
  *
  *	// A ring of available descriptor heads with free-running index.
- *	__u16 avail_flags;
- *	__u16 avail_idx;
- *	__u16 available[num];
- *	__u16 used_event_idx;
+ *	__virtio16 avail_flags;
+ *	__virtio16 avail_idx;
+ *	__virtio16 available[num];
+ *	__virtio16 used_event_idx;
  *
  *	// Padding to the next align boundary.
  *	char pad[];
  *
  *	// A ring of used descriptor heads with free-running index.
- *	__u16 used_flags;
- *	__u16 used_idx;
+ *	__virtio16 used_flags;
+ *	__virtio16 used_idx;
  *	struct vring_used_elem used[num];
- *	__u16 avail_event_idx;
+ *	__virtio16 avail_event_idx;
  * };
  */
 /* We publish the used event index at the end of the available ring, and vice
  * versa. They are at the end for backwards compatibility. */
 #define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
-#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
 
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 			      unsigned long align)
@@ -135,15 +136,15 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 	vr->num = num;
 	vr->desc = p;
 	vr->avail = p + num*sizeof(struct vring_desc);
-	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__u16)
+	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16)
 		+ align-1) & ~(align - 1));
 }
 
 static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
-	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
+	return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
 		 + align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
 }
 
 /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
diff --git a/include/uapi/linux/virtio_types.h b/include/uapi/linux/virtio_types.h
new file mode 100644
index 0000000..b90385f
--- /dev/null
+++ b/include/uapi/linux/virtio_types.h
@@ -0,0 +1,48 @@
+#ifndef _UAPI_LINUX_VIRTIO_TYPES_H
+#define _UAPI_LINUX_VIRTIO_TYPES_H
+/* An interface for efficient virtio implementation, currently for use by KVM
+ * and lguest, but hopefully others soon.  Do NOT change this since it will
+ * break existing servers and clients.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ */
+#include <linux/types.h>
+
+/*
+ * __virtio{16,32,64} have the following meaning:
+ * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
+ * - __le{16,32,64} for standard-compliant virtio devices
+ */
+
+typedef __u16 __bitwise__ __virtio16;
+typedef __u32 __bitwise__ __virtio32;
+typedef __u64 __bitwise__ __virtio64;
+
+#endif /* _UAPI_LINUX_VIRTIO_TYPES_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 4c94f31..44a5581 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -423,6 +423,7 @@ header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
 header-y += virtio_ids.h
+header-y += virtio_types.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
-- 
MST

^ permalink raw reply related

* [PATCH v4 09/42] virtio: set FEATURES_OK
From: Michael S. Tsirkin @ 2014-11-25 16:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: rusty, linux-api, virtualization, pbonzini, David Miller
In-Reply-To: <1416933600-21398-1-git-send-email-mst@redhat.com>

set FEATURES_OK as per virtio 1.0 spec

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_config.h |  2 ++
 drivers/virtio/virtio.c            | 29 ++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h
index 80e7381..4d05671 100644
--- a/include/uapi/linux/virtio_config.h
+++ b/include/uapi/linux/virtio_config.h
@@ -38,6 +38,8 @@
 #define VIRTIO_CONFIG_S_DRIVER		2
 /* Driver has used its parts of the config, and is happy */
 #define VIRTIO_CONFIG_S_DRIVER_OK	4
+/* Driver has finished configuring features */
+#define VIRTIO_CONFIG_S_FEATURES_OK	8
 /* We've given up on this device. */
 #define VIRTIO_CONFIG_S_FAILED		0x80
 
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index d213567..a3df817 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -160,6 +160,7 @@ static int virtio_dev_probe(struct device *_d)
 	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 	u64 device_features;
+	unsigned status;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -183,18 +184,32 @@ static int virtio_dev_probe(struct device *_d)
 
 	dev->config->finalize_features(dev);
 
+	if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
+		add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+		status = dev->config->get_status(dev);
+		if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+			printk(KERN_ERR "virtio: device refuses features: %x\n",
+			       status);
+			err = -ENODEV;
+			goto err;
+		}
+	}
+
 	err = drv->probe(dev);
 	if (err)
-		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else {
-		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
-		if (drv->scan)
-			drv->scan(dev);
+		goto err;
 
-		virtio_config_enable(dev);
-	}
+	add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+	if (drv->scan)
+		drv->scan(dev);
+
+	virtio_config_enable(dev);
 
+	return 0;
+err:
+	add_status(dev, VIRTIO_CONFIG_S_FAILED);
 	return err;
+
 }
 
 static int virtio_dev_remove(struct device *_d)
-- 
MST

^ permalink raw reply related

* [PATCH v4 12/42] virtio_net: v1.0 endianness
From: Michael S. Tsirkin @ 2014-11-25 16:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, Rusty Russell,
	virtualization, netdev, linux-api
In-Reply-To: <1416933600-21398-1-git-send-email-mst@redhat.com>

Based on patches by Rusty Russell, Cornelia Huck.
Note: more code changes are needed for 1.0 support
(due to different header size).
So we don't advertize support for 1.0 yet.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_net.h | 15 ++++++++-------
 drivers/net/virtio_net.c        | 33 ++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 172a7f0..b5f1677 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
 #include <linux/if_ether.h>
 
 /* The feature bitmap for virtio net */
@@ -84,17 +85,17 @@ struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_GSO_TCPV6	4	// GSO frame, IPv6 TCP
 #define VIRTIO_NET_HDR_GSO_ECN		0x80	// TCP has ECN set
 	__u8 gso_type;
-	__u16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
-	__u16 gso_size;		/* Bytes to append to hdr_len per frame */
-	__u16 csum_start;	/* Position to start checksumming from */
-	__u16 csum_offset;	/* Offset after that to place checksum */
+	__virtio16 hdr_len;		/* Ethernet + IP + tcp/udp hdrs */
+	__virtio16 gso_size;		/* Bytes to append to hdr_len per frame */
+	__virtio16 csum_start;	/* Position to start checksumming from */
+	__virtio16 csum_offset;	/* Offset after that to place checksum */
 };
 
 /* This is the version of the header to use when the MRG_RXBUF
  * feature has been negotiated. */
 struct virtio_net_hdr_mrg_rxbuf {
 	struct virtio_net_hdr hdr;
-	__u16 num_buffers;	/* Number of merged rx buffers */
+	__virtio16 num_buffers;	/* Number of merged rx buffers */
 };
 
 /*
@@ -149,7 +150,7 @@ typedef __u8 virtio_net_ctrl_ack;
  * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available.
  */
 struct virtio_net_ctrl_mac {
-	__u32 entries;
+	__virtio32 entries;
 	__u8 macs[][ETH_ALEN];
 } __attribute__((packed));
 
@@ -193,7 +194,7 @@ struct virtio_net_ctrl_mac {
  * specified.
  */
 struct virtio_net_ctrl_mq {
-	__u16 virtqueue_pairs;
+	__virtio16 virtqueue_pairs;
 };
 
 #define VIRTIO_NET_CTRL_MQ   4
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b0bc8ea..c07e030 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -347,13 +347,14 @@ err:
 }
 
 static struct sk_buff *receive_mergeable(struct net_device *dev,
+					 struct virtnet_info *vi,
 					 struct receive_queue *rq,
 					 unsigned long ctx,
 					 unsigned int len)
 {
 	void *buf = mergeable_ctx_to_buf_address(ctx);
 	struct skb_vnet_hdr *hdr = buf;
-	int num_buf = hdr->mhdr.num_buffers;
+	u16 num_buf = virtio16_to_cpu(rq->vq->vdev, hdr->mhdr.num_buffers);
 	struct page *page = virt_to_head_page(buf);
 	int offset = buf - page_address(page);
 	unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
@@ -369,7 +370,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
 		if (unlikely(!ctx)) {
 			pr_debug("%s: rx error: %d buffers out of %d missing\n",
-				 dev->name, num_buf, hdr->mhdr.num_buffers);
+				 dev->name, num_buf,
+				 virtio16_to_cpu(rq->vq->vdev,
+						 hdr->mhdr.num_buffers));
 			dev->stats.rx_length_errors++;
 			goto err_buf;
 		}
@@ -454,7 +457,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 	}
 
 	if (vi->mergeable_rx_bufs)
-		skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
+		skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
 	else if (vi->big_packets)
 		skb = receive_big(dev, rq, buf, len);
 	else
@@ -473,8 +476,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		pr_debug("Needs csum!\n");
 		if (!skb_partial_csum_set(skb,
-					  hdr->hdr.csum_start,
-					  hdr->hdr.csum_offset))
+			  virtio16_to_cpu(vi->vdev, hdr->hdr.csum_start),
+			  virtio16_to_cpu(vi->vdev, hdr->hdr.csum_offset)))
 			goto frame_err;
 	} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -514,7 +517,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
 			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 
-		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
+		skb_shinfo(skb)->gso_size = virtio16_to_cpu(vi->vdev,
+							    hdr->hdr.gso_size);
 		if (skb_shinfo(skb)->gso_size == 0) {
 			net_warn_ratelimited("%s: zero gso size.\n", dev->name);
 			goto frame_err;
@@ -876,16 +880,19 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		hdr->hdr.csum_start = skb_checksum_start_offset(skb);
-		hdr->hdr.csum_offset = skb->csum_offset;
+		hdr->hdr.csum_start = cpu_to_virtio16(vi->vdev,
+						skb_checksum_start_offset(skb));
+		hdr->hdr.csum_offset = cpu_to_virtio16(vi->vdev,
+							 skb->csum_offset);
 	} else {
 		hdr->hdr.flags = 0;
 		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
 	}
 
 	if (skb_is_gso(skb)) {
-		hdr->hdr.hdr_len = skb_headlen(skb);
-		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
+		hdr->hdr.hdr_len = cpu_to_virtio16(vi->vdev, skb_headlen(skb));
+		hdr->hdr.gso_size = cpu_to_virtio16(vi->vdev,
+						    skb_shinfo(skb)->gso_size);
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
 			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
@@ -1112,7 +1119,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
 	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
 		return 0;
 
-	s.virtqueue_pairs = queue_pairs;
+	s.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
 	sg_init_one(&sg, &s, sizeof(s));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
@@ -1189,7 +1196,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	sg_init_table(sg, 2);
 
 	/* Store the unicast list and count in the front of the buffer */
-	mac_data->entries = uc_count;
+	mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
 	i = 0;
 	netdev_for_each_uc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
@@ -1200,7 +1207,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	/* multicast list and count fill the end */
 	mac_data = (void *)&mac_data->macs[uc_count][0];
 
-	mac_data->entries = mc_count;
+	mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
 	i = 0;
 	netdev_for_each_mc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
-- 
MST

^ permalink raw reply related

* [PATCH v4 13/42] virtio_blk: v1.0 support
From: Michael S. Tsirkin @ 2014-11-25 16:42 UTC (permalink / raw)
  To: linux-kernel
  Cc: David Miller, cornelia.huck, rusty, nab, pbonzini, Thomas Huth,
	David Hildenbrand, Rusty Russell, virtualization, linux-api
In-Reply-To: <1416933600-21398-1-git-send-email-mst@redhat.com>

Based on patch by Cornelia Huck.

Note: for consistency, and to avoid sparse errors,
      convert all fields, even those no longer in use
      for virtio v1.0.

Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_blk.h | 15 ++++-----
 drivers/block/virtio_blk.c      | 70 ++++++++++++++++++++++++-----------------
 2 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ad67b2..247c8ba 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
 
 /* Feature bits */
 #define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
@@ -114,18 +115,18 @@ struct virtio_blk_config {
 /* This is the first element of the read scatter-gather list. */
 struct virtio_blk_outhdr {
 	/* VIRTIO_BLK_T* */
-	__u32 type;
+	__virtio32 type;
 	/* io priority. */
-	__u32 ioprio;
+	__virtio32 ioprio;
 	/* Sector (ie. 512 byte offset) */
-	__u64 sector;
+	__virtio64 sector;
 };
 
 struct virtio_scsi_inhdr {
-	__u32 errors;
-	__u32 data_len;
-	__u32 sense_len;
-	__u32 residual;
+	__virtio32 errors;
+	__virtio32 data_len;
+	__virtio32 sense_len;
+	__virtio32 residual;
 };
 
 /* And this is the final byte of the write scatter-gather list. */
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c6a27d5..f601f16 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -80,7 +80,7 @@ static int __virtblk_add_req(struct virtqueue *vq,
 {
 	struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
 	unsigned int num_out = 0, num_in = 0;
-	int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
+	__virtio32 type = vbr->out_hdr.type & ~cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT);
 
 	sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
 	sgs[num_out++] = &hdr;
@@ -91,19 +91,19 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	 * block, and before the normal inhdr we put the sense data and the
 	 * inhdr with additional status information.
 	 */
-	if (type == VIRTIO_BLK_T_SCSI_CMD) {
+	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
 		sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
 		sgs[num_out++] = &cmd;
 	}
 
 	if (have_data) {
-		if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
+		if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
 			sgs[num_out++] = data_sg;
 		else
 			sgs[num_out + num_in++] = data_sg;
 	}
 
-	if (type == VIRTIO_BLK_T_SCSI_CMD) {
+	if (type == cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_SCSI_CMD)) {
 		sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
 		sgs[num_out + num_in++] = &sense;
 		sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
@@ -119,12 +119,13 @@ static int __virtblk_add_req(struct virtqueue *vq,
 static inline void virtblk_request_done(struct request *req)
 {
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+	struct virtio_blk *vblk = req->q->queuedata;
 	int error = virtblk_result(vbr);
 
 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
-		req->resid_len = vbr->in_hdr.residual;
-		req->sense_len = vbr->in_hdr.sense_len;
-		req->errors = vbr->in_hdr.errors;
+		req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
+		req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
+		req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
 	} else if (req->cmd_type == REQ_TYPE_SPECIAL) {
 		req->errors = (error != 0);
 	}
@@ -173,25 +174,25 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 
 	vbr->req = req;
 	if (req->cmd_flags & REQ_FLUSH) {
-		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+		vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
 		vbr->out_hdr.sector = 0;
-		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+		vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
 	} else {
 		switch (req->cmd_type) {
 		case REQ_TYPE_FS:
 			vbr->out_hdr.type = 0;
-			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
-			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			vbr->out_hdr.sector = cpu_to_virtio64(vblk->vdev, blk_rq_pos(vbr->req));
+			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
 			break;
 		case REQ_TYPE_BLOCK_PC:
-			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
+			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_SCSI_CMD);
 			vbr->out_hdr.sector = 0;
-			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
 			break;
 		case REQ_TYPE_SPECIAL:
-			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+			vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
 			vbr->out_hdr.sector = 0;
-			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
 			break;
 		default:
 			/* We don't put anything else in the queue. */
@@ -204,9 +205,9 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(vbr->req) == WRITE)
-			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
 		else
-			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
 	}
 
 	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
@@ -476,7 +477,8 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
 				   struct virtio_blk_config, wce,
 				   &writeback);
 	if (err)
-		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
+		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE) ||
+		            virtio_has_feature(vdev, VIRTIO_F_VERSION_1);
 
 	return writeback;
 }
@@ -821,25 +823,35 @@ static const struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
-static unsigned int features[] = {
+static unsigned int features_legacy[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
 	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
 	VIRTIO_BLK_F_MQ,
+}
+;
+static unsigned int features[] = {
+	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
+	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
+	VIRTIO_BLK_F_TOPOLOGY,
+	VIRTIO_BLK_F_MQ,
+	VIRTIO_F_VERSION_1,
 };
 
 static struct virtio_driver virtio_blk = {
-	.feature_table		= features,
-	.feature_table_size	= ARRAY_SIZE(features),
-	.driver.name		= KBUILD_MODNAME,
-	.driver.owner		= THIS_MODULE,
-	.id_table		= id_table,
-	.probe			= virtblk_probe,
-	.remove			= virtblk_remove,
-	.config_changed		= virtblk_config_changed,
+	.feature_table			= features,
+	.feature_table_size		= ARRAY_SIZE(features),
+	.feature_table_legacy		= features_legacy,
+	.feature_table_size_legacy	= ARRAY_SIZE(features_legacy),
+	.driver.name			= KBUILD_MODNAME,
+	.driver.owner			= THIS_MODULE,
+	.id_table			= id_table,
+	.probe				= virtblk_probe,
+	.remove				= virtblk_remove,
+	.config_changed			= virtblk_config_changed,
 #ifdef CONFIG_PM_SLEEP
-	.freeze			= virtblk_freeze,
-	.restore		= virtblk_restore,
+	.freeze				= virtblk_freeze,
+	.restore			= virtblk_restore,
 #endif
 };
 
-- 
MST

^ permalink raw reply related

* [PATCH v4 33/42] tun: move internal flag defines out of uapi
From: Michael S. Tsirkin @ 2014-11-25 16:43 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
  Cc: David Miller, cornelia.huck-tA70FqPdS9bQT0dZR+AlfA,
	rusty-8fk3Idey6ehBDgjK7y7TUQ, nab-IzHhD5pYlfBP7FQvKIMDCQ,
	pbonzini-H+wXaHxf7aLQT0dZR+AlfA, Jason Wang, Zhi Yong Wu,
	Tom Herbert, Ben Hutchings, Masatake YAMATO, Herbert Xu, Xi Wang,
	netdev-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1416933600-21398-1-git-send-email-mst-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

TUN_ flags are internal and never exposed
to userspace. Any application using it is almost
certainly buggy.

Move them out to tun.c, we'll remove them in follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 include/uapi/linux/if_tun.h | 16 ++--------
 drivers/net/tun.c           | 74 ++++++++++++++-------------------------------
 2 files changed, 26 insertions(+), 64 deletions(-)

diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index e9502dd..277a260 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -22,21 +22,11 @@
 
 /* Read queue size */
 #define TUN_READQ_SIZE	500
-
-/* TUN device flags */
-#define TUN_TUN_DEV 	0x0001	
-#define TUN_TAP_DEV	0x0002
+/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
+#define TUN_TUN_DEV 	IFF_TUN
+#define TUN_TAP_DEV	IFF_TAP
 #define TUN_TYPE_MASK   0x000f
 
-#define TUN_FASYNC	0x0010
-#define TUN_NOCHECKSUM	0x0020
-#define TUN_NO_PI	0x0040
-/* This flag has no real effect */
-#define TUN_ONE_QUEUE	0x0080
-#define TUN_PERSIST 	0x0100	
-#define TUN_VNET_HDR 	0x0200
-#define TUN_TAP_MQ      0x0400
-
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
 #define TUNSETDEBUG   _IOW('T', 201, int) 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9dd3746..bc89d07 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -103,6 +103,21 @@ do {								\
 } while (0)
 #endif
 
+/* TUN device flags */
+
+/* IFF_ATTACH_QUEUE is never stored in device flags,
+ * overload it to mean fasync when stored there.
+ */
+#define TUN_FASYNC	IFF_ATTACH_QUEUE
+#define TUN_NO_PI	IFF_NO_PI
+/* This flag has no real effect */
+#define TUN_ONE_QUEUE	IFF_ONE_QUEUE
+#define TUN_PERSIST 	IFF_PERSIST
+#define TUN_VNET_HDR 	IFF_VNET_HDR
+#define TUN_TAP_MQ      IFF_MULTI_QUEUE
+
+#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
+		      IFF_MULTI_QUEUE)
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -1521,32 +1536,7 @@ static struct proto tun_proto = {
 
 static int tun_flags(struct tun_struct *tun)
 {
-	int flags = 0;
-
-	if (tun->flags & TUN_TUN_DEV)
-		flags |= IFF_TUN;
-	else
-		flags |= IFF_TAP;
-
-	if (tun->flags & TUN_NO_PI)
-		flags |= IFF_NO_PI;
-
-	/* This flag has no real effect.  We track the value for backwards
-	 * compatibility.
-	 */
-	if (tun->flags & TUN_ONE_QUEUE)
-		flags |= IFF_ONE_QUEUE;
-
-	if (tun->flags & TUN_VNET_HDR)
-		flags |= IFF_VNET_HDR;
-
-	if (tun->flags & TUN_TAP_MQ)
-		flags |= IFF_MULTI_QUEUE;
-
-	if (tun->flags & TUN_PERSIST)
-		flags |= IFF_PERSIST;
-
-	return flags;
+	return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
 }
 
 static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
@@ -1706,28 +1696,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
 
-	if (ifr->ifr_flags & IFF_NO_PI)
-		tun->flags |= TUN_NO_PI;
-	else
-		tun->flags &= ~TUN_NO_PI;
-
-	/* This flag has no real effect.  We track the value for backwards
-	 * compatibility.
-	 */
-	if (ifr->ifr_flags & IFF_ONE_QUEUE)
-		tun->flags |= TUN_ONE_QUEUE;
-	else
-		tun->flags &= ~TUN_ONE_QUEUE;
-
-	if (ifr->ifr_flags & IFF_VNET_HDR)
-		tun->flags |= TUN_VNET_HDR;
-	else
-		tun->flags &= ~TUN_VNET_HDR;
-
-	if (ifr->ifr_flags & IFF_MULTI_QUEUE)
-		tun->flags |= TUN_TAP_MQ;
-	else
-		tun->flags &= ~TUN_TAP_MQ;
+	tun->flags = (tun->flags & ~TUN_FEATURES) |
+		(ifr->ifr_flags & TUN_FEATURES);
 
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
@@ -1890,9 +1860,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	if (cmd == TUNGETFEATURES) {
 		/* Currently this just means: "what IFF flags are valid?".
 		 * This is needed because we never checked for invalid flags on
-		 * TUNSETIFF. */
-		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-				IFF_VNET_HDR | IFF_MULTI_QUEUE,
+		 * TUNSETIFF.  Why do we report IFF_TUN and IFF_TAP which are
+		 * not legal for TUNSETIFF here?  It's probably a bug, but it
+		 * doesn't seem to be worth fixing.
+		 */
+		return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
 				(unsigned int __user*)argp);
 	} else if (cmd == TUNSETQUEUE)
 		return tun_set_queue(file, &ifr);
-- 
MST

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox