Netdev List
 help / color / mirror / Atom feed
* [PATCH 1/2] net: qcom/emac: move phy init code to separate files
From: Timur Tabi @ 2016-12-07 20:39 UTC (permalink / raw)
  To: David Miller, netdev, alokc
In-Reply-To: <1481143186-20137-1-git-send-email-timur@codeaurora.org>

The internal PHY of the EMAC differs on each SOC, and the list will
only continue to grow.  By separating the code into individual files,
we can add support for more SOCs more cleanly.

Note: The internal PHY is also sometimes called the SGMII device.

We also stop referring to the various PHY variations by version number,
so no more "v2", "v3", etc.  Instead, the devices are named after the
SOC they are, which is in sync with the device tree property names.

Future patches will probably rearrange more code among the files.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/emac/Makefile        |   3 +-
 .../ethernet/qualcomm/emac/emac-sgmii-fsm9900.c    | 245 ++++++++++
 .../ethernet/qualcomm/emac/emac-sgmii-qdf2432.c    | 210 ++++++++
 drivers/net/ethernet/qualcomm/emac/emac-sgmii.c    | 538 +--------------------
 drivers/net/ethernet/qualcomm/emac/emac-sgmii.h    |   5 +-
 5 files changed, 478 insertions(+), 523 deletions(-)
 create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
 create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c

diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
index 01ee144..204b787 100644
--- a/drivers/net/ethernet/qualcomm/emac/Makefile
+++ b/drivers/net/ethernet/qualcomm/emac/Makefile
@@ -4,4 +4,5 @@
 
 obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o
 
-qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o
+qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o \
+		  emac-sgmii-fsm9900.o emac-sgmii-qdf2432.o
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
new file mode 100644
index 0000000..faa8933
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. FSM9900 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_QSERDES register offsets */
+#define EMAC_QSERDES_COM_SYS_CLK_CTRL		0x0000
+#define EMAC_QSERDES_COM_PLL_CNTRL		0x0014
+#define EMAC_QSERDES_COM_PLL_IP_SETI		0x0018
+#define EMAC_QSERDES_COM_PLL_CP_SETI		0x0024
+#define EMAC_QSERDES_COM_PLL_IP_SETP		0x0028
+#define EMAC_QSERDES_COM_PLL_CP_SETP		0x002c
+#define EMAC_QSERDES_COM_SYSCLK_EN_SEL		0x0038
+#define EMAC_QSERDES_COM_RESETSM_CNTRL		0x0040
+#define EMAC_QSERDES_COM_PLLLOCK_CMP1		0x0044
+#define EMAC_QSERDES_COM_PLLLOCK_CMP2		0x0048
+#define EMAC_QSERDES_COM_PLLLOCK_CMP3		0x004c
+#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN		0x0050
+#define EMAC_QSERDES_COM_DEC_START1		0x0064
+#define EMAC_QSERDES_COM_DIV_FRAC_START1	0x0098
+#define EMAC_QSERDES_COM_DIV_FRAC_START2	0x009c
+#define EMAC_QSERDES_COM_DIV_FRAC_START3	0x00a0
+#define EMAC_QSERDES_COM_DEC_START2		0x00a4
+#define EMAC_QSERDES_COM_PLL_CRCTRL		0x00ac
+#define EMAC_QSERDES_COM_RESET_SM		0x00bc
+#define EMAC_QSERDES_TX_BIST_MODE_LANENO	0x0100
+#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL	0x0108
+#define EMAC_QSERDES_TX_TX_DRV_LVL		0x010c
+#define EMAC_QSERDES_TX_LANE_MODE		0x0150
+#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN	0x0170
+#define EMAC_QSERDES_RX_CDR_CONTROL		0x0200
+#define EMAC_QSERDES_RX_CDR_CONTROL2		0x0210
+#define EMAC_QSERDES_RX_RX_EQ_GAIN12		0x0230
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_SERDES_START		0x0000
+#define EMAC_SGMII_PHY_CMN_PWR_CTRL		0x0004
+#define EMAC_SGMII_PHY_RX_PWR_CTRL		0x0008
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+#define PLL_IPSETI(x)				((x) & 0x3f)
+
+#define PLL_CPSETI(x)				((x) & 0xff)
+
+#define PLL_IPSETP(x)				((x) & 0x3f)
+
+#define PLL_CPSETP(x)				((x) & 0x1f)
+
+#define PLL_RCTRL(x)				(((x) & 0xf) << 4)
+#define PLL_CCTRL(x)				((x) & 0xf)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define SYSCLK_CM				BIT(4)
+#define SYSCLK_AC_COUPLE			BIT(3)
+
+#define OCP_EN					BIT(5)
+#define PLL_DIV_FFEN				BIT(2)
+#define PLL_DIV_ORD				BIT(1)
+
+#define SYSCLK_SEL_CMOS				BIT(3)
+
+#define FRQ_TUNE_MODE				BIT(4)
+
+#define PLLLOCK_CMP_EN				BIT(0)
+
+#define DEC_START1_MUX				BIT(7)
+#define DEC_START1(x)				((x) & 0x7f)
+
+#define DIV_FRAC_START_MUX			BIT(7)
+#define DIV_FRAC_START(x)			((x) & 0x7f)
+
+#define DIV_FRAC_START3_MUX			BIT(4)
+#define DIV_FRAC_START3(x)			((x) & 0xf)
+
+#define DEC_START2_MUX				BIT(1)
+#define DEC_START2				BIT(0)
+
+#define READY					BIT(5)
+
+#define TX_EMP_POST1_LVL_MUX			BIT(5)
+#define TX_EMP_POST1_LVL(x)			((x) & 0x1f)
+
+#define TX_DRV_LVL_MUX				BIT(4)
+#define TX_DRV_LVL(x)				((x) & 0xf)
+
+#define EMP_EN_MUX				BIT(1)
+#define EMP_EN					BIT(0)
+
+#define SECONDORDERENABLE			BIT(6)
+#define FIRSTORDER_THRESH(x)			(((x) & 0x7) << 3)
+#define SECONDORDERGAIN(x)			((x) & 0x7)
+
+#define RX_EQ_GAIN2(x)				(((x) & 0xf) << 4)
+#define RX_EQ_GAIN1(x)				((x) & 0xf)
+
+#define SERDES_START				BIT(0)
+
+#define BIAS_EN					BIT(6)
+#define PLL_EN					BIT(5)
+#define SYSCLK_EN				BIT(4)
+#define CLKBUF_L_EN				BIT(3)
+#define PLL_TXCLK_EN				BIT(1)
+#define PLL_RXCLK_EN				BIT(0)
+
+#define L0_RX_SIGDET_EN				BIT(7)
+#define L0_RX_TERM_MODE(x)			(((x) & 3) << 4)
+#define L0_RX_I_EN				BIT(1)
+
+#define L0_TX_EN				BIT(5)
+#define L0_CLKBUF_EN				BIT(4)
+#define L0_TRAN_BIAS_EN				BIT(1)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+#define L0_RESET_TSYNC_EN			BIT(4)
+#define L0_DRV_LVL(x)				((x) & 0xf)
+
+#define PWRDN_B					BIT(0)
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define PLLLOCK_CMP(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = {
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
+	{EMAC_SGMII_PHY_RX_PWR_CTRL,
+		L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
+		PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_LANE_CTRL1,
+		L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
+};
+
+static const struct emac_reg_write sysclk_refclk_setting[] = {
+	{EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
+	{EMAC_QSERDES_COM_SYS_CLK_CTRL,	SYSCLK_CM | SYSCLK_AC_COUPLE},
+};
+
+static const struct emac_reg_write pll_setting[] = {
+	{EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
+	{EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
+	{EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
+	{EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
+	{EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
+	{EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
+	{EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
+	{EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
+	{EMAC_QSERDES_COM_DIV_FRAC_START1,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START2,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START3,
+		DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
+	{EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
+};
+
+static const struct emac_reg_write cdr_setting[] = {
+	{EMAC_QSERDES_RX_CDR_CONTROL,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
+	{EMAC_QSERDES_RX_CDR_CONTROL2,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
+};
+
+static const struct emac_reg_write tx_rx_setting[] = {
+	{EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
+	{EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
+	{EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
+	{EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
+		TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
+	{EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
+	{EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
+};
+
+int emac_sgmii_init_fsm9900(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	unsigned int i;
+
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1,
+			   ARRAY_SIZE(physical_coding_sublayer_programming_v1));
+	emac_reg_write_all(phy->base, sysclk_refclk_setting,
+			   ARRAY_SIZE(sysclk_refclk_setting));
+	emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
+	emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
+	emac_reg_write_all(phy->base, tx_rx_setting, ARRAY_SIZE(tx_rx_setting));
+
+	/* Power up the Ser/Des engine */
+	writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
+
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "error: ser/des failed to start\n");
+		return -EIO;
+	}
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
new file mode 100644
index 0000000..6170200
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. QDF2432 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_RESET_CTRL		0x00a8
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+/* SGMII digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0		0x000C
+#define EMAC_SGMII_LN_DRVR_TAP_EN		0x0018
+#define EMAC_SGMII_LN_TX_MARGINING		0x001C
+#define EMAC_SGMII_LN_TX_PRE			0x0020
+#define EMAC_SGMII_LN_TX_POST			0x0024
+#define EMAC_SGMII_LN_TX_BAND_MODE		0x0060
+#define EMAC_SGMII_LN_LANE_MODE			0x0064
+#define EMAC_SGMII_LN_PARALLEL_RATE		0x0078
+#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x00B8
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x00D0
+#define EMAC_SGMII_LN_VGA_INITVAL		0x0134
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x017C
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x0188
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x0194
+#define EMAC_SGMII_LN_RX_BAND			0x019C
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x01B8
+#define EMAC_SGMII_LN_RSM_CONFIG		0x01F0
+#define EMAC_SGMII_LN_SIGDET_ENABLES		0x0224
+#define EMAC_SGMII_LN_SIGDET_CNTRL		0x0228
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x022C
+#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x02A0
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x02AC
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x02BC
+
+/* SGMII digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
+#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
+#define UCDR_ENABLE				BIT(6)
+#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS4			BIT(7)
+#define SIGDET_EN_PS0_TO_PS2			BIT(6)
+
+#define TXVAL_VALID_INIT			BIT(4)
+#define KR_PCIGEN3_MODE				BIT(0)
+
+#define MAIN_EN					BIT(0)
+
+#define TX_MARGINING_MUX			BIT(6)
+#define TX_MARGINING(x)				((x) & 0x3f)
+
+#define TX_PRE_MUX				BIT(6)
+
+#define TX_POST_MUX				BIT(6)
+
+#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
+
+#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)			((x) & 3)
+
+#define VGA_THRESH_DFE(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
+#define SIGDET_FLT_BYP				BIT(0)
+
+#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
+
+#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
+
+#define DRVR_LOGIC_CLK_EN			BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
+
+#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
+
+#define BAND_MODE0(x)				((x) & 0x3)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
+#define BYPASS_RSM_SAMP_CAL			BIT(1)
+#define BYPASS_RSM_DLL_CAL			BIT(0)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+
+#define PWRDN_B					BIT(0)
+
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write sgmii_laned[] = {
+	/* CDR Settings */
+	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
+	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+	/* TX/RX Settings */
+	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+	{EMAC_SGMII_LN_CML_CTRL_MODE0,
+		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+	{EMAC_SGMII_LN_SIGDET_ENABLES,
+		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+	{EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
+	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
+	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
+	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
+	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming[] = {
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+int emac_sgmii_init_qdf2432(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	void __iomem *phy_regs = phy->base;
+	void __iomem *laned = phy->digital;
+	unsigned int i;
+	u32 lnstatus;
+
+	/* PCS lane-x init */
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming,
+			   ARRAY_SIZE(physical_coding_sublayer_programming));
+
+	/* SGMII lane-x init */
+	emac_reg_write_all(phy->digital, sgmii_laned, ARRAY_SIZE(sgmii_laned));
+
+	/* Power up PCS and start reset lane state machine */
+
+	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+	writel(1, laned + SGMII_LN_RSM_START);
+
+	/* Wait for c_ready assertion */
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+		if (lnstatus & BIT(1))
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "SGMII failed to start\n");
+		return -EIO;
+	}
+
+	/* Disable digital and SERDES loopback */
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 3edb5a5..07c872a 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -20,448 +20,33 @@
 #include "emac-mac.h"
 #include "emac-sgmii.h"
 
-/* EMAC_QSERDES register offsets */
-#define EMAC_QSERDES_COM_SYS_CLK_CTRL		0x000000
-#define EMAC_QSERDES_COM_PLL_CNTRL		0x000014
-#define EMAC_QSERDES_COM_PLL_IP_SETI		0x000018
-#define EMAC_QSERDES_COM_PLL_CP_SETI		0x000024
-#define EMAC_QSERDES_COM_PLL_IP_SETP		0x000028
-#define EMAC_QSERDES_COM_PLL_CP_SETP		0x00002c
-#define EMAC_QSERDES_COM_SYSCLK_EN_SEL		0x000038
-#define EMAC_QSERDES_COM_RESETSM_CNTRL		0x000040
-#define EMAC_QSERDES_COM_PLLLOCK_CMP1		0x000044
-#define EMAC_QSERDES_COM_PLLLOCK_CMP2		0x000048
-#define EMAC_QSERDES_COM_PLLLOCK_CMP3		0x00004c
-#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN		0x000050
-#define EMAC_QSERDES_COM_DEC_START1		0x000064
-#define EMAC_QSERDES_COM_DIV_FRAC_START1	0x000098
-#define EMAC_QSERDES_COM_DIV_FRAC_START2	0x00009c
-#define EMAC_QSERDES_COM_DIV_FRAC_START3	0x0000a0
-#define EMAC_QSERDES_COM_DEC_START2		0x0000a4
-#define EMAC_QSERDES_COM_PLL_CRCTRL		0x0000ac
-#define EMAC_QSERDES_COM_RESET_SM		0x0000bc
-#define EMAC_QSERDES_TX_BIST_MODE_LANENO	0x000100
-#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL	0x000108
-#define EMAC_QSERDES_TX_TX_DRV_LVL		0x00010c
-#define EMAC_QSERDES_TX_LANE_MODE		0x000150
-#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN	0x000170
-#define EMAC_QSERDES_RX_CDR_CONTROL		0x000200
-#define EMAC_QSERDES_RX_CDR_CONTROL2		0x000210
-#define EMAC_QSERDES_RX_RX_EQ_GAIN12		0x000230
-
 /* EMAC_SGMII register offsets */
-#define EMAC_SGMII_PHY_SERDES_START		0x000000
-#define EMAC_SGMII_PHY_CMN_PWR_CTRL		0x000004
-#define EMAC_SGMII_PHY_RX_PWR_CTRL		0x000008
-#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x00000C
-#define EMAC_SGMII_PHY_LANE_CTRL1		0x000018
-#define EMAC_SGMII_PHY_AUTONEG_CFG2		0x000048
-#define EMAC_SGMII_PHY_CDR_CTRL0		0x000058
-#define EMAC_SGMII_PHY_SPEED_CFG1		0x000074
-#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x000080
-#define EMAC_SGMII_PHY_RESET_CTRL		0x0000a8
-#define EMAC_SGMII_PHY_IRQ_CMD			0x0000ac
-#define EMAC_SGMII_PHY_INTERRUPT_CLEAR		0x0000b0
-#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x0000b4
-#define EMAC_SGMII_PHY_INTERRUPT_STATUS		0x0000b8
-#define EMAC_SGMII_PHY_RX_CHK_STATUS		0x0000d4
-#define EMAC_SGMII_PHY_AUTONEG0_STATUS		0x0000e0
-#define EMAC_SGMII_PHY_AUTONEG1_STATUS		0x0000e4
-
-/* EMAC_QSERDES_COM_PLL_IP_SETI */
-#define PLL_IPSETI(x)				((x) & 0x3f)
-
-/* EMAC_QSERDES_COM_PLL_CP_SETI */
-#define PLL_CPSETI(x)				((x) & 0xff)
-
-/* EMAC_QSERDES_COM_PLL_IP_SETP */
-#define PLL_IPSETP(x)				((x) & 0x3f)
-
-/* EMAC_QSERDES_COM_PLL_CP_SETP */
-#define PLL_CPSETP(x)				((x) & 0x1f)
-
-/* EMAC_QSERDES_COM_PLL_CRCTRL */
-#define PLL_RCTRL(x)				(((x) & 0xf) << 4)
-#define PLL_CCTRL(x)				((x) & 0xf)
-
-/* SGMII v2 PHY registers per lane */
-#define EMAC_SGMII_PHY_LN_OFFSET		0x0400
-
-/* SGMII v2 digital lane registers */
-#define EMAC_SGMII_LN_DRVR_CTRL0		0x00C
-#define EMAC_SGMII_LN_DRVR_TAP_EN		0x018
-#define EMAC_SGMII_LN_TX_MARGINING		0x01C
-#define EMAC_SGMII_LN_TX_PRE			0x020
-#define EMAC_SGMII_LN_TX_POST			0x024
-#define EMAC_SGMII_LN_TX_BAND_MODE		0x060
-#define EMAC_SGMII_LN_LANE_MODE			0x064
-#define EMAC_SGMII_LN_PARALLEL_RATE		0x078
-#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x0B8
-#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x0D0
-#define EMAC_SGMII_LN_VGA_INITVAL		0x134
-#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x17C
-#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x188
-#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x194
-#define EMAC_SGMII_LN_RX_BAND			0x19C
-#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x1B8
-#define EMAC_SGMII_LN_RSM_CONFIG		0x1F0
-#define EMAC_SGMII_LN_SIGDET_ENABLES		0x224
-#define EMAC_SGMII_LN_SIGDET_CNTRL		0x228
-#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x22C
-#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x2A0
-#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x2AC
-#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x2BC
-
-/* SGMII v2 digital lane register values */
-#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
-#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
-#define UCDR_ENABLE				BIT(6)
-#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
-#define SIGDET_LP_BYP_PS4			BIT(7)
-#define SIGDET_EN_PS0_TO_PS2			BIT(6)
-#define EN_ACCOUPLEVCM_SW_MUX			BIT(5)
-#define EN_ACCOUPLEVCM_SW			BIT(4)
-#define RX_SYNC_EN				BIT(3)
-#define RXTERM_HIGHZ_PS5			BIT(2)
-#define SIGDET_EN_PS3				BIT(1)
-#define EN_ACCOUPLE_VCM_PS3			BIT(0)
-#define UFS_MODE				BIT(5)
-#define TXVAL_VALID_INIT			BIT(4)
-#define TXVAL_VALID_MUX				BIT(3)
-#define TXVAL_VALID				BIT(2)
-#define USB3P1_MODE				BIT(1)
-#define KR_PCIGEN3_MODE				BIT(0)
-#define PRE_EN					BIT(3)
-#define POST_EN					BIT(2)
-#define MAIN_EN_MUX				BIT(1)
-#define MAIN_EN					BIT(0)
-#define TX_MARGINING_MUX			BIT(6)
-#define TX_MARGINING(x)				((x) & 0x3f)
-#define TX_PRE_MUX				BIT(6)
-#define TX_PRE(x)				((x) & 0x3f)
-#define TX_POST_MUX				BIT(6)
-#define TX_POST(x)				((x) & 0x3f)
-#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
-#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
-#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
-#define MIXER_DATARATE_MODE(x)			((x) & 3)
-#define VGA_THRESH_DFE(x)			((x) & 0x3f)
-#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
-#define SIGDET_LP_BYP_MUX			BIT(4)
-#define SIGDET_LP_BYP				BIT(3)
-#define SIGDET_EN_MUX				BIT(2)
-#define SIGDET_EN				BIT(1)
-#define SIGDET_FLT_BYP				BIT(0)
-#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
-#define SIGDET_BW_CTRL(x)			((x) & 0xf)
-#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
-#define SIGDET_DEGLITCH_BYP			BIT(0)
-#define INVERT_PCS_RX_CLK			BIT(7)
-#define PWM_EN					BIT(6)
-#define RXBIAS_SEL(x)				(((x) & 0x3) << 4)
-#define EBDAC_SIGN				BIT(3)
-#define EDAC_SIGN				BIT(2)
-#define EN_AUXTAP1SIGN_INVERT			BIT(1)
-#define EN_DAC_CHOPPING				BIT(0)
-#define DRVR_LOGIC_CLK_EN			BIT(4)
-#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
-#define PARALLEL_RATE_MODE2(x)			(((x) & 0x3) << 4)
-#define PARALLEL_RATE_MODE1(x)			(((x) & 0x3) << 2)
-#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
-#define BAND_MODE2(x)				(((x) & 0x3) << 4)
-#define BAND_MODE1(x)				(((x) & 0x3) << 2)
-#define BAND_MODE0(x)				((x) & 0x3)
-#define LANE_SYNC_MODE				BIT(5)
-#define LANE_MODE(x)				((x) & 0x1f)
-#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
-#define EN_DLL_MODE0				BIT(4)
-#define EN_IQ_DCC_MODE0				BIT(3)
-#define EN_IQCAL_MODE0				BIT(2)
-#define EN_QPATH_MODE0				BIT(1)
-#define EN_EPATH_MODE0				BIT(0)
-#define FORCE_TSYNC_ACK				BIT(7)
-#define FORCE_CMN_ACK				BIT(6)
-#define FORCE_CMN_READY				BIT(5)
-#define EN_RCLK_DEGLITCH			BIT(4)
-#define BYPASS_RSM_CDR_RESET			BIT(3)
-#define BYPASS_RSM_TSYNC			BIT(2)
-#define BYPASS_RSM_SAMP_CAL			BIT(1)
-#define BYPASS_RSM_DLL_CAL			BIT(0)
-
-/* EMAC_QSERDES_COM_SYS_CLK_CTRL */
-#define SYSCLK_CM				BIT(4)
-#define SYSCLK_AC_COUPLE			BIT(3)
-
-/* EMAC_QSERDES_COM_PLL_CNTRL */
-#define OCP_EN					BIT(5)
-#define PLL_DIV_FFEN				BIT(2)
-#define PLL_DIV_ORD				BIT(1)
-
-/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */
-#define SYSCLK_SEL_CMOS				BIT(3)
-
-/* EMAC_QSERDES_COM_RESETSM_CNTRL */
-#define FRQ_TUNE_MODE				BIT(4)
-
-/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */
-#define PLLLOCK_CMP_EN				BIT(0)
-
-/* EMAC_QSERDES_COM_DEC_START1 */
-#define DEC_START1_MUX				BIT(7)
-#define DEC_START1(x)				((x) & 0x7f)
-
-/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */
-#define DIV_FRAC_START_MUX			BIT(7)
-#define DIV_FRAC_START(x)			((x) & 0x7f)
-
-/* EMAC_QSERDES_COM_DIV_FRAC_START3 */
-#define DIV_FRAC_START3_MUX			BIT(4)
-#define DIV_FRAC_START3(x)			((x) & 0xf)
-
-/* EMAC_QSERDES_COM_DEC_START2 */
-#define DEC_START2_MUX				BIT(1)
-#define DEC_START2				BIT(0)
-
-/* EMAC_QSERDES_COM_RESET_SM */
-#define READY					BIT(5)
-
-/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */
-#define TX_EMP_POST1_LVL_MUX			BIT(5)
-#define TX_EMP_POST1_LVL(x)			((x) & 0x1f)
-#define TX_EMP_POST1_LVL_BMSK			0x1f
-#define TX_EMP_POST1_LVL_SHFT			0
-
-/* EMAC_QSERDES_TX_TX_DRV_LVL */
-#define TX_DRV_LVL_MUX				BIT(4)
-#define TX_DRV_LVL(x)				((x) & 0xf)
-
-/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */
-#define EMP_EN_MUX				BIT(1)
-#define EMP_EN					BIT(0)
-
-/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */
-#define HBW_PD_EN				BIT(7)
-#define SECONDORDERENABLE			BIT(6)
-#define FIRSTORDER_THRESH(x)			(((x) & 0x7) << 3)
-#define SECONDORDERGAIN(x)			((x) & 0x7)
-
-/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */
-#define RX_EQ_GAIN2(x)				(((x) & 0xf) << 4)
-#define RX_EQ_GAIN1(x)				((x) & 0xf)
-
-/* EMAC_SGMII_PHY_SERDES_START */
-#define SERDES_START				BIT(0)
-
-/* EMAC_SGMII_PHY_CMN_PWR_CTRL */
-#define BIAS_EN					BIT(6)
-#define PLL_EN					BIT(5)
-#define SYSCLK_EN				BIT(4)
-#define CLKBUF_L_EN				BIT(3)
-#define PLL_TXCLK_EN				BIT(1)
-#define PLL_RXCLK_EN				BIT(0)
-
-/* EMAC_SGMII_PHY_RX_PWR_CTRL */
-#define L0_RX_SIGDET_EN				BIT(7)
-#define L0_RX_TERM_MODE(x)			(((x) & 3) << 4)
-#define L0_RX_I_EN				BIT(1)
-
-/* EMAC_SGMII_PHY_TX_PWR_CTRL */
-#define L0_TX_EN				BIT(5)
-#define L0_CLKBUF_EN				BIT(4)
-#define L0_TRAN_BIAS_EN				BIT(1)
-
-/* EMAC_SGMII_PHY_LANE_CTRL1 */
-#define L0_RX_EQUALIZE_ENABLE			BIT(6)
-#define L0_RESET_TSYNC_EN			BIT(4)
-#define L0_DRV_LVL(x)				((x) & 0xf)
-
-/* EMAC_SGMII_PHY_AUTONEG_CFG2 */
+#define EMAC_SGMII_PHY_AUTONEG_CFG2		0x0048
+#define EMAC_SGMII_PHY_SPEED_CFG1		0x0074
+#define EMAC_SGMII_PHY_IRQ_CMD			0x00ac
+#define EMAC_SGMII_PHY_INTERRUPT_CLEAR		0x00b0
+#define EMAC_SGMII_PHY_INTERRUPT_STATUS		0x00b8
+
 #define FORCE_AN_TX_CFG				BIT(5)
 #define FORCE_AN_RX_CFG				BIT(4)
 #define AN_ENABLE				BIT(0)
 
-/* EMAC_SGMII_PHY_SPEED_CFG1 */
 #define DUPLEX_MODE				BIT(4)
 #define SPDMODE_1000				BIT(1)
 #define SPDMODE_100				BIT(0)
 #define SPDMODE_10				0
-#define SPDMODE_BMSK				3
-#define SPDMODE_SHFT				0
-
-/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */
-#define PWRDN_B					BIT(0)
-#define CDR_MAX_CNT(x)				((x) & 0xff)
-
-/* EMAC_QSERDES_TX_BIST_MODE_LANENO */
-#define BIST_LANE_NUMBER(x)			(((x) & 3) << 5)
-#define BISTMODE(x)				((x) & 0x1f)
-
-/* EMAC_QSERDES_COM_PLLLOCK_CMPx */
-#define PLLLOCK_CMP(x)				((x) & 0xff)
 
-/* EMAC_SGMII_PHY_RESET_CTRL */
-#define PHY_SW_RESET				BIT(0)
-
-/* EMAC_SGMII_PHY_IRQ_CMD */
 #define IRQ_GLOBAL_CLEAR			BIT(0)
 
-/* EMAC_SGMII_PHY_INTERRUPT_MASK */
 #define DECODE_CODE_ERR				BIT(7)
 #define DECODE_DISP_ERR				BIT(6)
-#define PLL_UNLOCK				BIT(5)
-#define AN_ILLEGAL_TERM				BIT(4)
-#define SYNC_FAIL				BIT(3)
-#define AN_START				BIT(2)
-#define AN_END					BIT(1)
-#define AN_REQUEST				BIT(0)
 
 #define SGMII_PHY_IRQ_CLR_WAIT_TIME		10
 
-#define SGMII_PHY_INTERRUPT_ERR (\
-	DECODE_CODE_ERR         |\
-	DECODE_DISP_ERR)
-
-#define SGMII_ISR_AN_MASK       (\
-	AN_REQUEST              |\
-	AN_START                |\
-	AN_END                  |\
-	AN_ILLEGAL_TERM         |\
-	PLL_UNLOCK              |\
-	SYNC_FAIL)
-
-#define SGMII_ISR_MASK          (\
-	SGMII_PHY_INTERRUPT_ERR |\
-	SGMII_ISR_AN_MASK)
-
-/* SGMII TX_CONFIG */
-#define TXCFG_LINK				0x8000
-#define TXCFG_MODE_BMSK				0x1c00
-#define TXCFG_1000_FULL				0x1800
-#define TXCFG_100_FULL				0x1400
-#define TXCFG_100_HALF				0x0400
-#define TXCFG_10_FULL				0x1000
-#define TXCFG_10_HALF				0x0000
+#define SGMII_PHY_INTERRUPT_ERR		(DECODE_CODE_ERR | DECODE_DISP_ERR)
 
 #define SERDES_START_WAIT_TIMES			100
 
-struct emac_reg_write {
-	unsigned int offset;
-	u32 val;
-};
-
-static void emac_reg_write_all(void __iomem *base,
-			       const struct emac_reg_write *itr, size_t size)
-{
-	size_t i;
-
-	for (i = 0; i < size; ++itr, ++i)
-		writel(itr->val, base + itr->offset);
-}
-
-static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = {
-	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
-	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
-	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
-		BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
-	{EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
-	{EMAC_SGMII_PHY_RX_PWR_CTRL,
-		L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
-	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
-		BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
-		PLL_RXCLK_EN},
-	{EMAC_SGMII_PHY_LANE_CTRL1,
-		L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
-};
-
-static const struct emac_reg_write sysclk_refclk_setting[] = {
-	{EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
-	{EMAC_QSERDES_COM_SYS_CLK_CTRL,	SYSCLK_CM | SYSCLK_AC_COUPLE},
-};
-
-static const struct emac_reg_write pll_setting[] = {
-	{EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
-	{EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
-	{EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
-	{EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
-	{EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
-	{EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
-	{EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
-	{EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
-	{EMAC_QSERDES_COM_DIV_FRAC_START1,
-		DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
-	{EMAC_QSERDES_COM_DIV_FRAC_START2,
-		DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
-	{EMAC_QSERDES_COM_DIV_FRAC_START3,
-		DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
-	{EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
-};
-
-static const struct emac_reg_write cdr_setting[] = {
-	{EMAC_QSERDES_RX_CDR_CONTROL,
-		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
-	{EMAC_QSERDES_RX_CDR_CONTROL2,
-		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
-};
-
-static const struct emac_reg_write tx_rx_setting[] = {
-	{EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
-	{EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
-	{EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
-	{EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
-		TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
-	{EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
-	{EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
-};
-
-static const struct emac_reg_write sgmii_v2_laned[] = {
-	/* CDR Settings */
-	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
-		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
-	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
-	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
-
-	/* TX/RX Settings */
-	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
-
-	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
-	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
-	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
-	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
-	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
-
-	{EMAC_SGMII_LN_CML_CTRL_MODE0,
-		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
-	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
-		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
-	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
-	{EMAC_SGMII_LN_SIGDET_ENABLES,
-		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
-	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
-
-	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
-	{EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
-	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
-		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
-
-	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
-	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
-	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
-	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
-	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
-	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
-};
-
-static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = {
-	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
-	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
-	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
-	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
-};
-
 static int emac_sgmii_link_init(struct emac_adapter *adpt)
 {
 	struct phy_device *phydev = adpt->phydev;
@@ -536,98 +121,6 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
 	return 0;
 }
 
-int emac_sgmii_init_v1(struct emac_adapter *adpt)
-{
-	struct emac_phy *phy = &adpt->phy;
-	unsigned int i;
-	int ret;
-
-	ret = emac_sgmii_link_init(adpt);
-	if (ret)
-		return ret;
-
-	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1,
-			   ARRAY_SIZE(physical_coding_sublayer_programming_v1));
-	emac_reg_write_all(phy->base, sysclk_refclk_setting,
-			   ARRAY_SIZE(sysclk_refclk_setting));
-	emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
-	emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
-	emac_reg_write_all(phy->base, tx_rx_setting,
-			   ARRAY_SIZE(tx_rx_setting));
-
-	/* Power up the Ser/Des engine */
-	writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
-
-	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
-		if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
-			break;
-		usleep_range(100, 200);
-	}
-
-	if (i == SERDES_START_WAIT_TIMES) {
-		netdev_err(adpt->netdev, "error: ser/des failed to start\n");
-		return -EIO;
-	}
-	/* Mask out all the SGMII Interrupt */
-	writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
-
-	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
-
-	return 0;
-}
-
-int emac_sgmii_init_v2(struct emac_adapter *adpt)
-{
-	struct emac_phy *phy = &adpt->phy;
-	void __iomem *phy_regs = phy->base;
-	void __iomem *laned = phy->digital;
-	unsigned int i;
-	u32 lnstatus;
-	int ret;
-
-	ret = emac_sgmii_link_init(adpt);
-	if (ret)
-		return ret;
-
-	/* PCS lane-x init */
-	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2,
-			   ARRAY_SIZE(physical_coding_sublayer_programming_v2));
-
-	/* SGMII lane-x init */
-	emac_reg_write_all(phy->digital,
-			   sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned));
-
-	/* Power up PCS and start reset lane state machine */
-
-	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
-	writel(1, laned + SGMII_LN_RSM_START);
-
-	/* Wait for c_ready assertion */
-	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
-		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
-		if (lnstatus & BIT(1))
-			break;
-		usleep_range(100, 200);
-	}
-
-	if (i == SERDES_START_WAIT_TIMES) {
-		netdev_err(adpt->netdev, "SGMII failed to start\n");
-		return -EIO;
-	}
-
-	/* Disable digital and SERDES loopback */
-	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
-	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
-	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
-
-	/* Mask out all the SGMII Interrupt */
-	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
-
-	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
-
-	return 0;
-}
-
 static void emac_sgmii_reset_prepare(struct emac_adapter *adpt)
 {
 	struct emac_phy *phy = &adpt->phy;
@@ -651,16 +144,19 @@ void emac_sgmii_reset(struct emac_adapter *adpt)
 {
 	int ret;
 
-	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
 	emac_sgmii_reset_prepare(adpt);
 
+	ret = emac_sgmii_link_init(adpt);
+	if (ret) {
+		netdev_err(adpt->netdev, "unsupported link speed\n");
+		return;
+	}
+
 	ret = adpt->phy.initialize(adpt);
 	if (ret)
 		netdev_err(adpt->netdev,
 			   "could not reinitialize internal PHY (error=%i)\n",
 			   ret);
-
-	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
 }
 
 static int emac_sgmii_acpi_match(struct device *dev, void *data)
@@ -668,7 +164,7 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
 	static const struct acpi_device_id match_table[] = {
 		{
 			.id = "QCOM8071",
-			.driver_data = (kernel_ulong_t)emac_sgmii_init_v2,
+			.driver_data = (kernel_ulong_t)emac_sgmii_init_qdf2432,
 		},
 		{}
 	};
@@ -684,11 +180,11 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
 static const struct of_device_id emac_sgmii_dt_match[] = {
 	{
 		.compatible = "qcom,fsm9900-emac-sgmii",
-		.data = emac_sgmii_init_v1,
+		.data = emac_sgmii_init_fsm9900,
 	},
 	{
 		.compatible = "qcom,qdf2432-emac-sgmii",
-		.data = emac_sgmii_init_v2,
+		.data = emac_sgmii_init_qdf2432,
 	},
 	{}
 };
@@ -760,6 +256,8 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
 	if (ret)
 		goto error;
 
+	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
 	/* We've remapped the addresses, so we don't need the device any
 	 * more.  of_find_device_by_node() says we should release it.
 	 */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
index ce79212..e2bef14 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
@@ -16,9 +16,10 @@
 struct emac_adapter;
 struct platform_device;
 
-int emac_sgmii_init_v1(struct emac_adapter *adpt);
-int emac_sgmii_init_v2(struct emac_adapter *adpt);
 int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt);
 void emac_sgmii_reset(struct emac_adapter *adpt);
 
+int emac_sgmii_init_fsm9900(struct emac_adapter *adpt);
+int emac_sgmii_init_qdf2432(struct emac_adapter *adpt);
+
 #endif
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply related

* [PATCH 2/2] [v2] net: qcom/emac: add support for the Qualcomm Technologies QDF2400
From: Timur Tabi @ 2016-12-07 20:39 UTC (permalink / raw)
  To: David Miller, netdev, alokc
In-Reply-To: <1481143186-20137-1-git-send-email-timur@codeaurora.org>

The QDF2432 and the QDF2400 have slightly different internal PHYs,
so there are some programming differences.  Some of the registers in
the QDF2400 have moved, and some registers require different values
during initialization.

Because of the differences, and because HIDs are a scare resource,
the ACPI tables specify the hardware version in an _HRV property.
Version 1 is the QDF2432, and version 2 is the QDF2400.  Any future
SOC that has the same internal PHY but different programming
requirements will be assigned the next available version number.

Signed-off-by: Timur Tabi <timur@codeaurora.org>
---

v2:
   use _HRV instead of new HID for QDF2400

 drivers/net/ethernet/qualcomm/emac/Makefile        |   3 +-
 .../ethernet/qualcomm/emac/emac-sgmii-qdf2400.c    | 217 +++++++++++++++++++++
 drivers/net/ethernet/qualcomm/emac/emac-sgmii.c    |  31 ++-
 drivers/net/ethernet/qualcomm/emac/emac-sgmii.h    |   1 +
 4 files changed, 247 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c

diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
index 204b787..7a66879 100644
--- a/drivers/net/ethernet/qualcomm/emac/Makefile
+++ b/drivers/net/ethernet/qualcomm/emac/Makefile
@@ -5,4 +5,5 @@
 obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o
 
 qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o \
-		  emac-sgmii-fsm9900.o emac-sgmii-qdf2432.o
+		  emac-sgmii-fsm9900.o emac-sgmii-qdf2432.o \
+		  emac-sgmii-qdf2400.o
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
new file mode 100644
index 0000000..5b84194
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. QDF2400 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_RESET_CTRL		0x00a8
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+/* SGMII digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0		0x000C
+#define EMAC_SGMII_LN_DRVR_TAP_EN		0x0018
+#define EMAC_SGMII_LN_TX_MARGINING		0x001C
+#define EMAC_SGMII_LN_TX_PRE			0x0020
+#define EMAC_SGMII_LN_TX_POST			0x0024
+#define EMAC_SGMII_LN_TX_BAND_MODE		0x0060
+#define EMAC_SGMII_LN_LANE_MODE			0x0064
+#define EMAC_SGMII_LN_PARALLEL_RATE		0x007C
+#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x00C0
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x00D8
+#define EMAC_SGMII_LN_VGA_INITVAL		0x013C
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x0184
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x0190
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x019C
+#define EMAC_SGMII_LN_RX_BAND			0x01A4
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x01C0
+#define EMAC_SGMII_LN_RSM_CONFIG		0x01F8
+#define EMAC_SGMII_LN_SIGDET_ENABLES		0x0230
+#define EMAC_SGMII_LN_SIGDET_CNTRL		0x0234
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x0238
+#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x02AC
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x02B8
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x02C8
+
+/* SGMII digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
+#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
+#define UCDR_ENABLE				BIT(6)
+#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS4			BIT(7)
+#define SIGDET_EN_PS0_TO_PS2			BIT(6)
+
+#define TXVAL_VALID_INIT			BIT(4)
+#define KR_PCIGEN3_MODE				BIT(0)
+
+#define MAIN_EN					BIT(0)
+
+#define TX_MARGINING_MUX			BIT(6)
+#define TX_MARGINING(x)				((x) & 0x3f)
+
+#define TX_PRE_MUX				BIT(6)
+
+#define TX_POST_MUX				BIT(6)
+
+#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
+
+#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)			((x) & 3)
+
+#define VGA_THRESH_DFE(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
+#define SIGDET_FLT_BYP				BIT(0)
+
+#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
+
+#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
+
+#define INVERT_PCS_RX_CLK			BIT(7)
+
+#define DRVR_LOGIC_CLK_EN			BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
+
+#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
+
+#define BAND_MODE0(x)				((x) & 0x3)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
+#define EN_DLL_MODE0				BIT(4)
+#define EN_IQ_DCC_MODE0				BIT(3)
+#define EN_IQCAL_MODE0				BIT(2)
+
+#define BYPASS_RSM_SAMP_CAL			BIT(1)
+#define BYPASS_RSM_DLL_CAL			BIT(0)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+
+#define PWRDN_B					BIT(0)
+
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write sgmii_laned[] = {
+	/* CDR Settings */
+	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
+	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+	/* TX/RX Settings */
+	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+	{EMAC_SGMII_LN_CML_CTRL_MODE0,
+		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+	{EMAC_SGMII_LN_SIGDET_ENABLES,
+		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+	{EMAC_SGMII_LN_RX_MISC_CNTRL0, INVERT_PCS_RX_CLK},
+	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(1)},
+	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(2)},
+	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(2) |
+		EN_DLL_MODE0 | EN_IQ_DCC_MODE0 | EN_IQCAL_MODE0},
+	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming[] = {
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+int emac_sgmii_init_qdf2400(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	void __iomem *phy_regs = phy->base;
+	void __iomem *laned = phy->digital;
+	unsigned int i;
+	u32 lnstatus;
+
+	/* PCS lane-x init */
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming,
+			   ARRAY_SIZE(physical_coding_sublayer_programming));
+
+	/* SGMII lane-x init */
+	emac_reg_write_all(phy->digital, sgmii_laned, ARRAY_SIZE(sgmii_laned));
+
+	/* Power up PCS and start reset lane state machine */
+
+	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+	writel(1, laned + SGMII_LN_RSM_START);
+
+	/* Wait for c_ready assertion */
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+		if (lnstatus & BIT(1))
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "SGMII failed to start\n");
+		return -EIO;
+	}
+
+	/* Disable digital and SERDES loopback */
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 07c872a..0c6da54 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -164,17 +164,40 @@ static int emac_sgmii_acpi_match(struct device *dev, void *data)
 	static const struct acpi_device_id match_table[] = {
 		{
 			.id = "QCOM8071",
-			.driver_data = (kernel_ulong_t)emac_sgmii_init_qdf2432,
 		},
 		{}
 	};
 	const struct acpi_device_id *id = acpi_match_device(match_table, dev);
 	emac_sgmii_initialize *initialize = data;
 
-	if (id)
-		*initialize = (emac_sgmii_initialize)id->driver_data;
+	if (id) {
+		acpi_handle handle = ACPI_HANDLE(dev);
+		unsigned long long hrv;
+		acpi_status status;
+
+		status = acpi_evaluate_integer(handle, "_HRV", NULL, &hrv);
+		if (status) {
+			if (status == AE_NOT_FOUND)
+				/* Older versions of the QDF2432 ACPI tables do
+				 * not have an _HRV property.
+				 */
+				hrv = 1;
+			else
+				/* Something is wrong with the tables */
+				return 0;
+		}
 
-	return !!id;
+		switch (hrv) {
+		case 1:
+			*initialize = emac_sgmii_init_qdf2432;
+			return 1;
+		case 2:
+			*initialize = emac_sgmii_init_qdf2400;
+			return 1;
+		}
+	}
+
+	return 0;
 }
 
 static const struct of_device_id emac_sgmii_dt_match[] = {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
index e2bef14..80ed3dc 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
@@ -21,5 +21,6 @@ void emac_sgmii_reset(struct emac_adapter *adpt);
 
 int emac_sgmii_init_fsm9900(struct emac_adapter *adpt);
 int emac_sgmii_init_qdf2432(struct emac_adapter *adpt);
+int emac_sgmii_init_qdf2400(struct emac_adapter *adpt);
 
 #endif
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply related

* Re: [net-next][PATCH v2 18/18] RDS: IB: add missing connection cache usage info
From: Santosh Shilimkar @ 2016-12-07 20:42 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20161207.123642.1553217272033137094.davem@davemloft.net>

On 12/7/2016 9:36 AM, David Miller wrote:

[...]

> What does the newer tool do on an older kernel if it doesn't see
> the fields?  Does it check the size of the structure given back
> to it, and conditionally handle the older vs. the newer layout?
>
> It must do this.
>
Right but the rds-tool doesn't handle it well and needs
to be fixed before this or any additional change in rds-info
structures. To handle the kernel struct size issues with its
user copy, there is a provision to probe kernel struct len
and then decide on layout buts its not used/implemented
in tools. The parsing comment I made is also not completely
accurate.

For now, I will drop this patch and submit it once the tools
code is fixed along with other used fields to handle both
layouts.

I should have checked the full compatibility matrix as you
commented. Sorry for oversight. Thanks for your comments Dave.

Regard,
Santosh

^ permalink raw reply

* Re: [patch] drivers: net: xgene: uninitialized variable in xgene_enet_free_pagepool()
From: Iyappan Subramanian @ 2016-12-07 20:46 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Keyur Chudgar, netdev, linux-kernel@vger.kernel.org,
	kernel-janitors
In-Reply-To: <20161207111424.GA5507@elgon.mountain>

On Wed, Dec 7, 2016 at 3:14 AM, Dan Carpenter <dan.carpenter@oracle.com> wrote:
> We never set "slots" in this function.
>
> Fixes: a9380b0f7be8 ("drivers: net: xgene: Add support for Jumbo frame")
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
> ---
> I copied how slots gets set in xgene_enet_rx_frame().  Static analysis.
> Not tested.
>
> diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> index 6c7eea8b36af..884a334e82d0 100644
> --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> @@ -635,6 +635,7 @@ static void xgene_enet_free_pagepool(struct xgene_enet_desc_ring *buf_pool,
>                 return;
>
>         dev = ndev_to_dev(buf_pool->ndev);
> +       slots = buf_pool->slots - 1;
>         head = buf_pool->head;
>
>         for (i = 0; i < 4; i++) {

Thanks.

Acked-by: Iyappan Subramanian <isubramanian@apm.com>

^ permalink raw reply

* Re: [PATCH] net: ethernet: slicoss: use module_pci_driver()
From: Lino Sanfilippo @ 2016-12-07 20:51 UTC (permalink / raw)
  To: Tobias Klauser; +Cc: netdev
In-Reply-To: <20161207134330.8829-1-tklauser@distanz.ch>

Hi Tobias,

On 07.12.2016 14:43, Tobias Klauser wrote:
> Use module_pci_driver() to get rid of some boilerplate code.
> 
> Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
> ---
>  drivers/net/ethernet/alacritech/slicoss.c | 13 +------------
>  1 file changed, 1 insertion(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
> index e77ecd5b307c..b9fbd0107008 100644
> --- a/drivers/net/ethernet/alacritech/slicoss.c
> +++ b/drivers/net/ethernet/alacritech/slicoss.c
> @@ -1863,18 +1863,7 @@ static struct pci_driver slic_driver = {
>  	.remove = slic_remove,
>  };
>  
> -static int __init slic_init_module(void)
> -{
> -	return pci_register_driver(&slic_driver);
> -}
> -
> -static void __exit slic_cleanup_module(void)
> -{
> -	pci_unregister_driver(&slic_driver);
> -}
> -
> -module_init(slic_init_module);
> -module_exit(slic_cleanup_module);
> +module_pci_driver(slic_driver);
>  
>  MODULE_DESCRIPTION("Alacritech non-accelerated SLIC driver");
>  MODULE_AUTHOR("Lino Sanfilippo <LinoSanfilippo@gmx.de>");
> 

Yes, makes sense.

Acked-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>

Thanks,
Lino

^ permalink raw reply

* [PATCH iproute2] Makefile: really suppress printing of directories
From: David Ahern @ 2016-12-07 20:55 UTC (permalink / raw)
  To: netdev, stephen; +Cc: David Ahern

Makefile adds --no-print-directory to MAKEFLAGS if VERBOSE is not
defined however Config always defines VERBOSE. Update the check to
whether VERBOSE is 0.

Fixes: 57bdf8b76451 ("Make builds default to quiet mode")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 37b68ad87f06..18de7dcb315b 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ ifneq ($(wildcard Config),)
 include Config
 endif
 
-ifndef VERBOSE
+ifeq ($(VERBOSE),0)
 MAKEFLAGS += --no-print-directory
 endif
 
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Pavel Machek @ 2016-12-07 20:55 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
	davem, linux-kernel, netdev
In-Reply-To: <1481141138-19466-3-git-send-email-LinoSanfilippo@gmx.de>

[-- Attachment #1: Type: text/plain, Size: 1719 bytes --]

Hi!

> The driver uses a private lock for synchronization between the xmit
> function and the xmit completion handler, but since the NETIF_F_LLTX flag
> is not set, the xmit function is also called with the xmit_lock held.
> 
> On the other hand the xmit completion handler first takes the private lock
> and (in case that the tx queue has been stopped) the xmit_lock, leading to
> a reverse locking order and the potential danger of a deadlock.
> 
> Fix this by removing the private lock completely and synchronizing the xmit
> function and completion handler solely by means of the xmit_lock. By doing
> this remove also the now unnecessary double check for a stopped tx queue.
> 
> Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>

Does not seem to apply to net-next based on
adc176c5472214971d77c1a61c83db9b01e9cdc7. Aha, that's the printk()
changes, probably would apply to mainline.

> index caf069a..db46ec4 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1307,7 +1307,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>  	unsigned int bytes_compl = 0, pkts_compl = 0;
>  	unsigned int entry = priv->dirty_tx;
>  
> -	spin_lock(&priv->tx_lock);
> +	netif_tx_lock(priv->dev);
>  
>  	priv->xstats.tx_clean++;
>

Should it use "netif_tx_lock_bh"?

I could not reproduce the deadlock without this patch, nor can I
detect anything wrong with this patch, so I guess that is:

Tested-by: Pavel Machek <pavel@denx.de>

Thanks,
									Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Pavel Machek @ 2016-12-07 20:59 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
	davem, linux-kernel, netdev
In-Reply-To: <1481141138-19466-3-git-send-email-LinoSanfilippo@gmx.de>

[-- Attachment #1: Type: text/plain, Size: 1881 bytes --]

Hi!

> The driver uses a private lock for synchronization between the xmit
> function and the xmit completion handler, but since the NETIF_F_LLTX flag
> is not set, the xmit function is also called with the xmit_lock held.
> 
> On the other hand the xmit completion handler first takes the private lock
> and (in case that the tx queue has been stopped) the xmit_lock, leading to
> a reverse locking order and the potential danger of a deadlock.
> 
> Fix this by removing the private lock completely and synchronizing the xmit
> function and completion handler solely by means of the xmit_lock. By doing
> this remove also the now unnecessary double check for a stopped tx queue.
> 
> Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>

Oops, sorry no, that broke the driver after a while:

(So please ignore my tested-by:)

root@wagabuibui:/data/tmp/udpt# ./udp-test raw 10.0.0.6 1234 1000 100
30
Sending 100 packets (1000b each) at an interval of 30ms, expected data
rate:3333333b/s (3373333b/s incl udp overhead)
[   30.948626] socfpga-dwmac ff702000.ethernet eth0: Link is Up -
100Mbps/Full - flow control rx/tx
[   31.076064] Link is Up - 100/Full
[   32.979526] random: crng init done
[  262.244030] ------------[ cut here ]------------
[  262.248669] WARNING: CPU: 0 PID: 0 at net/sched/sch_generic.c:316
dev_watchdog+0x254/0x26c
[  262.256916] NETDEV WATCHDOG: eth0 (socfpga-dwmac): transmit queue 0
timed out
[  262.264028] Modules linked in:
[  262.267102] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
4.9.0-rc7-118095-g2d70d9b-dirty #339
[  262.275328] Hardware name: Altera SOCFPGA
[  262.279352] [<8010f758>] (unwind_backtrace) from [<8010affc>]
(show_stack+0x10/0x14)

									Pavel

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* Re: [RFC PATCH net-next v3 1/2] macb: Add 1588 support in Cadence GEM.
From: Richard Cochran @ 2016-12-07 21:04 UTC (permalink / raw)
  To: Andrei Pistirica
  Cc: tbultel, boris.brezillon, rafalo, netdev, alexandre.belloni,
	nicolas.ferre, linux-kernel, harinikatakamlinux, michals, anirudh,
	punnaia, harini.katakam, davem, linux-arm-kernel
In-Reply-To: <20161207193908.GA13062@netboy>

On Wed, Dec 07, 2016 at 08:39:09PM +0100, Richard Cochran wrote:
> > +static s32 gem_ptp_max_adj(unsigned int f_nom)
> > +{
> > +	u64 adj;
> > +
> > +	/* The 48 bits of seconds for the GEM overflows every:
> > +	 * 2^48/(365.25 * 24 * 60 *60) =~ 8 925 512 years (~= 9 mil years),
> > +	 * thus the maximum adjust frequency must not overflow CNS register:
> > +	 *
> > +	 * addend  = 10^9/nominal_freq
> > +	 * adj_max = +/- addend*ppb_max/10^9
> > +	 * max_ppb = (2^8-1)*nominal_freq-10^9
> > +	 */
> > +	adj = f_nom;
> > +	adj *= 0xffff;
> > +	adj -= 1000000000ULL;
> 
> What is this computation, and how does it relate to the comment?

I am not sure what you meant, but it sounds like you are on the wrong
track.  Let me explain...

The max_adj has nothing at all to do with the width of the time
register.  Rather, it should reflect the maximum possible change in
the tuning word.

For example, with a nominal 8 ns period, the tuning word is 0x80000.
Looking at running the clock more slowly, the slowest possible word is
0x00001, meaning a difference of 0x7FFFF.  This implies an adjustment
of 0x7FFFF/0x80000 or 999998092 ppb.  Running more quickly, we can
already have 0x100000, twice as fast, or just under 2 billion ppb.

You should consider the extreme cases to determine the most limited
(smallest) max_adj value:

Case 1 - high frequency
~~~~~~~~~~~~~~~~~~~~~~~

With a nominal 1 ns period, we have the nominal tuning word 0x10000.
The smallest is 0x1 for a difference of 0xFFFF.  This corresponds to
an adjustment of 0xFFFF/0x10000 = .9999847412109375 or 999984741 ppb.

Case 2 - low frequency
~~~~~~~~~~~~~~~~~~~~~~

With a nominal 255 ns period, the nominal word is 0xFF0000, the
largest 0xFFFFFF, and the difference is 0xFFFF.  This corresponds to
and adjustment of 0xFFFF/0xFF0000 = .0039215087890625 or 3921508 ppb.

Since 3921508 ppb is a huge adjustment, you can simply use that as a
safe maximum, ignoring the actual input clock.

Thanks,
Richard

^ permalink raw reply

* Re: [patch] ser_gigaset: return -ENOMEM on error instead of success
From: Tilman Schmidt @ 2016-12-07 20:57 UTC (permalink / raw)
  To: Paul Bolle, Dan Carpenter
  Cc: Karsten Keil, David S. Miller, gigaset307x-common, netdev,
	kernel-janitors
In-Reply-To: <1481137583.12596.19.camel@tiscali.nl>


[-- Attachment #1.1: Type: text/plain, Size: 1067 bytes --]

Am 07.12.2016 um 20:06 schrieb Paul Bolle:
> On Wed, 2016-12-07 at 14:22 +0300, Dan Carpenter wrote:
>> If we can't allocate the resources in gigaset_initdriver() then we
>> should return -ENOMEM instead of zero.
> 
> That's entirely correct.

Agree.

> I'll be back (probably tomorrow) after a short test to see whether this really
> needs to go into stable. It almost certainly should, but I'd like to first see
> the mess the current code leaves behind once gigaset_initdriver() fails before
> saying so.

Not much of a mess, I reckon. Everything that has been allocated and
registered up to that point is properly deallocated and unregistered.
The code just fails to tell the kernel that module initialization has
failed, so the module remains loaded even though it can never be
called because it isn't hooked anywhere. That's a nuisance and a
waste of RAM, but not much more.

HTH
Tilman

-- 
Tilman Schmidt                              E-Mail: tilman@imap.cc
Bonn, Germany
Nous, on a des fleurs et des bougies pour nous protéger.


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply

* Re: [patch] ser_gigaset: return -ENOMEM on error instead of success
From: Paul Bolle @ 2016-12-07 21:08 UTC (permalink / raw)
  To: Tilman Schmidt, Dan Carpenter
  Cc: Karsten Keil, David S. Miller, gigaset307x-common, netdev,
	kernel-janitors
In-Reply-To: <d8eb1f69-e68e-7792-00de-b14e6bac1606@imap.cc>

Hi Tilman,

On Wed, 2016-12-07 at 21:57 +0100, Tilman Schmidt wrote:
> Not much of a mess, I reckon. Everything that has been allocated and
> registered up to that point is properly deallocated and unregistered.
> The code just fails to tell the kernel that module initialization has
> failed, so the module remains loaded even though it can never be
> called because it isn't hooked anywhere. That's a nuisance and a
> waste of RAM, but not much more.

Yes.

But then the removal of the module, which is the only reasonable thing to do
after all this has happened, seems to trigger a WARN in driver_unregister().
And it's that WARN that I think requires the entire stable song and dance.

Otherwise it would be, as far as I can tell, a hard to hit problem in an
obscure driver without any side effects.


Paul Bolle

^ permalink raw reply

* net-next closing, README
From: David Miller @ 2016-12-07 21:28 UTC (permalink / raw)
  To: netdev


The merge window is about to open soon, and next week I will be
having sporadic internet access while travelling around, therefore
I am closing net-next up tonight.

Therefore, please do not submit any new features or cleanups for
net-next.  Bug fixes for problems introduced in net-next are fine,
however.

Thank you.

^ permalink raw reply

* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Pavel Machek @ 2016-12-07 21:37 UTC (permalink / raw)
  To: Lino Sanfilippo
  Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
	davem, linux-kernel, netdev
In-Reply-To: <1481141138-19466-3-git-send-email-LinoSanfilippo@gmx.de>

[-- Attachment #1: Type: text/plain, Size: 5197 bytes --]

On Wed 2016-12-07 21:05:38, Lino Sanfilippo wrote:
> The driver uses a private lock for synchronization between the xmit
> function and the xmit completion handler, but since the NETIF_F_LLTX flag
> is not set, the xmit function is also called with the xmit_lock held.
> 
> On the other hand the xmit completion handler first takes the private lock
> and (in case that the tx queue has been stopped) the xmit_lock, leading to
> a reverse locking order and the potential danger of a deadlock.
> 
> Fix this by removing the private lock completely and synchronizing the xmit
> function and completion handler solely by means of the xmit_lock. By doing
> this remove also the now unnecessary double check for a stopped tx queue.
> 

FYI, here's modified version. I believe _bh versions are needed, and
I'm testing that version now. (Oh and I also ported it to net-next).

It survived 30 minutes of testing so far...

Best regards,
									Pavel

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Pavel Machek <pavel@denx.de>

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dbacb80..eab04ae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -64,7 +64,6 @@ struct stmmac_priv {
 	dma_addr_t dma_tx_phy;
 	int tx_coalesce;
 	int hwts_tx_en;
-	spinlock_t tx_lock;
 	bool tx_path_in_lpi_mode;
 	struct timer_list txtimer;
 	bool tso;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 982c952..7415bc2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1308,7 +1308,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	unsigned int entry = priv->dirty_tx;
 
-	spin_lock(&priv->tx_lock);
+	netif_tx_lock_bh(priv->dev);
 
 	priv->xstats.tx_clean++;
 
@@ -1378,23 +1378,18 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 
 	netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
 
-	if (unlikely(netif_queue_stopped(priv->dev) &&
-		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
-		netif_tx_lock(priv->dev);
-		if (netif_queue_stopped(priv->dev) &&
-		    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
-			netif_dbg(priv, tx_done, priv->dev,
-				  "%s: restart transmit\n", __func__);
-			netif_wake_queue(priv->dev);
-		}
-		netif_tx_unlock(priv->dev);
+	if (netif_queue_stopped(priv->dev) &&
+	    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
+		netif_dbg(priv, tx_done, priv->dev,
+			  "%s: restart transmit\n", __func__);
+		netif_wake_queue(priv->dev);
 	}
 
 	if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
-	spin_unlock(&priv->tx_lock);
+	netif_tx_unlock_bh(priv->dev);
 }
 
 static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
@@ -2006,8 +2001,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 proto_hdr_len;
 	int i;
 
-	spin_lock(&priv->tx_lock);
-
 	/* Compute header lengths */
 	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
@@ -2021,7 +2014,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 				   "%s: Tx Ring full when queue awake\n",
 				   __func__);
 		}
-		spin_unlock(&priv->tx_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2156,11 +2148,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 				       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
 	dev_err(priv->device, "Tx dma map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
@@ -2192,10 +2182,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			return stmmac_tso_xmit(skb, dev);
 	}
 
-	spin_lock(&priv->tx_lock);
-
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
-		spin_unlock(&priv->tx_lock);
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 			/* This is a hard error, log it. */
@@ -2366,11 +2353,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 					       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
 	netdev_err(priv->dev, "Tx DMA map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
@@ -3357,7 +3342,6 @@ int stmmac_dvr_probe(struct device *device,
 	netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
 
 	spin_lock_init(&priv->lock);
-	spin_lock_init(&priv->tx_lock);
 
 	ret = register_netdev(ndev);
 	if (ret) {




-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply related

* Re: [PATCH 2/2] net: ethernet: stmmac: remove private tx queue lock
From: Lino Sanfilippo @ 2016-12-07 21:43 UTC (permalink / raw)
  To: Pavel Machek
  Cc: bh74.an, ks.giri, vipul.pandya, peppe.cavallaro, alexandre.torgue,
	davem, linux-kernel, netdev
In-Reply-To: <20161207213757.GC2250@amd>

Hi Pavel,

On 07.12.2016 22:37, Pavel Machek wrote:
> On Wed 2016-12-07 21:05:38, Lino Sanfilippo wrote:
>> The driver uses a private lock for synchronization between the xmit
>> function and the xmit completion handler, but since the NETIF_F_LLTX flag
>> is not set, the xmit function is also called with the xmit_lock held.
>> 
>> On the other hand the xmit completion handler first takes the private lock
>> and (in case that the tx queue has been stopped) the xmit_lock, leading to
>> a reverse locking order and the potential danger of a deadlock.
>> 
>> Fix this by removing the private lock completely and synchronizing the xmit
>> function and completion handler solely by means of the xmit_lock. By doing
>> this remove also the now unnecessary double check for a stopped tx queue.
>> 
> 
> FYI, here's modified version. I believe _bh versions are needed, and
> I'm testing that version now. (Oh and I also ported it to net-next).
> 
> It survived 30 minutes of testing so far...
> 

First off, thanks for testing.
Hmm. I dont understand why _bh would be needed. We call that function from
BH context only (napi poll and timer).
Any idea?

Lino

^ permalink raw reply

* RE: [net-next 20/20] i40e: don't allow i40e_vsi_(add|kill)_vlan to operate when VID<1
From: Keller, Jacob E @ 2016-12-07 21:50 UTC (permalink / raw)
  To: Sergei Shtylyov, Kirsher, Jeffrey T, davem@davemloft.net
  Cc: netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
	jogreene@redhat.com, guru.anbalagane@oracle.com
In-Reply-To: <8f82cd1d-0118-7b37-1a05-fa7b77d4e75c@cogentembedded.com>

> -----Original Message-----
> From: Sergei Shtylyov [mailto:sergei.shtylyov@cogentembedded.com]
> Sent: Wednesday, December 07, 2016 2:11 AM
> To: Kirsher, Jeffrey T <jeffrey.t.kirsher@intel.com>; davem@davemloft.net
> Cc: Keller, Jacob E <jacob.e.keller@intel.com>; netdev@vger.kernel.org;
> nhorman@redhat.com; sassmann@redhat.com; jogreene@redhat.com;
> guru.anbalagane@oracle.com
> Subject: Re: [net-next 20/20] i40e: don't allow i40e_vsi_(add|kill)_vlan to operate
> when VID<1
> 
> Hello!
> > +	if (!(vid > 0) || vsi->info.pvid)
> 
>     Why not just '!vid'?

Left over artifact of this previously being a signed value. We can fix this.

Thanks,
Jake

> > -void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
> > +void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
> >  {
> > +	if (!(vid > 0) || vsi->info.pvid)
> 
>     Likewise.

Same here. Can get this fixed.

Thanks,
Jake

> 
> > +		return;
> > +
> >  	spin_lock_bh(&vsi->mac_filter_hash_lock);
> >  	i40e_rm_vlan_all_mac(vsi, vid);
> >  	spin_unlock_bh(&vsi->mac_filter_hash_lock);
> 
> MBR, Sergei


^ permalink raw reply

* [PATCH 01/50] ipvs: Use IS_ERR_OR_NULL(svc) instead of IS_ERR(svc) || svc == NULL
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Gao Feng <fgao@ikuai8.com>

This minor refactoring does not change the logic of function
ip_vs_genl_dump_dests.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6b85ded4f91d..217e0105b5e0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3260,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 
 
 	svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
-	if (IS_ERR(svc) || svc == NULL)
+	if (IS_ERR_OR_NULL(svc))
 		goto out_err;
 
 	/* Dump the destinations */
-- 
2.1.4


^ permalink raw reply related

* [PATCH 02/50] ipvs: Decrement ttl
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Dwip Banerjee <dwip@linux.vnet.ibm.com>

We decrement the IP ttl in all the modes in order to prevent infinite
route loops. The changes were done based on Julian Anastasov's
suggestions in a prior thread.

The ttl based check/discard and the actual decrement are done in
__ip_vs_get_out_rt() and in __ip_vs_get_out_rt_v6(), for the IPv6
case. decrement_ttl() implements the actual functionality for the
two cases.

Signed-off-by: Dwip Banerjee <dwip@linux.vnet.ibm.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 01d3d894de46..4e1a98fcc8c3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
 	return true;
 }
 
+static inline bool decrement_ttl(struct netns_ipvs *ipvs,
+				 int skb_af,
+				 struct sk_buff *skb)
+{
+	struct net *net = ipvs->net;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (skb_af == AF_INET6) {
+		struct dst_entry *dst = skb_dst(skb);
+
+		/* check and decrement ttl */
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			/* Force OUTPUT device used as source address */
+			skb->dev = dst->dev;
+			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+				    ICMPV6_EXC_HOPLIMIT, 0);
+			__IP6_INC_STATS(net, ip6_dst_idev(dst),
+					IPSTATS_MIB_INHDRERRORS);
+
+			return false;
+		}
+
+		/* don't propagate ttl change to cloned packets */
+		if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
+			return false;
+
+		ipv6_hdr(skb)->hop_limit--;
+	} else
+#endif
+	{
+		if (ip_hdr(skb)->ttl <= 1) {
+			/* Tell the sender its packet died... */
+			__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+			icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+			return false;
+		}
+
+		/* don't propagate ttl change to cloned packets */
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
+			return false;
+
+		/* Decrease ttl */
+		ip_decrease_ttl(ip_hdr(skb));
+	}
+
+	return true;
+}
+
 /* Get route to destination or remote server */
 static int
 __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
@@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
 		return local;
 	}
 
+	if (!decrement_ttl(ipvs, skb_af, skb))
+		goto err_put;
+
 	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
 		mtu = dst_mtu(&rt->dst);
 	} else {
@@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
 		return local;
 	}
 
+	if (!decrement_ttl(ipvs, skb_af, skb))
+		goto err_put;
+
 	/* MTU checking */
 	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
 		mtu = dst_mtu(&rt->dst);
-- 
2.1.4


^ permalink raw reply related

* [PATCH 03/50] netfilter: update Arturo Borrero Gonzalez email address
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Arturo Borrero Gonzalez <arturo@debian.org>

The email address has changed, let's update the copyright statements.

Signed-off-by: Arturo Borrero Gonzalez <arturo@debian.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nft_masq_ipv4.c  | 4 ++--
 net/ipv4/netfilter/nft_redir_ipv4.c | 4 ++--
 net/ipv6/netfilter/nft_masq_ipv6.c  | 4 ++--
 net/ipv6/netfilter/nft_redir_ipv6.c | 4 ++--
 net/netfilter/nft_masq.c            | 4 ++--
 net/netfilter/nft_redir.c           | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 4f697e431811..4d69f99b8707 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -77,5 +77,5 @@ module_init(nft_masq_ipv4_module_init);
 module_exit(nft_masq_ipv4_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 16df0493c5ce..62c18e68ac58 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -71,5 +71,5 @@ module_init(nft_redir_ipv4_module_init);
 module_exit(nft_redir_ipv4_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index a2aff1277b40..93d758f70334 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -78,5 +78,5 @@ module_init(nft_masq_ipv6_module_init);
 module_exit(nft_masq_ipv6_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index bfcd5af6bc15..2850fcd8583f 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -72,5 +72,5 @@ module_init(nft_redir_ipv6_module_init);
 module_exit(nft_redir_ipv6_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
 MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 81b5ad6165ac..bf92de01410f 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -105,4 +105,4 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
 EXPORT_SYMBOL_GPL(nft_masq_dump);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 03f7bf40ae75..967e09b099b2 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -108,4 +108,4 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
 EXPORT_SYMBOL_GPL(nft_redir_dump);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-- 
2.1.4


^ permalink raw reply related

* [PATCH 04/50] netfilter: built-in NAT support for DCCP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Davide Caratti <dcaratti@redhat.com>

CONFIG_NF_NAT_PROTO_DCCP is no more a tristate. When set to y, NAT
support for DCCP protocol is built-in into nf_nat.ko.

footprint test:

(nf_nat_proto_)           | dccp   || nf_nat
--------------------------+--------++--------
no builtin                | 409800 || 2241312
DCCP builtin              |   -    || 2578968

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l4proto.h |  3 +++
 net/netfilter/Kconfig                  |  2 +-
 net/netfilter/Makefile                 |  3 ++-
 net/netfilter/nf_nat_core.c            |  4 ++++
 net/netfilter/nf_nat_proto_dccp.c      | 36 +---------------------------------
 5 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 12f4cc841b6e..92b147be00ef 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -54,6 +54,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
 extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
+#endif
 
 bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 44410d30d461..13092e5cd245 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -384,7 +384,7 @@ config NF_NAT_NEEDED
 	default y
 
 config NF_NAT_PROTO_DCCP
-	tristate
+	bool
 	depends on NF_NAT && NF_CT_PROTO_DCCP
 	default NF_NAT && NF_CT_PROTO_DCCP
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5bbf767672ec..9ea0c98e51e6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,8 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
 		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
+nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
 
@@ -55,7 +57,6 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o
 obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
 # NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
 obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
 obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..69b121d11275 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -682,6 +682,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
 			 &nf_nat_l4proto_tcp);
 	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
 			 &nf_nat_l4proto_udp);
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
+			 &nf_nat_l4proto_dccp);
+#endif
 	mutex_unlock(&nf_nat_proto_mutex);
 
 	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 15c47b246d0d..269fcd5dc34c 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -10,8 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/dccp.h>
 
@@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb,
 	return true;
 }
 
-static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
 	.l4proto		= IPPROTO_DCCP,
 	.manip_pkt		= dccp_manip_pkt,
 	.in_range		= nf_nat_l4proto_in_range,
@@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
 	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
-
-static int __init nf_nat_proto_dccp_init(void)
-{
-	int err;
-
-	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-	if (err < 0)
-		goto err1;
-	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
-	if (err < 0)
-		goto err2;
-	return 0;
-
-err2:
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-err1:
-	return err;
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
-	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
-
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP NAT protocol helper");
-MODULE_LICENSE("GPL");
-- 
2.1.4


^ permalink raw reply related

* [PATCH 05/50] netfilter: built-in NAT support for SCTP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Davide Caratti <dcaratti@redhat.com>

CONFIG_NF_NAT_PROTO_SCTP is no more a tristate. When set to y, NAT
support for SCTP protocol is built-in into nf_nat.ko.

footprint test:

(nf_nat_proto_)           | sctp   || nf_nat
--------------------------+--------++--------
no builtin                | 428344 || 2241312
SCTP builtin              |   -    || 2597032

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l4proto.h |  3 +++
 net/netfilter/Kconfig                  |  2 +-
 net/netfilter/Makefile                 |  2 +-
 net/netfilter/nf_nat_core.c            |  4 ++++
 net/netfilter/nf_nat_proto_sctp.c      | 35 +---------------------------------
 5 files changed, 10 insertions(+), 36 deletions(-)

diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 92b147be00ef..2cbaf3856e21 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -57,6 +57,9 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
 #ifdef CONFIG_NF_NAT_PROTO_DCCP
 extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
+#endif
 
 bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
 			     enum nf_nat_manip_type maniptype,
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 13092e5cd245..ad72edf1f6ec 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -394,7 +394,7 @@ config NF_NAT_PROTO_UDPLITE
 	default NF_NAT && NF_CT_PROTO_UDPLITE
 
 config NF_NAT_PROTO_SCTP
-	tristate
+	bool
 	default NF_NAT && NF_CT_PROTO_SCTP
 	depends on NF_NAT && NF_CT_PROTO_SCTP
 	select LIBCRC32C
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9ea0c98e51e6..02ef6decf94d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
 		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
 
 nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
@@ -58,7 +59,6 @@ obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
 # NAT protocols (nf_nat)
 obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 69b121d11275..80858bd110cc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -686,6 +686,10 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
 	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
 			 &nf_nat_l4proto_dccp);
 #endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
+			 &nf_nat_l4proto_sctp);
+#endif
 	mutex_unlock(&nf_nat_proto_mutex);
 
 	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index cbc7ade1487b..2e14108ff697 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -7,9 +7,7 @@
  */
 
 #include <linux/types.h>
-#include <linux/init.h>
 #include <linux/sctp.h>
-#include <linux/module.h>
 #include <net/sctp/checksum.h>
 
 #include <net/netfilter/nf_nat_l4proto.h>
@@ -54,7 +52,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	return true;
 }
 
-static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
 	.l4proto		= IPPROTO_SCTP,
 	.manip_pkt		= sctp_manip_pkt,
 	.in_range		= nf_nat_l4proto_in_range,
@@ -63,34 +61,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
 	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
-
-static int __init nf_nat_proto_sctp_init(void)
-{
-	int err;
-
-	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-	if (err < 0)
-		goto err1;
-	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
-	if (err < 0)
-		goto err2;
-	return 0;
-
-err2:
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-err1:
-	return err;
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
-	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
-	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-- 
2.1.4


^ permalink raw reply related

* [PATCH 10/50] netfilter: conntrack: built-in support for DCCP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Davide Caratti <dcaratti@redhat.com>

CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection
tracking support for DCCP protocol is built-in into nf_conntrack.ko.

footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \
        net/ipv4/netfilter/nf_conntrack_ipv4.ko \
        net/ipv6/netfilter/nf_conntrack_ipv6.ko

(builtin)||  dccp  |  ipv4  |  ipv6  | nf_conntrack
---------++--------+--------+--------+--------------
none     || 469140 | 828755 | 828676 | 6141434
DCCP     ||   -    | 830566 | 829935 | 6533526

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_dccp.h    |  2 +-
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 +
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  3 +
 include/net/netns/conntrack.h                  | 14 +++++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  3 +
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  3 +
 net/netfilter/Kconfig                          |  6 +-
 net/netfilter/Makefile                         |  3 +-
 net/netfilter/nf_conntrack_proto_dccp.c        | 79 ++++----------------------
 9 files changed, 41 insertions(+), 75 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h
index 40dcc82058d1..ff721d7325cf 100644
--- a/include/linux/netfilter/nf_conntrack_dccp.h
+++ b/include/linux/netfilter/nf_conntrack_dccp.h
@@ -25,7 +25,7 @@ enum ct_dccp_roles {
 #define CT_DCCP_ROLE_MAX	(__CT_DCCP_ROLE_MAX - 1)
 
 #ifdef __KERNEL__
-#include <net/netfilter/nf_conntrack_tuple.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
 
 struct nf_ct_dccp {
 	u_int8_t	role[IP_CT_DIR_MAX];
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 981c327374da..c2f155fd9299 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -15,6 +15,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+#endif
 
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index a4c993685795..5ec66c0d21c4 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -6,6 +6,9 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+#endif
 
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3d06d94d2e52..440b781baf0b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -6,6 +6,9 @@
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
 #include <linux/netfilter/nf_conntrack_tcp.h>
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+#include <linux/netfilter/nf_conntrack_dccp.h>
+#endif
 #include <linux/seqlock.h>
 
 struct ctl_table_header;
@@ -48,12 +51,23 @@ struct nf_icmp_net {
 	unsigned int timeout;
 };
 
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+struct nf_dccp_net {
+	struct nf_proto_net pn;
+	int dccp_loose;
+	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
+};
+#endif
+
 struct nf_ip_net {
 	struct nf_generic_net   generic;
 	struct nf_tcp_net	tcp;
 	struct nf_udp_net	udp;
 	struct nf_icmp_net	icmp;
 	struct nf_icmp_net	icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	struct nf_dccp_net	dccp;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7130ed5dc1fa..cb3cf770b00c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 	&nf_conntrack_l4proto_tcp4,
 	&nf_conntrack_l4proto_udp4,
 	&nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp4,
+#endif
 };
 
 static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 500be28ff563..f52338d02951 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -340,6 +340,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 	&nf_conntrack_l4proto_tcp6,
 	&nf_conntrack_l4proto_udp6,
 	&nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp6,
+#endif
 };
 
 static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 496e1dcbd003..27a3d8c8f8ce 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -146,14 +146,14 @@ config NF_CONNTRACK_LABELS
 	  to connection tracking entries.  It selected by the connlabel match.
 
 config NF_CT_PROTO_DCCP
-	tristate 'DCCP protocol connection tracking support'
+	bool 'DCCP protocol connection tracking support'
 	depends on NETFILTER_ADVANCED
-	default IP_DCCP
+	default y
 	help
 	  With this option enabled, the layer 3 independent connection
 	  tracking code will be able to do state tracking on DCCP connections.
 
-	  If unsure, say 'N'.
+	  If unsure, say Y.
 
 config NF_CT_PROTO_GRE
 	tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3b97d89df2cd..bbd0cc08eff0 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -5,6 +5,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -16,8 +17,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
 
-# SCTP protocol connection tracking
-obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
 obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 073b047314dc..b68ce6ac13b3 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -9,7 +9,6 @@
  *
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/spinlock.h>
@@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 	},
 };
 
-/* this module per-net specifics */
-static unsigned int dccp_net_id __read_mostly;
-struct dccp_net {
-	struct nf_proto_net pn;
-	int dccp_loose;
-	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
-};
-
-static inline struct dccp_net *dccp_pernet(struct net *net)
+static inline struct nf_dccp_net *dccp_pernet(struct net *net)
 {
-	return net_generic(net, dccp_net_id);
+	return &net->ct.nf_ct_proto.dccp;
 }
 
 static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		     unsigned int dataoff, unsigned int *timeouts)
 {
 	struct net *net = nf_ct_net(ct);
-	struct dccp_net *dn;
+	struct nf_dccp_net *dn;
 	struct dccp_hdr _dh, *dh;
 	const char *msg;
 	u_int8_t state;
@@ -719,7 +710,7 @@ static int dccp_nlattr_size(void)
 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
-	struct dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = dccp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = {
 #endif /* CONFIG_SYSCTL */
 
 static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
-				     struct dccp_net *dn)
+				     struct nf_dccp_net *dn)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table)
@@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
 
 static int dccp_init_net(struct net *net, u_int16_t proto)
 {
-	struct dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = dccp_pernet(net);
 	struct nf_proto_net *pn = &dn->pn;
 
 	if (!pn->users) {
@@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
 	return dccp_kmemdup_sysctl_table(net, pn, dn);
 }
 
-static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
 	.name			= "dccp",
@@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.net_id			= &dccp_net_id,
 	.init_net		= dccp_init_net,
 };
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4);
 
-static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
 	.name			= "dccp",
@@ -932,56 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.net_id			= &dccp_net_id,
 	.init_net		= dccp_init_net,
 };
-
-static struct nf_conntrack_l4proto *dccp_proto[] = {
-	&dccp_proto4,
-	&dccp_proto6,
-};
-
-static __net_init int dccp_net_init(struct net *net)
-{
-	return nf_ct_l4proto_pernet_register(net, dccp_proto,
-					     ARRAY_SIZE(dccp_proto));
-}
-
-static __net_exit void dccp_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister(net, dccp_proto,
-					ARRAY_SIZE(dccp_proto));
-}
-
-static struct pernet_operations dccp_net_ops = {
-	.init = dccp_net_init,
-	.exit = dccp_net_exit,
-	.id   = &dccp_net_id,
-	.size = sizeof(struct dccp_net),
-};
-
-static int __init nf_conntrack_proto_dccp_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&dccp_net_ops);
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto));
-	if (ret < 0)
-		unregister_pernet_subsys(&dccp_net_ops);
-	return ret;
-}
-
-static void __exit nf_conntrack_proto_dccp_fini(void)
-{
-	nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto));
-	unregister_pernet_subsys(&dccp_net_ops);
-}
-
-module_init(nf_conntrack_proto_dccp_init);
-module_exit(nf_conntrack_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP connection tracking protocol helper");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6);
-- 
2.1.4


^ permalink raw reply related

* [PATCH 11/50] netfilter: conntrack: built-in support for SCTP
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Davide Caratti <dcaratti@redhat.com>

CONFIG_NF_CT_PROTO_SCTP is no more a tristate. When set to y, connection
tracking support for SCTP protocol is built-in into nf_conntrack.ko.

footprint test:
$ ls -l net/netfilter/nf_conntrack{_proto_sctp,}.ko \
        net/ipv4/netfilter/nf_conntrack_ipv4.ko \
        net/ipv6/netfilter/nf_conntrack_ipv6.ko

(builtin)||  sctp  |  ipv4  |  ipv6  | nf_conntrack
---------++--------+--------+--------+--------------
none     || 498243 | 828755 | 828676 | 6141434
SCTP     ||   -    | 829254 | 829175 | 6547872

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |  3 +
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h |  3 +
 include/net/netns/conntrack.h                  | 13 +++++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  3 +
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  3 +
 net/netfilter/Kconfig                          |  7 +--
 net/netfilter/Makefile                         |  2 +-
 net/netfilter/nf_conntrack_proto_sctp.c        | 76 +++-----------------------
 8 files changed, 38 insertions(+), 72 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index c2f155fd9299..5f1fc15a51fb 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -18,6 +18,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+#endif
 
 int nf_conntrack_ipv4_compat_init(void);
 void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 5ec66c0d21c4..f70d191a8820 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -9,6 +9,9 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+#endif
 
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 440b781baf0b..17724c62de97 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -9,6 +9,9 @@
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+#include <linux/netfilter/nf_conntrack_sctp.h>
+#endif
 #include <linux/seqlock.h>
 
 struct ctl_table_header;
@@ -59,6 +62,13 @@ struct nf_dccp_net {
 };
 #endif
 
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+struct nf_sctp_net {
+	struct nf_proto_net pn;
+	unsigned int timeouts[SCTP_CONNTRACK_MAX];
+};
+#endif
+
 struct nf_ip_net {
 	struct nf_generic_net   generic;
 	struct nf_tcp_net	tcp;
@@ -68,6 +78,9 @@ struct nf_ip_net {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 	struct nf_dccp_net	dccp;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	struct nf_sctp_net	sctp;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index cb3cf770b00c..0a9d354ef314 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 	&nf_conntrack_l4proto_dccp4,
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp4,
+#endif
 };
 
 static int ipv4_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index f52338d02951..1d8daafb1685 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -343,6 +343,9 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 	&nf_conntrack_l4proto_dccp6,
 #endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp6,
+#endif
 };
 
 static int ipv6_net_init(struct net *net)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 27a3d8c8f8ce..29c0bf0a315d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -159,15 +159,14 @@ config NF_CT_PROTO_GRE
 	tristate
 
 config NF_CT_PROTO_SCTP
-	tristate 'SCTP protocol connection tracking support'
+	bool 'SCTP protocol connection tracking support'
 	depends on NETFILTER_ADVANCED
-	default IP_SCTP
+	default y
 	help
 	  With this option enabled, the layer 3 independent connection
 	  tracking code will be able to do state tracking on SCTP connections.
 
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
+	  If unsure, say Y.
 
 config NF_CT_PROTO_UDPLITE
 	tristate 'UDP-Lite protocol connection tracking support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index bbd0cc08eff0..6545c28ab746 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -6,6 +6,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -18,7 +19,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
 
 obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o
 
 # netlink interface for nf_conntrack
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d096c2d6b87b..a0efde38da44 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -15,7 +15,6 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/netfilter.h>
-#include <linux/module.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/sctp.h>
@@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
 	}
 };
 
-static unsigned int sctp_net_id	__read_mostly;
-struct sctp_net {
-	struct nf_proto_net pn;
-	unsigned int timeouts[SCTP_CONNTRACK_MAX];
-};
-
-static inline struct sctp_net *sctp_pernet(struct net *net)
+static inline struct nf_sctp_net *sctp_pernet(struct net *net)
 {
-	return net_generic(net, sctp_net_id);
+	return &net->ct.nf_ct_proto.sctp;
 }
 
 static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct sctp_net *sn = sctp_pernet(net);
+	struct nf_sctp_net *sn = sctp_pernet(net);
 	int i;
 
 	/* set default SCTP timeouts. */
@@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = {
 #endif
 
 static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				     struct sctp_net *sn)
+				     struct nf_sctp_net *sn)
 {
 #ifdef CONFIG_SYSCTL
 	if (pn->ctl_table)
@@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int sctp_init_net(struct net *net, u_int16_t proto)
 {
-	struct sctp_net *sn = sctp_pernet(net);
+	struct nf_sctp_net *sn = sctp_pernet(net);
 	struct nf_proto_net *pn = &sn->pn;
 
 	if (!pn->users) {
@@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
 	return sctp_kmemdup_sysctl_table(pn, sn);
 }
 
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
 	.name 			= "sctp",
@@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
 		.nla_policy	= sctp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.net_id			= &sctp_net_id,
 	.init_net		= sctp_init_net,
 };
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4);
 
-static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
+struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
 	.name 			= "sctp",
@@ -812,57 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
 	},
 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
 #endif
-	.net_id			= &sctp_net_id,
 	.init_net		= sctp_init_net,
 };
-
-static struct nf_conntrack_l4proto *sctp_proto[] = {
-	&nf_conntrack_l4proto_sctp4,
-	&nf_conntrack_l4proto_sctp6,
-};
-
-static int sctp_net_init(struct net *net)
-{
-	return nf_ct_l4proto_pernet_register(net, sctp_proto,
-					     ARRAY_SIZE(sctp_proto));
-}
-
-static void sctp_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister(net, sctp_proto,
-					ARRAY_SIZE(sctp_proto));
-}
-
-static struct pernet_operations sctp_net_ops = {
-	.init = sctp_net_init,
-	.exit = sctp_net_exit,
-	.id   = &sctp_net_id,
-	.size = sizeof(struct sctp_net),
-};
-
-static int __init nf_conntrack_proto_sctp_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&sctp_net_ops);
-	if (ret < 0)
-		return ret;
-	ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto));
-	if (ret < 0)
-		unregister_pernet_subsys(&sctp_net_ops);
-	return ret;
-}
-
-static void __exit nf_conntrack_proto_sctp_fini(void)
-{
-	nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto));
-	unregister_pernet_subsys(&sctp_net_ops);
-}
-
-module_init(nf_conntrack_proto_sctp_init);
-module_exit(nf_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
-MODULE_ALIAS("ip_conntrack_proto_sctp");
+EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6);
-- 
2.1.4


^ permalink raw reply related

* [PATCH 15/50] netfilter: nat: add dependencies on conntrack module
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

MASQUERADE, S/DNAT and REDIRECT already call functions that depend on the
conntrack module.

However, since the conntrack hooks are now registered in a lazy fashion
(i.e., only when needed) a symbol reference is not enough.

Thus, when something is added to a nat table, make sure that it will see
packets by calling nf_ct_netns_get() which will register the conntrack
hooks in the current netns.

An alternative would be to add these dependencies to the NAT table.

However, that has problems when using non-modular builds -- we might
register e.g. ipv6 conntrack before its initcall has run, leading to NULL
deref crashes since its per-netns storage has not yet been allocated.

Adding the dependency in the modules instead has the advantage that nat
table also does not register its hooks until rules are added.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_MASQUERADE.c |  8 +++++++-
 net/netfilter/xt_NETMAP.c           | 11 +++++++++--
 net/netfilter/xt_REDIRECT.c         | 12 ++++++++++--
 net/netfilter/xt_nat.c              | 18 +++++++++++++++++-
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 34cfb9b0bc0a..a03e4e7ef5f9 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 		pr_debug("bad rangesize %u\n", mr->rangesize);
 		return -EINVAL;
 	}
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
 }
 
 static unsigned int
@@ -59,6 +59,11 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 				      xt_out(par));
 }
 
+static void masquerade_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
+}
+
 static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV4,
@@ -67,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= masquerade_tg_check,
+	.destroy	= masquerade_tg_destroy,
 	.me		= THIS_MODULE,
 };
 
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
index 94d0b5411192..e45a01255e70 100644
--- a/net/netfilter/xt_NETMAP.c
+++ b/net/netfilter/xt_NETMAP.c
@@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
 
 	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
 		return -EINVAL;
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void netmap_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
 }
 
 static unsigned int
@@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par)
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return -EINVAL;
 	}
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
 }
 
 static struct xt_target netmap_tg_reg[] __read_mostly = {
@@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
 		              (1 << NF_INET_LOCAL_OUT) |
 		              (1 << NF_INET_LOCAL_IN),
 		.checkentry = netmap_tg6_checkentry,
+		.destroy    = netmap_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 	{
@@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = {
 		              (1 << NF_INET_LOCAL_OUT) |
 		              (1 << NF_INET_LOCAL_IN),
 		.checkentry = netmap_tg4_check,
+		.destroy    = netmap_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 };
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
index 651dce65a30b..98a4c6d4f1cb 100644
--- a/net/netfilter/xt_REDIRECT.c
+++ b/net/netfilter/xt_REDIRECT.c
@@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
 
 	if (range->flags & NF_NAT_RANGE_MAP_IPS)
 		return -EINVAL;
-	return 0;
+
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void redirect_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
 }
 
 /* FIXME: Take multiple ranges --RR */
@@ -56,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return -EINVAL;
 	}
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
 }
 
 static unsigned int
@@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
 		.revision   = 0,
 		.table      = "nat",
 		.checkentry = redirect_tg6_checkentry,
+		.destroy    = redirect_tg_destroy,
 		.target     = redirect_tg6,
 		.targetsize = sizeof(struct nf_nat_range),
 		.hooks      = (1 << NF_INET_PRE_ROUTING) |
@@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = {
 		.table      = "nat",
 		.target     = redirect_tg4,
 		.checkentry = redirect_tg4_check,
+		.destroy    = redirect_tg_destroy,
 		.targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.hooks      = (1 << NF_INET_PRE_ROUTING) |
 		              (1 << NF_INET_LOCAL_OUT),
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index bea7464cc43f..8107b3eb865f 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
 			par->target->name);
 		return -EINVAL;
 	}
-	return 0;
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static int xt_nat_checkentry(const struct xt_tgchk_param *par)
+{
+	return nf_ct_netns_get(par->net, par->family);
+}
+
+static void xt_nat_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
 }
 
 static void xt_nat_convert_range(struct nf_nat_range *dst,
@@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
 		.name		= "SNAT",
 		.revision	= 0,
 		.checkentry	= xt_nat_checkentry_v0,
+		.destroy	= xt_nat_destroy,
 		.target		= xt_snat_target_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.family		= NFPROTO_IPV4,
@@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
 		.name		= "DNAT",
 		.revision	= 0,
 		.checkentry	= xt_nat_checkentry_v0,
+		.destroy	= xt_nat_destroy,
 		.target		= xt_dnat_target_v0,
 		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 		.family		= NFPROTO_IPV4,
@@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
 	{
 		.name		= "SNAT",
 		.revision	= 1,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
 		.target		= xt_snat_target_v1,
 		.targetsize	= sizeof(struct nf_nat_range),
 		.table		= "nat",
@@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = {
 	{
 		.name		= "DNAT",
 		.revision	= 1,
+		.checkentry	= xt_nat_checkentry,
+		.destroy	= xt_nat_destroy,
 		.target		= xt_dnat_target_v1,
 		.targetsize	= sizeof(struct nf_nat_range),
 		.table		= "nat",
-- 
2.1.4


^ permalink raw reply related

* [PATCH 23/50] netfilter: x_tables: pass xt_counters struct instead of packet counter
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

On SMP we overload the packet counter (unsigned long) to contain
percpu offset.  Hide this from callers and pass xt_counters address
instead.

Preparation patch to allocate the percpu counters in page-sized batch
chunks.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 6 +-----
 net/ipv4/netfilter/arp_tables.c    | 4 ++--
 net/ipv4/netfilter/ip_tables.c     | 4 ++--
 net/ipv6/netfilter/ip6_tables.c    | 5 ++---
 net/netfilter/x_tables.c           | 9 +++++++++
 5 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index cd4eaf8df445..6e61edeb68e3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void)
 
 	return 0;
 }
-static inline void xt_percpu_counter_free(u64 pcnt)
-{
-	if (nr_cpu_ids > 1)
-		free_percpu((void __percpu *) (unsigned long) pcnt);
-}
+void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
 xt_get_this_cpu_counter(struct xt_counters *cnt)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 848a0704b28f..019f8e8dda6d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -439,7 +439,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 err:
 	module_put(t->u.kernel.target->me);
 out:
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -519,7 +519,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 46815c8a60d7..acc9a0c45bdf 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -582,7 +582,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 		cleanup_match(ematch, net);
 	}
 
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -670,7 +670,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6ff42b8301cc..88b56a98905b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -612,7 +612,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 		cleanup_match(ematch, net);
 	}
 
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 
 	return ret;
 }
@@ -699,8 +699,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 	module_put(par.target->me);
-
-	xt_percpu_counter_free(e->counters.pcnt);
+	xt_percpu_counter_free(&e->counters);
 }
 
 /* Checks and translates the user-supplied table segment (held in
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ad818e52859b..0580029eb0ee 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1615,6 +1615,15 @@ void xt_proto_fini(struct net *net, u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_proto_fini);
 
+void xt_percpu_counter_free(struct xt_counters *counters)
+{
+	unsigned long pcnt = counters->pcnt;
+
+	if (nr_cpu_ids > 1)
+		free_percpu((void __percpu *)pcnt);
+}
+EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
+
 static int __net_init xt_net_init(struct net *net)
 {
 	int i;
-- 
2.1.4


^ permalink raw reply related

* [PATCH 25/50] netfilter: x_tables: pack percpu counter allocations
From: Pablo Neira Ayuso @ 2016-12-07 21:52 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1481147576-5690-1-git-send-email-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

instead of allocating each xt_counter individually, allocate 4k chunks
and then use these for counter allocation requests.

This should speed up rule evaluation by increasing data locality,
also speeds up ruleset loading because we reduce calls to the percpu
allocator.

As Eric points out we can't use PAGE_SIZE, page_allocator would fail on
arches with 64k page size.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  7 ++++++-
 net/ipv4/netfilter/arp_tables.c    |  9 ++++++---
 net/ipv4/netfilter/ip_tables.c     |  9 ++++++---
 net/ipv6/netfilter/ip6_tables.c    |  9 ++++++---
 net/netfilter/x_tables.c           | 33 ++++++++++++++++++++++++---------
 5 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 05a94bd32c55..5117e4d2ddfa 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
+struct xt_percpu_counter_alloc_state {
+	unsigned int off;
+	const char __percpu *mem;
+};
 
-bool xt_percpu_counter_alloc(struct xt_counters *counters);
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+			     struct xt_counters *counter);
 void xt_percpu_counter_free(struct xt_counters *cnt);
 
 static inline struct xt_counters *
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 808deb275ceb..1258a9ab62ef 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, const char *name)
 }
 
 static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
 	int ret;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	t = arpt_get_target(e);
@@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
 static int translate_table(struct xt_table_info *newinfo, void *entry0,
 			   const struct arpt_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct arpt_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, repl->name, repl->size);
+		ret = find_check_entry(iter, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a48430d3420f..308b456723f0 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
-		 unsigned int size)
+		 unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
@@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	j = 0;
@@ -676,6 +677,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ipt_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct ipt_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, repl->name, repl->size);
+		ret = find_check_entry(iter, net, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a5a92083fd62..d56d8ac09a94 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
 
 static int
 find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
-		 unsigned int size)
+		 unsigned int size,
+		 struct xt_percpu_counter_alloc_state *alloc_state)
 {
 	struct xt_entry_target *t;
 	struct xt_target *target;
@@ -571,7 +572,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	struct xt_mtchk_param mtpar;
 	struct xt_entry_match *ematch;
 
-	if (!xt_percpu_counter_alloc(&e->counters))
+	if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
 		return -ENOMEM;
 
 	j = 0;
@@ -705,6 +706,7 @@ static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		const struct ip6t_replace *repl)
 {
+	struct xt_percpu_counter_alloc_state alloc_state = { 0 };
 	struct ip6t_entry *iter;
 	unsigned int *offsets;
 	unsigned int i;
@@ -764,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 	/* Finally, each sanity check must pass */
 	i = 0;
 	xt_entry_foreach(iter, entry0, newinfo->size) {
-		ret = find_check_entry(iter, net, repl->name, repl->size);
+		ret = find_check_entry(iter, net, repl->name, repl->size,
+				       &alloc_state);
 		if (ret != 0)
 			break;
 		++i;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index be5e83047594..f6ce4a7036e6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+#define XT_PCPU_BLOCK_SIZE 4096
 
 struct compat_delta {
 	unsigned int offset; /* offset in kernel */
@@ -1618,6 +1619,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
 /**
  * xt_percpu_counter_alloc - allocate x_tables rule counter
  *
+ * @state: pointer to xt_percpu allocation state
  * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
  *
  * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
@@ -1626,21 +1628,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
  * Rule evaluation needs to use xt_get_this_cpu_counter() helper
  * to fetch the real percpu counter.
  *
+ * To speed up allocation and improve data locality, a 4kb block is
+ * allocated.
+ *
+ * xt_percpu_counter_alloc_state contains the base address of the
+ * allocated page and the current sub-offset.
+ *
  * returns false on error.
  */
-bool xt_percpu_counter_alloc(struct xt_counters *counter)
+bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
+			     struct xt_counters *counter)
 {
-	void __percpu *res;
+	BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
 
 	if (nr_cpu_ids <= 1)
 		return true;
 
-	res = __alloc_percpu(sizeof(struct xt_counters),
-			     sizeof(struct xt_counters));
-	if (!res)
-		return false;
-
-	counter->pcnt = (__force unsigned long)res;
+	if (!state->mem) {
+		state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
+					    XT_PCPU_BLOCK_SIZE);
+		if (!state->mem)
+			return false;
+	}
+	counter->pcnt = (__force unsigned long)(state->mem + state->off);
+	state->off += sizeof(*counter);
+	if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
+		state->mem = NULL;
+		state->off = 0;
+	}
 	return true;
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
@@ -1649,7 +1664,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
 {
 	unsigned long pcnt = counters->pcnt;
 
-	if (nr_cpu_ids > 1)
+	if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
 		free_percpu((void __percpu *)pcnt);
 }
 EXPORT_SYMBOL_GPL(xt_percpu_counter_free);
-- 
2.1.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox