* [net-next 10/18] ixgbe: Add support for x550em_a 10G MAC type
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Add support for x550em_a 10G MAC type to the ixgbe driver. The new
MAC includes new firmware commands that need to be used to control
PHY and IOSF access, so that support is also added. The interface
supported is a native SFP+ interface.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 3 +
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c | 6 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 9 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 43 ++++-
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c | 2 +
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 6 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 38 +++++
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 208 ++++++++++++++++++++++-
11 files changed, 311 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4590fab..d10ed62 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -817,6 +817,7 @@ static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
return IXGBE_MAX_RSS_INDICES;
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
return IXGBE_MAX_RSS_INDICES_X550;
default:
return 0;
@@ -860,6 +861,7 @@ enum ixgbe_boards {
board_X540,
board_X550,
board_X550EM_x,
+ board_x550em_a,
};
extern const struct ixgbe_info ixgbe_82598_info;
@@ -867,6 +869,7 @@ extern const struct ixgbe_info ixgbe_82599_info;
extern const struct ixgbe_info ixgbe_X540_info;
extern const struct ixgbe_info ixgbe_X550_info;
extern const struct ixgbe_info ixgbe_X550EM_x_info;
+extern const struct ixgbe_info ixgbe_x550em_a_info;
#ifdef CONFIG_IXGBE_DCB
extern const struct dcbnl_rtnl_ops dcbnl_ops;
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 4bb6b68..0151978 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1633,6 +1633,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
switch (hw->mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm);
break;
default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 8c560da..11450bd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -2855,6 +2855,7 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
index 02c7333..f8fb2ac 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
+ Copyright(c) 1999 - 2016 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -293,6 +293,7 @@ s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
return ixgbe_dcb_hw_config_82599(hw, pfc_en, refill, max,
bwgid, ptype, prio_tc);
default:
@@ -311,6 +312,7 @@ s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
return ixgbe_dcb_config_pfc_82599(hw, pfc_en, prio_tc);
default:
break;
@@ -368,6 +370,7 @@ s32 ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max,
bwg_id, prio_type, prio_tc);
ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max,
@@ -398,6 +401,7 @@ void ixgbe_dcb_read_rtrup2tc(struct ixgbe_hw *hw, u8 *map)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
ixgbe_dcb_read_rtrup2tc_82599(hw, map);
break;
default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index b3530e1..9f76be1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2014 Intel Corporation.
+ Copyright(c) 1999 - 2016 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -547,6 +547,7 @@ static void ixgbe_get_regs(struct net_device *netdev,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL_82599(i));
regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH_82599(i));
break;
@@ -660,6 +661,7 @@ static void ixgbe_get_regs(struct net_device *netdev,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RTRPCS);
for (i = 0; i < 8; i++)
@@ -1443,6 +1445,7 @@ static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
toggle = 0x7FFFF30F;
test = reg_test_82599;
break;
@@ -1681,6 +1684,7 @@ static void ixgbe_free_desc_rings(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
reg_ctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
reg_ctl &= ~IXGBE_DMATXCTL_TE;
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, reg_ctl);
@@ -1720,6 +1724,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_DMATXCTL);
reg_data |= IXGBE_DMATXCTL_TE;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_DMATXCTL, reg_data);
@@ -1780,6 +1785,7 @@ static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
reg_data = IXGBE_READ_REG(hw, IXGBE_MACC);
reg_data |= IXGBE_MACC_FLU;
IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data);
@@ -2991,6 +2997,7 @@ static int ixgbe_get_ts_info(struct net_device *dev,
switch (adapter->hw.mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
case ixgbe_mac_X540:
case ixgbe_mac_82599EB:
info->so_timestamping =
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index e771e76..bcdc884 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2013 Intel Corporation.
+ Copyright(c) 1999 - 2016 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -128,6 +128,7 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
if (num_tcs > 4) {
/*
* TCs : TC0/1 TC2/3 TC4-7
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9594438..eb93319 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -73,7 +73,7 @@ static char ixgbe_default_device_descr[] =
#define DRV_VERSION "4.2.1-k"
const char ixgbe_driver_version[] = DRV_VERSION;
static const char ixgbe_copyright[] =
- "Copyright (c) 1999-2015 Intel Corporation.";
+ "Copyright (c) 1999-2016 Intel Corporation.";
static const char ixgbe_overheat_msg[] = "Network adapter has been stopped because it has over heated. Restart the computer. If the problem persists, power off the system and replace the adapter";
@@ -83,6 +83,7 @@ static const struct ixgbe_info *ixgbe_info_tbl[] = {
[board_X540] = &ixgbe_X540_info,
[board_X550] = &ixgbe_X550_info,
[board_X550EM_x] = &ixgbe_X550EM_x_info,
+ [board_x550em_a] = &ixgbe_x550em_a_info,
};
/* ixgbe_pci_tbl - PCI Device ID Table
@@ -130,6 +131,7 @@ static const struct pci_device_id ixgbe_pci_tbl[] = {
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KR), board_X550EM_x},
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_10G_T), board_X550EM_x},
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_SFP), board_X550EM_x},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
/* required last entry */
{0, }
};
@@ -861,6 +863,7 @@ static void ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
if (direction == -1) {
/* other causes */
msix_vector |= IXGBE_IVAR_ALLOC_VAL;
@@ -899,6 +902,7 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
mask = (qmask & 0xFFFFFFFF);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
mask = (qmask >> 32);
@@ -2245,6 +2249,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
ixgbe_set_ivar(adapter, -1, 1, v_idx);
break;
default:
@@ -2356,6 +2361,7 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
/*
* set the WDIS bit to not clear the timer bits and cause an
* immediate assertion of the interrupt
@@ -2517,6 +2523,7 @@ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw)
return false;
case ixgbe_mac_82599EB:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
switch (hw->mac.ops.get_media_type(hw)) {
case ixgbe_media_type_fiber:
case ixgbe_media_type_fiber_qsfp:
@@ -2591,6 +2598,7 @@ static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
mask = (qmask & 0xFFFFFFFF);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
@@ -2619,6 +2627,7 @@ static inline void ixgbe_irq_disable_queues(struct ixgbe_adapter *adapter,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
mask = (qmask & 0xFFFFFFFF);
if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
@@ -2654,6 +2663,7 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
mask |= IXGBE_EIMS_TS;
break;
default:
@@ -2669,7 +2679,9 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
- if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP)
+ case ixgbe_mac_x550em_a:
+ if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
+ adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP_N)
mask |= IXGBE_EIMS_GPI_SDP0(&adapter->hw);
if (adapter->hw.phy.type == ixgbe_phy_x550em_ext_t)
mask |= IXGBE_EICR_GPI_SDP0_X540;
@@ -2727,6 +2739,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
(eicr & IXGBE_EICR_GPI_SDP0_X540)) {
adapter->flags2 |= IXGBE_FLAG2_PHY_INTERRUPT;
@@ -2963,6 +2976,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
if (eicr & IXGBE_EICR_ECC) {
e_info(link, "Received ECC Err, initiating reset\n");
adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
@@ -3059,6 +3073,7 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
@@ -3858,6 +3873,7 @@ static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter)
break;
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
if (adapter->num_vfs)
rdrxctl |= IXGBE_RDRXCTL_PSP;
/* fall through for older HW */
@@ -4021,6 +4037,7 @@ static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
for (i = 0; i < adapter->num_rx_queues; i++) {
struct ixgbe_ring *ring = adapter->rx_ring[i];
@@ -4057,6 +4074,7 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
for (i = 0; i < adapter->num_rx_queues; i++) {
struct ixgbe_ring *ring = adapter->rx_ring[i];
@@ -4083,6 +4101,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
default:
if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
break;
@@ -4173,6 +4192,7 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
default:
if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
break;
@@ -4561,6 +4581,7 @@ static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter)
switch (adapter->hw.mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0);
adapter->vxlan_port = 0;
break;
@@ -4661,6 +4682,7 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
dv_id = IXGBE_DV_X540(link, tc);
break;
default:
@@ -4721,6 +4743,7 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
dv_id = IXGBE_LOW_DV_X540(tc);
break;
default:
@@ -5137,6 +5160,7 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
default:
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
@@ -5187,6 +5211,7 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
gpie |= IXGBE_SDP1_GPIEN_8259X | IXGBE_SDP2_GPIEN_8259X;
break;
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
gpie |= IXGBE_SDP0_GPIEN_X540;
break;
default:
@@ -5498,6 +5523,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
~IXGBE_DMATXCTL_TE));
@@ -5616,6 +5642,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
break;
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
case ixgbe_mac_X550:
#ifdef CONFIG_IXGBE_DCA
adapter->flags &= ~IXGBE_FLAG_DCA_CAPABLE;
@@ -5641,6 +5668,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
adapter->dcb_cfg.num_tcs.pg_tcs = X540_TRAFFIC_CLASS;
adapter->dcb_cfg.num_tcs.pfc_tcs = X540_TRAFFIC_CLASS;
break;
@@ -6248,6 +6276,7 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
pci_wake_from_d3(pdev, !!wufc);
break;
default:
@@ -6383,6 +6412,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
hwstats->pxonrxc[i] +=
IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
break;
@@ -6398,7 +6428,8 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
if ((hw->mac.type == ixgbe_mac_82599EB) ||
(hw->mac.type == ixgbe_mac_X540) ||
(hw->mac.type == ixgbe_mac_X550) ||
- (hw->mac.type == ixgbe_mac_X550EM_x)) {
+ (hw->mac.type == ixgbe_mac_X550EM_x) ||
+ (hw->mac.type == ixgbe_mac_x550em_a)) {
hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
@@ -6423,6 +6454,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
/* OS2BMC stats are X540 and later */
hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
@@ -6693,6 +6725,7 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
case ixgbe_mac_82599EB: {
u32 mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN);
u32 fccfg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
@@ -9146,6 +9179,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
break;
default:
@@ -9578,6 +9612,9 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
case ixgbe_mac_X550EM_x:
device_id = IXGBE_DEV_ID_X550EM_X_VF;
break;
+ case ixgbe_mac_x550em_a:
+ device_id = IXGBE_DEV_ID_X550EM_A_VF;
+ break;
default:
device_id = 0;
break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
index 2837c94..b2125e3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
@@ -307,6 +307,7 @@ static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
case ixgbe_mac_X540:
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
vflre = IXGBE_READ_REG(hw, IXGBE_VFLREC(reg_offset));
break;
default:
@@ -430,6 +431,7 @@ void ixgbe_init_mbx_params_pf(struct ixgbe_hw *hw)
if (hw->mac.type != ixgbe_mac_82599EB &&
hw->mac.type != ixgbe_mac_X550 &&
hw->mac.type != ixgbe_mac_X550EM_x &&
+ hw->mac.type != ixgbe_mac_x550em_a &&
hw->mac.type != ixgbe_mac_X540)
return;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index ef1504d..bdc8fdc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1,7 +1,7 @@
/*******************************************************************************
Intel 10 Gigabit PCI Express Linux driver
- Copyright(c) 1999 - 2015 Intel Corporation.
+ Copyright(c) 1999 - 2016 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
@@ -333,6 +333,7 @@ static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter,
*/
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
/* Upper 32 bits represent billions of cycles, lower 32 bits
* represent cycles. However, we use timespec64_to_ns for the
* correct math even though the units haven't been corrected
@@ -921,6 +922,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
switch (hw->mac.type) {
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
/* enable timestamping all packets only if at least some
* packets were requested. Otherwise, play nice and disable
* timestamping
@@ -1083,6 +1085,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
cc.shift = 2;
}
/* fallthrough */
+ case ixgbe_mac_x550em_a:
case ixgbe_mac_X550:
cc.read = ixgbe_ptp_read_X550;
@@ -1223,6 +1226,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
break;
case ixgbe_mac_X550:
case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name);
adapter->ptp_caps.owner = THIS_MODULE;
adapter->ptp_caps.max_adj = 30000000;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index fef2264..ced38c1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2627,6 +2627,20 @@ enum ixgbe_fdir_pballoc_type {
#define FW_MAX_READ_BUFFER_SIZE 1024
#define FW_DISABLE_RXEN_CMD 0xDE
#define FW_DISABLE_RXEN_LEN 0x1
+#define FW_PHY_MGMT_REQ_CMD 0x20
+#define FW_PHY_TOKEN_REQ_CMD 0x0A
+#define FW_PHY_TOKEN_REQ_LEN 2
+#define FW_PHY_TOKEN_REQ 0
+#define FW_PHY_TOKEN_REL 1
+#define FW_PHY_TOKEN_OK 1
+#define FW_PHY_TOKEN_RETRY 0x80
+#define FW_PHY_TOKEN_DELAY 5 /* milliseconds */
+#define FW_PHY_TOKEN_WAIT 5 /* seconds */
+#define FW_PHY_TOKEN_RETRIES ((FW_PHY_TOKEN_WAIT * 1000) / FW_PHY_TOKEN_DELAY)
+#define FW_INT_PHY_REQ_CMD 0xB
+#define FW_INT_PHY_REQ_LEN 10
+#define FW_INT_PHY_REQ_READ 0
+#define FW_INT_PHY_REQ_WRITE 1
/* Host Interface Command Structures */
struct ixgbe_hic_hdr {
@@ -2695,6 +2709,28 @@ struct ixgbe_hic_disable_rxen {
u16 pad3;
};
+struct ixgbe_hic_phy_token_req {
+ struct ixgbe_hic_hdr hdr;
+ u8 port_number;
+ u8 command_type;
+ u16 pad;
+};
+
+struct ixgbe_hic_internal_phy_req {
+ struct ixgbe_hic_hdr hdr;
+ u8 port_number;
+ u8 command_type;
+ __be16 address;
+ u16 rsv1;
+ __be32 write_data;
+ u16 pad;
+} __packed;
+
+struct ixgbe_hic_internal_phy_resp {
+ struct ixgbe_hic_hdr hdr;
+ __be32 read_data;
+};
+
/* Transmit Descriptor - Advanced */
union ixgbe_adv_tx_desc {
struct {
@@ -3528,6 +3564,8 @@ struct ixgbe_info {
#define IXGBE_ERR_INVALID_ARGUMENT -32
#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33
#define IXGBE_ERR_FDIR_CMD_INCOMPLETE -38
+#define IXGBE_ERR_FW_RESP_INVALID -39
+#define IXGBE_ERR_TOKEN_RETRY -40
#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF
#define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 878ea1e..ba161b5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -278,6 +278,8 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
ixgbe_setup_mux_ctl(hw);
ixgbe_check_cs4227(hw);
+ /* Fallthrough */
+ case IXGBE_DEV_ID_X550EM_A_SFP_N:
return ixgbe_identify_module_generic(hw);
case IXGBE_DEV_ID_X550EM_X_KX4:
hw->phy.type = ixgbe_phy_x550em_kx4;
@@ -413,6 +415,121 @@ out:
return ret;
}
+/**
+ * ixgbe_get_phy_token - Get the token for shared PHY access
+ * @hw: Pointer to hardware structure
+ */
+static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw)
+{
+ struct ixgbe_hic_phy_token_req token_cmd;
+ s32 status;
+
+ token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
+ token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
+ token_cmd.hdr.cmd_or_resp.cmd_resv = 0;
+ token_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+ token_cmd.port_number = hw->bus.lan_id;
+ token_cmd.command_type = FW_PHY_TOKEN_REQ;
+ token_cmd.pad = 0;
+ status = ixgbe_host_interface_command(hw, &token_cmd, sizeof(token_cmd),
+ IXGBE_HI_COMMAND_TIMEOUT,
+ true);
+ if (status)
+ return status;
+ if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK)
+ return 0;
+ if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY)
+ return IXGBE_ERR_FW_RESP_INVALID;
+
+ return IXGBE_ERR_TOKEN_RETRY;
+}
+
+/**
+ * ixgbe_put_phy_token - Put the token for shared PHY access
+ * @hw: Pointer to hardware structure
+ */
+static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw)
+{
+ struct ixgbe_hic_phy_token_req token_cmd;
+ s32 status;
+
+ token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
+ token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
+ token_cmd.hdr.cmd_or_resp.cmd_resv = 0;
+ token_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+ token_cmd.port_number = hw->bus.lan_id;
+ token_cmd.command_type = FW_PHY_TOKEN_REL;
+ token_cmd.pad = 0;
+ status = ixgbe_host_interface_command(hw, &token_cmd, sizeof(token_cmd),
+ IXGBE_HI_COMMAND_TIMEOUT,
+ true);
+ if (status)
+ return status;
+ if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK)
+ return 0;
+ return IXGBE_ERR_FW_RESP_INVALID;
+}
+
+/**
+ * ixgbe_write_iosf_sb_reg_x550a - Write to IOSF PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 3 bit device type
+ * @data: Data to write to the register
+ **/
+static s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+ __always_unused u32 device_type,
+ u32 data)
+{
+ struct ixgbe_hic_internal_phy_req write_cmd;
+
+ memset(&write_cmd, 0, sizeof(write_cmd));
+ write_cmd.hdr.cmd = FW_INT_PHY_REQ_CMD;
+ write_cmd.hdr.buf_len = FW_INT_PHY_REQ_LEN;
+ write_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+ write_cmd.port_number = hw->bus.lan_id;
+ write_cmd.command_type = FW_INT_PHY_REQ_WRITE;
+ write_cmd.address = cpu_to_be16(reg_addr);
+ write_cmd.write_data = cpu_to_be32(data);
+
+ return ixgbe_host_interface_command(hw, &write_cmd, sizeof(write_cmd),
+ IXGBE_HI_COMMAND_TIMEOUT, false);
+}
+
+/**
+ * ixgbe_read_iosf_sb_reg_x550a - Read from IOSF PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 3 bit device type
+ * @data: Pointer to read data from the register
+ **/
+static s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+ __always_unused u32 device_type,
+ u32 *data)
+{
+ union {
+ struct ixgbe_hic_internal_phy_req cmd;
+ struct ixgbe_hic_internal_phy_resp rsp;
+ } hic;
+ s32 status;
+
+ memset(&hic, 0, sizeof(hic));
+ hic.cmd.hdr.cmd = FW_INT_PHY_REQ_CMD;
+ hic.cmd.hdr.buf_len = FW_INT_PHY_REQ_LEN;
+ hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+ hic.cmd.port_number = hw->bus.lan_id;
+ hic.cmd.command_type = FW_INT_PHY_REQ_READ;
+ hic.cmd.address = cpu_to_be16(reg_addr);
+
+ status = ixgbe_host_interface_command(hw, &hic.cmd, sizeof(hic.cmd),
+ IXGBE_HI_COMMAND_TIMEOUT, true);
+
+ /* Extract the register value from the response. */
+ *data = be32_to_cpu(hic.rsp.read_data);
+
+ return status;
+}
+
/** ixgbe_read_ee_hostif_data_X550 - Read EEPROM word using a host interface
* command assuming that the semaphore is already obtained.
* @hw: pointer to hardware structure
@@ -1339,9 +1456,9 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
mac->ops.disable_tx_laser = NULL;
mac->ops.enable_tx_laser = NULL;
mac->ops.flap_tx_laser = NULL;
+ mac->ops.setup_mac_link = ixgbe_setup_mac_link_sfp_x550em;
mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber;
mac->ops.setup_fc = ixgbe_setup_fc_x550em;
- mac->ops.setup_mac_link = ixgbe_setup_mac_link_sfp_x550em;
mac->ops.set_rate_select_speed =
ixgbe_set_soft_rate_select_speed;
break;
@@ -1349,6 +1466,8 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
mac->ops.setup_fc = ixgbe_setup_fc_generic;
mac->ops.check_link = ixgbe_check_link_t_X550em;
+ return;
+ case ixgbe_media_type_backplane:
break;
default:
mac->ops.setup_fc = ixgbe_setup_fc_x550em;
@@ -2107,11 +2226,12 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
media_type = ixgbe_media_type_backplane;
break;
case IXGBE_DEV_ID_X550EM_X_SFP:
+ case IXGBE_DEV_ID_X550EM_A_SFP_N:
media_type = ixgbe_media_type_fiber;
break;
case IXGBE_DEV_ID_X550EM_X_1G_T:
case IXGBE_DEV_ID_X550EM_X_10G_T:
- media_type = ixgbe_media_type_copper;
+ media_type = ixgbe_media_type_copper;
break;
default:
media_type = ixgbe_media_type_unknown;
@@ -2375,6 +2495,59 @@ static void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
ixgbe_release_swfw_sync_X540(hw, mask);
}
+/**
+ * ixgbe_acquire_swfw_sync_x550em_a - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore and get the shared PHY token as needed
+ */
+static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
+{
+ u32 hmask = mask & ~IXGBE_GSSR_TOKEN_SM;
+ int retries = FW_PHY_TOKEN_RETRIES;
+ s32 status;
+
+ while (--retries) {
+ status = 0;
+ if (hmask)
+ status = ixgbe_acquire_swfw_sync_X540(hw, hmask);
+ if (status)
+ return status;
+ if (!(mask & IXGBE_GSSR_TOKEN_SM))
+ return 0;
+
+ status = ixgbe_get_phy_token(hw);
+ if (!status)
+ return 0;
+ if (hmask)
+ ixgbe_release_swfw_sync_X540(hw, hmask);
+ if (status != IXGBE_ERR_TOKEN_RETRY)
+ return status;
+ udelay(FW_PHY_TOKEN_DELAY * 1000);
+ }
+
+ return status;
+}
+
+/**
+ * ixgbe_release_swfw_sync_x550em_a - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Release the SWFW semaphore and puts the shared PHY token as needed
+ */
+static void ixgbe_release_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
+{
+ u32 hmask = mask & ~IXGBE_GSSR_TOKEN_SM;
+
+ if (mask & IXGBE_GSSR_TOKEN_SM)
+ ixgbe_put_phy_token(hw);
+
+ if (hmask)
+ ixgbe_release_swfw_sync_X540(hw, hmask);
+}
+
#define X550_COMMON_MAC \
.init_hw = &ixgbe_init_hw_generic, \
.start_hw = &ixgbe_start_hw_X540, \
@@ -2455,6 +2628,23 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550,
};
+static struct ixgbe_mac_operations mac_ops_x550em_a = {
+ X550_COMMON_MAC
+ .reset_hw = ixgbe_reset_hw_X550em,
+ .get_media_type = ixgbe_get_media_type_X550em,
+ .get_san_mac_addr = NULL,
+ .get_wwn_prefix = NULL,
+ .setup_link = NULL, /* defined later */
+ .get_link_capabilities = ixgbe_get_link_capabilities_X550em,
+ .get_bus_info = ixgbe_get_bus_info_X550em,
+ .setup_sfp = ixgbe_setup_sfp_modules_X550em,
+ .acquire_swfw_sync = ixgbe_acquire_swfw_sync_x550em_a,
+ .release_swfw_sync = ixgbe_release_swfw_sync_x550em_a,
+ .setup_fc = ixgbe_setup_fc_generic,
+ .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a,
+ .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a,
+};
+
#define X550_COMMON_EEP \
.read = &ixgbe_read_ee_hostif_X550, \
.read_buffer = &ixgbe_read_ee_hostif_buffer_X550, \
@@ -2515,6 +2705,10 @@ static const u32 ixgbe_mvals_X550EM_x[IXGBE_MVALS_IDX_LIMIT] = {
IXGBE_MVALS_INIT(X550EM_x)
};
+static const u32 ixgbe_mvals_x550em_a[IXGBE_MVALS_IDX_LIMIT] = {
+ IXGBE_MVALS_INIT(X550EM_a)
+};
+
const struct ixgbe_info ixgbe_X550_info = {
.mac = ixgbe_mac_X550,
.get_invariants = &ixgbe_get_invariants_X540,
@@ -2534,3 +2728,13 @@ const struct ixgbe_info ixgbe_X550EM_x_info = {
.mbx_ops = &mbx_ops_generic,
.mvals = ixgbe_mvals_X550EM_x,
};
+
+const struct ixgbe_info ixgbe_x550em_a_info = {
+ .mac = ixgbe_mac_x550em_a,
+ .get_invariants = &ixgbe_get_invariants_X550_x,
+ .mac_ops = &mac_ops_x550em_a,
+ .eeprom_ops = &eeprom_ops_X550EM_x,
+ .phy_ops = &phy_ops_X550EM_x,
+ .mbx_ops = &mbx_ops_generic,
+ .mvals = ixgbe_mvals_x550em_a,
+};
--
2.5.5
^ permalink raw reply related
* [net-next 09/18] ixgbe: Use method pointer to access IOSF devices
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Provide method pointers and use them to access IOSF-attached
devices. A new MAC will introduce a new access method.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 ++
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 32 +++++++++++++++------------
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index b505da3..fef2264 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3332,6 +3332,8 @@ struct ixgbe_mac_operations {
s32 (*dmac_config)(struct ixgbe_hw *hw);
s32 (*dmac_update_tcs)(struct ixgbe_hw *hw);
s32 (*dmac_config_tcs)(struct ixgbe_hw *hw);
+ s32 (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *);
+ s32 (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32);
};
struct ixgbe_phy_operations {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 65832fa..878ea1e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1615,7 +1615,7 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
s32 status;
u32 reg_val;
- status = ixgbe_read_iosf_sb_reg_x550(hw,
+ status = hw->mac.ops.read_iosf_sb_reg(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val);
if (status)
@@ -1637,7 +1637,7 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
/* Restart auto-negotiation. */
reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
- status = ixgbe_write_iosf_sb_reg_x550(hw,
+ status = hw->mac.ops.write_iosf_sb_reg(hw,
IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
@@ -1654,9 +1654,9 @@ static s32 ixgbe_setup_kx4_x550em(struct ixgbe_hw *hw)
s32 status;
u32 reg_val;
- status = ixgbe_read_iosf_sb_reg_x550(hw, IXGBE_KX4_LINK_CNTL_1,
- IXGBE_SB_IOSF_TARGET_KX4_PCS0 +
- hw->bus.lan_id, ®_val);
+ status = hw->mac.ops.read_iosf_sb_reg(hw, IXGBE_KX4_LINK_CNTL_1,
+ IXGBE_SB_IOSF_TARGET_KX4_PCS0 +
+ hw->bus.lan_id, ®_val);
if (status)
return status;
@@ -1675,9 +1675,9 @@ static s32 ixgbe_setup_kx4_x550em(struct ixgbe_hw *hw)
/* Restart auto-negotiation. */
reg_val |= IXGBE_KX4_LINK_CNTL_1_TETH_AN_RESTART;
- status = ixgbe_write_iosf_sb_reg_x550(hw, IXGBE_KX4_LINK_CNTL_1,
- IXGBE_SB_IOSF_TARGET_KX4_PCS0 +
- hw->bus.lan_id, reg_val);
+ status = hw->mac.ops.write_iosf_sb_reg(hw, IXGBE_KX4_LINK_CNTL_1,
+ IXGBE_SB_IOSF_TARGET_KX4_PCS0 +
+ hw->bus.lan_id, reg_val);
return status;
}
@@ -1897,9 +1897,10 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
if (hw->device_id != IXGBE_DEV_ID_X550EM_X_KR)
return 0;
- rc = ixgbe_read_iosf_sb_reg_x550(hw,
- IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
- IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val);
+ rc = hw->mac.ops.read_iosf_sb_reg(hw,
+ IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY,
+ ®_val);
if (rc)
return rc;
@@ -1909,9 +1910,10 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
if (asm_dir)
reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
- rc = ixgbe_write_iosf_sb_reg_x550(hw,
- IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
- IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+ rc = hw->mac.ops.write_iosf_sb_reg(hw,
+ IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+ IXGBE_SB_IOSF_TARGET_KR_PHY,
+ reg_val);
/* This device does not fully support AN. */
hw->fc.disable_fc_autoneg = true;
@@ -2449,6 +2451,8 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
.release_swfw_sync = &ixgbe_release_swfw_sync_X550em,
.init_swfw_sync = &ixgbe_init_swfw_sync_X540,
.setup_fc = NULL, /* defined later */
+ .read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550,
+ .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550,
};
#define X550_COMMON_EEP \
--
2.5.5
^ permalink raw reply related
* [net-next 08/18] ixgbe: Add definitions for x550em_a 10G MAC
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Add definitions for a x550em_a 10G MAC device with a native SFP
interface.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index bd95be2..b505da3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -81,16 +81,18 @@
#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC
#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD
#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE
+#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4
+
+/* VF Device IDs */
#define IXGBE_DEV_ID_X550_VF_HV 0x1564
#define IXGBE_DEV_ID_X550_VF 0x1565
#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8
#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9
-
-/* VF Device IDs */
#define IXGBE_DEV_ID_82599_VF 0x10ED
#define IXGBE_DEV_ID_X540_VF 0x1515
#define IXGBE_DEV_ID_X550_VF 0x1565
#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8
+#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5
#define IXGBE_CAT(r, m) IXGBE_##r##_##m
@@ -129,7 +131,7 @@
#define IXGBE_FLA_X540 IXGBE_FLA_8259X
#define IXGBE_FLA_X550 IXGBE_FLA_8259X
#define IXGBE_FLA_X550EM_x IXGBE_FLA_8259X
-#define IXGBE_FLA_X550EM_a 0x15F6C
+#define IXGBE_FLA_X550EM_a 0x15F68
#define IXGBE_FLA(_hw) IXGBE_BY_MAC((_hw), FLA)
#define IXGBE_EEMNGCTL 0x10110
#define IXGBE_EEMNGDATA 0x10114
@@ -369,6 +371,8 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4))
#define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4))
#define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4))
+#define IXGBE_WQBR_RX(_i) (0x2FB0 + ((_i) * 4)) /* 4 total */
+#define IXGBE_WQBR_TX(_i) (0x8130 + ((_i) * 4)) /* 4 total */
#define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
#define IXGBE_RXFECCERR0 0x051B8
#define IXGBE_LLITHRESH 0x0EC90
@@ -440,6 +444,8 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_DMATXCTL_TE 0x1 /* Transmit Enable */
#define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */
#define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */
+#define IXGBE_DMATXCTL_MDP_EN 0x20 /* Bit 5 */
+#define IXGBE_DMATXCTL_MBINTEN 0x40 /* Bit 6 */
#define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */
#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */
@@ -548,7 +554,6 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_TDPT2TCCR(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
#define IXGBE_TDPT2TCSR(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
-
/* Security Control Registers */
#define IXGBE_SECTXCTRL 0x08800
#define IXGBE_SECTXSTAT 0x08804
@@ -1197,6 +1202,8 @@ struct ixgbe_thermal_sensor_data {
#define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disable RSC compl on LLI */
#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC enabled */
#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC enabled */
+#define IXGBE_RDRXCTL_MBINTEN 0x10000000
+#define IXGBE_RDRXCTL_MDP_EN 0x20000000
/* RQTC Bit Masks and Shifts */
#define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4)
@@ -1951,7 +1958,9 @@ enum {
#define IXGBE_GSSR_PHY1_SM 0x0004
#define IXGBE_GSSR_MAC_CSR_SM 0x0008
#define IXGBE_GSSR_FLASH_SM 0x0010
+#define IXGBE_GSSR_NVM_UPDATE_SM 0x0200
#define IXGBE_GSSR_SW_MNG_SM 0x0400
+#define IXGBE_GSSR_TOKEN_SM 0x40000000 /* SW bit for shared access */
#define IXGBE_GSSR_SHARED_I2C_SM 0x1806 /* Wait for both phys & I2Cs */
#define IXGBE_GSSR_I2C_MASK 0x1800
#define IXGBE_GSSR_NVM_PHY_MASK 0xF
@@ -2524,6 +2533,10 @@ enum ixgbe_fdir_pballoc_type {
#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS 0x00000080
#define IXGBE_FDIRCTRL_DROP_Q_SHIFT 8
#define IXGBE_FDIRCTRL_FLEX_SHIFT 16
+#define IXGBE_FDIRCTRL_DROP_NO_MATCH 0x00008000
+#define IXGBE_FDIRCTRL_FILTERMODE_SHIFT 21
+#define IXGBE_FDIRCTRL_FILTERMODE_MACVLAN 0x0001 /* bit 23:21, 001b */
+#define IXGBE_FDIRCTRL_FILTERMODE_CLOUD 0x0002 /* bit 23:21, 010b */
#define IXGBE_FDIRCTRL_SEARCHLIM 0x00800000
#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT 24
#define IXGBE_FDIRCTRL_FULL_THRESH_MASK 0xF0000000
@@ -2982,6 +2995,7 @@ enum ixgbe_mac_type {
ixgbe_mac_X540,
ixgbe_mac_X550,
ixgbe_mac_X550EM_x,
+ ixgbe_mac_x550em_a,
ixgbe_num_macs
};
--
2.5.5
^ permalink raw reply related
* [net-next 06/18] ixgbe/ixgbevf: Add support for bulk free in Tx cleanup & cleanup boolean logic
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Alexander Duyck, netdev, nhorman, sassmann, jogreene,
Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Alexander Duyck <aduyck@mirantis.com>
This patch enables bulk free in Tx cleanup for ixgbevf and cleans up the
boolean logic in the polling routines for ixgbe and ixgbevf in the hopes of
avoiding any mix-ups similar to what occurred with i40e and i40evf.
Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 +++++++---
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +++++++++-----
2 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 19bf386..d5509cc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1111,6 +1111,7 @@ static int ixgbe_tx_maxrate(struct net_device *netdev,
* ixgbe_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: structure containing interrupt and ring information
* @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
**/
static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *tx_ring, int napi_budget)
@@ -2807,8 +2808,10 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
ixgbe_update_dca(q_vector);
#endif
- ixgbe_for_each_ring(ring, q_vector->tx)
- clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget);
+ ixgbe_for_each_ring(ring, q_vector->tx) {
+ if (!ixgbe_clean_tx_irq(q_vector, ring, budget))
+ clean_complete = false;
+ }
/* Exit if we are called by netpoll or busy polling is active */
if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector))
@@ -2826,7 +2829,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
per_ring_budget);
work_done += cleaned;
- clean_complete &= (cleaned < per_ring_budget);
+ if (cleaned >= per_ring_budget)
+ clean_complete = false;
}
ixgbe_qv_unlock_napi(q_vector);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 50b6bff..007cbe0 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -288,9 +288,10 @@ static void ixgbevf_tx_timeout(struct net_device *netdev)
* ixgbevf_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: board private structure
* @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
**/
static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
- struct ixgbevf_ring *tx_ring)
+ struct ixgbevf_ring *tx_ring, int napi_budget)
{
struct ixgbevf_adapter *adapter = q_vector->adapter;
struct ixgbevf_tx_buffer *tx_buffer;
@@ -328,7 +329,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
total_packets += tx_buffer->gso_segs;
/* free the skb */
- dev_kfree_skb_any(tx_buffer->skb);
+ napi_consume_skb(tx_buffer->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -1013,8 +1014,10 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget)
int per_ring_budget, work_done = 0;
bool clean_complete = true;
- ixgbevf_for_each_ring(ring, q_vector->tx)
- clean_complete &= ixgbevf_clean_tx_irq(q_vector, ring);
+ ixgbevf_for_each_ring(ring, q_vector->tx) {
+ if (!ixgbevf_clean_tx_irq(q_vector, ring, budget))
+ clean_complete = false;
+ }
if (budget <= 0)
return budget;
@@ -1035,7 +1038,8 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget)
int cleaned = ixgbevf_clean_rx_irq(q_vector, ring,
per_ring_budget);
work_done += cleaned;
- clean_complete &= (cleaned < per_ring_budget);
+ if (cleaned >= per_ring_budget)
+ clean_complete = false;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
--
2.5.5
^ permalink raw reply related
* [net-next 07/18] ixgbe: Add support for single-port X550 device
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Add support for a single-port X550 device.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 ++
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1 +
3 files changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index ee43a38..8c560da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -97,6 +97,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
case IXGBE_DEV_ID_X540T:
case IXGBE_DEV_ID_X540T1:
case IXGBE_DEV_ID_X550T:
+ case IXGBE_DEV_ID_X550T1:
case IXGBE_DEV_ID_X550EM_X_10G_T:
supported = true;
break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index d5509cc..9594438 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -125,6 +125,7 @@ static const struct pci_device_id ixgbe_pci_tbl[] = {
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP), board_82599 },
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T1), board_X540 },
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T), board_X550},
+ {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T1), board_X550},
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KX4), board_X550EM_x},
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KR), board_X550EM_x},
{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_10G_T), board_X550EM_x},
@@ -8983,6 +8984,7 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
case IXGBE_DEV_ID_X540T:
case IXGBE_DEV_ID_X540T1:
case IXGBE_DEV_ID_X550T:
+ case IXGBE_DEV_ID_X550T1:
case IXGBE_DEV_ID_X550EM_X_KX4:
case IXGBE_DEV_ID_X550EM_X_KR:
case IXGBE_DEV_ID_X550EM_X_10G_T:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 7ae4bbd..bd95be2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -75,6 +75,7 @@
#define IXGBE_DEV_ID_X540T1 0x1560
#define IXGBE_DEV_ID_X550T 0x1563
+#define IXGBE_DEV_ID_X550T1 0x15D1
#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA
#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB
#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC
--
2.5.5
^ permalink raw reply related
* [net-next 05/18] ixgbe: Take manageability semaphore for firmware commands
From: Jeff Kirsher @ 2016-04-08 3:21 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
We need to take the manageability semaphore when issuing firmware
commands to avoid problems. With this in place, the semaphore is
no longer taken in the ixgbe_set_fw_drv_ver_generic function, since
it will now always be taken by the ixgbe_host_interface_command
function.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 30 ++++++++++++++++---------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index b8cdff7..ee43a38 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3494,11 +3494,16 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
struct ixgbe_hic_hdr hdr;
u32 u32arr[1];
} *bp = buffer;
+ s32 status;
if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length);
return IXGBE_ERR_HOST_INTERFACE_COMMAND;
}
+ /* Take management host interface semaphore */
+ status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+ if (status)
+ return status;
/* Set bit 9 of FWSTS clearing FW reset indication */
fwsts = IXGBE_READ_REG(hw, IXGBE_FWSTS);
@@ -3508,13 +3513,15 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
if (!(hicr & IXGBE_HICR_EN)) {
hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
- return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto rel_out;
}
/* Calculate length in DWORDs. We must be DWORD aligned */
if (length % sizeof(u32)) {
hw_dbg(hw, "Buffer length failure, not aligned to dword");
- return IXGBE_ERR_INVALID_ARGUMENT;
+ status = IXGBE_ERR_INVALID_ARGUMENT;
+ goto rel_out;
}
dword_len = length >> 2;
@@ -3540,11 +3547,12 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
if ((timeout && i == timeout) ||
!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) {
hw_dbg(hw, "Command has failed with no status valid.\n");
- return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto rel_out;
}
if (!return_data)
- return 0;
+ goto rel_out;
/* Calculate length in DWORDs */
dword_len = hdr_size >> 2;
@@ -3558,11 +3566,12 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
/* If there is any thing in data position pull it in */
buf_len = bp->hdr.buf_len;
if (!buf_len)
- return 0;
+ goto rel_out;
if (length < round_up(buf_len, 4) + hdr_size) {
hw_dbg(hw, "Buffer not large enough for reply message.\n");
- return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+ goto rel_out;
}
/* Calculate length in DWORDs, add 3 for odd lengths */
@@ -3574,7 +3583,10 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
le32_to_cpus(&bp->u32arr[bi]);
}
- return 0;
+rel_out:
+ hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+
+ return status;
}
/**
@@ -3597,9 +3609,6 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
int i;
s32 ret_val;
- if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM))
- return IXGBE_ERR_SWFW_SYNC;
-
fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
@@ -3631,7 +3640,6 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
break;
}
- hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
return ret_val;
}
--
2.5.5
^ permalink raw reply related
* [net-next 03/18] ixgbe: Correct length check for round up
From: Jeff Kirsher @ 2016-04-08 3:20 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
The function ixgbe_host_interface_command actually uses a multiple
of word sized buffer to do its business, but only checks against
the actual length passed in. This means that on read operations it
could be possible to modify locations beyond the length passed in.
Change the check to round up in the same way, just to avoid any
possible hazard.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index dfdb114..a2ca9ef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3557,7 +3557,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
if (buf_len == 0)
return 0;
- if (length < (buf_len + hdr_size)) {
+ if (length < round_up(buf_len, 4) + hdr_size) {
hw_dbg(hw, "Buffer not large enough for reply message.\n");
return IXGBE_ERR_HOST_INTERFACE_COMMAND;
}
--
2.5.5
^ permalink raw reply related
* [net-next 04/18] ixgbe: Clean up interface for firmware commands
From: Jeff Kirsher @ 2016-04-08 3:20 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Clean up the interface for issuing firmware commands to use a
void * instead of a u32 *. This eliminates a number of casts.
Also clean up ixgbe_host_interface_command in a few other ways,
eliminating comparisons with 0, redundant parens and minor
formatting issues.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 39 +++++++++++++------------
drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 4 +--
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 13 ++++-----
3 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index a2ca9ef..b8cdff7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3483,15 +3483,19 @@ static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
* Communicates with the manageability block. On success return 0
* else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
**/
-s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
+s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
u32 length, u32 timeout,
bool return_data)
{
- u32 hicr, i, bi, fwsts;
u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+ u32 hicr, i, bi, fwsts;
u16 buf_len, dword_len;
+ union {
+ struct ixgbe_hic_hdr hdr;
+ u32 u32arr[1];
+ } *bp = buffer;
- if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+ if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length);
return IXGBE_ERR_HOST_INTERFACE_COMMAND;
}
@@ -3502,26 +3506,25 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
/* Check that the host interface is enabled. */
hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
- if ((hicr & IXGBE_HICR_EN) == 0) {
+ if (!(hicr & IXGBE_HICR_EN)) {
hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
return IXGBE_ERR_HOST_INTERFACE_COMMAND;
}
/* Calculate length in DWORDs. We must be DWORD aligned */
- if ((length % (sizeof(u32))) != 0) {
+ if (length % sizeof(u32)) {
hw_dbg(hw, "Buffer length failure, not aligned to dword");
return IXGBE_ERR_INVALID_ARGUMENT;
}
dword_len = length >> 2;
- /*
- * The device driver writes the relevant command block
+ /* The device driver writes the relevant command block
* into the ram area.
*/
for (i = 0; i < dword_len; i++)
IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
- i, cpu_to_le32(buffer[i]));
+ i, cpu_to_le32(bp->u32arr[i]));
/* Setting this bit tells the ARC that a new command is pending. */
IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
@@ -3534,8 +3537,8 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
}
/* Check command successful completion. */
- if ((timeout != 0 && i == timeout) ||
- (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) {
+ if ((timeout && i == timeout) ||
+ !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) {
hw_dbg(hw, "Command has failed with no status valid.\n");
return IXGBE_ERR_HOST_INTERFACE_COMMAND;
}
@@ -3548,13 +3551,13 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
/* first pull in the header so we know the buffer length */
for (bi = 0; bi < dword_len; bi++) {
- buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
- le32_to_cpus(&buffer[bi]);
+ bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+ le32_to_cpus(&bp->u32arr[bi]);
}
/* If there is any thing in data position pull it in */
- buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len;
- if (buf_len == 0)
+ buf_len = bp->hdr.buf_len;
+ if (!buf_len)
return 0;
if (length < round_up(buf_len, 4) + hdr_size) {
@@ -3565,10 +3568,10 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
/* Calculate length in DWORDs, add 3 for odd lengths */
dword_len = (buf_len + 3) >> 2;
- /* Pull in the rest of the buffer (bi is where we left off)*/
+ /* Pull in the rest of the buffer (bi is where we left off) */
for (; bi <= dword_len; bi++) {
- buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
- le32_to_cpus(&buffer[bi]);
+ bp->u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+ le32_to_cpus(&bp->u32arr[bi]);
}
return 0;
@@ -3612,7 +3615,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
fw_cmd.pad2 = 0;
for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
- ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+ ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
sizeof(fw_cmd),
IXGBE_HI_COMMAND_TIMEOUT,
true);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 2e29015..6f8e6a5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -111,8 +111,8 @@ void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
u8 build, u8 ver);
-s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer,
- u32 length, u32 timeout, bool return_data);
+s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *, u32 length,
+ u32 timeout, bool return_data);
void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
bool ixgbe_mng_present(struct ixgbe_hw *hw);
bool ixgbe_mng_enabled(struct ixgbe_hw *hw);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 5affac1..65832fa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -437,8 +437,7 @@ static s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
/* one word */
buffer.length = cpu_to_be16(sizeof(u16));
- status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
- sizeof(buffer),
+ status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer),
IXGBE_HI_COMMAND_TIMEOUT, false);
if (status)
return status;
@@ -488,7 +487,7 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
buffer.address = cpu_to_be32((offset + current_word) * 2);
buffer.length = cpu_to_be16(words_to_read * 2);
- status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
+ status = ixgbe_host_interface_command(hw, &buffer,
sizeof(buffer),
IXGBE_HI_COMMAND_TIMEOUT,
false);
@@ -771,8 +770,7 @@ static s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
buffer.data = data;
buffer.address = cpu_to_be32(offset * 2);
- status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
- sizeof(buffer),
+ status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer),
IXGBE_HI_COMMAND_TIMEOUT, false);
return status;
}
@@ -814,8 +812,7 @@ static s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw)
buffer.req.buf_lenl = FW_SHADOW_RAM_DUMP_LEN;
buffer.req.checksum = FW_DEFAULT_CHECKSUM;
- status = ixgbe_host_interface_command(hw, (u32 *)&buffer,
- sizeof(buffer),
+ status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer),
IXGBE_HI_COMMAND_TIMEOUT, false);
return status;
}
@@ -864,7 +861,7 @@ static void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
fw_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
fw_cmd.port_number = hw->bus.lan_id;
- status = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+ status = ixgbe_host_interface_command(hw, &fw_cmd,
sizeof(struct ixgbe_hic_disable_rxen),
IXGBE_HI_COMMAND_TIMEOUT, true);
--
2.5.5
^ permalink raw reply related
* [net-next 01/18] ixgbe: Delete some unused register definitions
From: Jeff Kirsher @ 2016-04-08 3:20 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
I noticed the SRAMREL registers are not referenced for any device,
so delete the definitions.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index bc012ab..d02a0a3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -143,13 +143,6 @@
#define IXGBE_GRC_X550EM_a 0x15F64
#define IXGBE_GRC(_hw) IXGBE_BY_MAC((_hw), GRC)
-#define IXGBE_SRAMREL_8259X 0x10210
-#define IXGBE_SRAMREL_X540 IXGBE_SRAMREL_8259X
-#define IXGBE_SRAMREL_X550 IXGBE_SRAMREL_8259X
-#define IXGBE_SRAMREL_X550EM_x IXGBE_SRAMREL_8259X
-#define IXGBE_SRAMREL_X550EM_a 0x15F6C
-#define IXGBE_SRAMREL(_hw) IXGBE_BY_MAC((_hw), SRAMREL)
-
/* General Receive Control */
#define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */
#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */
@@ -2948,7 +2941,6 @@ union ixgbe_atr_hash_dword {
IXGBE_CAT(EEC, m), \
IXGBE_CAT(FLA, m), \
IXGBE_CAT(GRC, m), \
- IXGBE_CAT(SRAMREL, m), \
IXGBE_CAT(FACTPS, m), \
IXGBE_CAT(SWSM, m), \
IXGBE_CAT(SWFW_SYNC, m), \
--
2.5.5
^ permalink raw reply related
* [net-next 02/18] ixgbe: Change the lan_id and func fields to a u8 to avoid casts
From: Jeff Kirsher @ 2016-04-08 3:20 UTC (permalink / raw)
To: davem; +Cc: Mark Rustad, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <1460085673-87056-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Mark Rustad <mark.d.rustad@intel.com>
Since the lan_id and func fields only ever hold small values, make
them u8 to avoid casts used to silence warnings.
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 4 ++--
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 8c7e78b..dfdb114 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3600,7 +3600,7 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
- fw_cmd.port_num = (u8)hw->bus.func;
+ fw_cmd.port_num = hw->bus.func;
fw_cmd.ver_maj = maj;
fw_cmd.ver_min = min;
fw_cmd.ver_build = build;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index d02a0a3..7ae4bbd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3122,8 +3122,8 @@ struct ixgbe_bus_info {
enum ixgbe_bus_width width;
enum ixgbe_bus_type type;
- u16 func;
- u16 lan_id;
+ u8 func;
+ u8 lan_id;
};
/* Flow control parameters */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 9d3f765..5affac1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -862,7 +862,7 @@ static void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
fw_cmd.hdr.cmd = FW_DISABLE_RXEN_CMD;
fw_cmd.hdr.buf_len = FW_DISABLE_RXEN_LEN;
fw_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
- fw_cmd.port_number = (u8)hw->bus.lan_id;
+ fw_cmd.port_number = hw->bus.lan_id;
status = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
sizeof(struct ixgbe_hic_disable_rxen),
--
2.5.5
^ permalink raw reply related
* [net-next 00/18][pull request] 10GbE Intel Wired LAN Driver Updates 2016-04-07
From: Jeff Kirsher @ 2016-04-08 3:20 UTC (permalink / raw)
To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann, jogreene, john.ronciak
This series contains updates to ixgbe and ixgbevf.
This entire series (except for one patch from Alex) comes from Mark and
is mainly to add support for our new MAC (x550em_a).
So let's get Alex's patch out of the way first before we cover Mark's
many changes. Alex does his enable bulk free in transmit cleanup for
ixgbe and ixgbevf, like his has done for all of our other drivers.
First Mark cleans up registers that were not being used, so do some
house cleaning. Then to avoid casting lan_id and func fields, just
make them u8 since they only hold small values anyways. Found and
fixed an issue where on read operations it could be possible to
modify locations beyond the length passed in, so change the check
to round up in the same way. Cleaned up the interface for issuing
firmware commands to use a void * instead of a u32 * which eliminates
a number of casts. Added support for the new MAC and provided method
pointers and use them to access IOSF-attached devices, since the
new MAC will also need a new access method. Added support for SFPs
with an external retimer and for an SGMII backplane interface.
The following are changes since commit 889750bd2e08a94d52a116056d462b3a8e5616a7:
Merge branch 'tipc-next'
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 10GbE
Alexander Duyck (1):
ixgbe/ixgbevf: Add support for bulk free in Tx cleanup & cleanup
boolean logic
Mark Rustad (17):
ixgbe: Delete some unused register definitions
ixgbe: Change the lan_id and func fields to a u8 to avoid casts
ixgbe: Correct length check for round up
ixgbe: Clean up interface for firmware commands
ixgbe: Take manageability semaphore for firmware commands
ixgbe: Add support for single-port X550 device
ixgbe: Add definitions for x550em_a 10G MAC
ixgbe: Use method pointer to access IOSF devices
ixgbe: Add support for x550em_a 10G MAC type
ixgbe: Use new methods for PHY access
ixgbe: Read and set instance id
ixgbe: Read and parse NW_MNG_IF_SEL register
ixgbe: Introduce function to control MDIO speed
ixgbe: Add support for SFPs with retimer
ixgbe: Add support for SGMII backplane interface
ixgbe: Add KR backplane support for x550em_a
ixgbe: Bump version number
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 3 +
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 83 +--
drivers/net/ethernet/intel/ixgbe/ixgbe_common.h | 4 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c | 6 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 9 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 63 ++-
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c | 2 +
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h | 6 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 6 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 101 +++-
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 591 ++++++++++++++++++++--
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +-
14 files changed, 786 insertions(+), 106 deletions(-)
--
2.5.5
^ permalink raw reply
* Re: [PATCH 0/3] crypto: af_alg - add TLS type encryption
From: Tom Herbert @ 2016-04-08 2:58 UTC (permalink / raw)
To: Herbert Xu
Cc: Tadeusz Struk, linux-crypto, LKML, David S. Miller,
Linux Kernel Network Developers, davejwatson
In-Reply-To: <20160408025250.GA7596@gondor.apana.org.au>
On Thu, Apr 7, 2016 at 11:52 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote:
> On Wed, Apr 06, 2016 at 10:56:12AM -0700, Tadeusz Struk wrote:
>>
>> The intend is to enable HW acceleration of the TLS protocol.
>> The way it will work is that the user space will send a packet of data
>> via AF_ALG and HW will authenticate and encrypt it in one go.
>
> There have been suggestions to implement TLS data-path within
> the kernel. So we should decide whether we pursue that or go
> with your approach before we start adding algorithms.
>
Yes, please see Dave Watson's patches on this.
Tom
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH 0/3] crypto: af_alg - add TLS type encryption
From: Herbert Xu @ 2016-04-08 2:52 UTC (permalink / raw)
To: Tadeusz Struk; +Cc: linux-crypto, linux-kernel, davem, netdev
In-Reply-To: <57054DBC.8010507@intel.com>
On Wed, Apr 06, 2016 at 10:56:12AM -0700, Tadeusz Struk wrote:
>
> The intend is to enable HW acceleration of the TLS protocol.
> The way it will work is that the user space will send a packet of data
> via AF_ALG and HW will authenticate and encrypt it in one go.
There have been suggestions to implement TLS data-path within
the kernel. So we should decide whether we pursue that or go
with your approach before we start adding algorithms.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: mpls's find_outdev()
From: roopa @ 2016-04-08 2:50 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20160407.214109.32624032556886158.davem@davemloft.net>
On 4/7/16, 6:41 PM, David Miller wrote:
> While auditing something unrelated, I noticed that this function seems
> to potentially dev_put() on an error pointer.
>
> I guess the problem comes from the fact that there are two methods
> by which the device pointer is obtained.
>
> First, inet{,6}_fib_lookup_dev() which uses error pointers.
>
> Second, dev_get_by_index() which returns a valid device or NULL,
> and therefore does not use error pointers.
>
> If inet{,6}_fib_lookup_dev() returns an error pointer, the !dev check
> will not pass and dev_put() will operate on an error pointer and
> crash.
>
> If you agree with my analysis, could you please cook up and test a
> fix?
>
> Thank you!
certainly!. I see it. Thanks for catching it.
I will test and post a fix in a few hours.
^ permalink raw reply
* Re: [PATCH net] ipv6: Count in extension headers in skb->network_header
From: David Miller @ 2016-04-08 2:44 UTC (permalink / raw)
To: jkbs; +Cc: netdev, jiji, hannes
In-Reply-To: <4697d0324d0b4313cbc2053b49ac85ec955b81cb.1459872592.git.jkbs@redhat.com>
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Tue, 5 Apr 2016 18:41:08 +0200
> When sending a UDPv6 message longer than MTU, account for the length
> of fragmentable IPv6 extension headers in skb->network_header offset.
> Same as we do in alloc_new_skb path in __ip6_append_data().
>
> This ensures that later on __ip6_make_skb() will make space in
> headroom for fragmentable extension headers:
>
> /* move skb->data to ip header from ext header */
> if (skb->data < skb_network_header(skb))
> __skb_pull(skb, skb_network_offset(skb));
>
> Prevents a splat due to skb_under_panic:
...
> Reported-by: Ji Jianwen <jiji@redhat.com>
> Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Applied and queued up for -stable, thanks.
^ permalink raw reply
* RE: [PATCH v8 net-next 1/1] hv_sock: introduce Hyper-V Sockets
From: Dexuan Cui @ 2016-04-08 1:56 UTC (permalink / raw)
To: Joe Perches, gregkh@linuxfoundation.org, davem@davemloft.net,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
devel@linuxdriverproject.org, olaf@aepfle.de, apw@canonical.com,
jasowang@redhat.com, cavery@redhat.com, KY Srinivasan,
Haiyang Zhang
In-Reply-To: <1460078109.1800.16.camel@perches.com>
> From: Joe Perches [mailto:joe@perches.com]
> Sent: Friday, April 8, 2016 9:15
> On Thu, 2016-04-07 at 18:36 -0700, Dexuan Cui wrote:
> > diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
> []
> > +#define VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV (5 * PAGE_SIZE)
> > +#define VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND (5 * PAGE_SIZE)
> > +
> > +#define HVSOCK_RCV_BUF_SZ
> VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV
> > +#define HVSOCK_SND_BUF_SZ PAGE_SIZE
> []
> > +struct hvsock_sock {
> []
> > + struct {
> > + struct vmpipe_proto_header hdr;
> > + char buf[HVSOCK_SND_BUF_SZ];
> > + } __packed send;
> > +
> > + struct {
> > + struct vmpipe_proto_header hdr;
> > + char buf[HVSOCK_RCV_BUF_SZ];
> > + unsigned int data_len;
> > + unsigned int data_offset;
> > + } __packed recv;
> > +};
>
> These bufs are not page aligned and so can span pages.
>
> Is there any value in allocating these bufs separately
> as pages instead of as a kmalloc?
The bufs are not required to be page aligned.
Here the 'hdr' and the 'buf' must be consecutive, i.e., the 'buf' must be
an array rather than a pointer: please see hvsock_send_data().
It looks to me there is no big value to make sure the 'buf' is page
aligned: on x86_64, at least it should already be 8-byte aligned due to the
adjacent channel pointer, so memcpy_from_msg() should work
enough good and in hvsock_send_data() -> vmbus_sendpacket(),
we don't copy the 'buf'.
Thanks,
-- Dexuan
^ permalink raw reply
* Re: [PATCH V2 1/2] net: socket: return EADDRNOTAVAIL when source address becomes nonlocal
From: David Miller @ 2016-04-08 1:48 UTC (permalink / raw)
To: zlpnobody; +Cc: netdev, liping.zhang
In-Reply-To: <1459865986-18271-2-git-send-email-zlpnobody@163.com>
From: Liping Zhang <zlpnobody@163.com>
Date: Tue, 5 Apr 2016 22:19:45 +0800
> From: Liping Zhang <liping.zhang@spreadtrum.com>
>
> A socket can use bind(directly) or connect(indirectly) to bind to a local
> ip address, and later if the network becomes down, that cause the source
> address becomes nonlocal, then send() call will fail and return EINVAL.
> But this error code is confusing, acctually we did not pass any invalid
> arguments. Furthermore, send() maybe return ok at first, it now returns
> fail just because of a temporary network problem, i.e. when the network
> recovery, send() call will become ok. Return EADDRNOTAVAIL instead of
> EINVAL in such situation is better.
>
> Take the ping utility for example, we can use -I option to specify the
> source address (e.g ping -I 10.19.145.26 117.135.169.41), when network
> becomes down, error message will be printed out as follows:
> 64 bytes from (117.135.169.41): icmp_seq=9 ttl=54 time=46.7 ms
> ping: sendmsg: Invalid argument
> ping: sendmsg: Invalid argument
>
> Apply this patch, error message will become:
> 64 bytes from (117.135.169.41): icmp_seq=5 ttl=54 time=47.5 ms
> ping: sendmsg: Cannot assign requested address
> ping: sendmsg: Cannot assign requested address
>
> Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Changing the behavior of such a core, and fundamental function, with
hundreds of direct and indirect call sites is extremely dangerous.
I was worried that someone might depend upon the -EINVAL return value,
and indeed I quickly found that the code in net/netfilter/ipvs/ip_vs_xmit.c
does want to see EINVAL in this case.
I refuse to audit the entire call chain of all users of this helper
function, as I said there are hundreds. But there are potentially
many other call sites, either direct or indirect, that have this
problem as well.
And furthermore, USERSPACE itself might depend upon the error code
being -EINVAL since we've been returning it for several decades.
To be quite honest I think the value of this change is very low and
with the risk factors involved here I really would rather not make
this change at all.
Sorry.
^ permalink raw reply
* mpls's find_outdev()
From: David Miller @ 2016-04-08 1:41 UTC (permalink / raw)
To: roopa; +Cc: netdev
While auditing something unrelated, I noticed that this function seems
to potentially dev_put() on an error pointer.
I guess the problem comes from the fact that there are two methods
by which the device pointer is obtained.
First, inet{,6}_fib_lookup_dev() which uses error pointers.
Second, dev_get_by_index() which returns a valid device or NULL,
and therefore does not use error pointers.
If inet{,6}_fib_lookup_dev() returns an error pointer, the !dev check
will not pass and dev_put() will operate on an error pointer and
crash.
If you agree with my analysis, could you please cook up and test a
fix?
Thank you!
^ permalink raw reply
* Re: [PATCH 2/2] net-ath9k_htc: Replace a variable initialisation by an assignment in ath9k_htc_set_channel()
From: Julian Calaby @ 2016-04-08 1:40 UTC (permalink / raw)
To: Kalle Valo
Cc: ath9k-devel, linux-wireless, netdev, QCA ath9k Development, LKML,
SF Markus Elfring, kernel-janitors, Julia Lawall
In-Reply-To: <5686C47E.1040906@users.sourceforge.net>
Hi Kalle,
On Sat, Jan 2, 2016 at 5:25 AM, SF Markus Elfring
<elfring@users.sourceforge.net> wrote:
> From: Markus Elfring <elfring@users.sourceforge.net>
> Date: Fri, 1 Jan 2016 19:09:32 +0100
>
> Replace an explicit initialisation for one local variable at the beginning
> by a conditional assignment.
>
> Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
This looks sane to me.
Reviewed-by: Julian Calaby <julian.calaby@gmail.com>
Thanks,
Julian Calaby
> ---
> drivers/net/wireless/ath/ath9k/htc_drv_main.c | 7 ++-----
> 1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
> index a680a97..30bd59e 100644
> --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
> +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
> @@ -246,7 +246,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv,
> struct ieee80211_conf *conf = &common->hw->conf;
> bool fastcc;
> struct ieee80211_channel *channel = hw->conf.chandef.chan;
> - struct ath9k_hw_cal_data *caldata = NULL;
> + struct ath9k_hw_cal_data *caldata;
> enum htc_phymode mode;
> __be16 htc_mode;
> u8 cmd_rsp;
> @@ -274,10 +274,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv,
> priv->ah->curchan->channel,
> channel->center_freq, conf_is_ht(conf), conf_is_ht40(conf),
> fastcc);
> -
> - if (!fastcc)
> - caldata = &priv->caldata;
> -
> + caldata = fastcc ? NULL : &priv->caldata;
> ret = ath9k_hw_reset(ah, hchan, caldata, fastcc);
> if (ret) {
> ath_err(common,
> --
> 2.6.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Julian Calaby
Email: julian.calaby@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/
^ permalink raw reply
* [PATCH v8 net-next 1/1] hv_sock: introduce Hyper-V Sockets
From: Dexuan Cui @ 2016-04-08 1:36 UTC (permalink / raw)
To: gregkh, davem, netdev, linux-kernel, devel, olaf, apw, jasowang,
cavery, kys, haiyangz
Cc: joe
Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
mechanism between the host and the guest. It's somewhat like TCP over
VMBus, but the transportation layer (VMBus) is much simpler than IP.
With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.
Hyper-V Sockets is only available on new Windows hosts, like Windows Server
2016. More info is in this article "Make your own integration services":
https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service
The patch implements the necessary support in the guest side by introducing
a new socket address family AF_HYPERV.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
---
MAINTAINERS | 2 +
include/linux/hyperv.h | 16 +
include/linux/socket.h | 5 +-
include/net/af_hvsock.h | 51 ++
include/uapi/linux/hyperv.h | 25 +
net/Kconfig | 1 +
net/Makefile | 1 +
net/hv_sock/Kconfig | 10 +
net/hv_sock/Makefile | 3 +
net/hv_sock/af_hvsock.c | 1483 +++++++++++++++++++++++++++++++++++++++++++
10 files changed, 1595 insertions(+), 2 deletions(-)
create mode 100644 include/net/af_hvsock.h
create mode 100644 net/hv_sock/Kconfig
create mode 100644 net/hv_sock/Makefile
create mode 100644 net/hv_sock/af_hvsock.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 67d99dd..7b6f203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5267,7 +5267,9 @@ F: drivers/pci/host/pci-hyperv.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/video/fbdev/hyperv_fb.c
+F: net/hv_sock/
F: include/linux/hyperv.h
+F: include/net/af_hvsock.h
F: tools/hv/
F: Documentation/ABI/stable/sysfs-bus-vmbus
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index aa0fadc..b92439d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1338,4 +1338,20 @@ extern __u32 vmbus_proto_version;
int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
const uuid_le *shv_host_servie_id);
+struct vmpipe_proto_header {
+ u32 pkt_type;
+ u32 data_size;
+} __packed;
+
+#define HVSOCK_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \
+ sizeof(struct vmpipe_proto_header))
+
+/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write() */
+#define PREV_INDICES_LEN (sizeof(u64))
+
+#define HVSOCK_PKT_LEN(payload_len) (HVSOCK_HEADER_LEN + \
+ ALIGN((payload_len), 8) + \
+ PREV_INDICES_LEN)
+#define HVSOCK_MIN_PKT_LEN HVSOCK_PKT_LEN(1)
+
#endif /* _HYPERV_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 73bf6c6..88b1ccd 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -201,8 +201,8 @@ struct ucred {
#define AF_NFC 39 /* NFC sockets */
#define AF_VSOCK 40 /* vSockets */
#define AF_KCM 41 /* Kernel Connection Multiplexor*/
-
-#define AF_MAX 42 /* For now.. */
+#define AF_HYPERV 42 /* Hyper-V Sockets */
+#define AF_MAX 43 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -249,6 +249,7 @@ struct ucred {
#define PF_NFC AF_NFC
#define PF_VSOCK AF_VSOCK
#define PF_KCM AF_KCM
+#define PF_HYPERV AF_HYPERV
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
new file mode 100644
index 0000000..a5aa28d
--- /dev/null
+++ b/include/net/af_hvsock.h
@@ -0,0 +1,51 @@
+#ifndef __AF_HVSOCK_H__
+#define __AF_HVSOCK_H__
+
+#include <linux/kernel.h>
+#include <linux/hyperv.h>
+#include <net/sock.h>
+
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV (5 * PAGE_SIZE)
+#define VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND (5 * PAGE_SIZE)
+
+#define HVSOCK_RCV_BUF_SZ VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV
+#define HVSOCK_SND_BUF_SZ PAGE_SIZE
+
+#define sk_to_hvsock(__sk) ((struct hvsock_sock *)(__sk))
+#define hvsock_to_sk(__hvsk) ((struct sock *)(__hvsk))
+
+struct hvsock_sock {
+ /* sk must be the first member. */
+ struct sock sk;
+
+ struct sockaddr_hv local_addr;
+ struct sockaddr_hv remote_addr;
+
+ /* protected by the global hvsock_mutex */
+ struct list_head bound_list;
+ struct list_head connected_list;
+
+ struct list_head accept_queue;
+ /* used by enqueue and dequeue */
+ struct mutex accept_queue_mutex;
+
+ struct delayed_work dwork;
+
+ u32 peer_shutdown;
+
+ struct vmbus_channel *channel;
+
+ struct {
+ struct vmpipe_proto_header hdr;
+ char buf[HVSOCK_SND_BUF_SZ];
+ } __packed send;
+
+ struct {
+ struct vmpipe_proto_header hdr;
+ char buf[HVSOCK_RCV_BUF_SZ];
+ unsigned int data_len;
+ unsigned int data_offset;
+ } __packed recv;
+};
+
+#endif /* __AF_HVSOCK_H__ */
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index e347b24..f1d0bca 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -26,6 +26,7 @@
#define _UAPI_HYPERV_H
#include <linux/uuid.h>
+#include <linux/socket.h>
/*
* Framework version for util services.
@@ -396,4 +397,28 @@ struct hv_kvp_ip_msg {
struct hv_kvp_ipaddr_value kvp_ip_val;
} __attribute__((packed));
+/*
+ * This is the address fromat of Hyper-V Sockets.
+ * Note: here we just borrow the kernel's built-in type uuid_le. When
+ * an application calls bind() or connect(), the 2 members of struct
+ * sockaddr_hv must be of GUID.
+ * The GUID format differs from the UUID format only in the byte order of
+ * the first 3 fields. Refer to:
+ * https://en.wikipedia.org/wiki/Globally_unique_identifier
+ */
+#define guid_t uuid_le
+struct sockaddr_hv {
+ __kernel_sa_family_t shv_family; /* Address family */
+ __le16 reserved; /* Must be Zero */
+ guid_t shv_vm_id; /* Not used. Must be Zero. */
+ guid_t shv_service_id; /* Service ID */
+};
+
+#define SHV_VMID_GUEST NULL_UUID_LE
+#define SHV_VMID_HOST NULL_UUID_LE
+
+#define SHV_SERVICE_ID_ANY NULL_UUID_LE
+
+#define SHV_PROTO_RAW 1
+
#endif /* _UAPI_HYPERV_H */
diff --git a/net/Kconfig b/net/Kconfig
index a8934d8..68d13d7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -231,6 +231,7 @@ source "net/dns_resolver/Kconfig"
source "net/batman-adv/Kconfig"
source "net/openvswitch/Kconfig"
source "net/vmw_vsock/Kconfig"
+source "net/hv_sock/Kconfig"
source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 81d1411..d115c31 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/
obj-$(CONFIG_NFC) += nfc/
obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
+obj-$(CONFIG_HYPERV_SOCK) += hv_sock/
obj-$(CONFIG_MPLS) += mpls/
obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
diff --git a/net/hv_sock/Kconfig b/net/hv_sock/Kconfig
new file mode 100644
index 0000000..1f41848
--- /dev/null
+++ b/net/hv_sock/Kconfig
@@ -0,0 +1,10 @@
+config HYPERV_SOCK
+ tristate "Hyper-V Sockets"
+ depends on HYPERV
+ default m if HYPERV
+ help
+ Hyper-V Sockets is somewhat like TCP over VMBus, allowing
+ communication between Linux guest and Hyper-V host without TCP/IP.
+
+ To compile this driver as a module, choose M here: the module
+ will be called hv_sock.
diff --git a/net/hv_sock/Makefile b/net/hv_sock/Makefile
new file mode 100644
index 0000000..716c012
--- /dev/null
+++ b/net/hv_sock/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_HYPERV_SOCK) += hv_sock.o
+
+hv_sock-y += af_hvsock.o
diff --git a/net/hv_sock/af_hvsock.c b/net/hv_sock/af_hvsock.c
new file mode 100644
index 0000000..185a382
--- /dev/null
+++ b/net/hv_sock/af_hvsock.c
@@ -0,0 +1,1483 @@
+/*
+ * Hyper-V Sockets -- a socket-based communication channel between the
+ * Hyper-V host and the virtual machines running on it.
+ *
+ * Copyright(c) 2016, Microsoft Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <net/af_hvsock.h>
+
+static struct proto hvsock_proto = {
+ .name = "HV_SOCK",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct hvsock_sock),
+};
+
+#define SS_LISTEN 255
+
+static LIST_HEAD(hvsock_bound_list);
+static LIST_HEAD(hvsock_connected_list);
+static DEFINE_MUTEX(hvsock_mutex);
+
+static bool uuid_equals(uuid_le u1, uuid_le u2)
+{
+ return !uuid_le_cmp(u1, u2);
+}
+
+/* NOTE: hvsock_mutex must be held when the below helper functions, whose
+ * names begin with __ hvsock, are invoked.
+ */
+static void __hvsock_insert_bound(struct list_head *list,
+ struct hvsock_sock *hvsk)
+{
+ sock_hold(&hvsk->sk);
+ list_add(&hvsk->bound_list, list);
+}
+
+static void __hvsock_insert_connected(struct list_head *list,
+ struct hvsock_sock *hvsk)
+{
+ sock_hold(&hvsk->sk);
+ list_add(&hvsk->connected_list, list);
+}
+
+static void __hvsock_remove_bound(struct hvsock_sock *hvsk)
+{
+ list_del_init(&hvsk->bound_list);
+ sock_put(&hvsk->sk);
+}
+
+static void __hvsock_remove_connected(struct hvsock_sock *hvsk)
+{
+ list_del_init(&hvsk->connected_list);
+ sock_put(&hvsk->sk);
+}
+
+static struct sock *__hvsock_find_bound_socket(const struct sockaddr_hv *addr)
+{
+ struct hvsock_sock *hvsk;
+
+ list_for_each_entry(hvsk, &hvsock_bound_list, bound_list) {
+ if (uuid_equals(addr->shv_service_id,
+ hvsk->local_addr.shv_service_id))
+ return hvsock_to_sk(hvsk);
+ }
+ return NULL;
+}
+
+static struct sock *__hvsock_find_connected_socket_by_channel(
+ const struct vmbus_channel *channel)
+{
+ struct hvsock_sock *hvsk;
+
+ list_for_each_entry(hvsk, &hvsock_connected_list, connected_list) {
+ if (hvsk->channel == channel)
+ return hvsock_to_sk(hvsk);
+ }
+ return NULL;
+}
+
+static bool __hvsock_in_bound_list(struct hvsock_sock *hvsk)
+{
+ return !list_empty(&hvsk->bound_list);
+}
+
+static bool __hvsock_in_connected_list(struct hvsock_sock *hvsk)
+{
+ return !list_empty(&hvsk->connected_list);
+}
+
+static void hvsock_insert_connected(struct hvsock_sock *hvsk)
+{
+ __hvsock_insert_connected(&hvsock_connected_list, hvsk);
+}
+
+static
+void hvsock_enqueue_accept(struct sock *listener, struct sock *connected)
+{
+ struct hvsock_sock *hvlistener;
+ struct hvsock_sock *hvconnected;
+
+ hvlistener = sk_to_hvsock(listener);
+ hvconnected = sk_to_hvsock(connected);
+
+ sock_hold(connected);
+ sock_hold(listener);
+
+ mutex_lock(&hvlistener->accept_queue_mutex);
+ list_add_tail(&hvconnected->accept_queue, &hvlistener->accept_queue);
+ listener->sk_ack_backlog++;
+ mutex_unlock(&hvlistener->accept_queue_mutex);
+}
+
+static struct sock *hvsock_dequeue_accept(struct sock *listener)
+{
+ struct hvsock_sock *hvlistener;
+ struct hvsock_sock *hvconnected;
+
+ hvlistener = sk_to_hvsock(listener);
+
+ mutex_lock(&hvlistener->accept_queue_mutex);
+
+ if (list_empty(&hvlistener->accept_queue)) {
+ mutex_unlock(&hvlistener->accept_queue_mutex);
+ return NULL;
+ }
+
+ hvconnected = list_entry(hvlistener->accept_queue.next,
+ struct hvsock_sock, accept_queue);
+
+ list_del_init(&hvconnected->accept_queue);
+ listener->sk_ack_backlog--;
+
+ mutex_unlock(&hvlistener->accept_queue_mutex);
+
+ sock_put(listener);
+ /* The caller will need a reference on the connected socket so we let
+ * it call sock_put().
+ */
+
+ return hvsock_to_sk(hvconnected);
+}
+
+static bool hvsock_is_accept_queue_empty(struct sock *sk)
+{
+ struct hvsock_sock *hvsk = sk_to_hvsock(sk);
+ int ret;
+
+ mutex_lock(&hvsk->accept_queue_mutex);
+ ret = list_empty(&hvsk->accept_queue);
+ mutex_unlock(&hvsk->accept_queue_mutex);
+
+ return ret;
+}
+
+static void hvsock_addr_init(struct sockaddr_hv *addr, uuid_le service_id)
+{
+ memset(addr, 0, sizeof(*addr));
+ addr->shv_family = AF_HYPERV;
+ addr->shv_service_id = service_id;
+}
+
+static int hvsock_addr_validate(const struct sockaddr_hv *addr)
+{
+ if (!addr)
+ return -EFAULT;
+
+ if (addr->shv_family != AF_HYPERV)
+ return -EAFNOSUPPORT;
+
+ if (addr->reserved != 0)
+ return -EINVAL;
+
+ if (!uuid_equals(addr->shv_vm_id, NULL_UUID_LE))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool hvsock_addr_bound(const struct sockaddr_hv *addr)
+{
+ return !uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY);
+}
+
+static int hvsock_addr_cast(const struct sockaddr *addr, size_t len,
+ struct sockaddr_hv **out_addr)
+{
+ if (len < sizeof(**out_addr))
+ return -EFAULT;
+
+ *out_addr = (struct sockaddr_hv *)addr;
+ return hvsock_addr_validate(*out_addr);
+}
+
+static int __hvsock_do_bind(struct hvsock_sock *hvsk,
+ struct sockaddr_hv *addr)
+{
+ struct sockaddr_hv hv_addr;
+ int ret = 0;
+
+ hvsock_addr_init(&hv_addr, addr->shv_service_id);
+
+ mutex_lock(&hvsock_mutex);
+
+ if (uuid_equals(addr->shv_service_id, SHV_SERVICE_ID_ANY)) {
+ do {
+ uuid_le_gen(&hv_addr.shv_service_id);
+ } while (__hvsock_find_bound_socket(&hv_addr));
+ } else {
+ if (__hvsock_find_bound_socket(&hv_addr)) {
+ ret = -EADDRINUSE;
+ goto out;
+ }
+ }
+
+ hvsock_addr_init(&hvsk->local_addr, hv_addr.shv_service_id);
+ __hvsock_insert_bound(&hvsock_bound_list, hvsk);
+
+out:
+ mutex_unlock(&hvsock_mutex);
+
+ return ret;
+}
+
+static int __hvsock_bind(struct sock *sk, struct sockaddr_hv *addr)
+{
+ struct hvsock_sock *hvsk = sk_to_hvsock(sk);
+ int ret;
+
+ if (hvsock_addr_bound(&hvsk->local_addr))
+ return -EINVAL;
+
+ switch (sk->sk_socket->type) {
+ case SOCK_STREAM:
+ ret = __hvsock_do_bind(hvsk, addr);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/* Autobind this socket to the local address if necessary. */
+static int hvsock_auto_bind(struct hvsock_sock *hvsk)
+{
+ struct sock *sk = hvsock_to_sk(hvsk);
+ struct sockaddr_hv local_addr;
+
+ if (hvsock_addr_bound(&hvsk->local_addr))
+ return 0;
+ hvsock_addr_init(&local_addr, SHV_SERVICE_ID_ANY);
+ return __hvsock_bind(sk, &local_addr);
+}
+
+static void hvsock_sk_destruct(struct sock *sk)
+{
+ struct hvsock_sock *hvsk = sk_to_hvsock(sk);
+ struct vmbus_channel *channel = hvsk->channel;
+
+ if (!channel)
+ return;
+
+ vmbus_hvsock_device_unregister(channel);
+}
+
+static void __hvsock_release(struct sock *sk)
+{
+ struct hvsock_sock *hvsk;
+ struct sock *pending;
+
+ hvsk = sk_to_hvsock(sk);
+
+ mutex_lock(&hvsock_mutex);
+ if (__hvsock_in_bound_list(hvsk))
+ __hvsock_remove_bound(hvsk);
+
+ if (__hvsock_in_connected_list(hvsk))
+ __hvsock_remove_connected(hvsk);
+ mutex_unlock(&hvsock_mutex);
+
+ lock_sock(sk);
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ /* Clean up any sockets that never were accepted. */
+ while ((pending = hvsock_dequeue_accept(sk)) != NULL) {
+ __hvsock_release(pending);
+ sock_put(pending);
+ }
+
+ release_sock(sk);
+ sock_put(sk);
+}
+
+static int hvsock_release(struct socket *sock)
+{
+ /* If accept() is interrupted by a signal, the temporary socket
+ * struct's sock->sk is NULL.
+ */
+ if (sock->sk) {
+ __hvsock_release(sock->sk);
+ sock->sk = NULL;
+ }
+
+ sock->state = SS_FREE;
+ return 0;
+}
+
+static struct sock *__hvsock_create(struct net *net, struct socket *sock,
+ gfp_t priority, unsigned short type)
+{
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ sk = sk_alloc(net, AF_HYPERV, priority, &hvsock_proto, 0);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk);
+
+ /* sk->sk_type is normally set in sock_init_data, but only if sock is
+ * non-NULL. We make sure that our sockets always have a type by
+ * setting it here if needed.
+ */
+ if (!sock)
+ sk->sk_type = type;
+
+ hvsk = sk_to_hvsock(sk);
+ hvsock_addr_init(&hvsk->local_addr, SHV_SERVICE_ID_ANY);
+ hvsock_addr_init(&hvsk->remote_addr, SHV_SERVICE_ID_ANY);
+
+ sk->sk_destruct = hvsock_sk_destruct;
+
+ /* Looks stream-based socket doesn't need this. */
+ sk->sk_backlog_rcv = NULL;
+
+ sk->sk_state = 0;
+ sock_reset_flag(sk, SOCK_DONE);
+
+ INIT_LIST_HEAD(&hvsk->bound_list);
+ INIT_LIST_HEAD(&hvsk->connected_list);
+
+ INIT_LIST_HEAD(&hvsk->accept_queue);
+ mutex_init(&hvsk->accept_queue_mutex);
+
+ hvsk->peer_shutdown = 0;
+
+ hvsk->recv.data_len = 0;
+ hvsk->recv.data_offset = 0;
+
+ return sk;
+}
+
+static int hvsock_bind(struct socket *sock, struct sockaddr *addr,
+ int addr_len)
+{
+ struct sockaddr_hv *hv_addr;
+ struct sock *sk;
+ int ret;
+
+ sk = sock->sk;
+
+ if (hvsock_addr_cast(addr, addr_len, &hv_addr) != 0)
+ return -EINVAL;
+
+ lock_sock(sk);
+ ret = __hvsock_bind(sk, hv_addr);
+ release_sock(sk);
+
+ return ret;
+}
+
+static int hvsock_getname(struct socket *sock,
+ struct sockaddr *addr, int *addr_len, int peer)
+{
+ struct sockaddr_hv *hv_addr;
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+ int ret;
+
+ sk = sock->sk;
+ hvsk = sk_to_hvsock(sk);
+ ret = 0;
+
+ lock_sock(sk);
+
+ if (peer) {
+ if (sock->state != SS_CONNECTED) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ hv_addr = &hvsk->remote_addr;
+ } else {
+ hv_addr = &hvsk->local_addr;
+ }
+
+ __sockaddr_check_size(sizeof(*hv_addr));
+
+ memcpy(addr, hv_addr, sizeof(*hv_addr));
+ *addr_len = sizeof(*hv_addr);
+
+out:
+ release_sock(sk);
+ return ret;
+}
+
+static int hvsock_shutdown(struct socket *sock, int mode)
+{
+ struct sock *sk;
+
+ if (mode < SHUT_RD || mode > SHUT_RDWR)
+ return -EINVAL;
+ /* This maps:
+ * SHUT_RD (0) -> RCV_SHUTDOWN (1)
+ * SHUT_WR (1) -> SEND_SHUTDOWN (2)
+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
+ */
+ ++mode;
+
+ if (sock->state == SS_UNCONNECTED)
+ return -ENOTCONN;
+
+ sock->state = SS_DISCONNECTING;
+
+ sk = sock->sk;
+
+ lock_sock(sk);
+
+ sk->sk_shutdown |= mode;
+ sk->sk_state_change(sk);
+
+ /* TODO: how to send a FIN if we haven't done that? */
+ if (mode & SEND_SHUTDOWN)
+ ;
+
+ release_sock(sk);
+
+ return 0;
+}
+
+static void get_ringbuffer_rw_status(struct vmbus_channel *channel,
+ bool *can_read, bool *can_write)
+{
+ u32 avl_read_bytes, avl_write_bytes, dummy;
+
+ if (can_read) {
+ hv_get_ringbuffer_availbytes(&channel->inbound,
+ &avl_read_bytes,
+ &dummy);
+ *can_read = avl_read_bytes >= HVSOCK_MIN_PKT_LEN;
+ }
+
+ /* We write into the ringbuffer only when we're able to write a
+ * a payload of 4096 bytes (the actual written payload's length may be
+ * less than 4096).
+ */
+ if (can_write) {
+ hv_get_ringbuffer_availbytes(&channel->outbound,
+ &dummy,
+ &avl_write_bytes);
+ *can_write = avl_write_bytes > HVSOCK_PKT_LEN(PAGE_SIZE);
+ }
+}
+
+static unsigned int hvsock_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct vmbus_channel *channel;
+ bool can_read, can_write;
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+ unsigned int mask;
+
+ sk = sock->sk;
+ hvsk = sk_to_hvsock(sk);
+
+ poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
+
+ if (sk->sk_err)
+ /* Signify that there has been an error on this socket. */
+ mask |= POLLERR;
+
+ /* INET sockets treat local write shutdown and peer write shutdown as a
+ * case of POLLHUP set.
+ */
+ if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+ ((sk->sk_shutdown & SEND_SHUTDOWN) &&
+ (hvsk->peer_shutdown & SEND_SHUTDOWN))) {
+ mask |= POLLHUP;
+ }
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
+ hvsk->peer_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLRDHUP;
+ }
+
+ lock_sock(sk);
+
+ /* Listening sockets that have connections in their accept
+ * queue can be read.
+ */
+ if (sk->sk_state == SS_LISTEN && !hvsock_is_accept_queue_empty(sk))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* The mutex is to against hvsock_open_connection() */
+ mutex_lock(&hvsock_mutex);
+
+ channel = hvsk->channel;
+ if (channel) {
+ /* If there is something in the queue then we can read */
+ get_ringbuffer_rw_status(channel, &can_read, &can_write);
+
+ if (!can_read && hvsk->recv.data_len > 0)
+ can_read = true;
+
+ if (!(sk->sk_shutdown & RCV_SHUTDOWN) && can_read)
+ mask |= POLLIN | POLLRDNORM;
+ } else {
+ can_read = false;
+ can_write = false;
+ }
+
+ mutex_unlock(&hvsock_mutex);
+
+ /* Sockets whose connections have been closed terminated should
+ * also be considered read, and we check the shutdown flag for that.
+ */
+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
+ hvsk->peer_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLIN | POLLRDNORM;
+ }
+
+ /* Connected sockets that can produce data can be written. */
+ if (sk->sk_state == SS_CONNECTED && can_write &&
+ !(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ /* Remove POLLWRBAND since INET sockets are not setting it.
+ */
+ mask |= POLLOUT | POLLWRNORM;
+ }
+
+ /* Simulate INET socket poll behaviors, which sets
+ * POLLOUT|POLLWRNORM when peer is closed and nothing to read,
+ * but local send is not shutdown.
+ */
+ if (sk->sk_state == SS_UNCONNECTED &&
+ !(sk->sk_shutdown & SEND_SHUTDOWN))
+ mask |= POLLOUT | POLLWRNORM;
+
+ release_sock(sk);
+
+ return mask;
+}
+
+/* This function runs in the tasklet context of process_chn_event() */
+static void hvsock_on_channel_cb(void *ctx)
+{
+ struct sock *sk = (struct sock *)ctx;
+ struct hvsock_sock *hvsk = sk_to_hvsock(sk);
+ struct vmbus_channel *channel = hvsk->channel;
+ bool can_read, can_write;
+
+ if (!channel) {
+ WARN_ONCE(1, "NULL channel! There is a programming bug.\n");
+ return;
+ }
+
+ get_ringbuffer_rw_status(channel, &can_read, &can_write);
+
+ if (can_read)
+ sk->sk_data_ready(sk);
+
+ if (can_write)
+ sk->sk_write_space(sk);
+}
+
+static void hvsock_close_connection(struct vmbus_channel *channel)
+{
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ mutex_lock(&hvsock_mutex);
+
+ sk = __hvsock_find_connected_socket_by_channel(channel);
+
+ /* The guest has already closed the connection? */
+ if (!sk)
+ goto out;
+
+ sk->sk_socket->state = SS_UNCONNECTED;
+ sk->sk_state = SS_UNCONNECTED;
+ sock_set_flag(sk, SOCK_DONE);
+
+ hvsk = sk_to_hvsock(sk);
+ hvsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
+
+ sk->sk_state_change(sk);
+out:
+ mutex_unlock(&hvsock_mutex);
+}
+
+static int hvsock_open_connection(struct vmbus_channel *channel)
+{
+ struct hvsock_sock *hvsk, *new_hvsk;
+ struct sockaddr_hv hv_addr;
+ struct sock *sk, *new_sk;
+
+ uuid_le *instance, *service_id;
+ int ret;
+
+ instance = &channel->offermsg.offer.if_instance;
+ service_id = &channel->offermsg.offer.if_type;
+
+ hvsock_addr_init(&hv_addr, *instance);
+
+ mutex_lock(&hvsock_mutex);
+
+ sk = __hvsock_find_bound_socket(&hv_addr);
+
+ if (sk) {
+ /* It is from the guest client's connect() */
+ if (sk->sk_state != SS_CONNECTING) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ hvsk = sk_to_hvsock(sk);
+ hvsk->channel = channel;
+ set_channel_read_state(channel, false);
+ vmbus_set_chn_rescind_callback(channel,
+ hvsock_close_connection);
+ ret = vmbus_open(channel, VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND,
+ VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV, NULL, 0,
+ hvsock_on_channel_cb, sk);
+ if (ret != 0) {
+ hvsk->channel = NULL;
+ goto out;
+ }
+
+ set_channel_pending_send_size(channel,
+ HVSOCK_PKT_LEN(PAGE_SIZE));
+ sk->sk_state = SS_CONNECTED;
+ sk->sk_socket->state = SS_CONNECTED;
+ hvsock_insert_connected(hvsk);
+ sk->sk_state_change(sk);
+ goto out;
+ }
+
+ /* Now we suppose it is from a host client's connect() */
+ hvsock_addr_init(&hv_addr, *service_id);
+ sk = __hvsock_find_bound_socket(&hv_addr);
+
+ /* No guest server listening? Well, let's ignore the offer */
+ if (!sk || sk->sk_state != SS_LISTEN) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
+ ret = -EMFILE;
+ goto out;
+ }
+
+ new_sk = __hvsock_create(sock_net(sk), NULL, GFP_KERNEL, sk->sk_type);
+ if (!new_sk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ new_hvsk = sk_to_hvsock(new_sk);
+ new_sk->sk_state = SS_CONNECTING;
+ hvsock_addr_init(&new_hvsk->local_addr, *service_id);
+ hvsock_addr_init(&new_hvsk->remote_addr, *instance);
+
+ set_channel_read_state(channel, false);
+ new_hvsk->channel = channel;
+ vmbus_set_chn_rescind_callback(channel, hvsock_close_connection);
+ ret = vmbus_open(channel, VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND,
+ VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV, NULL, 0,
+ hvsock_on_channel_cb, new_sk);
+ if (ret != 0) {
+ new_hvsk->channel = NULL;
+ sock_put(new_sk);
+ goto out;
+ }
+ set_channel_pending_send_size(channel, HVSOCK_PKT_LEN(PAGE_SIZE));
+
+ new_sk->sk_state = SS_CONNECTED;
+ hvsock_insert_connected(new_hvsk);
+ hvsock_enqueue_accept(sk, new_sk);
+ sk->sk_state_change(sk);
+out:
+ mutex_unlock(&hvsock_mutex);
+ return ret;
+}
+
+static void hvsock_connect_timeout(struct work_struct *work)
+{
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ hvsk = container_of(work, struct hvsock_sock, dwork.work);
+ sk = hvsock_to_sk(hvsk);
+
+ lock_sock(sk);
+ if ((sk->sk_state == SS_CONNECTING) &&
+ (sk->sk_shutdown != SHUTDOWN_MASK)) {
+ sk->sk_state = SS_UNCONNECTED;
+ sk->sk_err = ETIMEDOUT;
+ sk->sk_error_report(sk);
+ }
+ release_sock(sk);
+
+ sock_put(sk);
+}
+
+static int hvsock_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
+{
+ struct sockaddr_hv *remote_addr;
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ DEFINE_WAIT(wait);
+ long timeout;
+
+ int ret = 0;
+
+ sk = sock->sk;
+ hvsk = sk_to_hvsock(sk);
+
+ lock_sock(sk);
+
+ switch (sock->state) {
+ case SS_CONNECTED:
+ ret = -EISCONN;
+ goto out;
+ case SS_DISCONNECTING:
+ ret = -EINVAL;
+ goto out;
+ case SS_CONNECTING:
+ /* This continues on so we can move sock into the SS_CONNECTED
+ * state once the connection has completed (at which point err
+ * will be set to zero also). Otherwise, we will either wait
+ * for the connection or return -EALREADY should this be a
+ * non-blocking call.
+ */
+ ret = -EALREADY;
+ break;
+ default:
+ if ((sk->sk_state == SS_LISTEN) ||
+ hvsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Set the remote address that we are connecting to. */
+ memcpy(&hvsk->remote_addr, remote_addr,
+ sizeof(hvsk->remote_addr));
+
+ ret = hvsock_auto_bind(hvsk);
+ if (ret)
+ goto out;
+
+ sk->sk_state = SS_CONNECTING;
+
+ ret = vmbus_send_tl_connect_request(
+ &hvsk->local_addr.shv_service_id,
+ &hvsk->remote_addr.shv_service_id);
+ if (ret < 0)
+ goto out;
+
+ /* Mark sock as connecting and set the error code to in
+ * progress in case this is a non-blocking connect.
+ */
+ sock->state = SS_CONNECTING;
+ ret = -EINPROGRESS;
+ }
+
+ /* The receive path will handle all communication until we are able to
+ * enter the connected state. Here we wait for the connection to be
+ * completed or a notification of an error.
+ */
+ timeout = 30 * HZ;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+ while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+ if (flags & O_NONBLOCK) {
+ /* If we're not going to block, we schedule a timeout
+ * function to generate a timeout on the connection
+ * attempt, in case the peer doesn't respond in a
+ * timely manner. We hold on to the socket until the
+ * timeout fires.
+ */
+ sock_hold(sk);
+ INIT_DELAYED_WORK(&hvsk->dwork,
+ hvsock_connect_timeout);
+ schedule_delayed_work(&hvsk->dwork, timeout);
+
+ /* Skip ahead to preserve error code set above. */
+ goto out_wait;
+ }
+
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeout);
+ goto out_wait_error;
+ } else if (timeout == 0) {
+ ret = -ETIMEDOUT;
+ goto out_wait_error;
+ }
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ }
+
+ ret = sk->sk_err ? -sk->sk_err : 0;
+
+out_wait_error:
+ if (ret < 0) {
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ }
+out_wait:
+ finish_wait(sk_sleep(sk), &wait);
+out:
+ release_sock(sk);
+ return ret;
+}
+
+static
+int hvsock_accept(struct socket *sock, struct socket *newsock, int flags)
+{
+ struct hvsock_sock *hvconnected;
+ struct sock *connected;
+ struct sock *listener;
+
+ DEFINE_WAIT(wait);
+ long timeout;
+
+ int ret = 0;
+
+ listener = sock->sk;
+
+ lock_sock(listener);
+
+ if (sock->type != SOCK_STREAM) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (listener->sk_state != SS_LISTEN) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Wait for children sockets to appear; these are the new sockets
+ * created upon connection establishment.
+ */
+ timeout = sock_sndtimeo(listener, flags & O_NONBLOCK);
+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+
+ while ((connected = hvsock_dequeue_accept(listener)) == NULL &&
+ listener->sk_err == 0) {
+ release_sock(listener);
+ timeout = schedule_timeout(timeout);
+ lock_sock(listener);
+
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeout);
+ goto out_wait;
+ } else if (timeout == 0) {
+ ret = -EAGAIN;
+ goto out_wait;
+ }
+
+ prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
+ }
+
+ if (listener->sk_err)
+ ret = -listener->sk_err;
+
+ if (connected) {
+ lock_sock(connected);
+ hvconnected = sk_to_hvsock(connected);
+
+ /* If the listener socket has received an error, then we should
+ * reject this socket and return. Note that we simply mark the
+ * socket rejected, drop our reference, and let the cleanup
+ * function handle the cleanup; the fact that we found it in
+ * the listener's accept queue guarantees that the cleanup
+ * function hasn't run yet.
+ */
+ if (ret) {
+ release_sock(connected);
+ sock_put(connected);
+ goto out_wait;
+ }
+
+ newsock->state = SS_CONNECTED;
+ sock_graft(connected, newsock);
+ release_sock(connected);
+ sock_put(connected);
+ }
+
+out_wait:
+ finish_wait(sk_sleep(listener), &wait);
+out:
+ release_sock(listener);
+ return ret;
+}
+
+static int hvsock_listen(struct socket *sock, int backlog)
+{
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+ int ret = 0;
+
+ sk = sock->sk;
+ lock_sock(sk);
+
+ if (sock->type != SOCK_STREAM) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (sock->state != SS_UNCONNECTED) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (backlog <= 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ /* This is an artificial limit */
+ if (backlog > 128)
+ backlog = 128;
+
+ hvsk = sk_to_hvsock(sk);
+ if (!hvsock_addr_bound(&hvsk->local_addr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sk->sk_ack_backlog = 0;
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_state = SS_LISTEN;
+out:
+ release_sock(sk);
+ return ret;
+}
+
+static int hvsock_setsockopt(struct socket *sock,
+ int level,
+ int optname,
+ char __user *optval, unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static int hvsock_getsockopt(struct socket *sock,
+ int level,
+ int optname,
+ char __user *optval, int __user *optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static int hvsock_send_data(struct vmbus_channel *channel,
+ struct hvsock_sock *hvsk,
+ size_t to_write)
+{
+ hvsk->send.hdr.pkt_type = 1;
+ hvsk->send.hdr.data_size = to_write;
+ return vmbus_sendpacket(channel, &hvsk->send.hdr,
+ sizeof(hvsk->send.hdr) + to_write,
+ 0, VM_PKT_DATA_INBAND, 0);
+}
+
+static int hvsock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ struct vmbus_channel *channel;
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ size_t total_to_write = len;
+ size_t total_written = 0;
+
+ bool can_write;
+ long timeout;
+ int ret = 0;
+
+ DEFINE_WAIT(wait);
+
+ if (len == 0)
+ return -EINVAL;
+
+ if (msg->msg_flags & ~MSG_DONTWAIT) {
+ pr_err("%s: unsupported flags=0x%x\n", __func__,
+ msg->msg_flags);
+ return -EOPNOTSUPP;
+ }
+
+ sk = sock->sk;
+ hvsk = sk_to_hvsock(sk);
+ channel = hvsk->channel;
+
+ lock_sock(sk);
+
+ /* Callers should not provide a destination with stream sockets. */
+ if (msg->msg_namelen) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Send data only if both sides are not shutdown in the direction. */
+ if (sk->sk_shutdown & SEND_SHUTDOWN ||
+ hvsk->peer_shutdown & RCV_SHUTDOWN) {
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (sk->sk_state != SS_CONNECTED ||
+ !hvsock_addr_bound(&hvsk->local_addr)) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ if (!hvsock_addr_bound(&hvsk->remote_addr)) {
+ ret = -EDESTADDRREQ;
+ goto out;
+ }
+
+ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+ while (total_to_write > 0) {
+ size_t to_write;
+
+ while (1) {
+ get_ringbuffer_rw_status(channel, NULL, &can_write);
+
+ if (can_write || sk->sk_err != 0 ||
+ (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ (hvsk->peer_shutdown & RCV_SHUTDOWN))
+ break;
+
+ /* Don't wait for non-blocking sockets. */
+ if (timeout == 0) {
+ ret = -EAGAIN;
+ goto out_wait;
+ }
+
+ release_sock(sk);
+
+ timeout = schedule_timeout(timeout);
+
+ lock_sock(sk);
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeout);
+ goto out_wait;
+ } else if (timeout == 0) {
+ ret = -EAGAIN;
+ goto out_wait;
+ }
+
+ prepare_to_wait(sk_sleep(sk), &wait,
+ TASK_INTERRUPTIBLE);
+ }
+
+ /* These checks occur both as part of and after the loop
+ * conditional since we need to check before and after
+ * sleeping.
+ */
+ if (sk->sk_err) {
+ ret = -sk->sk_err;
+ goto out_wait;
+ } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+ (hvsk->peer_shutdown & RCV_SHUTDOWN)) {
+ ret = -EPIPE;
+ goto out_wait;
+ }
+
+ /* Note: that write will only write as many bytes as possible
+ * in the ringbuffer. It is the caller's responsibility to
+ * check how many bytes we actually wrote.
+ */
+ do {
+ to_write = min_t(size_t, HVSOCK_SND_BUF_SZ,
+ total_to_write);
+ ret = memcpy_from_msg(hvsk->send.buf, msg, to_write);
+ if (ret != 0)
+ goto out_wait;
+
+ ret = hvsock_send_data(channel, hvsk, to_write);
+ if (ret != 0)
+ goto out_wait;
+
+ total_written += to_write;
+ total_to_write -= to_write;
+ } while (total_to_write > 0);
+ }
+out_wait:
+ if (total_written > 0)
+ ret = total_written;
+
+ finish_wait(sk_sleep(sk), &wait);
+out:
+ release_sock(sk);
+
+ /* ret is a bigger-than-0 total_written or a negative err code. */
+ if (ret == 0) {
+ WARN(1, "unexpected return value of 0\n");
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int hvsock_recv_data(struct vmbus_channel *channel,
+ struct hvsock_sock *hvsk,
+ size_t *payload_len)
+{
+ u32 buffer_actual_len;
+ u64 dummy_req_id;
+ int ret;
+
+ ret = vmbus_recvpacket(channel, &hvsk->recv.hdr,
+ sizeof(hvsk->recv.hdr) + sizeof(hvsk->recv.buf),
+ &buffer_actual_len, &dummy_req_id);
+ if (ret != 0 || buffer_actual_len <= sizeof(hvsk->recv.hdr))
+ *payload_len = 0;
+ else
+ *payload_len = hvsk->recv.hdr.data_size;
+
+ return ret;
+}
+
+static int hvsock_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct vmbus_channel *channel;
+ struct hvsock_sock *hvsk;
+ struct sock *sk;
+
+ size_t total_to_read = len;
+ size_t copied;
+
+ bool can_read;
+ long timeout;
+
+ int ret = 0;
+
+ DEFINE_WAIT(wait);
+
+ sk = sock->sk;
+ hvsk = sk_to_hvsock(sk);
+ channel = hvsk->channel;
+
+ lock_sock(sk);
+
+ if (sk->sk_state != SS_CONNECTED) {
+ /* Recvmsg is supposed to return 0 if a peer performs an
+ * orderly shutdown. Differentiate between that case and when a
+ * peer has not connected or a local shutdown occurred with the
+ * SOCK_DONE flag.
+ */
+ if (sock_flag(sk, SOCK_DONE))
+ ret = 0;
+ else
+ ret = -ENOTCONN;
+
+ goto out;
+ }
+
+ /* We ignore msg->addr_name/len. */
+ if (flags & ~MSG_DONTWAIT) {
+ pr_err("%s: unsupported flags=0x%x\n", __func__, flags);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* We don't check peer_shutdown flag here since peer may actually shut
+ * down, but there can be data in the queue that a local socket can
+ * receive.
+ */
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ ret = 0;
+ goto out;
+ }
+
+ /* It is valid on Linux to pass in a zero-length receive buffer. This
+ * is not an error. We may as well bail out now.
+ */
+ if (!len) {
+ ret = 0;
+ goto out;
+ }
+
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ copied = 0;
+
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+ while (1) {
+ bool need_refill = hvsk->recv.data_len == 0;
+
+ if (need_refill)
+ get_ringbuffer_rw_status(channel, &can_read, NULL);
+ else
+ can_read = true;
+
+ if (can_read) {
+ size_t payload_len;
+
+ if (need_refill) {
+ ret = hvsock_recv_data(channel, hvsk,
+ &payload_len);
+ if (ret != 0 || payload_len == 0 ||
+ payload_len > HVSOCK_RCV_BUF_SZ) {
+ ret = -EIO;
+ goto out_wait;
+ }
+
+ hvsk->recv.data_len = payload_len;
+ hvsk->recv.data_offset = 0;
+ }
+
+ if (hvsk->recv.data_len <= total_to_read) {
+ ret = memcpy_to_msg(msg, hvsk->recv.buf +
+ hvsk->recv.data_offset,
+ hvsk->recv.data_len);
+ if (ret != 0)
+ break;
+
+ copied += hvsk->recv.data_len;
+ total_to_read -= hvsk->recv.data_len;
+ hvsk->recv.data_len = 0;
+ hvsk->recv.data_offset = 0;
+
+ if (total_to_read == 0)
+ break;
+ } else {
+ ret = memcpy_to_msg(msg, hvsk->recv.buf +
+ hvsk->recv.data_offset,
+ total_to_read);
+ if (ret != 0)
+ break;
+
+ copied += total_to_read;
+ hvsk->recv.data_len -= total_to_read;
+ hvsk->recv.data_offset += total_to_read;
+ total_to_read = 0;
+ break;
+ }
+ } else {
+ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ (hvsk->peer_shutdown & SEND_SHUTDOWN))
+ break;
+
+ /* Don't wait for non-blocking sockets. */
+ if (timeout == 0) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ if (copied > 0)
+ break;
+
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeout);
+ break;
+ } else if (timeout == 0) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ prepare_to_wait(sk_sleep(sk), &wait,
+ TASK_INTERRUPTIBLE);
+ }
+ }
+
+ if (sk->sk_err)
+ ret = -sk->sk_err;
+ else if (sk->sk_shutdown & RCV_SHUTDOWN)
+ ret = 0;
+
+ if (copied > 0) {
+ ret = copied;
+
+ /* If the other side has shutdown for sending and there
+ * is nothing more to read, then we modify the socket
+ * state.
+ */
+ if ((hvsk->peer_shutdown & SEND_SHUTDOWN) &&
+ hvsk->recv.data_len == 0) {
+ get_ringbuffer_rw_status(channel, &can_read, NULL);
+ if (!can_read) {
+ sk->sk_state = SS_UNCONNECTED;
+ sock_set_flag(sk, SOCK_DONE);
+ sk->sk_state_change(sk);
+ }
+ }
+ }
+out_wait:
+ finish_wait(sk_sleep(sk), &wait);
+out:
+ release_sock(sk);
+ return ret;
+}
+
+static const struct proto_ops hvsock_ops = {
+ .family = PF_HYPERV,
+ .owner = THIS_MODULE,
+ .release = hvsock_release,
+ .bind = hvsock_bind,
+ .connect = hvsock_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = hvsock_accept,
+ .getname = hvsock_getname,
+ .poll = hvsock_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = hvsock_listen,
+ .shutdown = hvsock_shutdown,
+ .setsockopt = hvsock_setsockopt,
+ .getsockopt = hvsock_getsockopt,
+ .sendmsg = hvsock_sendmsg,
+ .recvmsg = hvsock_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static int hvsock_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (protocol != 0 && protocol != SHV_PROTO_RAW)
+ return -EPROTONOSUPPORT;
+
+ switch (sock->type) {
+ case SOCK_STREAM:
+ sock->ops = &hvsock_ops;
+ break;
+ default:
+ return -ESOCKTNOSUPPORT;
+ }
+
+ sock->state = SS_UNCONNECTED;
+
+ return __hvsock_create(net, sock, GFP_KERNEL, 0) ? 0 : -ENOMEM;
+}
+
+static const struct net_proto_family hvsock_family_ops = {
+ .family = AF_HYPERV,
+ .create = hvsock_create,
+ .owner = THIS_MODULE,
+};
+
+static int hvsock_probe(struct hv_device *hdev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ struct vmbus_channel *channel = hdev->channel;
+
+ /* We ignore the error return code to suppress the unnecessary
+ * error message in vmbus_probe(): on error the host will rescind
+ * the offer in 30 seconds and we can do cleanup at that time.
+ */
+ (void)hvsock_open_connection(channel);
+
+ return 0;
+}
+
+static int hvsock_remove(struct hv_device *hdev)
+{
+ struct vmbus_channel *channel = hdev->channel;
+
+ vmbus_close(channel);
+
+ return 0;
+}
+
+/* It's not really used. See vmbus_match() and vmbus_probe(). */
+static const struct hv_vmbus_device_id id_table[] = {
+ {},
+};
+
+static struct hv_driver hvsock_drv = {
+ .name = "hv_sock",
+ .hvsock = true,
+ .id_table = id_table,
+ .probe = hvsock_probe,
+ .remove = hvsock_remove,
+};
+
+static int __init hvsock_init(void)
+{
+ int ret;
+
+ /* Hyper-V Sockets requires at least VMBus 4.0 */
+ if ((vmbus_proto_version >> 16) < 4) {
+ pr_err("failed to load: VMBus 4 or later is required\n");
+ return -ENODEV;
+ }
+
+ ret = vmbus_driver_register(&hvsock_drv);
+ if (ret) {
+ pr_err("failed to register hv_sock driver\n");
+ return ret;
+ }
+
+ ret = proto_register(&hvsock_proto, 0);
+ if (ret) {
+ pr_err("failed to register protocol\n");
+ goto unreg_hvsock_drv;
+ }
+
+ ret = sock_register(&hvsock_family_ops);
+ if (ret) {
+ pr_err("failed to register address family\n");
+ goto unreg_proto;
+ }
+
+ return 0;
+
+unreg_proto:
+ proto_unregister(&hvsock_proto);
+unreg_hvsock_drv:
+ vmbus_driver_unregister(&hvsock_drv);
+ return ret;
+}
+
+static void __exit hvsock_exit(void)
+{
+ sock_unregister(AF_HYPERV);
+ proto_unregister(&hvsock_proto);
+ vmbus_driver_unregister(&hvsock_drv);
+}
+
+module_init(hvsock_init);
+module_exit(hvsock_exit);
+
+MODULE_DESCRIPTION("Hyper-V Sockets");
+MODULE_LICENSE("Dual BSD/GPL");
--
2.1.0
^ permalink raw reply related
* [PATCH v8 net-next 0/1] introduce Hyper-V VM Sockets(hv_sock)
From: Dexuan Cui @ 2016-04-08 1:36 UTC (permalink / raw)
To: gregkh, davem, netdev, linux-kernel, devel, olaf, apw, jasowang,
cavery, kys, haiyangz
Cc: joe
Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
mechanism between the host and the guest. It's somewhat like TCP over
VMBus, but the transportation layer (VMBus) is much simpler than IP.
With Hyper-V Sockets, applications between the host and the guest can talk
to each other directly by the traditional BSD-style socket APIs.
Hyper-V Sockets is only available on new Windows hosts, like Windows Server
2016. More info is in this article "Make your own integration services":
https://msdn.microsoft.com/en-us/virtualization/hyperv_on_windows/develop/make_mgmt_service
The patch implements the necessary support in the guest side by
introducing a new socket address family AF_HYPERV.
Note: the VMBus driver side's supporting patches have been in the mainline
tree.
I know the kernel has already had a VM Sockets driver (AF_VSOCK) based
on VMware VMCI (net/vmw_vsock/, drivers/misc/vmw_vmci), and KVM is
proposing AF_VSOCK of virtio version:
http://marc.info/?l=linux-netdev&m=145952064004765&w=2
However, though Hyper-V Sockets may seem conceptually similar to
AF_VOSCK, there are differences in the transportation layer, and IMO these
make the direct code reusing impractical:
1. In AF_VSOCK, the endpoint type is: <u32 ContextID, u32 Port>, but in
AF_HYPERV, the endpoint type is: <GUID VM_ID, GUID ServiceID>. Here GUID
is 128-bit.
2. AF_VSOCK supports SOCK_DGRAM, while AF_HYPERV doesn't.
3. AF_VSOCK supports some special sock opts, like SO_VM_SOCKETS_BUFFER_SIZE,
SO_VM_SOCKETS_BUFFER_MIN/MAX_SIZE and SO_VM_SOCKETS_CONNECT_TIMEOUT.
These are meaningless to AF_HYPERV.
4. Some AF_VSOCK's VMCI transportation ops are meanless to AF_HYPERV/VMBus,
like .notify_recv_init
.notify_recv_pre_block
.notify_recv_pre_dequeue
.notify_recv_post_dequeue
.notify_send_init
.notify_send_pre_block
.notify_send_pre_enqueue
.notify_send_post_enqueue
etc.
So I think we'd better introduce a new address family: AF_HYPERV.
Please review the patch.
Looking forward to your comments!
Changes since v1:
- updated "[PATCH 6/7] hvsock: introduce Hyper-V VM Sockets feature"
- added __init and __exit for the module init/exit functions
- net/hv_sock/Kconfig: "default m" -> "default m if HYPERV"
- MODULE_LICENSE: "Dual MIT/GPL" -> "Dual BSD/GPL"
Changes since v2:
- fixed various coding issue pointed out by David Miller
- fixed indentation issues
- removed pr_debug in net/hv_sock/af_hvsock.c
- used reverse-Chrismas-tree style for local variables.
- EXPORT_SYMBOL -> EXPORT_SYMBOL_GPL
Changes since v3:
- fixed a few coding issue pointed by Vitaly Kuznetsov and Dan Carpenter
- fixed the ret value in vmbus_recvpacket_hvsock on error
- fixed the style of multi-line comment: vmbus_get_hvsock_rw_status()
Changes since v4 (https://lkml.org/lkml/2015/7/28/404):
- addressed all the comments about V4.
- treat the hvsock offers/channels as special VMBus devices
- add a mechanism to pass hvsock events to the hvsock driver
- fixed some corner cases with proper locking when a connection is closed
- rebased to the latest Greg's tree
Changes since v5 (https://lkml.org/lkml/2015/12/24/103):
- addressed the coding style issues (Vitaly Kuznetsov & David Miller, thanks!)
- used a better coding for the per-channel rescind callback (Thank Vitaly!)
- avoided the introduction of new VMBUS driver APIs vmbus_sendpacket_hvsock()
and vmbus_recvpacket_hvsock() and used vmbus_sendpacket()/vmbus_recvpacket()
in the higher level (i.e., the vmsock driver). Thank Vitaly!
Changes since v6 (http://lkml.iu.edu/hypermail/linux/kernel/1601.3/01813.html)
- only a few minor changes of coding style and comments
Changes since v7
- a few minor changes of coding style: thanks, Joe Perches!
- added some lines of comments about GUID/UUID before the struct sockaddr_hv.
Dexuan Cui (1):
hv_sock: introduce Hyper-V Sockets
MAINTAINERS | 2 +
include/linux/hyperv.h | 16 +
include/linux/socket.h | 5 +-
include/net/af_hvsock.h | 51 ++
include/uapi/linux/hyperv.h | 25 +
net/Kconfig | 1 +
net/Makefile | 1 +
net/hv_sock/Kconfig | 10 +
net/hv_sock/Makefile | 3 +
net/hv_sock/af_hvsock.c | 1483 +++++++++++++++++++++++++++++++++++++++++++
10 files changed, 1595 insertions(+), 2 deletions(-)
create mode 100644 include/net/af_hvsock.h
create mode 100644 net/hv_sock/Kconfig
create mode 100644 net/hv_sock/Makefile
create mode 100644 net/hv_sock/af_hvsock.c
--
2.1.0
^ permalink raw reply
* Re: [PATCH v8 net-next 1/1] hv_sock: introduce Hyper-V Sockets
From: Joe Perches @ 2016-04-08 1:15 UTC (permalink / raw)
To: Dexuan Cui, gregkh, davem, netdev, linux-kernel, devel, olaf, apw,
jasowang, cavery, kys, haiyangz
Cc: vkuznets
In-Reply-To: <1460079411-31982-1-git-send-email-decui@microsoft.com>
On Thu, 2016-04-07 at 18:36 -0700, Dexuan Cui wrote:
> Hyper-V Sockets (hv_sock) supplies a byte-stream based communication
> mechanism between the host and the guest. It's somewhat like TCP over
> VMBus, but the transportation layer (VMBus) is much simpler than IP.
[]
> diff --git a/include/net/af_hvsock.h b/include/net/af_hvsock.h
[]
> +#define VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV (5 * PAGE_SIZE)
> +#define VMBUS_RINGBUFFER_SIZE_HVSOCK_SEND (5 * PAGE_SIZE)
> +
> +#define HVSOCK_RCV_BUF_SZ VMBUS_RINGBUFFER_SIZE_HVSOCK_RECV
> +#define HVSOCK_SND_BUF_SZ PAGE_SIZE
[]
> +struct hvsock_sock {
[]
> + struct {
> + struct vmpipe_proto_header hdr;
> + char buf[HVSOCK_SND_BUF_SZ];
> + } __packed send;
> +
> + struct {
> + struct vmpipe_proto_header hdr;
> + char buf[HVSOCK_RCV_BUF_SZ];
> + unsigned int data_len;
> + unsigned int data_offset;
> + } __packed recv;
> +};
These bufs are not page aligned and so can span pages.
Is there any value in allocating these bufs separately
as pages instead of as a kmalloc?
^ permalink raw reply
* [PATCH net-next] macvlan: Set nocarrier when lowerdev admin down
From: Debabrata Banerjee @ 2016-04-08 1:06 UTC (permalink / raw)
To: Nikolay Aleksandrov, Patrick McHardy, netdev; +Cc: Debabrata Banerjee
In-Reply-To: <57063EDD.3040406@cumulusnetworks.com>
When the lowerdev is set administratively down disable carrier on the
macvlan interface. This means operstate gets set properly instead of
still being "up".
Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
---
drivers/net/macvlan.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 2bcf1f3..16d0e56 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1525,10 +1525,14 @@ static int macvlan_device_event(struct notifier_block *unused,
switch (event) {
case NETDEV_UP:
+ case NETDEV_DOWN:
case NETDEV_CHANGE:
- list_for_each_entry(vlan, &port->vlans, list)
+ list_for_each_entry(vlan, &port->vlans, list) {
netif_stacked_transfer_operstate(vlan->lowerdev,
vlan->dev);
+ if (!(vlan->lowerdev->flags & IFF_UP))
+ netif_carrier_off(vlan->dev);
+ }
break;
case NETDEV_FEAT_CHANGE:
list_for_each_entry(vlan, &port->vlans, list) {
--
2.8.0
^ permalink raw reply related
* Re: [PATCH v2 net-next 00/10] allow bpf attach to tracepoints
From: David Miller @ 2016-04-08 1:04 UTC (permalink / raw)
To: ast
Cc: rostedt, peterz, mingo, daniel, acme, wangnan0, jbacik,
brendan.d.gregg, netdev, linux-kernel, kernel-team
In-Reply-To: <1459993411-2754735-1-git-send-email-ast@fb.com>
Series applied, thanks Alexei.
^ permalink raw reply
* Re: [PATCH net-next v2] macvlan: Support interface operstate properly
From: Banerjee, Debabrata @ 2016-04-08 1:03 UTC (permalink / raw)
To: Nikolay Aleksandrov, Patrick McHardy, netdev@vger.kernel.org
In-Reply-To: <57063EDD.3040406@cumulusnetworks.com>
On 4/7/16, 7:05 AM, "Nikolay Aleksandrov" <nikolay@cumulusnetworks.com> wrote:
>On 04/07/2016 12:36 AM, Debabrata Banerjee wrote:
>> Set appropriate macvlan interface status based on lower device and our
>> status. Can be up, down, or lowerlayerdown.
>What about dormant ?
>
>That being said I understand the need to switch to lowerlayerdown when the lower
>device is in "down", which is basically the most important change of this patch.
>The rest is already handled by link watch based on carrier state. By now people
>are used to having lowerlayerdown when there's no carrier, now it can also mean
>that the lower device has been brought admin down.
>
>Here's another interesting state:
>6: mac1@eth2: <NO-CARRIER,BROADCAST,MULTICAST,UP,LOWER_UP,DORMANT,M-DOWN> mtu 1500 qdisc noqueue state LOWERLAYERDOWN mode DEFAULT group default qlen 1000
>Prior to this patch the macvlan would stay in dormant state and it will also propagate
>to devices stacked on top of it.
The no carrier issue gave me an idea. What if we just set no carrier on the macvlan device?
This seems appropriate, but it doesn't directly address the dormant issue, although that could
be a separate patch. Will this cause any new issues? It's very simple, new patch incoming.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox