Netdev List
 help / color / mirror / Atom feed
* [RFC ethtool PATCH] ethtool: Add new commands to set VFs, VM queues and spoof checking
From: Greg Rose @ 2011-07-27 22:23 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 ethtool-copy.h |   11 ++++++++++-
 ethtool.c      |   40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/ethtool-copy.h b/ethtool-copy.h
index c7a18f7..0360070 100644
--- a/ethtool-copy.h
+++ b/ethtool-copy.h
@@ -33,12 +33,14 @@ struct ethtool_cmd {
 	__u8	mdio_support;
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
+	__u32	num_vfs;	/* Enable SR-IOV VFs */
+	__u32	num_vmqs;	/* Set number of queues for VMDq */
 	__u16	speed_hi;       /* The forced speed (upper
 				 * bits) in Mbps. Please use
 				 * ethtool_cmd_speed()/_set() to
 				 * access it */
 	__u8	eth_tp_mdix;
-	__u8	reserved2;
+	__u8	spoof_check;	/* Enable/Disable anti-spoofing */
 	__u32	lp_advertising;	/* Features the link partner advertises */
 	__u32	reserved[2];
 };
@@ -846,6 +848,13 @@ enum ethtool_sfeatures_retval_bits {
 #define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
+/* Enable or disable MAC and/or VLAN spoofchecking.If this is
+ * set to enable, then depending on the controller capabilities
+ * MAC and/or VLAN spoofing will be turned on.
+ */
+#define SPOOFCHECK_DISABLE	0x00
+#define SPOOFCHECK_ENABLE	0x01
+
 /* Mode MDI or MDI-X */
 #define ETH_TP_MDI_INVALID	0x00
 #define ETH_TP_MDI		0x01
diff --git a/ethtool.c b/ethtool.c
index c189c78..0154f41 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -148,6 +148,9 @@ static struct option {
 		"		[ autoneg on|off ]\n"
 		"		[ advertise %x ]\n"
 		"		[ phyad %d ]\n"
+		"		[ vfs %d ]\n"
+		"		[ vmqueues %d]\n"
+		"		[ spoofcheck on|off ]\n"
 		"		[ xcvr internal|external ]\n"
 		"		[ wol p|u|m|b|a|g|s|d... ]\n"
 		"		[ sopass %x:%x:%x:%x:%x:%x ]\n"
@@ -362,6 +365,9 @@ static int port_wanted = -1;
 static int autoneg_wanted = -1;
 static int phyad_wanted = -1;
 static int xcvr_wanted = -1;
+static int num_vfs_wanted = -1;
+static int num_vmqs_wanted = -1;
+static u8 spoofchk_wanted = -1;
 static int advertising_wanted = -1;
 static int gset_changed = 0; /* did anything in GSET change? */
 static u32  wol_wanted = 0;
@@ -1074,6 +1080,34 @@ static void parse_cmdline(int argc, char **argp)
 					exit_bad_args();
 				phyad_wanted = get_int(argp[i], 0);
 				break;
+			} else if (!strcmp(argp[i], "vfs")) {
+				gset_changed = 1;
+				i += 1;
+				if (i >= argc)
+					exit_bad_args();
+				num_vfs_wanted = get_int(argp[i], 0);
+				break;
+			} else if (!strcmp(argp[i], "vmqueues")) {
+				gset_changed = 1;
+				i += 1;
+				if (i >= argc)
+					exit_bad_args();
+				num_vmqs_wanted = get_int(argp[i], 0);
+				break;
+			} else if (!strcmp(argp[i], "spoofcheck")) {
+				i += 1;
+				if (i >= argc)
+					exit_bad_args();
+				if (!strcmp(argp[i], "on")) {
+					gset_changed = 1;
+					spoofchk_wanted = SPOOFCHECK_ENABLE;
+				} else if (!strcmp(argp[i], "off")) {
+					gset_changed = 1;
+					spoofchk_wanted = SPOOFCHECK_DISABLE;
+				} else {
+					exit_bad_args();
+				}
+				break;
 			} else if (!strcmp(argp[i], "xcvr")) {
 				gset_changed = 1;
 				i += 1;
@@ -2447,6 +2481,12 @@ static int do_sset(int fd, struct ifreq *ifr)
 				ecmd.phy_address = phyad_wanted;
 			if (xcvr_wanted != -1)
 				ecmd.transceiver = xcvr_wanted;
+			if (num_vfs_wanted != -1)
+				ecmd.num_vfs = num_vfs_wanted;
+			if (num_vmqs_wanted != -1)
+				ecmd.num_vmqs = num_vmqs_wanted;
+			if (spoofchk_wanted != -1)
+				ecmd.spoof_check = spoofchk_wanted;
 			/* XXX If the user specified speed or duplex
 			 * then we should mask the advertised modes
 			 * accordingly.  For now, warn that we aren't


^ permalink raw reply related

* [RFC net-next PATCH 4/4] ixgbe: Add support for new ethtool settings
From: Greg Rose @ 2011-07-27 22:18 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20110727221406.8435.44324.stgit@gitlad.jf.intel.com>

Adds ixgbe driver support for new ethtool settings for SR-IOV re-init,
number of VM queues and anti-spoofing ON/OFF switch.


Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 drivers/net/ixgbe/ixgbe.h         |    1 
 drivers/net/ixgbe/ixgbe_ethtool.c |   96 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |    5 ++
 drivers/net/ixgbe/ixgbe_sriov.c   |    2 -
 drivers/net/ixgbe/ixgbe_sriov.h   |    3 -
 5 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index ed9836f..c826d7e 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -504,6 +504,7 @@ struct ixgbe_adapter {
 	struct hlist_head fdir_filter_list;
 	union ixgbe_atr_input fdir_mask;
 	int fdir_filter_count;
+	const struct ixgbe_info *saved_ii;
 };
 
 struct ixgbe_fdir_filter {
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index dc64955..4a8d3e5 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -38,6 +38,7 @@
 #include <linux/uaccess.h>
 
 #include "ixgbe.h"
+#include "ixgbe_sriov.h"
 
 
 #define IXGBE_ALL_RAR_ENTRIES 16
@@ -314,6 +315,67 @@ static int ixgbe_get_settings(struct net_device *netdev,
 	return 0;
 }
 
+static int ixgbe_reinit_sriov(struct net_device *netdev, int new_vfs)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+	int i;
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		if (ixgbe_check_vf_assignment(adapter)) {
+			netdev_warn(netdev, "%s	",
+				    "reconfigure of SR-IOV VFs "
+				    "not supported while VFs are "
+				    "assigned to guest VMs\n");
+			return -EBUSY;
+		}
+	}
+	if (netif_running(netdev)) {
+		netdev_warn(netdev, "%s",
+			    "Cannot reconfigure SR-IOV "
+			    "while interface is up\n"
+			    "Please bring the interface "
+			    "down first\n");
+		return -EBUSY;
+	}
+
+	ixgbe_clear_interrupt_scheme(adapter);
+
+	if (adapter->num_vfs)
+		ixgbe_disable_sriov(adapter);
+
+	adapter->num_vfs = (new_vfs > 63) ? 63 : new_vfs;
+
+	if (adapter->num_vfs) {
+		ixgbe_enable_sriov(adapter, adapter->saved_ii);
+		for (i = 0; i < adapter->num_vfs; i++)
+			ixgbe_vf_configure(pdev, (i | 0x10000000));
+	}
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		adapter->flags &= ~(IXGBE_FLAG_RSS_ENABLED |
+				    IXGBE_FLAG_DCB_ENABLED);
+		netdev->features &= ~NETIF_F_RXHASH;
+	} else {
+		adapter->flags |= IXGBE_FLAG_RSS_ENABLED;
+		netdev->features |= NETIF_F_RXHASH;
+	}
+
+	err = ixgbe_init_interrupt_scheme(adapter);
+	/*
+	 * If we can't init some sort of interrupt scheme then the device
+	 * is hosed - just print a warning and bail.  Nothing will work
+	 * but at least we've put a message in the system log telling why.
+	 */
+	if (err)
+		e_dev_err("Cannot initialize interrupts for device\n");
+	else
+		ixgbe_reset(adapter);
+
+	return err;
+}
+
 static int ixgbe_set_settings(struct net_device *netdev,
                               struct ethtool_cmd *ecmd)
 {
@@ -322,6 +384,40 @@ static int ixgbe_set_settings(struct net_device *netdev,
 	u32 advertised, old;
 	s32 err = 0;
 
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		goto skip_sriov_checks;
+
+	if (ecmd->num_vfs != adapter->num_vfs) {
+		if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) {
+			err = ixgbe_reinit_sriov(netdev, ecmd->num_vfs);
+			if (err)
+				return err;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if ((ecmd->spoof_check == SPOOFCHECK_ENABLE)
+	    && !adapter->antispoofing_enabled) {
+		int i;
+		hw->mac.ops.set_mac_anti_spoofing(hw, true,
+						  adapter->num_vfs);
+		for (i = 0; i < adapter->num_vfs; i++)
+			hw->mac.ops.set_vlan_anti_spoofing(hw, true, i);
+		adapter->antispoofing_enabled = true;
+	} else if ((ecmd->spoof_check == SPOOFCHECK_DISABLE)
+		 && adapter->antispoofing_enabled) {
+		int i;
+		hw->mac.ops.set_mac_anti_spoofing(hw, false,
+						  adapter->num_vfs);
+		for (i = 0; i < adapter->num_vfs; i++)
+			hw->mac.ops.set_vlan_anti_spoofing(hw, false, i);
+		adapter->antispoofing_enabled = false;
+	}
+
+skip_sriov_checks:
+
+
 	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
 	    (hw->phy.multispeed_fiber)) {
 		/* 10000/copper and 1000/copper must autoneg
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 06ba9f2..fba7ff0 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -7206,6 +7206,9 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter,
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		return;
 
+	/* need to save this away in case SR-IOV is reconfigured */
+	adapter->saved_ii = ii;
+
 	/* The 82599 supports up to 64 VFs per physical function
 	 * but this implementation limits allocation to 63 so that
 	 * basic networking resources are still available to the
@@ -7589,7 +7592,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
 		e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
 		for (i = 0; i < adapter->num_vfs; i++)
-			ixgbe_vf_configuration(pdev, (i | 0x10000000));
+			ixgbe_vf_configure(pdev, (i | 0x10000000));
 	}
 
 	/* Inform firmware of driver version */
diff --git a/drivers/net/ixgbe/ixgbe_sriov.c b/drivers/net/ixgbe/ixgbe_sriov.c
index cdb2f0c..0da639e 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ixgbe/ixgbe_sriov.c
@@ -437,7 +437,7 @@ int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter)
 	return false;
 }
 
-int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
+int ixgbe_vf_configure(struct pci_dev *pdev, unsigned int event_mask)
 {
 	unsigned char vf_mac_addr[6];
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
diff --git a/drivers/net/ixgbe/ixgbe_sriov.h b/drivers/net/ixgbe/ixgbe_sriov.h
index 2781847..f588bf5 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ixgbe/ixgbe_sriov.h
@@ -30,7 +30,7 @@
 
 void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
 void ixgbe_msg_task(struct ixgbe_adapter *adapter);
-int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
+int ixgbe_vf_configure(struct pci_dev *pdev, unsigned int event_mask);
 void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 void ixgbe_dump_registers(struct ixgbe_adapter *adapter);
@@ -46,6 +46,5 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 			const struct ixgbe_info *ii);
 int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
 
-
 #endif /* _IXGBE_SRIOV_H_ */
 


^ permalink raw reply related

* [RFC net-next PATCH 3/4] ethtool: Add new set commands
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20110727221406.8435.44324.stgit@gitlad.jf.intel.com>

Add new set commands to configure the number of SR-IOV VFs, the
number of VM queues and spoof checking on/off switch.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 include/linux/ethtool.h |   11 ++++++++++-
 1 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c6e427a..c4972ba 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -36,12 +36,14 @@ struct ethtool_cmd {
 	__u8	mdio_support;
 	__u32	maxtxpkt;	/* Tx pkts before generating tx int */
 	__u32	maxrxpkt;	/* Rx pkts before generating rx int */
+	__u32	num_vfs;	/* Enable SR-IOV VFs */
+	__u32	num_vmqs;	/* Set number of queues for VMDq */
 	__u16	speed_hi;       /* The forced speed (upper
 				 * bits) in Mbps. Please use
 				 * ethtool_cmd_speed()/_set() to
 				 * access it */
 	__u8	eth_tp_mdix;
-	__u8	reserved2;
+	__u8	spoof_check;	/* Enable/Disable anti-spoofing */
 	__u32	lp_advertising;	/* Features the link partner advertises */
 	__u32	reserved[2];
 };
@@ -1121,6 +1123,13 @@ struct ethtool_ops {
 #define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
+/* Enable or disable MAC and/or VLAN spoofchecking.If this is
+ * set to enable, then depending on the controller capabilities
+ * MAC and/or VLAN spoofing will be turned on.
+ */
+#define SPOOFCHECK_DISABLE     0x00
+#define SPOOFCHECK_ENABLE      0x01
+
 /* Mode MDI or MDI-X */
 #define ETH_TP_MDI_INVALID	0x00
 #define ETH_TP_MDI		0x01


^ permalink raw reply related

* [RFC net-next PATCH 2/4] ixgbe: Reconfigure SR-IOV Init
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20110727221406.8435.44324.stgit@gitlad.jf.intel.com>

Use the PCI device flag indicating if a VF is assigned to a guest VM
to guard against destroying VFs upon driver removal.  Implement
additional feature to detect if VFs already exist when the driver
is loaded and if so configure them and set the driver state to
SR-IOV enabled.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 drivers/net/ixgbe/ixgbe.h       |    1 
 drivers/net/ixgbe/ixgbe_main.c  |  104 ++-------------------
 drivers/net/ixgbe/ixgbe_sriov.c |  195 +++++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_sriov.h |    5 +
 drivers/net/ixgbe/ixgbe_type.h  |    4 +
 5 files changed, 213 insertions(+), 96 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index e04a8e4..ed9836f 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -120,6 +120,7 @@ struct vf_data_storage {
 	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
 	u16 pf_qos;
 	u16 tx_rate;
+	struct pci_dev *vfdev;
 };
 
 struct vf_macvlans {
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 1be6175..06ba9f2 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -160,41 +160,6 @@ MODULE_VERSION(DRV_VERSION);
 
 #define DEFAULT_DEBUG_LEVEL_SHIFT 3
 
-static inline void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	u32 gcr;
-	u32 gpie;
-	u32 vmdctl;
-
-#ifdef CONFIG_PCI_IOV
-	/* disable iov and allow time for transactions to clear */
-	pci_disable_sriov(adapter->pdev);
-#endif
-
-	/* turn off device IOV mode */
-	gcr = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
-	gcr &= ~(IXGBE_GCR_EXT_SRIOV);
-	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr);
-	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
-	gpie &= ~IXGBE_GPIE_VTMODE_MASK;
-	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
-
-	/* set default pool back to 0 */
-	vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
-	vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
-	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
-
-	/* take a breather then clean up driver data */
-	msleep(100);
-
-	kfree(adapter->vfinfo);
-	adapter->vfinfo = NULL;
-
-	adapter->num_vfs = 0;
-	adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
-}
-
 static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
 {
 	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
@@ -7237,11 +7202,8 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter,
 {
 #ifdef CONFIG_PCI_IOV
 	struct ixgbe_hw *hw = &adapter->hw;
-	int err;
-	int num_vf_macvlans, i;
-	struct vf_macvlans *mv_list;
 
-	if (hw->mac.type == ixgbe_mac_82598EB || !max_vfs)
+	if (hw->mac.type == ixgbe_mac_82598EB)
 		return;
 
 	/* The 82599 supports up to 64 VFs per physical function
@@ -7250,60 +7212,7 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter,
 	 * physical function
 	 */
 	adapter->num_vfs = (max_vfs > 63) ? 63 : max_vfs;
-	adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED;
-	err = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
-	if (err) {
-		e_err(probe, "Failed to enable PCI sriov: %d\n", err);
-		goto err_novfs;
-	}
-
-	num_vf_macvlans = hw->mac.num_rar_entries -
-		(IXGBE_MAX_PF_MACVLANS + 1 + adapter->num_vfs);
-
-	adapter->mv_list = mv_list = kcalloc(num_vf_macvlans,
-					     sizeof(struct vf_macvlans),
-					     GFP_KERNEL);
-	if (mv_list) {
-		/* Initialize list of VF macvlans */
-		INIT_LIST_HEAD(&adapter->vf_mvs.l);
-		for (i = 0; i < num_vf_macvlans; i++) {
-			mv_list->vf = -1;
-			mv_list->free = true;
-			mv_list->rar_entry = hw->mac.num_rar_entries -
-				(i + adapter->num_vfs + 1);
-			list_add(&mv_list->l, &adapter->vf_mvs.l);
-			mv_list++;
-		}
-	}
-
-	/* If call to enable VFs succeeded then allocate memory
-	 * for per VF control structures.
-	 */
-	adapter->vfinfo =
-		kcalloc(adapter->num_vfs,
-			sizeof(struct vf_data_storage), GFP_KERNEL);
-	if (adapter->vfinfo) {
-		/* Now that we're sure SR-IOV is enabled
-		 * and memory allocated set up the mailbox parameters
-		 */
-		ixgbe_init_mbx_params_pf(hw);
-		memcpy(&hw->mbx.ops, ii->mbx_ops,
-		       sizeof(hw->mbx.ops));
-
-		/* Disable RSC when in SR-IOV mode */
-		adapter->flags2 &= ~(IXGBE_FLAG2_RSC_CAPABLE |
-				     IXGBE_FLAG2_RSC_ENABLED);
-		return;
-	}
-
-	/* Oh oh */
-	e_err(probe, "Unable to allocate memory for VF Data Storage - "
-	      "SRIOV disabled\n");
-	pci_disable_sriov(adapter->pdev);
-
-err_novfs:
-	adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
-	adapter->num_vfs = 0;
+	ixgbe_enable_sriov(adapter, ii);
 #endif /* CONFIG_PCI_IOV */
 }
 
@@ -7752,8 +7661,13 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev)
 	if (netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(netdev);
 
-	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
-		ixgbe_disable_sriov(adapter);
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		if (!(ixgbe_check_vf_assignment(adapter)))
+			ixgbe_disable_sriov(adapter);
+		else
+			e_dev_warn("Unloading driver while VFs are assigned "
+				   "- VFs will not be deallocated\n");
+	}
 
 	ixgbe_clear_interrupt_scheme(adapter);
 
diff --git a/drivers/net/ixgbe/ixgbe_sriov.c b/drivers/net/ixgbe/ixgbe_sriov.c
index d99d01e..cdb2f0c 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ixgbe/ixgbe_sriov.c
@@ -43,6 +43,158 @@
 
 #include "ixgbe_sriov.h"
 
+static int ixgbe_find_enabled_vfs(struct ixgbe_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct pci_dev *pvfdev;
+	u16 vf_devfn = 0;
+	int device_id;
+	int vfs_found = 0;
+
+	switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			device_id = IXGBE_DEV_ID_82599_VF;
+			break;
+		case ixgbe_mac_X540:
+			device_id = IXGBE_DEV_ID_X540_VF;
+			break;
+		default:
+			device_id = 0;
+			break;
+	}
+
+	vf_devfn = pdev->devfn + 0x80;
+	pvfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID, device_id, NULL);
+	while (pvfdev) {
+		if (pvfdev->devfn == vf_devfn)
+			vfs_found++;
+		vf_devfn += 2;
+		pvfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID,
+					device_id, pvfdev);
+	}
+
+	return vfs_found;
+}
+
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
+			 const struct ixgbe_info *ii)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err = 0;
+	int num_vf_macvlans, i;
+	struct vf_macvlans *mv_list;
+	int pre_existing_vfs = 0;
+
+	pre_existing_vfs = ixgbe_find_enabled_vfs(adapter);
+	if (!pre_existing_vfs && !adapter->num_vfs)
+		return;
+
+	/* If there are pre-existing VFs then we have to force
+ 	 * use of that many because they were not deleted the last
+ 	 * time someone removed the PF driver.  That would have
+ 	 * been because they were allocated to guest VMs and can't
+ 	 * be removed.  Go ahead and just re-enable the old amount.
+ 	 * If the user wants to change the number of VFs they can
+ 	 * use ethtool while making sure no VFs are allocated to
+ 	 * guest VMs... i.e. the right way.
+ 	 */
+	if (pre_existing_vfs)
+		adapter->num_vfs = pre_existing_vfs;
+	else
+		err = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
+	if (err) {
+		e_err(probe, "Failed to enable PCI sriov: %d\n", err);
+		goto err_novfs;
+	}
+	adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED;
+
+	e_info(probe, "SR-IOV enabled with %d VFs\n", adapter->num_vfs);
+
+	num_vf_macvlans = hw->mac.num_rar_entries -
+	(IXGBE_MAX_PF_MACVLANS + 1 + adapter->num_vfs);
+
+	adapter->mv_list = mv_list = kcalloc(num_vf_macvlans,
+					     sizeof(struct vf_macvlans),
+					     GFP_KERNEL);
+	if (mv_list) {
+		/* Initialize list of VF macvlans */
+		INIT_LIST_HEAD(&adapter->vf_mvs.l);
+		for (i = 0; i < num_vf_macvlans; i++) {
+			mv_list->vf = -1;
+			mv_list->free = true;
+			mv_list->rar_entry = hw->mac.num_rar_entries -
+				(i + adapter->num_vfs + 1);
+			list_add(&mv_list->l, &adapter->vf_mvs.l);
+			mv_list++;
+		}
+	}
+
+	/* If call to enable VFs succeeded then allocate memory
+	 * for per VF control structures.
+	 */
+	adapter->vfinfo =
+		kcalloc(adapter->num_vfs,
+			sizeof(struct vf_data_storage), GFP_KERNEL);
+	if (adapter->vfinfo) {
+		/* Now that we're sure SR-IOV is enabled
+		 * and memory allocated set up the mailbox parameters
+		 */
+		ixgbe_init_mbx_params_pf(hw);
+		memcpy(&hw->mbx.ops, ii->mbx_ops,
+		       sizeof(hw->mbx.ops));
+
+		/* Disable RSC when in SR-IOV mode */
+		adapter->flags2 &= ~(IXGBE_FLAG2_RSC_CAPABLE |
+				     IXGBE_FLAG2_RSC_ENABLED);
+		return;
+	}
+
+	/* Oh oh */
+	e_err(probe, "Unable to allocate memory for VF Data Storage - "
+	      "SRIOV disabled\n");
+	pci_disable_sriov(adapter->pdev);
+
+err_novfs:
+	adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+	adapter->num_vfs = 0;
+}
+
+void ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gcr;
+	u32 gpie;
+	u32 vmdctl;
+
+#ifdef CONFIG_PCI_IOV
+	/* disable iov and allow time for transactions to clear */
+	pci_disable_sriov(adapter->pdev);
+#endif
+
+	/* turn off device IOV mode */
+	gcr = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
+	gcr &= ~(IXGBE_GCR_EXT_SRIOV);
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr);
+	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
+	gpie &= ~IXGBE_GPIE_VTMODE_MASK;
+	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+
+	/* set default pool back to 0 */
+	vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+	vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
+	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
+
+	/* take a breather then clean up driver data */
+	msleep(100);
+
+	kfree(adapter->vfinfo);
+	kfree(adapter->mv_list);
+	adapter->vfinfo = NULL;
+
+	adapter->num_vfs = 0;
+	adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+}
+
 static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
 				   int entries, u16 *hash_list, u32 vf)
 {
@@ -273,12 +425,28 @@ static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
 	return 0;
 }
 
+int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter)
+{
+	int i;
+	for (i = 0; i < adapter->num_vfs; i++) {
+		if (adapter->vfinfo[i].vfdev->dev_flags &
+			PCI_DEV_FLAGS_ASSIGNED) {
+		return true;
+		}
+	}
+	return false;
+}
+
 int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 {
 	unsigned char vf_mac_addr[6];
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	unsigned int vfn = (event_mask & 0x3f);
-
+	struct pci_dev *pvfdev;
+	unsigned int device_id;
+	u16 thisvf_devfn = (pdev->devfn + 0x80 + (vfn << 1)) |
+				(pdev->devfn & 1);
+ 
 	bool enable = ((event_mask & 0x10000000U) != 0);
 
 	if (enable) {
@@ -290,6 +458,31 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 		 * for it later.
 		 */
 		memcpy(adapter->vfinfo[vfn].vf_mac_addresses, vf_mac_addr, 6);
+
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			device_id = IXGBE_DEV_ID_82599_VF;
+			break;
+		case ixgbe_mac_X540:
+			device_id = IXGBE_DEV_ID_X540_VF;
+			break;
+		default:
+			device_id = 0;
+			break;
+		}
+
+		pvfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID, device_id, NULL);
+		while (pvfdev) {
+			if (pvfdev->devfn == thisvf_devfn)
+				break;
+			pvfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID,
+						device_id, pvfdev);
+		}
+		if (pvfdev)
+			adapter->vfinfo[vfn].vfdev = pvfdev;
+		else
+			e_err(drv, "Couldn't find pci dev ptr for VF %4.4x\n",
+			      thisvf_devfn);
 	}
 
 	return 0;
diff --git a/drivers/net/ixgbe/ixgbe_sriov.h b/drivers/net/ixgbe/ixgbe_sriov.h
index 3417556..2781847 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ixgbe/ixgbe_sriov.h
@@ -41,6 +41,11 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi);
 void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+void ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
+			const struct ixgbe_info *ii);
+int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter);
+
 
 #endif /* _IXGBE_SRIOV_H_ */
 
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index e0d970e..648cf15 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -65,6 +65,10 @@
 #define IXGBE_DEV_ID_82599_LS            0x154F
 #define IXGBE_DEV_ID_X540T               0x1528
 
+/* VF Device IDs */
+#define IXGBE_DEV_ID_82599_VF           0x10ED
+#define IXGBE_DEV_ID_X540_VF            0x1515
+
 /* General Registers */
 #define IXGBE_CTRL      0x00000
 #define IXGBE_STATUS    0x00008


^ permalink raw reply related

* [RFC net-next PATCH 1/4] pci: Add flag indicating device has been assigned by KVM
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20110727221406.8435.44324.stgit@gitlad.jf.intel.com>

Device drivers that create and destroy SR-IOV virtual functions via
calls to pci_enable_sriov() and pci_disable_sriov can cause catastrophic
failures if they attempt to destroy VFs while they are assigned to
guest virtual machines.  By adding a flag for use by the KVM module
to indicate that a device is assigned a device driver can check that
flag and avoid destroying VFs while they are assigned and avoid system
failures.

Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
---

 include/linux/pci.h     |    2 ++
 virt/kvm/assigned-dev.c |    2 ++
 virt/kvm/iommu.c        |    4 ++++
 3 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2d29218..a297ca2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -174,6 +174,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
 	/* Device configuration is irrevocably lost if disabled into D3 */
 	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+	/* Provide indication device is assigned by KVM */
+	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
 };
 
 enum pci_irq_reroute_variant {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 6cc4b97..f401de1 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -205,6 +205,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
 	else
 		pci_restore_state(assigned_dev->dev);
 
+	assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
 	pci_release_regions(assigned_dev->dev);
 	pci_disable_device(assigned_dev->dev);
 	pci_dev_put(assigned_dev->dev);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 62a9caf..cffc530 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -181,6 +181,8 @@ int kvm_assign_device(struct kvm *kvm,
 			goto out_unmap;
 	}
 
+	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+
 	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
 		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,
@@ -209,6 +211,8 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	iommu_detach_device(domain, &pdev->dev);
 
+	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
 	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
 		assigned_dev->host_segnr,
 		assigned_dev->host_busnr,


^ permalink raw reply related

* [RFC net-next PATCH 0/4] Add new settings for ethtool
From: Greg Rose @ 2011-07-27 22:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, bhutchings, jeffrey.t.kirsher

This series of patches implements several changes to the way SR-IOV
can be configured to allow for per port assignment of the number of
VFs.

In addition, patches 1/4 and 2/4 of this patch series adds a new flag
bit to the PCI device structure dev_flags that is used by the kvm
module to indicate when a PCI device is assigned to a guest virtual
machine (VM) and changes to the ixgbe driver to make use of this new
bit.  The advantage these first two patches provide is to prevent
destruction of virtual functions (VFs) by the physical function
(PF) driver when it is removed if the PF driver finds that any VFs
are still assigned to guest VMs.  This in turn prevents panics that
will result when the VF device disappears from the guest VM without
notification.

Patches 3/4 and 4/4 of this patch series implement new settings for
Ethtool that allow for reconfiguration of the number of VFs per PF
without resorting to use of the module parameter which is only capable
of setting a single number of VFs per PF, to set the number of VM
queues for devices that support it and an on/off switch for the
anti-spoofing feature.  A follow on patch for Ethtool that implements
these settings will be sent separately.

Patches 1/4 and 2/4 of this series could also be considered for
submission to the current net tree and also the stable tree since
they prevent catastrophic results when the PF driver is removed
while VFs are assigned to guest VMs.

---

Greg Rose (4):
      ixgbe: Add support for new ethtool settings
      ethtool: Add new set commands
      ixgbe: Reconfigure SR-IOV Init
      pci: Add flag indicating device has been assigned by KVM


 drivers/net/ixgbe/ixgbe.h         |    2 
 drivers/net/ixgbe/ixgbe_ethtool.c |   96 ++++++++++++++++++
 drivers/net/ixgbe/ixgbe_main.c    |  109 ++------------------
 drivers/net/ixgbe/ixgbe_sriov.c   |  197 +++++++++++++++++++++++++++++++++++++
 drivers/net/ixgbe/ixgbe_sriov.h   |    6 +
 drivers/net/ixgbe/ixgbe_type.h    |    4 +
 include/linux/ethtool.h           |   11 ++
 include/linux/pci.h               |    2 
 virt/kvm/assigned-dev.c           |    2 
 virt/kvm/iommu.c                  |    4 +
 10 files changed, 333 insertions(+), 100 deletions(-)

-- 
Signed-off-by: Greg Rose <gregory.v.rose@intel.com>

^ permalink raw reply

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Christoph Hellwig @ 2011-07-27 21:01 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christoph Hellwig, Eric Dumazet, Tim Chen, Al Viro, David Miller,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <20110727205957.GC8006@one.firstfloor.org>

On Wed, Jul 27, 2011 at 10:59:57PM +0200, Andi Kleen wrote:
> > Btw, I wonder if you should micro-optimize things a bit further by
> > moving the unhashed checks from the deletion functions into the callers
> > and thus save a function call for each of them.
> 
> If the caller is in the same file modern gcc is able to do that automatically
> if you're lucky enough ("partial inlining")
> 
> I would not uglify the code for it.

Depending on how you look at it the code might actually be a tad
cleaner.  One of called functions is outside of inode.c.


^ permalink raw reply

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Andi Kleen @ 2011-07-27 20:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Eric Dumazet, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <20110727204415.GA13308@infradead.org>

> Btw, I wonder if you should micro-optimize things a bit further by
> moving the unhashed checks from the deletion functions into the callers
> and thus save a function call for each of them.

If the caller is in the same file modern gcc is able to do that automatically
if you're lucky enough ("partial inlining")

I would not uglify the code for it.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Christoph Hellwig @ 2011-07-27 20:44 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <1311780065.2356.18.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Wed, Jul 27, 2011 at 05:21:05PM +0200, Eric Dumazet wrote:
> If I am not mistaken, we can add unlocked checks on the three hot spots.
> 
> After following patch, a close(socket(PF_INET, SOCK_DGRAM, 0)) pair on
> my dev machine takes ~3us instead of ~9us.
> 
> Maybe its better to split it in three patches, just let me know.

I think three patches would be a lot cleaner.

As for safety of the unlocked checks:

 - inode are either hashed when created or never, so that one looks
   fine.
 - same for the sb list.
 - the writeback list is a bit more dynamic as we move things around
   quite a bit.  But in additon to the inode_wb_list_del call from
   evict() it only ever gets remove in writeback_single_inode, which
   for a freeing inode can only be called from the callers of evict().

Btw, I wonder if you should micro-optimize things a bit further by
moving the unhashed checks from the deletion functions into the callers
and thus save a function call for each of them.


^ permalink raw reply

* Re: [PATCH net-next-2.6 v2] bonding: reduce noise during init
From: Andy Gospodarek @ 2011-07-27 20:09 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jay Vosburgh, Andy Gospodarek, netdev
In-Reply-To: <1311727227.15386.40.camel@Joe-Laptop>

On Tue, Jul 26, 2011 at 05:40:27PM -0700, Joe Perches wrote:
> On Tue, 2011-07-26 at 17:37 -0700, Jay Vosburgh wrote:
> > Joe Perches <joe@perches.com> wrote:
> > >I'd prefer you don't separate the format string
> > >into multiple pieces.
> > Why not?  To me, it looks easier to read split into sections
> > that don't wrap lines.
> 
> Harder to grep for a dmesg and the
> defect rate of these split formats is
> typically higher than single strings
> because of bad spacing between string
> segments.
> 

I noticed that you took some time back in late 2009 to 'consolidate' the
split format-strings present in the bonding driver at the time and I've
decided I'm fine to leave them the way they are.  The main point of my
patch was to change the output and I would like to get that included.
Here is my updated patch...


Subject: [PATCH net-next-2.6 v2] bonding: reduce noise during init

Many are using sysfs to configure bonding rather than module options, so
there is no need for bonding to throw this warning in normal cases.

Keep the message around when debugging is enabled as it might be useful
for someone desperate enough to enable debugging, but eliminate it
otherwise.

Signed-off-by: Andy Gospodarek <andy@greyhouse.net>

---
 drivers/net/bonding/bond_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 61265f7..b37c602 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4745,7 +4745,7 @@ static int bond_check_params(struct bond_params *params)
 		/* miimon and arp_interval not set, we need one so things
 		 * work as expected, see bonding.txt for details
 		 */
-		pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
+		pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n");
 	}
 
 	if (primary && !USES_PRIMARY(bond_mode)) {
-- 
1.7.4.4




^ permalink raw reply related

* Re: IPv6: autoconfiguration and suspend/resume or link down/up
From: Dan Williams @ 2011-07-27 19:48 UTC (permalink / raw)
  To: David Miller; +Cc: jbohac, netdev, herbert, shemminger
In-Reply-To: <20110722.010628.1678943945721626312.davem@davemloft.net>

On Fri, 2011-07-22 at 01:06 -0700, David Miller wrote:
> From: Jiri Bohac <jbohac@suse.cz>
> Date: Tue, 19 Jul 2011 20:02:53 +0200
> 
> > When the cable is unplugged and plugged in again, we already get
> > notified through linkwatch -> netdev_state_change ->
> >   -> call_netdevice_notifiers(NETDEV_CHANGE, ...)
> > However, if the device has already been autoconfigured,
> > addrconf_notify() only handles this event by printing a
> > message.
> > 
> > So my idea was to:
> > - handle link up/down in addrconf_notify() similarly to
> >   NETDEV_UP/NETDEV_DOWN
> > 
> > - on suspend, faking a link down event; on resume, faking a link up event
> >   (or better, having a special event type for suspend/resume)
> > 
> > This would cause autoconfiguration to be restarted on resume as
> > well as cable plug/unplug, solving both the above problems.
> > 
> > Or do we want to completely rely on userspace tools
> > (networkmanager/ifplug) and expect them to do NETDEV_DOWN on
> > unplug/suspend and NETDEV_UP on plug/resume?
> > 
> > Any thoughts?
> 
> This is an oft-reocurring discussion, what to do on link up/down
> events wrt. all sorts of autoconfiguration.
> 
> My gut instinct is that on any link state change (physical link down,
> suspend) we should be prepared to renegotiate everything and anything
> since as you state we could be on a different physical network.
> 
> Suspend is even more important because while we were suspended we
> could be on the same network but the routers present and available
> might have changed completely.
> 
> In userspace I've noticed that we've grown an ecosystem of stupid
> tools and facilities, none (or very few) of which monitor link status
> in order to do handle things intelligently.  DHCP servers are a great
> example.  DHCP servers spit out broadcast discover packets before we
> even have a link up.
> 
> Filling this void is NetworkManager, which does listen on a netlink
> socket for device state changes, hotplug, etc.  And in response it
> explicitly tells various facilities to reprobe the network.
> 
> This is why I'm reluctant to give NetworkManager a hard time, because
> whilst it's a huge beast, it is at least trying to do the right thing.
> :-)

Oh, it's not that huge :)  What's huge is the networking problem space,
and thus when you build something that tries to be aware of much of
what's going on (which in the modern world you do really need) it's
going to need to talk to all sorts of different subsystems...  Such is
life :(  It's sufficiently module though that if you have no need to
modems, or PPP, or WiFi, or 802.1x, or WiMAX, you don't need to run
those parts.  Of course now we're adding bridging, VLAN, bonding, etc
support, so the amoeba gets larger.  If only people stopped adding
features to the kernel networking stack :)

Dan


^ permalink raw reply

* Re: strange behaviour of MC7700 with sierra_net
From: Dan Williams @ 2011-07-27 19:40 UTC (permalink / raw)
  To: Phil Sutter
  Cc: Elina Pasheva, dbrownell-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Rory Filer
In-Reply-To: <20110727141246.GC29616-2COwY06ShZsYt6otZODPyA@public.gmane.org>

On Wed, 2011-07-27 at 16:12 +0200, Phil Sutter wrote:
> Hello everyone,
> 
> I am testing the above module on linux-2.6.39.2 which should contain the
> latest changes to at least sierra_net.c. Although I am able to bring the
> module up and then can get traffic through it, initialisation seems to
> be a little picky.

I wonder if the firmware changed and Sierra hasn't updated the driver
yet.  I've got a usb306 which also uses sierra_net which doesn't have
this problem.  It looks like the driver tries to sync with the firmware
right after binding to the net interface, so that means at least a
couple of SYNC requests have already happened by the time you start
getting errors.  My first thought here is simply that the firmware on
the MC7700 doesn't work like the rest of the devices that sierra_net
expects, and for that you'd have to get Sierra to weigh in on the
required changes :(

Since the MC7700 is an LTE module it's 100% likely the firmware is
different, or at least significantly rebased, from the existing Sierra
HSPA/EVDO parts that use sierra_net and thus I wouldn't necessarily
expect it to work out of the box.

Dan

> After connecting the module via USB, I get the following final
> initialisation message:
> | Jul 27 14:01:15 (none) user.info kernel: [166371.982356] sierra_net 1-1:1.7: wwan0: register 'sierra_net' at usb-0000:00:08.2-1, Sierra Wireless USB-to-WWAN Modem, 4a:82:22:b9:05:07
> 
> Doing nothing (successfully), the driver starts printing errors after a
> while:
> | Jul 27 14:02:15 (none) user.err kernel: [166432.201461] sierra_net 1-1:1.7: wwan0: Submit SYNC failed -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.201609] sierra_net 1-1:1.7: wwan0: Send SYNC failed, status -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.205446] sierra_net 1-1:1.7: wwan0: Submit SYNC failed -32
> | Jul 27 14:02:15 (none) user.err kernel: [166432.205590] sierra_net 1-1:1.7: wwan0: Send SYNC failed, status -32
> 
> The above messages are the first ones to appear (so there's a delay of
> 60 seconds in between), and they repeat each 2 seconds from then on.
> Depending on how I continue at that point, results vary:
> 
> a) 'ip link set wwan0 up': setting the interface up stops the above error
>    messages from being printed. This worked every time I tried.
> 
> b) Sending 'ATZ' on the control-tty: this makes the error-messages
>    disappear for about 6 seconds, so two iterations of the sync-timer
>    seem to succeed. When I then try to continue initialisation, I usually
>    get to 'AT+C' (for AT+CPIN='1234'), then the control-tty dies (neither
>    echoing of typed characters, nor feedback from the modem printed).
> 
> Trying a) from the state after b) indeed makes the error-messages go
> away, but the tty stays dead until I power-cycle the module. Needless to
> say, without the control-tty the module is completely useless.
> 
> My first thought was that the driver shouldn't try to SYNC while the
> interface being down, but apparently sierra_net_send_sync() doesn't get
> called anymore after setting the interface up.
> 
> So in order to prevent the module from dieing, I need to up the
> interface before initialisation via AT-interface.
> 
> Another interesting aspect: the above error stays gone after the
> interface has been upped once, even if it's brought down right
> afterwards without doing anything else. OK, not completely - there needs
> to be a little delay in which the interface stays up, but a tenth of a
> second was enough.
> 
> What else can I do to track this problem down further? What additional
> information do you need from me? Any advice is highly appreciated, of
> course!
> 
> Greetings, Phil
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* pull request: wireless-next-2.6 2011-07-27
From: John W. Linville @ 2011-07-27 18:49 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Dave,

Here is a handful of fixes intended for 3.1.  This includes a
user-visible typo fix, a fix for a user after free in the new pn533
NFC driver, a cfg80211 fix for a possible NULL pointer dereference,
a fix for an invalid memory access in b43, and another b43 fix for
a memory corruption problem.

On top of that b43 memory corruption fix, there is a patch to remove
BROKEN from the B43_BCMA Kconfig entry, which is key to enabling
support for some of the more modern Broadcom wireless hardware.
I'm sure the Rafał (and a number of others) would love to see that
merged while the 3.1 merge window is still open as well.

Please let me know if there are problems...

Thanks!

John

---

The following changes since commit 41bf37117b47fc5ce2aae91f6a108e7e42e0b046:

  Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6 into for-davem (2011-07-22 17:51:16 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-next-2.6.git for-davem

Dan Carpenter (1):
      NFC: pn533: use after free in pn533_disconnect()

John W. Linville (1):
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless-next-2.6 into for-davem

Mihai Moldovan (1):
      wireless: fix a typo in ignore_reg_update

Pavel Roskin (1):
      b43: fix invalid memory access in b43_ssb_remove()

Rafał Miłecki (2):
      b43: bus: fix memory corruption when setting driver's data
      b43: bcma: drop BROKEN

Sven Neumann (1):
      cfg80211: really ignore the regulatory request

 drivers/net/wireless/b43/Kconfig |    2 +-
 drivers/net/wireless/b43/bus.c   |    2 ++
 drivers/net/wireless/b43/main.c  |    5 +++--
 drivers/nfc/pn533.c              |    2 +-
 net/wireless/reg.c               |    7 ++++---
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig
index d2293dc..3cab843 100644
--- a/drivers/net/wireless/b43/Kconfig
+++ b/drivers/net/wireless/b43/Kconfig
@@ -28,7 +28,7 @@ config B43
 
 config B43_BCMA
 	bool "Support for BCMA bus"
-	depends on B43 && BCMA && BROKEN
+	depends on B43 && BCMA
 	default y
 
 config B43_SSB
diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c
index 64c3f65..05f6c7b 100644
--- a/drivers/net/wireless/b43/bus.c
+++ b/drivers/net/wireless/b43/bus.c
@@ -244,10 +244,12 @@ void b43_bus_set_wldev(struct b43_bus_dev *dev, void *wldev)
 #ifdef CONFIG_B43_BCMA
 	case B43_BUS_BCMA:
 		bcma_set_drvdata(dev->bdev, wldev);
+		break;
 #endif
 #ifdef CONFIG_B43_SSB
 	case B43_BUS_SSB:
 		ssb_set_drvdata(dev->sdev, wldev);
+		break;
 #endif
 	}
 }
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 73fbf03..a92cde8 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -5350,6 +5350,7 @@ static void b43_ssb_remove(struct ssb_device *sdev)
 {
 	struct b43_wl *wl = ssb_get_devtypedata(sdev);
 	struct b43_wldev *wldev = ssb_get_drvdata(sdev);
+	struct b43_bus_dev *dev = wldev->dev;
 
 	/* We must cancel any work here before unregistering from ieee80211,
 	 * as the ieee80211 unreg will destroy the workqueue. */
@@ -5365,14 +5366,14 @@ static void b43_ssb_remove(struct ssb_device *sdev)
 		ieee80211_unregister_hw(wl->hw);
 	}
 
-	b43_one_core_detach(wldev->dev);
+	b43_one_core_detach(dev);
 
 	if (list_empty(&wl->devlist)) {
 		b43_leds_unregister(wl);
 		/* Last core on the chip unregistered.
 		 * We can destroy common struct b43_wl.
 		 */
-		b43_wireless_exit(wldev->dev, wl);
+		b43_wireless_exit(dev, wl);
 	}
 }
 
diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c
index 0372315..c77e054 100644
--- a/drivers/nfc/pn533.c
+++ b/drivers/nfc/pn533.c
@@ -1596,7 +1596,7 @@ static void pn533_disconnect(struct usb_interface *interface)
 	usb_free_urb(dev->out_urb);
 	kfree(dev);
 
-	nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected");
+	nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected");
 }
 
 static struct usb_driver pn533_driver = {
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1ad0f39..02751db 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -903,7 +903,7 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 	    initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    !is_world_regdom(last_request->alpha2)) {
 		REG_DBG_PRINT("Ignoring regulatory request %s "
-			      "since the driver requires its own regulaotry "
+			      "since the driver requires its own regulatory "
 			      "domain to be set first",
 			      reg_initiator_name(initiator));
 		return true;
@@ -1125,12 +1125,13 @@ void wiphy_update_regulatory(struct wiphy *wiphy,
 	enum ieee80211_band band;
 
 	if (ignore_reg_update(wiphy, initiator))
-		goto out;
+		return;
+
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
 			handle_band(wiphy, band, initiator);
 	}
-out:
+
 	reg_process_beacons(wiphy);
 	reg_process_ht_flags(wiphy);
 	if (wiphy->reg_notifier)
-- 
John W. Linville		Someday the world will need a hero, and you
linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org			might be all we have.  Be ready.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH] vfs: avoid taking locks if inode not in lists
From: Andi Kleen @ 2011-07-27 17:12 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Tim Chen, Al Viro, David Miller, Andi Kleen,
	Matthew Wilcox, Anton Blanchard, npiggin, linux-kernel,
	linux-fsdevel, netdev
In-Reply-To: <1311780065.2356.18.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

> Before even taking locks, we can perform an unlocked test to check if
> inode can possibly be in the lists.
> 
> On a 2x4x2 machine, a close(socket()) pair can be 200% faster with these
> changes.

Great result! Seems simple and obvious to me.

-Andi

^ permalink raw reply

* [PATCH 1/1] tc: filter: fix default 'protocol all' on little-endian platforms
From: Florian Westphal @ 2011-07-27 16:47 UTC (permalink / raw)
  To: netdev; +Cc: Florian Westphal

when specifiying filters without 'protocol' keyword, tc will
default to 'protocol all'.

Unfortunately, this missed a byte-ordering conversion.
---
 tc/tc_filter.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tc/tc_filter.c b/tc/tc_filter.c
index 919c57c..4e55812 100644
--- a/tc/tc_filter.c
+++ b/tc/tc_filter.c
@@ -73,7 +73,7 @@ int tc_filter_modify(int cmd, unsigned flags, int argc, char **argv)
 	req.t.tcm_family = AF_UNSPEC;
 
 	if (cmd == RTM_NEWTFILTER && flags & NLM_F_CREATE)
-		protocol = ETH_P_ALL;
+		protocol = htons(ETH_P_ALL);
 
 	while (argc > 0) {
 		if (strcmp(*argv, "dev") == 0) {
-- 
1.7.3.4


^ permalink raw reply related

* Re: IP over 802.2 with LLC/SNAP
From: Rick Jones @ 2011-07-27 16:26 UTC (permalink / raw)
  To: Alan Cox; +Cc: Alan Ott, linux-kernel, linux-net, netdev
In-Reply-To: <20110727133209.0d9dd6c4@lxorguk.ukuu.org.uk>

On 07/27/2011 05:32 AM, Alan Cox wrote:
>> So the question is, does Linux support IP over 802.2 with LLC/SNAP? Is
>> there a sysfs/proc entry that I have to turn on to make this work (I
>> didn't find one)? I have the LLC2 module loaded, and I believe my packet
>> to be correct, since Windows recognizes it and since Wireshark doesn't
>> give any red flags on it. I've been unable to find anything about this
>> kind of thing in my searching.
>
> Linux supports LLC/SNAP and various things over it (IPX/Appletalk DDP
> etc) but not IP over it, as it's one of those standards bodies driven
> bogosities which nobody ever actually deployed.

Well...  Hewlett-Packard deployed it in the 80's and 90's in MPE and 
HP-UX, HP-UX because until the mid-ish 1990's MPE only spoke 802.2 not 
"Ethernet."  By the late 1990's I think it was gone from HP-UX.

rick jones

^ permalink raw reply

* [patch 04/11] [PATCH] af_iucv: cleanup - use iucv_sk(sk) early
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-cleanup-use-iucv_sk-early.patch --]
[-- Type: text/plain, Size: 3567 bytes --]

From: Ursula Braun <ursula.braun@de.ibm.com>

Code cleanup making make use of local variable for struct iucv_sock.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/af_iucv.c |   44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -446,23 +446,25 @@ static void iucv_sock_init(struct sock *
 static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 {
 	struct sock *sk;
+	struct iucv_sock *iucv;
 
 	sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
 	if (!sk)
 		return NULL;
+	iucv = iucv_sk(sk);
 
 	sock_init_data(sock, sk);
-	INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
-	spin_lock_init(&iucv_sk(sk)->accept_q_lock);
-	skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
-	INIT_LIST_HEAD(&iucv_sk(sk)->message_q.list);
-	spin_lock_init(&iucv_sk(sk)->message_q.lock);
-	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
-	iucv_sk(sk)->send_tag = 0;
-	iucv_sk(sk)->flags = 0;
-	iucv_sk(sk)->msglimit = IUCV_QUEUELEN_DEFAULT;
-	iucv_sk(sk)->path = NULL;
-	memset(&iucv_sk(sk)->src_user_id , 0, 32);
+	INIT_LIST_HEAD(&iucv->accept_q);
+	spin_lock_init(&iucv->accept_q_lock);
+	skb_queue_head_init(&iucv->send_skb_q);
+	INIT_LIST_HEAD(&iucv->message_q.list);
+	spin_lock_init(&iucv->message_q.lock);
+	skb_queue_head_init(&iucv->backlog_skb_q);
+	iucv->send_tag = 0;
+	iucv->flags = 0;
+	iucv->msglimit = IUCV_QUEUELEN_DEFAULT;
+	iucv->path = NULL;
+	memset(&iucv->src_user_id , 0, 32);
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
@@ -669,7 +671,7 @@ static int iucv_sock_connect(struct sock
 {
 	struct sockaddr_iucv *sa = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
-	struct iucv_sock *iucv;
+	struct iucv_sock *iucv = iucv_sk(sk);
 	unsigned char user_data[16];
 	int err;
 
@@ -691,14 +693,13 @@ static int iucv_sock_connect(struct sock
 	lock_sock(sk);
 
 	/* Set the destination information */
-	memcpy(iucv_sk(sk)->dst_user_id, sa->siucv_user_id, 8);
-	memcpy(iucv_sk(sk)->dst_name, sa->siucv_name, 8);
+	memcpy(iucv->dst_user_id, sa->siucv_user_id, 8);
+	memcpy(iucv->dst_name, sa->siucv_name, 8);
 
 	high_nmcpy(user_data, sa->siucv_name);
-	low_nmcpy(user_data, iucv_sk(sk)->src_name);
+	low_nmcpy(user_data, iucv->src_name);
 	ASCEBC(user_data, sizeof(user_data));
 
-	iucv = iucv_sk(sk);
 	/* Create path. */
 	iucv->path = iucv_path_alloc(iucv->msglimit,
 				     IUCV_IPRMDATA, GFP_KERNEL);
@@ -836,20 +837,21 @@ static int iucv_sock_getname(struct sock
 {
 	struct sockaddr_iucv *siucv = (struct sockaddr_iucv *) addr;
 	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
 
 	addr->sa_family = AF_IUCV;
 	*len = sizeof(struct sockaddr_iucv);
 
 	if (peer) {
-		memcpy(siucv->siucv_user_id, iucv_sk(sk)->dst_user_id, 8);
-		memcpy(siucv->siucv_name, &iucv_sk(sk)->dst_name, 8);
+		memcpy(siucv->siucv_user_id, iucv->dst_user_id, 8);
+		memcpy(siucv->siucv_name, iucv->dst_name, 8);
 	} else {
-		memcpy(siucv->siucv_user_id, iucv_sk(sk)->src_user_id, 8);
-		memcpy(siucv->siucv_name, iucv_sk(sk)->src_name, 8);
+		memcpy(siucv->siucv_user_id, iucv->src_user_id, 8);
+		memcpy(siucv->siucv_name, iucv->src_name, 8);
 	}
 	memset(&siucv->siucv_port, 0, sizeof(siucv->siucv_port));
 	memset(&siucv->siucv_addr, 0, sizeof(siucv->siucv_addr));
-	memset(siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
+	memset(&siucv->siucv_nodeid, 0, sizeof(siucv->siucv_nodeid));
 
 	return 0;
 }


^ permalink raw reply

* [patch 10/11] [PATCH] qeth: exploit asynchronous delivery of storage blocks
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qeth_cq.patch --]
[-- Type: text/plain, Size: 28255 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch exploits the QDIO support for asynchronous delivery of storage 
blocks for Hipersockets. The exploitation is not configured per default and
may be enabled via the function qeth_configure_cq.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 drivers/s390/net/qeth_core.h      |   28 +-
 drivers/s390/net/qeth_core_main.c |  495 ++++++++++++++++++++++++++++++++++----
 2 files changed, 480 insertions(+), 43 deletions(-)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -110,6 +110,10 @@ struct qeth_perf_stats {
 
 	unsigned int sc_dp_p;
 	unsigned int sc_p_dp;
+	/* qdio_cq_handler: number of times called, time spent in */
+	__u64 cq_start_time;
+	unsigned int cq_cnt;
+	unsigned int cq_time;
 	/* qdio_input_handler: number of times called, time spent in */
 	__u64 inbound_start_time;
 	unsigned int inbound_cnt;
@@ -376,6 +380,11 @@ enum qeth_qdio_buffer_states {
 	 * outbound: filled by driver; owned by hardware in order to be sent
 	 */
 	QETH_QDIO_BUF_PRIMED,
+	/*
+	 * inbound: not applicable
+	 * outbound: handled via transfer pending / completion queue
+	 */
+	QETH_QDIO_BUF_HANDLED_DELAYED,
 };
 
 enum qeth_qdio_info_states {
@@ -413,8 +422,11 @@ struct qeth_qdio_out_buffer {
 	atomic_t state;
 	int next_element_to_fill;
 	struct sk_buff_head skb_list;
-	struct list_head ctx_list;
 	int is_header[16];
+
+	struct qaob *aob;
+	struct qeth_qdio_out_q *q;
+	struct qeth_qdio_out_buffer *next_pending;
 };
 
 struct qeth_card;
@@ -427,7 +439,8 @@ enum qeth_out_q_states {
 
 struct qeth_qdio_out_q {
 	struct qdio_buffer qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
-	struct qeth_qdio_out_buffer bufs[QDIO_MAX_BUFFERS_PER_Q];
+	struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
+	struct qdio_outbuf_state *bufstates; /* convenience pointer */
 	int queue_no;
 	struct qeth_card *card;
 	atomic_t state;
@@ -448,7 +461,9 @@ struct qeth_qdio_out_q {
 struct qeth_qdio_info {
 	atomic_t state;
 	/* input */
+	int no_in_queues;
 	struct qeth_qdio_q *in_q;
+	struct qeth_qdio_q *c_q;
 	struct qeth_qdio_buffer_pool in_buf_pool;
 	struct qeth_qdio_buffer_pool init_pool;
 	int in_buf_size;
@@ -456,6 +471,7 @@ struct qeth_qdio_info {
 	/* output */
 	int no_out_queues;
 	struct qeth_qdio_out_q **out_qs;
+	struct qdio_outbuf_state *out_bufstates;
 
 	/* priority queueing */
 	int do_prio_queueing;
@@ -527,6 +543,12 @@ enum qeth_cmd_buffer_state {
 	BUF_STATE_PROCESSED,
 };
 
+enum qeth_cq {
+	QETH_CQ_DISABLED = 0,
+	QETH_CQ_ENABLED = 1,
+	QETH_CQ_NOTAVAILABLE = 2,
+};
+
 struct qeth_ipato {
 	int enabled;
 	int invert4;
@@ -651,6 +673,7 @@ struct qeth_card_options {
 	int rx_sg_cb;
 	enum qeth_ipa_isolation_modes isolation;
 	int sniffer;
+	enum qeth_cq cq;
 };
 
 /*
@@ -888,6 +911,7 @@ void qeth_dbf_longtext(debug_info_t *id,
 int qeth_core_ethtool_get_settings(struct net_device *, struct ethtool_cmd *);
 int qeth_set_access_ctrl_online(struct qeth_card *card);
 int qeth_hdr_chk_and_bounce(struct sk_buff *, int);
+int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 int qeth_query_ipassists(struct qeth_card *, enum qeth_prot_versions prot);
 
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -44,6 +44,7 @@ struct qeth_card_list_struct qeth_core_c
 EXPORT_SYMBOL_GPL(qeth_core_card_list);
 struct kmem_cache *qeth_core_header_cache;
 EXPORT_SYMBOL_GPL(qeth_core_header_cache);
+static struct kmem_cache *qeth_qdio_outbuf_cache;
 
 static struct device *qeth_core_root_dev;
 static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
@@ -56,6 +57,10 @@ static struct qeth_cmd_buffer *qeth_get_
 static void qeth_setup_ccw(struct qeth_channel *, unsigned char *, __u32);
 static void qeth_free_buffer_pool(struct qeth_card *);
 static int qeth_qdio_establish(struct qeth_card *);
+static void qeth_free_qdio_buffers(struct qeth_card *);
+static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
+		struct qeth_qdio_out_buffer *buf,
+		enum qeth_qdio_buffer_states newbufstate);
 
 
 static inline const char *qeth_get_cardname(struct qeth_card *card)
@@ -239,6 +244,150 @@ int qeth_realloc_buffer_pool(struct qeth
 }
 EXPORT_SYMBOL_GPL(qeth_realloc_buffer_pool);
 
+static inline int qeth_cq_init(struct qeth_card *card)
+{
+	int rc;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		QETH_DBF_TEXT(SETUP, 2, "cqinit");
+		memset(card->qdio.c_q->qdio_bufs, 0,
+		       QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer));
+		card->qdio.c_q->next_buf_to_init = 127;
+		rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT,
+			     card->qdio.no_in_queues - 1, 0,
+			     127);
+		if (rc) {
+			QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
+			goto out;
+		}
+	}
+	rc = 0;
+out:
+	return rc;
+}
+
+static inline int qeth_alloc_cq(struct qeth_card *card)
+{
+	int rc;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		int i;
+		struct qdio_outbuf_state *outbuf_states;
+
+		QETH_DBF_TEXT(SETUP, 2, "cqon");
+		card->qdio.c_q = kzalloc(sizeof(struct qeth_qdio_q),
+					 GFP_KERNEL);
+		if (!card->qdio.c_q) {
+			rc = -1;
+			goto kmsg_out;
+		}
+		QETH_DBF_HEX(SETUP, 2, &card->qdio.c_q, sizeof(void *));
+
+		for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
+			card->qdio.c_q->bufs[i].buffer =
+				&card->qdio.c_q->qdio_bufs[i];
+		}
+
+		card->qdio.no_in_queues = 2;
+
+		card->qdio.out_bufstates = (struct qdio_outbuf_state *)
+			kzalloc(card->qdio.no_out_queues *
+				QDIO_MAX_BUFFERS_PER_Q *
+				sizeof(struct qdio_outbuf_state), GFP_KERNEL);
+		outbuf_states = card->qdio.out_bufstates;
+		if (outbuf_states == NULL) {
+			rc = -1;
+			goto free_cq_out;
+		}
+		for (i = 0; i < card->qdio.no_out_queues; ++i) {
+			card->qdio.out_qs[i]->bufstates = outbuf_states;
+			outbuf_states += QDIO_MAX_BUFFERS_PER_Q;
+		}
+	} else {
+		QETH_DBF_TEXT(SETUP, 2, "nocq");
+		card->qdio.c_q = NULL;
+		card->qdio.no_in_queues = 1;
+	}
+	QETH_DBF_TEXT_(SETUP, 2, "iqc%d", card->qdio.no_in_queues);
+	rc = 0;
+out:
+	return rc;
+free_cq_out:
+	kfree(card->qdio.c_q);
+	card->qdio.c_q = NULL;
+kmsg_out:
+	dev_err(&card->gdev->dev, "Failed to create completion queue\n");
+	goto out;
+}
+
+static inline void qeth_free_cq(struct qeth_card *card)
+{
+	if (card->qdio.c_q) {
+		--card->qdio.no_in_queues;
+		kfree(card->qdio.c_q);
+		card->qdio.c_q = NULL;
+	}
+	kfree(card->qdio.out_bufstates);
+	card->qdio.out_bufstates = NULL;
+}
+
+static inline void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q,
+	int bidx, int forced_cleanup)
+{
+	if (q->bufs[bidx]->next_pending != NULL) {
+		struct qeth_qdio_out_buffer *head = q->bufs[bidx];
+		struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending;
+
+		while (c) {
+			if (forced_cleanup ||
+			    atomic_read(&c->state) ==
+			      QETH_QDIO_BUF_HANDLED_DELAYED) {
+				struct qeth_qdio_out_buffer *f = c;
+				QETH_CARD_TEXT(f->q->card, 5, "fp");
+				QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f);
+				c = f->next_pending;
+				BUG_ON(head->next_pending != f);
+				head->next_pending = c;
+				kmem_cache_free(qeth_qdio_outbuf_cache, f);
+			} else {
+				head = c;
+				c = c->next_pending;
+			}
+
+		}
+	}
+}
+
+
+static inline void qeth_qdio_handle_aob(struct qeth_card *card,
+		unsigned long phys_aob_addr) {
+	struct qaob *aob;
+	struct qeth_qdio_out_buffer *buffer;
+
+	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
+	QETH_CARD_TEXT(card, 5, "haob");
+	QETH_CARD_TEXT_(card, 5, "%lx", phys_aob_addr);
+	buffer = (struct qeth_qdio_out_buffer *) aob->user1;
+	QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
+
+	BUG_ON(buffer == NULL);
+
+	buffer->aob = NULL;
+	qeth_clear_output_buffer(buffer->q, buffer,
+				QETH_QDIO_BUF_HANDLED_DELAYED);
+	/* from here on: do not touch buffer anymore */
+	qdio_release_aob(aob);
+}
+
+static inline int qeth_is_cq(struct qeth_card *card, unsigned int queue)
+{
+	return card->options.cq == QETH_CQ_ENABLED &&
+	    card->qdio.c_q != NULL &&
+	    queue != 0 &&
+	    queue == card->qdio.no_in_queues - 1;
+}
+
+
 static int qeth_issue_next_read(struct qeth_card *card)
 {
 	int rc;
@@ -681,6 +830,7 @@ EXPORT_SYMBOL_GPL(qeth_do_run_thread);
 void qeth_schedule_recovery(struct qeth_card *card)
 {
 	QETH_CARD_TEXT(card, 2, "startrec");
+	WARN_ON(1);
 	if (qeth_set_thread_start_bit(card, QETH_RECOVER_THREAD) == 0)
 		schedule_work(&card->kernel_thread_starter);
 }
@@ -884,7 +1034,8 @@ out:
 }
 
 static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf)
+		struct qeth_qdio_out_buffer *buf,
+		enum qeth_qdio_buffer_states newbufstate)
 {
 	int i;
 	struct sk_buff *skb;
@@ -912,21 +1063,36 @@ static void qeth_clear_output_buffer(str
 	buf->buffer->element[15].eflags = 0;
 	buf->buffer->element[15].sflags = 0;
 	buf->next_element_to_fill = 0;
-	atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
+	atomic_set(&buf->state, newbufstate);
+}
+
+static void qeth_clear_outq_buffers(struct qeth_qdio_out_q *q, int free)
+{
+	int j;
+
+	for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
+		if (!q->bufs[j])
+			continue;
+		qeth_cleanup_handled_pending(q, j, free);
+		qeth_clear_output_buffer(q, q->bufs[j], QETH_QDIO_BUF_EMPTY);
+		if (free) {
+			kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
+			q->bufs[j] = NULL;
+		}
+	}
 }
 
 void qeth_clear_qdio_buffers(struct qeth_card *card)
 {
-	int i, j;
+	int i;
 
 	QETH_CARD_TEXT(card, 2, "clearqdbf");
 	/* clear outbound buffers to free skbs */
-	for (i = 0; i < card->qdio.no_out_queues; ++i)
+	for (i = 0; i < card->qdio.no_out_queues; ++i) {
 		if (card->qdio.out_qs[i]) {
-			for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
-				qeth_clear_output_buffer(card->qdio.out_qs[i],
-						&card->qdio.out_qs[i]->bufs[j]);
+			qeth_clear_outq_buffers(card->qdio.out_qs[i], 0);
 		}
+	}
 }
 EXPORT_SYMBOL_GPL(qeth_clear_qdio_buffers);
 
@@ -945,11 +1111,14 @@ static void qeth_free_buffer_pool(struct
 
 static void qeth_free_qdio_buffers(struct qeth_card *card)
 {
-	int i, j;
+	int i;
 
 	if (atomic_xchg(&card->qdio.state, QETH_QDIO_UNINITIALIZED) ==
 		QETH_QDIO_UNINITIALIZED)
 		return;
+
+	qeth_free_cq(card);
+
 	kfree(card->qdio.in_q);
 	card->qdio.in_q = NULL;
 	/* inbound buffer pool */
@@ -957,9 +1126,7 @@ static void qeth_free_qdio_buffers(struc
 	/* free outbound qdio_qs */
 	if (card->qdio.out_qs) {
 		for (i = 0; i < card->qdio.no_out_queues; ++i) {
-			for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j)
-				qeth_clear_output_buffer(card->qdio.out_qs[i],
-						&card->qdio.out_qs[i]->bufs[j]);
+			qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
 			kfree(card->qdio.out_qs[i]);
 		}
 		kfree(card->qdio.out_qs);
@@ -1051,6 +1218,7 @@ static void qeth_set_intial_options(stru
 	card->options.performance_stats = 0;
 	card->options.rx_sg_cb = QETH_RX_SG_CB;
 	card->options.isolation = ISOLATION_MODE_NONE;
+	card->options.cq = QETH_CQ_DISABLED;
 }
 
 static int qeth_do_start_thread(struct qeth_card *card, unsigned long thread)
@@ -1180,6 +1348,7 @@ static int qeth_determine_card_type(stru
 			card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
 			card->qdio.no_out_queues =
 				known_devices[i][QETH_QUEUE_NO_IND];
+			card->qdio.no_in_queues = 1;
 			card->info.is_multicast_different =
 				known_devices[i][QETH_MULTICAST_IND];
 			qeth_get_channel_path_desc(card);
@@ -2027,6 +2196,37 @@ static int qeth_ulp_setup(struct qeth_ca
 	return rc;
 }
 
+static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
+{
+	int rc;
+	struct qeth_qdio_out_buffer *newbuf;
+
+	rc = 0;
+	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
+	if (!newbuf) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	newbuf->buffer = &q->qdio_bufs[bidx];
+	skb_queue_head_init(&newbuf->skb_list);
+	lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
+	newbuf->q = q;
+	newbuf->aob = NULL;
+	newbuf->next_pending = q->bufs[bidx];
+	atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
+	q->bufs[bidx] = newbuf;
+	if (q->bufstates) {
+		q->bufstates[bidx].user = newbuf;
+		QETH_CARD_TEXT_(q->card, 2, "nbs%d", bidx);
+		QETH_CARD_TEXT_(q->card, 2, "%lx", (long) newbuf);
+		QETH_CARD_TEXT_(q->card, 2, "%lx",
+				(long) newbuf->next_pending);
+	}
+out:
+	return rc;
+}
+
+
 static int qeth_alloc_qdio_buffers(struct qeth_card *card)
 {
 	int i, j;
@@ -2038,7 +2238,7 @@ static int qeth_alloc_qdio_buffers(struc
 		return 0;
 
 	card->qdio.in_q = kmalloc(sizeof(struct qeth_qdio_q),
-				  GFP_KERNEL);
+				   GFP_KERNEL);
 	if (!card->qdio.in_q)
 		goto out_nomem;
 	QETH_DBF_TEXT(SETUP, 2, "inq");
@@ -2051,6 +2251,7 @@ static int qeth_alloc_qdio_buffers(struc
 	/* inbound buffer pool */
 	if (qeth_alloc_buffer_pool(card))
 		goto out_freeinq;
+
 	/* outbound */
 	card->qdio.out_qs =
 		kmalloc(card->qdio.no_out_queues *
@@ -2068,21 +2269,30 @@ static int qeth_alloc_qdio_buffers(struc
 		card->qdio.out_qs[i]->queue_no = i;
 		/* give outbound qeth_qdio_buffers their qdio_buffers */
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
-			card->qdio.out_qs[i]->bufs[j].buffer =
-				&card->qdio.out_qs[i]->qdio_bufs[j];
-			skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j].
-					    skb_list);
-			lockdep_set_class(
-				&card->qdio.out_qs[i]->bufs[j].skb_list.lock,
-				&qdio_out_skb_queue_key);
-			INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list);
+			BUG_ON(card->qdio.out_qs[i]->bufs[j] != NULL);
+			if (qeth_init_qdio_out_buf(card->qdio.out_qs[i], j))
+				goto out_freeoutqbufs;
 		}
 	}
+
+	/* completion */
+	if (qeth_alloc_cq(card))
+		goto out_freeoutq;
+
 	return 0;
 
+out_freeoutqbufs:
+	while (j > 0) {
+		--j;
+		kmem_cache_free(qeth_qdio_outbuf_cache,
+				card->qdio.out_qs[i]->bufs[j]);
+		card->qdio.out_qs[i]->bufs[j] = NULL;
+	}
 out_freeoutq:
-	while (i > 0)
+	while (i > 0) {
 		kfree(card->qdio.out_qs[--i]);
+		qeth_clear_outq_buffers(card->qdio.out_qs[i], 1);
+	}
 	kfree(card->qdio.out_qs);
 	card->qdio.out_qs = NULL;
 out_freepool:
@@ -2399,13 +2609,21 @@ int qeth_init_qdio_queues(struct qeth_ca
 		QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc);
 		return rc;
 	}
+
+	/* completion */
+	rc = qeth_cq_init(card);
+	if (rc) {
+		return rc;
+	}
+
 	/* outbound queue */
 	for (i = 0; i < card->qdio.no_out_queues; ++i) {
 		memset(card->qdio.out_qs[i]->qdio_bufs, 0,
 		       QDIO_MAX_BUFFERS_PER_Q * sizeof(struct qdio_buffer));
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
 			qeth_clear_output_buffer(card->qdio.out_qs[i],
-					&card->qdio.out_qs[i]->bufs[j]);
+					card->qdio.out_qs[i]->bufs[j],
+					QETH_QDIO_BUF_EMPTY);
 		}
 		card->qdio.out_qs[i]->card = card;
 		card->qdio.out_qs[i]->next_buf_to_fill = 0;
@@ -2787,8 +3005,6 @@ void qeth_queue_input_buffer(struct qeth
 				qeth_get_micros() -
 				card->perf_stats.inbound_do_qdio_start_time;
 		if (rc) {
-			dev_warn(&card->gdev->dev,
-				"QDIO reported an error, rc=%i\n", rc);
 			QETH_CARD_TEXT(card, 2, "qinberr");
 		}
 		queue->next_buf_to_init = (queue->next_buf_to_init + count) %
@@ -2862,12 +3078,12 @@ static int qeth_switch_to_nonpacking_if_
 				queue->card->perf_stats.sc_p_dp++;
 			queue->do_pack = 0;
 			/* flush packing buffers */
-			buffer = &queue->bufs[queue->next_buf_to_fill];
+			buffer = queue->bufs[queue->next_buf_to_fill];
 			if ((atomic_read(&buffer->state) ==
 						QETH_QDIO_BUF_EMPTY) &&
 			    (buffer->next_element_to_fill > 0)) {
 				atomic_set(&buffer->state,
-						QETH_QDIO_BUF_PRIMED);
+					   QETH_QDIO_BUF_PRIMED);
 				flush_count++;
 				queue->next_buf_to_fill =
 					(queue->next_buf_to_fill + 1) %
@@ -2878,6 +3094,7 @@ static int qeth_switch_to_nonpacking_if_
 	return flush_count;
 }
 
+
 /*
  * Called to flush a packing buffer if no more pci flags are on the queue.
  * Checks if there is a packing buffer and prepares it to be flushed.
@@ -2887,7 +3104,7 @@ static int qeth_flush_buffers_on_no_pci(
 {
 	struct qeth_qdio_out_buffer *buffer;
 
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	if ((atomic_read(&buffer->state) == QETH_QDIO_BUF_EMPTY) &&
 	   (buffer->next_element_to_fill > 0)) {
 		/* it's a packing buffer */
@@ -2908,10 +3125,14 @@ static void qeth_flush_buffers(struct qe
 	unsigned int qdio_flags;
 
 	for (i = index; i < index + count; ++i) {
-		buf = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		buf = queue->bufs[bidx];
 		buf->buffer->element[buf->next_element_to_fill - 1].eflags |=
 				SBAL_EFLAGS_LAST_ENTRY;
 
+		if (queue->bufstates)
+			queue->bufstates[bidx].user = buf;
+
 		if (queue->card->info.type == QETH_CARD_TYPE_IQD)
 			continue;
 
@@ -2963,6 +3184,9 @@ static void qeth_flush_buffers(struct qe
 		if (rc == QDIO_ERROR_SIGA_TARGET)
 			return;
 		QETH_CARD_TEXT(queue->card, 2, "flushbuf");
+		QETH_CARD_TEXT_(queue->card, 2, " q%d", queue->queue_no);
+		QETH_CARD_TEXT_(queue->card, 2, " idx%d", index);
+		QETH_CARD_TEXT_(queue->card, 2, " c%d", count);
 		QETH_CARD_TEXT_(queue->card, 2, " err%d", rc);
 
 		/* this must not happen under normal circumstances. if it
@@ -3024,14 +3248,120 @@ void qeth_qdio_start_poll(struct ccw_dev
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_start_poll);
 
+int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
+{
+	int rc;
+
+	if (card->options.cq ==  QETH_CQ_NOTAVAILABLE) {
+		rc = -1;
+		goto out;
+	} else {
+		if (card->options.cq == cq) {
+			rc = 0;
+			goto out;
+		}
+
+		if (card->state != CARD_STATE_DOWN &&
+		    card->state != CARD_STATE_RECOVER) {
+			rc = -1;
+			goto out;
+		}
+
+		qeth_free_qdio_buffers(card);
+		card->options.cq = cq;
+		rc = 0;
+	}
+out:
+	return rc;
+
+}
+EXPORT_SYMBOL_GPL(qeth_configure_cq);
+
+
+static void qeth_qdio_cq_handler(struct qeth_card *card,
+		unsigned int qdio_err,
+		unsigned int queue, int first_element, int count) {
+	struct qeth_qdio_q *cq = card->qdio.c_q;
+	int i;
+	int rc;
+
+	if (!qeth_is_cq(card, queue))
+		goto out;
+
+	QETH_CARD_TEXT_(card, 5, "qcqhe%d", first_element);
+	QETH_CARD_TEXT_(card, 5, "qcqhc%d", count);
+	QETH_CARD_TEXT_(card, 5, "qcqherr%d", qdio_err);
+
+	if (qdio_err) {
+		netif_stop_queue(card->dev);
+		qeth_schedule_recovery(card);
+		goto out;
+	}
+
+	if (card->options.performance_stats) {
+		card->perf_stats.cq_cnt++;
+		card->perf_stats.cq_start_time = qeth_get_micros();
+	}
+
+	for (i = first_element; i < first_element + count; ++i) {
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		struct qdio_buffer *buffer = &cq->qdio_bufs[bidx];
+		int e;
+
+		e = 0;
+		while (buffer->element[e].addr) {
+			unsigned long phys_aob_addr;
+
+			phys_aob_addr = (unsigned long) buffer->element[e].addr;
+			qeth_qdio_handle_aob(card, phys_aob_addr);
+			buffer->element[e].addr = NULL;
+			buffer->element[e].eflags = 0;
+			buffer->element[e].sflags = 0;
+			buffer->element[e].length = 0;
+
+			++e;
+		}
+
+		buffer->element[15].eflags = 0;
+		buffer->element[15].sflags = 0;
+	}
+	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
+		    card->qdio.c_q->next_buf_to_init,
+		    count);
+	if (rc) {
+		dev_warn(&card->gdev->dev,
+			"QDIO reported an error, rc=%i\n", rc);
+		QETH_CARD_TEXT(card, 2, "qcqherr");
+	}
+	card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init
+				   + count) % QDIO_MAX_BUFFERS_PER_Q;
+
+	netif_wake_queue(card->dev);
+
+	if (card->options.performance_stats) {
+		int delta_t = qeth_get_micros();
+		delta_t -= card->perf_stats.cq_start_time;
+		card->perf_stats.cq_time += delta_t;
+	}
+out:
+	return;
+}
+
 void qeth_qdio_input_handler(struct ccw_device *ccwdev, unsigned int qdio_err,
-		unsigned int queue, int first_element, int count,
+		unsigned int queue, int first_elem, int count,
 		unsigned long card_ptr)
 {
 	struct qeth_card *card = (struct qeth_card *)card_ptr;
 
-	if (qdio_err)
+	QETH_CARD_TEXT_(card, 2, "qihq%d", queue);
+	QETH_CARD_TEXT_(card, 2, "qiec%d", qdio_err);
+
+	if (qeth_is_cq(card, queue))
+		qeth_qdio_cq_handler(card, qdio_err, queue, first_elem, count);
+	else if (qdio_err)
 		qeth_schedule_recovery(card);
+
+
 }
 EXPORT_SYMBOL_GPL(qeth_qdio_input_handler);
 
@@ -3057,9 +3387,26 @@ void qeth_qdio_output_handler(struct ccw
 			qeth_get_micros();
 	}
 	for (i = first_element; i < (first_element + count); ++i) {
-		buffer = &queue->bufs[i % QDIO_MAX_BUFFERS_PER_Q];
+		int bidx = i % QDIO_MAX_BUFFERS_PER_Q;
+		buffer = queue->bufs[bidx];
 		qeth_handle_send_error(card, buffer, qdio_error);
-		qeth_clear_output_buffer(queue, buffer);
+
+		if (queue->bufstates &&
+		    (queue->bufstates[bidx].flags &
+		     QDIO_OUTBUF_STATE_FLAG_PENDING) != 0) {
+			buffer->aob = queue->bufstates[bidx].aob;
+			QETH_CARD_TEXT_(queue->card, 5, "pel%d", bidx);
+			QETH_CARD_TEXT_(queue->card, 5, "aob");
+			QETH_CARD_TEXT_(queue->card, 5, "%lx",
+					virt_to_phys(buffer->aob));
+			BUG_ON(bidx < 0 || bidx >= QDIO_MAX_BUFFERS_PER_Q);
+			if (qeth_init_qdio_out_buf(queue, bidx))
+				qeth_schedule_recovery(card);
+		} else {
+			qeth_clear_output_buffer(queue, buffer,
+						QETH_QDIO_BUF_EMPTY);
+		}
+		qeth_cleanup_handled_pending(queue, bidx, 0);
 	}
 	atomic_sub(count, &queue->used_buffers);
 	/* check if we need to do something on this outbound queue */
@@ -3291,7 +3638,7 @@ int qeth_do_send_packet_fast(struct qeth
 			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	/* ... now we've got the queue */
 	index = queue->next_buf_to_fill;
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	/*
 	 * check if buffer is empty to make sure that we do not 'overtake'
 	 * ourselves and try to fill a buffer that is already primed
@@ -3325,7 +3672,7 @@ int qeth_do_send_packet(struct qeth_card
 	while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
 			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	start_index = queue->next_buf_to_fill;
-	buffer = &queue->bufs[queue->next_buf_to_fill];
+	buffer = queue->bufs[queue->next_buf_to_fill];
 	/*
 	 * check if buffer is empty to make sure that we do not 'overtake'
 	 * ourselves and try to fill a buffer that is already primed
@@ -3347,7 +3694,7 @@ int qeth_do_send_packet(struct qeth_card
 			queue->next_buf_to_fill =
 				(queue->next_buf_to_fill + 1) %
 				QDIO_MAX_BUFFERS_PER_Q;
-			buffer = &queue->bufs[queue->next_buf_to_fill];
+			buffer = queue->bufs[queue->next_buf_to_fill];
 			/* we did a step forward, so check buffer state
 			 * again */
 			if (atomic_read(&buffer->state) !=
@@ -3925,6 +4272,20 @@ static void qeth_determine_capabilities(
 	if (rc)
 		QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc);
 
+	QETH_DBF_TEXT_(SETUP, 2, "qfmt%d", card->ssqd.qfmt);
+	QETH_DBF_TEXT_(SETUP, 2, "%d", card->ssqd.qdioac1);
+	QETH_DBF_TEXT_(SETUP, 2, "%d", card->ssqd.qdioac3);
+	QETH_DBF_TEXT_(SETUP, 2, "icnt%d", card->ssqd.icnt);
+	if (!((card->ssqd.qfmt != QDIO_IQDIO_QFMT) ||
+	    ((card->ssqd.qdioac1 & CHSC_AC1_INITIATE_INPUTQ) == 0) ||
+	    ((card->ssqd.qdioac3 & CHSC_AC3_FORMAT2_CQ_AVAILABLE) == 0))) {
+		dev_info(&card->gdev->dev,
+			"Completion Queueing supported\n");
+	} else {
+		card->options.cq = QETH_CQ_NOTAVAILABLE;
+	}
+
+
 out_offline:
 	if (ddev_offline == 1)
 		ccw_device_set_offline(ddev);
@@ -3932,6 +4293,24 @@ out:
 	return;
 }
 
+static inline void qeth_qdio_establish_cq(struct qeth_card *card,
+	struct qdio_buffer **in_sbal_ptrs,
+	void (**queue_start_poll) (struct ccw_device *, int, unsigned long)) {
+	int i;
+
+	if (card->options.cq == QETH_CQ_ENABLED) {
+		int offset = QDIO_MAX_BUFFERS_PER_Q *
+			     (card->qdio.no_in_queues - 1);
+		i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
+		for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
+			in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
+				virt_to_phys(card->qdio.c_q->bufs[i].buffer);
+		}
+
+		queue_start_poll[card->qdio.no_in_queues - 1] = NULL;
+	}
+}
+
 static int qeth_qdio_establish(struct qeth_card *card)
 {
 	struct qdio_initialize init_data;
@@ -3954,22 +4333,28 @@ static int qeth_qdio_establish(struct qe
 	qeth_create_qib_param_field(card, qib_param_field);
 	qeth_create_qib_param_field_blkt(card, qib_param_field);
 
-	in_sbal_ptrs = kmalloc(QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
+	in_sbal_ptrs = kmalloc(card->qdio.no_in_queues *
+			       QDIO_MAX_BUFFERS_PER_Q * sizeof(void *),
 			       GFP_KERNEL);
 	if (!in_sbal_ptrs) {
 		rc = -ENOMEM;
 		goto out_free_qib_param;
 	}
-	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i)
+	for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
 		in_sbal_ptrs[i] = (struct qdio_buffer *)
 			virt_to_phys(card->qdio.in_q->bufs[i].buffer);
+	}
 
-	queue_start_poll = kmalloc(sizeof(void *) * 1, GFP_KERNEL);
+	queue_start_poll = kzalloc(sizeof(void *) * card->qdio.no_in_queues,
+				   GFP_KERNEL);
 	if (!queue_start_poll) {
 		rc = -ENOMEM;
 		goto out_free_in_sbals;
 	}
-	queue_start_poll[0] = card->discipline.start_poll;
+	for (i = 0; i < card->qdio.no_in_queues; ++i)
+		queue_start_poll[i] = card->discipline.start_poll;
+
+	qeth_qdio_establish_cq(card, in_sbal_ptrs, queue_start_poll);
 
 	out_sbal_ptrs =
 		kmalloc(card->qdio.no_out_queues * QDIO_MAX_BUFFERS_PER_Q *
@@ -3981,7 +4366,7 @@ static int qeth_qdio_establish(struct qe
 	for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i)
 		for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) {
 			out_sbal_ptrs[k] = (struct qdio_buffer *)virt_to_phys(
-				card->qdio.out_qs[i]->bufs[j].buffer);
+				card->qdio.out_qs[i]->bufs[j]->buffer);
 		}
 
 	memset(&init_data, 0, sizeof(struct qdio_initialize));
@@ -3989,7 +4374,7 @@ static int qeth_qdio_establish(struct qe
 	init_data.q_format               = qeth_get_qdio_q_format(card);
 	init_data.qib_param_field_format = 0;
 	init_data.qib_param_field        = qib_param_field;
-	init_data.no_input_qs            = 1;
+	init_data.no_input_qs            = card->qdio.no_in_queues;
 	init_data.no_output_qs           = card->qdio.no_out_queues;
 	init_data.input_handler          = card->discipline.input_handler;
 	init_data.output_handler         = card->discipline.output_handler;
@@ -3997,6 +4382,7 @@ static int qeth_qdio_establish(struct qe
 	init_data.int_parm               = (unsigned long) card;
 	init_data.input_sbal_addr_array  = (void **) in_sbal_ptrs;
 	init_data.output_sbal_addr_array = (void **) out_sbal_ptrs;
+	init_data.output_sbal_state_array = card->qdio.out_bufstates;
 	init_data.scan_threshold =
 		(card->info.type == QETH_CARD_TYPE_IQD) ? 8 : 32;
 
@@ -4013,6 +4399,17 @@ static int qeth_qdio_establish(struct qe
 			qdio_free(CARD_DDEV(card));
 		}
 	}
+
+	switch (card->options.cq) {
+	case QETH_CQ_ENABLED:
+		dev_info(&card->gdev->dev, "Completion Queue support enabled");
+		break;
+	case QETH_CQ_DISABLED:
+		dev_info(&card->gdev->dev, "Completion Queue support disabled");
+		break;
+	default:
+		break;
+	}
 out:
 	kfree(out_sbal_ptrs);
 out_free_queue_start_poll:
@@ -4191,6 +4588,8 @@ static inline int qeth_create_skb_frag(s
 		(*pskb)->truesize += data_len;
 		(*pfrag)++;
 	}
+
+
 	return 0;
 }
 
@@ -4664,6 +5063,8 @@ static struct {
 	{"tx do_QDIO count"},
 	{"tx csum"},
 	{"tx lin"},
+	{"cq handler count"},
+	{"cq handler time"}
 };
 
 int qeth_core_get_sset_count(struct net_device *dev, int stringset)
@@ -4722,6 +5123,8 @@ void qeth_core_get_ethtool_stats(struct
 	data[32] = card->perf_stats.outbound_do_qdio_cnt;
 	data[33] = card->perf_stats.tx_csum;
 	data[34] = card->perf_stats.tx_lin;
+	data[35] = card->perf_stats.cq_cnt;
+	data[36] = card->perf_stats.cq_time;
 }
 EXPORT_SYMBOL_GPL(qeth_core_get_ethtool_stats);
 
@@ -4880,7 +5283,16 @@ static int __init qeth_core_init(void)
 		goto slab_err;
 	}
 
+	qeth_qdio_outbuf_cache = kmem_cache_create("qeth_buf",
+			sizeof(struct qeth_qdio_out_buffer), 0, 0, NULL);
+	if (!qeth_qdio_outbuf_cache) {
+		rc = -ENOMEM;
+		goto cqslab_err;
+	}
+
 	return 0;
+cqslab_err:
+	kmem_cache_destroy(qeth_core_header_cache);
 slab_err:
 	root_device_unregister(qeth_core_root_dev);
 register_err:
@@ -4905,6 +5317,7 @@ static void __exit qeth_core_exit(void)
 			   &driver_attr_group);
 	ccwgroup_driver_unregister(&qeth_core_ccwgroup_driver);
 	ccw_driver_unregister(&qeth_ccw_driver);
+	kmem_cache_destroy(qeth_qdio_outbuf_cache);
 	kmem_cache_destroy(qeth_core_header_cache);
 	qeth_unregister_dbf_views();
 	pr_info("core functions removed\n");


^ permalink raw reply

* [patch 03/11] [PATCH] af_iucv: use loadable iucv interface
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-use-loadable-iucv-interface.patch --]
[-- Type: text/plain, Size: 8680 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

For future af_iucv extensions the module should be able to run in LPAR
mode too. For this we use the new dynamic loading iucv interface.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/af_iucv.c |  119 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 74 insertions(+), 45 deletions(-)

--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -42,6 +42,8 @@ static struct proto iucv_proto = {
 	.obj_size	= sizeof(struct iucv_sock),
 };
 
+static struct iucv_interface *pr_iucv;
+
 /* special AF_IUCV IPRM messages */
 static const u8 iprm_shutdown[8] =
 	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
@@ -165,7 +167,7 @@ static int afiucv_pm_freeze(struct devic
 		case IUCV_CLOSING:
 		case IUCV_CONNECTED:
 			if (iucv->path) {
-				err = iucv_path_sever(iucv->path, NULL);
+				err = pr_iucv->path_sever(iucv->path, NULL);
 				iucv_path_free(iucv->path);
 				iucv->path = NULL;
 			}
@@ -229,7 +231,7 @@ static const struct dev_pm_ops afiucv_pm
 static struct device_driver af_iucv_driver = {
 	.owner = THIS_MODULE,
 	.name = "afiucv",
-	.bus  = &iucv_bus,
+	.bus  = NULL,
 	.pm   = &afiucv_pm_ops,
 };
 
@@ -412,7 +414,7 @@ static void iucv_sock_close(struct sock 
 			low_nmcpy(user_data, iucv->src_name);
 			high_nmcpy(user_data, iucv->dst_name);
 			ASCEBC(user_data, sizeof(user_data));
-			iucv_path_sever(iucv->path, user_data);
+			pr_iucv->path_sever(iucv->path, user_data);
 			iucv_path_free(iucv->path);
 			iucv->path = NULL;
 		}
@@ -704,8 +706,9 @@ static int iucv_sock_connect(struct sock
 		err = -ENOMEM;
 		goto done;
 	}
-	err = iucv_path_connect(iucv->path, &af_iucv_handler,
-				sa->siucv_user_id, NULL, user_data, sk);
+	err = pr_iucv->path_connect(iucv->path, &af_iucv_handler,
+				    sa->siucv_user_id, NULL, user_data,
+				    sk);
 	if (err) {
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
@@ -738,7 +741,7 @@ static int iucv_sock_connect(struct sock
 	}
 
 	if (err) {
-		iucv_path_sever(iucv->path, NULL);
+		pr_iucv->path_sever(iucv->path, NULL);
 		iucv_path_free(iucv->path);
 		iucv->path = NULL;
 	}
@@ -871,7 +874,7 @@ static int iucv_send_iprm(struct iucv_pa
 
 	memcpy(prmdata, (void *) skb->data, skb->len);
 	prmdata[7] = 0xff - (u8) skb->len;
-	return iucv_message_send(path, msg, IUCV_IPRMDATA, 0,
+	return pr_iucv->message_send(path, msg, IUCV_IPRMDATA, 0,
 				 (void *) prmdata, 8);
 }
 
@@ -999,13 +1002,13 @@ static int iucv_sock_sendmsg(struct kioc
 		/* this error should never happen since the
 		 * IUCV_IPRMDATA path flag is set... sever path */
 		if (err == 0x15) {
-			iucv_path_sever(iucv->path, NULL);
+			pr_iucv->path_sever(iucv->path, NULL);
 			skb_unlink(skb, &iucv->send_skb_q);
 			err = -EPIPE;
 			goto fail;
 		}
 	} else
-		err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+		err = pr_iucv->message_send(iucv->path, &txmsg, 0, 0,
 					(void *) skb->data, skb->len);
 	if (err) {
 		if (err == 3) {
@@ -1095,8 +1098,9 @@ static void iucv_process_message(struct 
 			skb->len = 0;
 		}
 	} else {
-		rc = iucv_message_receive(path, msg, msg->flags & IUCV_IPRMDATA,
-					  skb->data, len, NULL);
+		rc = pr_iucv->message_receive(path, msg,
+					      msg->flags & IUCV_IPRMDATA,
+					      skb->data, len, NULL);
 		if (rc) {
 			kfree_skb(skb);
 			return;
@@ -1110,7 +1114,7 @@ static void iucv_process_message(struct 
 			kfree_skb(skb);
 			skb = NULL;
 			if (rc) {
-				iucv_path_sever(path, NULL);
+				pr_iucv->path_sever(path, NULL);
 				return;
 			}
 			skb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
@@ -1327,8 +1331,8 @@ static int iucv_sock_shutdown(struct soc
 	if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
 		txmsg.class = 0;
 		txmsg.tag = 0;
-		err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
-					(void *) iprm_shutdown, 8);
+		err = pr_iucv->message_send(iucv->path, &txmsg, IUCV_IPRMDATA,
+					0, (void *) iprm_shutdown, 8);
 		if (err) {
 			switch (err) {
 			case 1:
@@ -1345,7 +1349,7 @@ static int iucv_sock_shutdown(struct soc
 	}
 
 	if (how == RCV_SHUTDOWN || how == SHUTDOWN_MASK) {
-		err = iucv_path_quiesce(iucv_sk(sk)->path, NULL);
+		err = pr_iucv->path_quiesce(iucv->path, NULL);
 		if (err)
 			err = -ENOTCONN;
 
@@ -1372,7 +1376,7 @@ static int iucv_sock_release(struct sock
 
 	/* Unregister with IUCV base support */
 	if (iucv_sk(sk)->path) {
-		iucv_path_sever(iucv_sk(sk)->path, NULL);
+		pr_iucv->path_sever(iucv_sk(sk)->path, NULL);
 		iucv_path_free(iucv_sk(sk)->path);
 		iucv_sk(sk)->path = NULL;
 	}
@@ -1514,14 +1518,14 @@ static int iucv_callback_connreq(struct 
 	high_nmcpy(user_data, iucv->dst_name);
 	ASCEBC(user_data, sizeof(user_data));
 	if (sk->sk_state != IUCV_LISTEN) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
 
 	/* Check for backlog size */
 	if (sk_acceptq_is_full(sk)) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
@@ -1529,7 +1533,7 @@ static int iucv_callback_connreq(struct 
 	/* Create the new socket */
 	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
 	if (!nsk) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		goto fail;
 	}
@@ -1553,9 +1557,9 @@ static int iucv_callback_connreq(struct 
 	/* set message limit for path based on msglimit of accepting socket */
 	niucv->msglimit = iucv->msglimit;
 	path->msglim = iucv->msglimit;
-	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
+	err = pr_iucv->path_accept(path, &af_iucv_handler, nuser_data, nsk);
 	if (err) {
-		err = iucv_path_sever(path, user_data);
+		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
 		iucv_sock_kill(nsk);
 		goto fail;
@@ -1589,7 +1593,7 @@ static void iucv_callback_rx(struct iucv
 	int len;
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN) {
-		iucv_message_reject(path, msg);
+		pr_iucv->message_reject(path, msg);
 		return;
 	}
 
@@ -1718,6 +1722,41 @@ static const struct net_proto_family iuc
 	.create	= iucv_sock_create,
 };
 
+static int __init afiucv_iucv_init(void)
+{
+	int err;
+
+	err = pr_iucv->iucv_register(&af_iucv_handler, 0);
+	if (err)
+		goto out;
+	/* establish dummy device */
+	af_iucv_driver.bus = pr_iucv->bus;
+	err = driver_register(&af_iucv_driver);
+	if (err)
+		goto out_iucv;
+	af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!af_iucv_dev) {
+		err = -ENOMEM;
+		goto out_driver;
+	}
+	dev_set_name(af_iucv_dev, "af_iucv");
+	af_iucv_dev->bus = pr_iucv->bus;
+	af_iucv_dev->parent = pr_iucv->root;
+	af_iucv_dev->release = (void (*)(struct device *))kfree;
+	af_iucv_dev->driver = &af_iucv_driver;
+	err = device_register(af_iucv_dev);
+	if (err)
+		goto out_driver;
+	return 0;
+
+out_driver:
+	driver_unregister(&af_iucv_driver);
+out_iucv:
+	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+out:
+	return err;
+}
+
 static int __init afiucv_init(void)
 {
 	int err;
@@ -1735,44 +1774,33 @@ static int __init afiucv_init(void)
 		goto out;
 	}
 
-	err = iucv_register(&af_iucv_handler, 0);
-	if (err)
+	pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
+	if (!pr_iucv) {
+		printk(KERN_WARNING "iucv_if lookup failed\n");
+		err = -EPROTONOSUPPORT;
 		goto out;
+	}
+
 	err = proto_register(&iucv_proto, 0);
 	if (err)
-		goto out_iucv;
+		goto out;
 	err = sock_register(&iucv_sock_family_ops);
 	if (err)
 		goto out_proto;
-	/* establish dummy device */
-	err = driver_register(&af_iucv_driver);
+
+	err = afiucv_iucv_init();
 	if (err)
 		goto out_sock;
-	af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!af_iucv_dev) {
-		err = -ENOMEM;
-		goto out_driver;
-	}
-	dev_set_name(af_iucv_dev, "af_iucv");
-	af_iucv_dev->bus = &iucv_bus;
-	af_iucv_dev->parent = iucv_root;
-	af_iucv_dev->release = (void (*)(struct device *))kfree;
-	af_iucv_dev->driver = &af_iucv_driver;
-	err = device_register(af_iucv_dev);
-	if (err)
-		goto out_driver;
 
 	return 0;
 
-out_driver:
-	driver_unregister(&af_iucv_driver);
 out_sock:
 	sock_unregister(PF_IUCV);
 out_proto:
 	proto_unregister(&iucv_proto);
-out_iucv:
-	iucv_unregister(&af_iucv_handler, 0);
 out:
+	if (pr_iucv)
+		symbol_put(iucv_if);
 	return err;
 }
 
@@ -1780,9 +1808,10 @@ static void __exit afiucv_exit(void)
 {
 	device_unregister(af_iucv_dev);
 	driver_unregister(&af_iucv_driver);
+	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
+	symbol_put(iucv_if);
 	sock_unregister(PF_IUCV);
 	proto_unregister(&iucv_proto);
-	iucv_unregister(&af_iucv_handler, 0);
 }
 
 module_init(afiucv_init);


^ permalink raw reply

* [patch 08/11] [PATCH] qdio: support forced signal adapter indications
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck, Jan Glauber
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qdio_siga_r.patch --]
[-- Type: text/plain, Size: 2398 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch ensures that signal adapter commands are issued if they are 
indicated to be required.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 arch/s390/include/asm/qdio.h |   10 ++++++++++
 drivers/s390/cio/qdio.h      |    9 ---------
 drivers/s390/cio/qdio_main.c |    5 +----
 3 files changed, 11 insertions(+), 13 deletions(-)

--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -279,6 +279,16 @@ struct qdio_outbuf_state {
 
 #define CHSC_AC1_INITIATE_INPUTQ	0x80
 
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
+#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
+
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -85,15 +85,6 @@ enum qdio_irq_states {
 #define CHSC_FLAG_QDIO_CAPABILITY	0x80
 #define CHSC_FLAG_VALIDITY		0x40
 
-/* qdio adapter-characteristics-1 flag */
-#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
-#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
-#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
-#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
-#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
-#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
-#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
-
 /* SIGA flags */
 #define QDIO_SIGA_WRITE		0x00
 #define QDIO_SIGA_READ		0x01
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -1577,12 +1577,9 @@ set:
 	used = atomic_add_return(count, &q->nr_buf_used) - count;
 	BUG_ON(used + count > QDIO_MAX_BUFFERS_PER_Q);
 
-	/* no need to signal as long as the adapter had free buffers */
-	if (used)
-		return 0;
-
 	if (need_siga_in(q))
 		return qdio_siga_input(q);
+
 	return 0;
 }
 


^ permalink raw reply

* [patch 09/11] [PATCH] qeth: support forced signal adapter indications
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Einar Lueck
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117_qeth_siga_r.patch --]
[-- Type: text/plain, Size: 907 bytes --]

From: Einar Lueck <elelueck@de.ibm.com>

This patch ensures that signal adapter commands are issued if they are
indicated to be required.

Signed-off-by: Einar Lueck <elelueck@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 drivers/s390/net/qeth_core.h |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -231,7 +231,8 @@ static inline int qeth_is_ipa_enabled(st
 #define QETH_IN_BUF_COUNT_MAX 128
 #define QETH_MAX_BUFFER_ELEMENTS(card) ((card)->qdio.in_buf_size >> 12)
 #define QETH_IN_BUF_REQUEUE_THRESHOLD(card) \
-		((card)->qdio.in_buf_pool.buf_count / 2)
+		((card)->ssqd.qdioac1 & AC1_SIGA_INPUT_NEEDED ? 1 : \
+		 ((card)->qdio.in_buf_pool.buf_count / 2))
 
 /* buffers we have to be behind before we get a PCI */
 #define QETH_PCI_THRESHOLD_A(card) ((card)->qdio.in_buf_pool.buf_count+1)


^ permalink raw reply

* [patch 05/11] [PATCH] if_ether: add new Ethernet Protocol ID for af_iucv
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-new-protocol-id.patch --]
[-- Type: text/plain, Size: 823 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

Add a new ethertype for af_iucv over s/390 HiperSockets transport. Since
HiperSockets is not a real ethernet hw this is not an officially registered ID.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 include/linux/if_ether.h |    1 +
 1 file changed, 1 insertion(+)

--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -88,6 +88,7 @@
 #define ETH_P_QINQ2	0x9200		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_QINQ3	0x9300		/* deprecated QinQ VLAN [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
+#define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
 /*
  *	Non DIX types. Won't clash for 1500 types.


^ permalink raw reply

* [patch 01/11] [PATCH] iucv: introduce loadable iucv interface
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-iucv-introduce-loadable-iucv-interface.patch --]
[-- Type: text/plain, Size: 3456 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

This patch adds a symbol to dynamically load iucv functions.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 include/net/iucv/iucv.h |   36 +++++++++++++++++++++++++++++++++++-
 net/iucv/iucv.c         |   23 +++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -120,7 +120,7 @@ struct iucv_message {
 	u32 reply_size;
 	u8  rmmsg[8];
 	u8  flags;
-};
+} __packed;
 
 /*
  * struct iucv_handler
@@ -459,3 +459,37 @@ int __iucv_message_send(struct iucv_path
 int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
 			  u8 flags, u32 srccls, void *buffer, size_t size,
 			  void *answer, size_t asize, size_t *residual);
+
+struct iucv_interface {
+	int (*message_receive)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, void *buffer, size_t size, size_t *residual);
+	int (*__message_receive)(struct iucv_path *path,
+		struct iucv_message *msg, u8 flags, void *buffer, size_t size,
+		size_t *residual);
+	int (*message_reply)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, void *reply, size_t size);
+	int (*message_reject)(struct iucv_path *path, struct iucv_message *msg);
+	int (*message_send)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, u32 srccls, void *buffer, size_t size);
+	int (*__message_send)(struct iucv_path *path, struct iucv_message *msg,
+		u8 flags, u32 srccls, void *buffer, size_t size);
+	int (*message_send2way)(struct iucv_path *path,
+		struct iucv_message *msg, u8 flags, u32 srccls, void *buffer,
+		size_t size, void *answer, size_t asize, size_t *residual);
+	int (*message_purge)(struct iucv_path *path, struct iucv_message *msg,
+		u32 srccls);
+	int (*path_accept)(struct iucv_path *path, struct iucv_handler *handler,
+		u8 userdata[16], void *private);
+	int (*path_connect)(struct iucv_path *path,
+		struct iucv_handler *handler,
+		u8 userid[8], u8 system[8], u8 userdata[16], void *private);
+	int (*path_quiesce)(struct iucv_path *path, u8 userdata[16]);
+	int (*path_resume)(struct iucv_path *path, u8 userdata[16]);
+	int (*path_sever)(struct iucv_path *path, u8 userdata[16]);
+	int (*iucv_register)(struct iucv_handler *handler, int smp);
+	void (*iucv_unregister)(struct iucv_handler *handler, int smp);
+	struct bus_type *bus;
+	struct device *root;
+};
+
+extern struct iucv_interface iucv_if;
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1974,6 +1974,27 @@ out:
 	return rc;
 }
 
+struct iucv_interface iucv_if = {
+	.message_receive = iucv_message_receive,
+	.__message_receive = __iucv_message_receive,
+	.message_reply = iucv_message_reply,
+	.message_reject = iucv_message_reject,
+	.message_send = iucv_message_send,
+	.__message_send = __iucv_message_send,
+	.message_send2way = iucv_message_send2way,
+	.message_purge = iucv_message_purge,
+	.path_accept = iucv_path_accept,
+	.path_connect = iucv_path_connect,
+	.path_quiesce = iucv_path_quiesce,
+	.path_resume = iucv_path_resume,
+	.path_sever = iucv_path_sever,
+	.iucv_register = iucv_register,
+	.iucv_unregister = iucv_unregister,
+	.bus = NULL,
+	.root = NULL,
+};
+EXPORT_SYMBOL(iucv_if);
+
 /**
  * iucv_init
  *
@@ -2037,6 +2058,8 @@ static int __init iucv_init(void)
 	rc = bus_register(&iucv_bus);
 	if (rc)
 		goto out_reboot;
+	iucv_if.root = iucv_root;
+	iucv_if.bus = &iucv_bus;
 	return 0;
 
 out_reboot:


^ permalink raw reply

* [patch 02/11] [PATCH] iucv: kernel option for z/VM IUCV and HiperSockets
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390; +Cc: Ursula Braun
In-Reply-To: <20110727161242.018577444@de.ibm.com>

[-- Attachment #1: net1117-af_iucv-kernel-option-for-z-VM-IUCV-and-HiperSockets.patch transports --]
[-- Type: text/plain, Size: 1357 bytes --]

From: Ursula Braun <ursula.braun@de.ibm.com>

When adding HiperSockets transport to AF_IUCV Sockets, af_iucv either
depends on IUCV or QETH_L3 (or both). This patch introduces the
necessary changes for kernel configuration.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
---
 net/iucv/Kconfig |   14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

--- a/net/iucv/Kconfig
+++ b/net/iucv/Kconfig
@@ -1,15 +1,15 @@
 config IUCV
-	tristate "IUCV support (S390 - z/VM only)"
-	depends on S390
+	def_tristate y if S390
+	prompt "IUCV support (S390 - z/VM only)"
 	help
 	  Select this option if you want to use inter-user communication
 	  under VM or VIF. If you run on z/VM, say "Y" to enable a fast
 	  communication link between VM guests.
 
 config AFIUCV
-	tristate "AF_IUCV support (S390 - z/VM only)"
-	depends on IUCV
+	def_tristate m if QETH_L3 || IUCV
+	prompt "AF_IUCV Socket support (S390 - z/VM and HiperSockets transport)"
 	help
-	  Select this option if you want to use inter-user communication under
-	  VM or VIF sockets. If you run on z/VM, say "Y" to enable a fast
-	  communication link between VM guests.
+	  Select this option if you want to use AF_IUCV socket applications
+	  based on z/VM inter-user communication vehicle or based on
+	  HiperSockets.


^ permalink raw reply

* [patch 00/11] [RFC] s390: af_iucv traffic over HiperSockets transport
From: frank.blaschka @ 2011-07-27 16:12 UTC (permalink / raw)
  To: netdev, linux-s390

Following patch set implements a new feature to use HiperSockets
as a transport mechanism for the af_iucv address family. Basic
design idea is: The current transport mechanism for af_iucv (iucv)
is only available on VM. HiperSockets provide similar capabilities
as iucv and are available on LPAR. There is guaranty for data
integrity, in order delivery and delivery acknowledgment. A new
HiperSockets feature is exploited to support
flow control/congestion management. For this it is nearby to extend
af_iucv address family to use HiperSockets transport.

shortlog:

Einar Lueck (4)
qdio: support asynchronous delivery of storage blocks
qdio: support forced signal adapter indications
qeth: exploit asynchronous delivery of storage blocks
qeth: support forced signal adapter indications

Ursula Braun (3)
iucv: kernel option for z/VM IUCV and HiperSockets
af_iucv: cleanup - use iucv_sk(sk) early
af_iucv: add HiperSockets transport

Frank Blaschka (4)
iucv: introduce loadable iucv interface
af_iucv: use loadable iucv interface
if_ether: add new Ethernet Protocol ID for af_iucv
qeth: add support for af_iucv HiperSockets transport

Thanks for review and comments,
        Frank

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox