Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH net-next] rxrpc: Make IPv6 support conditional on CONFIG_IPV6
From: David Miller @ 2016-09-17  7:59 UTC (permalink / raw)
  To: dhowells; +Cc: netdev, linux-afs, linux-kernel
In-Reply-To: <147409356125.30214.18028040340873709660.stgit@warthog.procyon.org.uk>

From: David Howells <dhowells@redhat.com>
Date: Sat, 17 Sep 2016 07:26:01 +0100

> Add CONFIG_AF_RXRPC_IPV6 and make the IPv6 support code conditional on it.
> This is then made conditional on CONFIG_IPV6.
> 
> Without this, the following can be seen:
> 
>    net/built-in.o: In function `rxrpc_init_peer':
>>> peer_object.c:(.text+0x18c3c8): undefined reference to `ip6_route_output_flags'
> 
> Reported-by: kbuild test robot <fengguang.wu@intel.com>
> Signed-off-by: David Howells <dhowells@redhat.com>

Applied.

^ permalink raw reply

* [PATCH net-next] rxrpc: Make IPv6 support conditional on CONFIG_IPV6
From: David Howells @ 2016-09-17  6:26 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel

Add CONFIG_AF_RXRPC_IPV6 and make the IPv6 support code conditional on it.
This is then made conditional on CONFIG_IPV6.

Without this, the following can be seen:

   net/built-in.o: In function `rxrpc_init_peer':
>> peer_object.c:(.text+0x18c3c8): undefined reference to `ip6_route_output_flags'

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---

 net/rxrpc/Kconfig        |    7 +++++++
 net/rxrpc/af_rxrpc.c     |    7 ++++++-
 net/rxrpc/conn_object.c  |    2 ++
 net/rxrpc/local_object.c |    2 ++
 net/rxrpc/output.c       |    2 ++
 net/rxrpc/peer_event.c   |    4 +++-
 net/rxrpc/peer_object.c  |   10 ++++++++++
 net/rxrpc/utils.c        |    2 ++
 8 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 784c53163b7b..13396c74b5c1 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -19,6 +19,13 @@ config AF_RXRPC
 
 	  See Documentation/networking/rxrpc.txt.
 
+config AF_RXRPC_IPV6
+	bool "IPv6 support for RxRPC"
+	depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+	help
+	  Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+	  its network transport.
+
 
 config AF_RXRPC_DEBUG
 	bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index f61f7b2d1ca4..09f81befc705 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -109,12 +109,14 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
 		tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		if (srx->transport_len < sizeof(struct sockaddr_in6))
 			return -EINVAL;
 		tail = offsetof(struct sockaddr_rxrpc, transport) +
 			sizeof(struct sockaddr_in6);
 		break;
+#endif
 
 	default:
 		return -EAFNOSUPPORT;
@@ -413,9 +415,11 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
 		case AF_INET:
 			rx->srx.transport_len = sizeof(struct sockaddr_in);
 			break;
+#ifdef CONFIG_AF_RXRPC_IPV6
 		case AF_INET6:
 			rx->srx.transport_len = sizeof(struct sockaddr_in6);
 			break;
+#endif
 		default:
 			ret = -EAFNOSUPPORT;
 			goto error_unlock;
@@ -570,7 +574,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
 		return -EAFNOSUPPORT;
 
 	/* we support transport protocol UDP/UDP6 only */
-	if (protocol != PF_INET && protocol != PF_INET6)
+	if (protocol != PF_INET &&
+	    IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
 		return -EPROTONOSUPPORT;
 
 	if (sock->type != SOCK_DGRAM)
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index c0ddba787fd4..bb1f29280aea 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -134,6 +134,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 			    srx.transport.sin.sin_addr.s_addr)
 				goto not_found;
 			break;
+#ifdef CONFIG_AF_RXRPC_IPV6
 		case AF_INET6:
 			if (peer->srx.transport.sin6.sin6_port !=
 			    srx.transport.sin6.sin6_port ||
@@ -142,6 +143,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
 				   sizeof(struct in6_addr)) != 0)
 				goto not_found;
 			break;
+#endif
 		default:
 			BUG();
 		}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index f5b9bb0d3f98..e3fad80b0795 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -58,6 +58,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
 			memcmp(&local->srx.transport.sin.sin_addr,
 			       &srx->transport.sin.sin_addr,
 			       sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		/* If the choice of UDP6 port is left up to the transport, then
 		 * the endpoint record doesn't match.
@@ -67,6 +68,7 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
 			memcmp(&local->srx.transport.sin6.sin6_addr,
 			       &srx->transport.sin6.sin6_addr,
 			       sizeof(struct in6_addr));
+#endif
 	default:
 		BUG();
 	}
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index d7cd87f17f0d..06a9aca739d1 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -259,6 +259,7 @@ send_fragmentable:
 		}
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		opt = IPV6_PMTUDISC_DONT;
 		ret = kernel_setsockopt(conn->params.local->socket,
@@ -274,6 +275,7 @@ send_fragmentable:
 					  (char *)&opt, sizeof(opt));
 		}
 		break;
+#endif
 	}
 
 	up_write(&conn->params.local->defrag_sem);
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 74217589cf44..9e0725f5652b 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -66,6 +66,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 		}
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		srx.transport.sin6.sin6_port = serr->port;
 		srx.transport_len = sizeof(struct sockaddr_in6);
@@ -78,7 +79,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 			break;
 		case SO_EE_ORIGIN_ICMP:
 			_net("Rx ICMP on v6 sock");
-			memcpy(&srx.transport.sin6.sin6_addr.s6_addr + 12,
+			memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
 			       skb_network_header(skb) + serr->addr_offset,
 			       sizeof(struct in_addr));
 			break;
@@ -89,6 +90,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
 			break;
 		}
 		break;
+#endif
 
 	default:
 		BUG();
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index dfc07b41a472..f3e5766910fd 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -52,11 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
 		size = sizeof(srx->transport.sin.sin_addr);
 		p = (u16 *)&srx->transport.sin.sin_addr;
 		break;
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		hash_key += (u16 __force)srx->transport.sin.sin_port;
 		size = sizeof(srx->transport.sin6.sin6_addr);
 		p = (u16 *)&srx->transport.sin6.sin6_addr;
 		break;
+#endif
 	default:
 		WARN(1, "AF_RXRPC: Unsupported transport address family\n");
 		return 0;
@@ -100,12 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
 			memcmp(&peer->srx.transport.sin.sin_addr,
 			       &srx->transport.sin.sin_addr,
 			       sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		return ((u16 __force)peer->srx.transport.sin6.sin6_port -
 			(u16 __force)srx->transport.sin6.sin6_port) ?:
 			memcmp(&peer->srx.transport.sin6.sin6_addr,
 			       &srx->transport.sin6.sin6_addr,
 			       sizeof(struct in6_addr));
+#endif
 	default:
 		BUG();
 	}
@@ -159,7 +163,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 	struct rtable *rt;
 	struct flowi fl;
 	struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
 	struct flowi6 *fl6 = &fl.u.ip6;
+#endif
 
 	peer->if_mtu = 1500;
 
@@ -177,6 +183,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		dst = &rt->dst;
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		fl6->flowi6_iif = LOOPBACK_IFINDEX;
 		fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
@@ -191,6 +198,7 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 			return;
 		}
 		break;
+#endif
 
 	default:
 		BUG();
@@ -241,9 +249,11 @@ static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
 	case AF_INET:
 		peer->hdrsize = sizeof(struct iphdr);
 		break;
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
 		peer->hdrsize = sizeof(struct ipv6hdr);
 		break;
+#endif
 	default:
 		BUG();
 	}
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index b88914d53ca5..ff7af71c4b49 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
 		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
 		return 0;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case ETH_P_IPV6:
 		srx->transport_type = SOCK_DGRAM;
 		srx->transport_len = sizeof(srx->transport.sin6);
@@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
 		srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
 		srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
 		return 0;
+#endif
 
 	default:
 		pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",

^ permalink raw reply related

* [PATCH net-next] chcr/cxgb4i/cxgbit/RDMA/cxgb4: Allocate resources dynamically for all cxgb4 ULD's
From: Hariprasad Shenai @ 2016-09-17  2:42 UTC (permalink / raw)
  To: netdev, linux-scsi, target-devel, linux-rdma, linux-crypto
  Cc: davem, nab, jejb, martin.petersen, dledford, herbert, varun,
	nirranjan, swise, atul.gupta, rahul.lakkireddy, Hariprasad Shenai

Allocate resources dynamically to cxgb4's Upper layer driver's(ULD) like
cxgbit, iw_cxgb4 and cxgb4i. Allocate resources when they register with
cxgb4 driver and free them while unregistering. All the queues and the
interrupts for them will be allocated during ULD probe only and freed
during remove.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
---
 drivers/crypto/chelsio/chcr_core.c                 |   10 +-
 drivers/infiniband/hw/cxgb4/device.c               |    4 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |   47 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |  127 +----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  613 +++++---------------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c     |  223 ++++++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h     |   31 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c           |   18 +-
 drivers/scsi/cxgbi/cxgb4i/cxgb4i.c                 |    3 +
 drivers/target/iscsi/cxgbit/cxgbit_main.c          |    3 +
 10 files changed, 385 insertions(+), 694 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index 2f6156b..fb5f9bb 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -39,12 +39,10 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
 	[CPL_FW6_PLD] = cpl_fw6_pld_handler,
 };
 
-static struct cxgb4_pci_uld_info chcr_uld_info = {
+static struct cxgb4_uld_info chcr_uld_info = {
 	.name = DRV_MODULE_NAME,
-	.nrxq = 4,
+	.nrxq = MAX_ULD_QSETS,
 	.rxq_size = 1024,
-	.nciq = 0,
-	.ciq_size = 0,
 	.add = chcr_uld_add,
 	.state_change = chcr_uld_state_change,
 	.rx_handler = chcr_uld_rx_handler,
@@ -205,7 +203,7 @@ static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 
 static int __init chcr_crypto_init(void)
 {
-	if (cxgb4_register_pci_uld(CXGB4_PCI_ULD1, &chcr_uld_info)) {
+	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) {
 		pr_err("ULD register fail: No chcr crypto support in cxgb4");
 		return -1;
 	}
@@ -228,7 +226,7 @@ static void __exit chcr_crypto_exit(void)
 		kfree(u_ctx);
 	}
 	mutex_unlock(&dev_mutex);
-	cxgb4_unregister_pci_uld(CXGB4_PCI_ULD1);
+	cxgb4_unregister_uld(CXGB4_ULD_CRYPTO);
 }
 
 module_init(chcr_crypto_init);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d733..f170b63 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -1475,6 +1475,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
 
 static struct cxgb4_uld_info c4iw_uld_info = {
 	.name = DRV_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.rxq_size = 511,
+	.ciq = true,
+	.lro = false,
 	.add = c4iw_uld_add,
 	.rx_handler = c4iw_uld_rx_handler,
 	.state_change = c4iw_uld_state_change,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 4595569..1f9867d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -437,11 +437,6 @@ enum {
 	MAX_ETH_QSETS = 32,           /* # of Ethernet Tx/Rx queue sets */
 	MAX_OFLD_QSETS = 16,          /* # of offload Tx, iscsi Rx queue sets */
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
-	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
-	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
-
-	/* # of streaming iSCSIT Rx queues */
-	MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
 };
 
 enum {
@@ -458,8 +453,7 @@ enum {
 enum {
 	INGQ_EXTRAS = 2,        /* firmware event queue and */
 				/*   forwarded interrupts */
-	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
-		   MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
+	MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
 };
 
 struct adapter;
@@ -704,10 +698,6 @@ struct sge {
 	struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
 
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
-	struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
-	struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
-	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
-	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
 	struct sge_uld_rxq_info **uld_rxq_info;
 
@@ -717,15 +707,8 @@ struct sge {
 	u16 max_ethqsets;           /* # of available Ethernet queue sets */
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
-	u16 iscsiqsets;              /* # of active iSCSI queue sets */
-	u16 niscsitq;               /* # of available iSCST Rx queues */
-	u16 rdmaqs;                 /* # of available RDMA Rx queues */
-	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
+	u16 ofldqsets;              /* # of active ofld queue sets */
 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
-	u16 iscsi_rxq[MAX_OFLD_QSETS];
-	u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
-	u16 rdma_rxq[MAX_RDMA_QUEUES];
-	u16 rdma_ciq[MAX_RDMA_CIQS];
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u32 fl_pg_order;            /* large page allocation size */
@@ -749,10 +732,7 @@ struct sge {
 };
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
-#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
-#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
-#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
-#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
+#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
 
 struct l2t_data;
 
@@ -786,6 +766,7 @@ struct uld_msix_bmap {
 struct uld_msix_info {
 	unsigned short vec;
 	char desc[IFNAMSIZ + 10];
+	unsigned int idx;
 };
 
 struct vf_info {
@@ -818,7 +799,7 @@ struct adapter {
 	} msix_info[MAX_INGQ + 1];
 	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
 	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
-	unsigned int msi_idx;
+	int msi_idx;
 
 	struct doorbell_stats db_stats;
 	struct sge sge;
@@ -836,9 +817,10 @@ struct adapter {
 	unsigned int clipt_start;
 	unsigned int clipt_end;
 	struct clip_tbl *clipt;
-	struct cxgb4_pci_uld_info *uld;
+	struct cxgb4_uld_info *uld;
 	void *uld_handle[CXGB4_ULD_MAX];
 	unsigned int num_uld;
+	unsigned int num_ofld_uld;
 	struct list_head list_node;
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
@@ -858,6 +840,8 @@ struct adapter {
 #define T4_OS_LOG_MBOX_CMDS 256
 	struct mbox_cmd_log *mbox_log;
 
+	struct mutex uld_mutex;
+
 	struct dentry *debugfs_root;
 	bool use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
 	bool trace_rss;	/* 1 implies that different RSS flit per filter is
@@ -1051,6 +1035,11 @@ static inline int is_pci_uld(const struct adapter *adap)
 	return adap->params.crypto;
 }
 
+static inline int is_uld(const struct adapter *adap)
+{
+	return (adap->params.offload || adap->params.crypto);
+}
+
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
 	return readl(adap->regs + reg_addr);
@@ -1277,6 +1266,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 			  struct net_device *dev, unsigned int iqid,
 			  unsigned int cmplqid);
+int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+			unsigned int cmplqid);
 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 			  struct net_device *dev, unsigned int iqid);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
@@ -1635,7 +1626,9 @@ void t4_idma_monitor(struct adapter *adapter,
 		     int hz, int ticks);
 int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
 		      unsigned int naddr, u8 *addr);
-void uld_mem_free(struct adapter *adap);
-int uld_mem_alloc(struct adapter *adap);
+void t4_uld_mem_free(struct adapter *adap);
+int t4_uld_mem_alloc(struct adapter *adap);
+void t4_uld_clean_up(struct adapter *adap);
+void t4_register_netevent_notifier(void);
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 91fb508..52be9a4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2432,17 +2432,11 @@ static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
 	struct adapter *adap = seq->private;
 	int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
-	int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4);
-	int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4);
-	int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4);
-	int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4);
+	int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4);
 	int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
 	int i, r = (uintptr_t)v - 1;
-	int iscsi_idx = r - eth_entries;
-	int iscsit_idx = iscsi_idx - iscsi_entries;
-	int rdma_idx = iscsit_idx - iscsit_entries;
-	int ciq_idx = rdma_idx - rdma_entries;
-	int ctrl_idx =  ciq_idx - ciq_entries;
+	int ofld_idx = r - eth_entries;
+	int ctrl_idx =  ofld_idx - ofld_entries;
 	int fq_idx =  ctrl_idx - ctrl_entries;
 
 	if (r)
@@ -2518,119 +2512,17 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-	} else if (iscsi_idx < iscsi_entries) {
-		const struct sge_ofld_rxq *rx =
-			&adap->sge.iscsirxq[iscsi_idx * 4];
+	} else if (ofld_idx < ofld_entries) {
 		const struct sge_ofld_txq *tx =
-			&adap->sge.ofldtxq[iscsi_idx * 4];
-		int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx);
+			&adap->sge.ofldtxq[ofld_idx * 4];
+		int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx);
 
-		S("QType:", "iSCSI");
+		S("QType:", "OFLD-Txq");
 		T("TxQ ID:", q.cntxt_id);
 		T("TxQ size:", q.size);
 		T("TxQ inuse:", q.in_use);
 		T("TxQ CIDX:", q.cidx);
 		T("TxQ PIDX:", q.pidx);
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (iscsit_idx < iscsit_entries) {
-		const struct sge_ofld_rxq *rx =
-			&adap->sge.iscsitrxq[iscsit_idx * 4];
-		int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx);
-
-		S("QType:", "iSCSIT");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (rdma_idx < rdma_entries) {
-		const struct sge_ofld_rxq *rx =
-				&adap->sge.rdmarxq[rdma_idx * 4];
-		int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx);
-
-		S("QType:", "RDMA-CPL");
-		S("Interface:",
-		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (ciq_idx < ciq_entries) {
-		const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4];
-		int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
-
-		S("QType:", "RDMA-CIQ");
-		S("Interface:",
-		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		RL("RxAN:", stats.an);
-		RL("RxNoMem:", stats.nomem);
 
 	} else if (ctrl_idx < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
@@ -2672,10 +2564,7 @@ do { \
 static int sge_queue_entries(const struct adapter *adap)
 {
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-	       DIV_ROUND_UP(adap->sge.iscsiqsets, 4) +
-	       DIV_ROUND_UP(adap->sge.niscsitq, 4) +
-	       DIV_ROUND_UP(adap->sge.rdmaqs, 4) +
-	       DIV_ROUND_UP(adap->sge.rdmaciqs, 4) +
+	       DIV_ROUND_UP(adap->sge.ofldqsets, 4) +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 44cc976..d1ebb84 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -226,11 +226,6 @@ static struct dentry *cxgb4_debugfs_root;
 
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
-/* Adapter list to be accessed from atomic context */
-static LIST_HEAD(adap_rcu_list);
-static DEFINE_SPINLOCK(adap_rcu_lock);
-static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
-static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
 
 static void link_report(struct net_device *dev)
 {
@@ -678,56 +673,6 @@ out:
 	return 0;
 }
 
-/* Flush the aggregated lro sessions */
-static void uldrx_flush_handler(struct sge_rspq *q)
-{
-	if (ulds[q->uld].lro_flush)
-		ulds[q->uld].lro_flush(&q->lro_mgr);
-}
-
-/**
- *	uldrx_handler - response queue handler for ULD queues
- *	@q: the response queue that received the packet
- *	@rsp: the response queue descriptor holding the offload message
- *	@gl: the gather list of packet fragments
- *
- *	Deliver an ingress offload packet to a ULD.  All processing is done by
- *	the ULD, we just maintain statistics.
- */
-static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
-			 const struct pkt_gl *gl)
-{
-	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
-	int ret;
-
-	/* FW can send CPLs encapsulated in a CPL_FW4_MSG.
-	 */
-	if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
-	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
-		rsp += 2;
-
-	if (q->flush_handler)
-		ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
-						  rsp, gl, &q->lro_mgr,
-						  &q->napi);
-	else
-		ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
-					      rsp, gl);
-
-	if (ret) {
-		rxq->stats.nomem++;
-		return -1;
-	}
-
-	if (gl == NULL)
-		rxq->stats.imm++;
-	else if (gl == CXGB4_MSG_AN)
-		rxq->stats.an++;
-	else
-		rxq->stats.pkts++;
-	return 0;
-}
-
 static void disable_msi(struct adapter *adapter)
 {
 	if (adapter->flags & USING_MSIX) {
@@ -779,30 +724,12 @@ static void name_msix_vecs(struct adapter *adap)
 			snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
 				 d->name, i);
 	}
-
-	/* offload queues */
-	for_each_iscsirxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
-			 adap->port[0]->name, i);
-
-	for_each_iscsitrxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
-			 adap->port[0]->name, i);
-
-	for_each_rdmarxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
-			 adap->port[0]->name, i);
-
-	for_each_rdmaciq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
-			 adap->port[0]->name, i);
 }
 
 static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
-	int iscsitqidx = 0;
+	int err, ethqidx;
 	int msi_index = 2;
 
 	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
@@ -819,57 +746,9 @@ static int request_msix_queue_irqs(struct adapter *adap)
 			goto unwind;
 		msi_index++;
 	}
-	for_each_iscsirxq(s, iscsiqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->iscsirxq[iscsiqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_iscsitrxq(s, iscsitqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->iscsitrxq[iscsitqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_rdmarxq(s, rdmaqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->rdmarxq[rdmaqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_rdmaciq(s, rdmaciqqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->rdmaciq[rdmaciqqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
 	return 0;
 
 unwind:
-	while (--rdmaciqqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->rdmaciq[rdmaciqqidx].rspq);
-	while (--rdmaqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->rdmarxq[rdmaqidx].rspq);
-	while (--iscsitqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->iscsitrxq[iscsitqidx].rspq);
-	while (--iscsiqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->iscsirxq[iscsiqidx].rspq);
 	while (--ethqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->ethrxq[ethqidx].rspq);
@@ -885,16 +764,6 @@ static void free_msix_queue_irqs(struct adapter *adap)
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
 	for_each_ethrxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
-	for_each_iscsirxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec,
-			 &s->iscsirxq[i].rspq);
-	for_each_iscsitrxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec,
-			 &s->iscsitrxq[i].rspq);
-	for_each_rdmarxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
-	for_each_rdmaciq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
 }
 
 /**
@@ -1033,42 +902,11 @@ static void enable_rx(struct adapter *adap)
 	}
 }
 
-static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
-			   unsigned int nq, unsigned int per_chan, int msi_idx,
-			   u16 *ids, bool lro)
-{
-	int i, err;
-
-	for (i = 0; i < nq; i++, q++) {
-		if (msi_idx > 0)
-			msi_idx++;
-		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
-				       adap->port[i / per_chan],
-				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler,
-				       lro ? uldrx_flush_handler : NULL,
-				       0);
-		if (err)
-			return err;
-		memset(&q->stats, 0, sizeof(q->stats));
-		if (ids)
-			ids[i] = q->rspq.abs_id;
-	}
-	return 0;
-}
 
-/**
- *	setup_sge_queues - configure SGE Tx/Rx/response queues
- *	@adap: the adapter
- *
- *	Determines how many sets of SGE queues to use and initializes them.
- *	We support multiple queue sets per port if we have MSI-X, otherwise
- *	just one queue set per port.
- */
-static int setup_sge_queues(struct adapter *adap)
+static int setup_fw_sge_queues(struct adapter *adap)
 {
-	int err, i, j;
 	struct sge *s = &adap->sge;
+	int err = 0;
 
 	bitmap_zero(s->starving_fl, s->egr_sz);
 	bitmap_zero(s->txq_maperr, s->egr_sz);
@@ -1083,25 +921,27 @@ static int setup_sge_queues(struct adapter *adap)
 		adap->msi_idx = -((int)s->intrq.abs_id + 1);
 	}
 
-	/* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
-	 * don't forget to update the following which need to be
-	 * synchronized to and changes here.
-	 *
-	 * 1. The calculations of MAX_INGQ in cxgb4.h.
-	 *
-	 * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
-	 *    to accommodate any new/deleted Ingress Queues
-	 *    which need MSI-X Vectors.
-	 *
-	 * 3. Update sge_qinfo_show() to include information on the
-	 *    new/deleted queues.
-	 */
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
 			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
-	if (err) {
-freeout:	t4_free_sge_resources(adap);
-		return err;
-	}
+	if (err)
+		t4_free_sge_resources(adap);
+	return err;
+}
+
+/**
+ *	setup_sge_queues - configure SGE Tx/Rx/response queues
+ *	@adap: the adapter
+ *
+ *	Determines how many sets of SGE queues to use and initializes them.
+ *	We support multiple queue sets per port if we have MSI-X, otherwise
+ *	just one queue set per port.
+ */
+static int setup_sge_queues(struct adapter *adap)
+{
+	int err, i, j;
+	struct sge *s = &adap->sge;
+	struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+	unsigned int cmplqid = 0;
 
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
@@ -1132,8 +972,8 @@ freeout:	t4_free_sge_resources(adap);
 		}
 	}
 
-	j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */
-	for_each_iscsirxq(s, i) {
+	j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
+	for_each_ofldtxq(s, i) {
 		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
 					    adap->port[i / j],
 					    s->fw_evtq.cntxt_id);
@@ -1141,30 +981,15 @@ freeout:	t4_free_sge_resources(adap);
 			goto freeout;
 	}
 
-#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
-	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, adap->msi_idx, ids, lro); \
-	if (err) \
-		goto freeout; \
-	if (adap->msi_idx > 0) \
-		adap->msi_idx += nq; \
-} while (0)
-
-	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
-	ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
-	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
-	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
-	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
-
-#undef ALLOC_OFLD_RXQS
-
 	for_each_port(adap, i) {
-		/*
-		 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
+		/* Note that cmplqid below is 0 if we don't
 		 * have RDMA queues, and that's the right value.
 		 */
+		if (rxq_info)
+			cmplqid	= rxq_info->uldrxq[i].rspq.cntxt_id;
+
 		err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
-					    s->fw_evtq.cntxt_id,
-					    s->rdmarxq[i].rspq.cntxt_id);
+					    s->fw_evtq.cntxt_id, cmplqid);
 		if (err)
 			goto freeout;
 	}
@@ -1175,6 +1000,9 @@ freeout:	t4_free_sge_resources(adap);
 		     RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
 		     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
 	return 0;
+freeout:
+	t4_free_sge_resources(adap);
+	return err;
 }
 
 /*
@@ -2317,7 +2145,7 @@ static void disable_dbs(struct adapter *adap)
 
 	for_each_ethrxq(&adap->sge, i)
 		disable_txq_db(&adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		disable_txq_db(&adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		disable_txq_db(&adap->sge.ctrlq[i].q);
@@ -2329,7 +2157,7 @@ static void enable_dbs(struct adapter *adap)
 
 	for_each_ethrxq(&adap->sge, i)
 		enable_txq_db(adap, &adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		enable_txq_db(adap, &adap->sge.ctrlq[i].q);
@@ -2337,9 +2165,10 @@ static void enable_dbs(struct adapter *adap)
 
 static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
 {
-	if (adap->uld_handle[CXGB4_ULD_RDMA])
-		ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
-				cmd);
+	enum cxgb4_uld type = CXGB4_ULD_RDMA;
+
+	if (adap->uld && adap->uld[type].handle)
+		adap->uld[type].control(adap->uld[type].handle, cmd);
 }
 
 static void process_db_full(struct work_struct *work)
@@ -2393,13 +2222,14 @@ out:
 	if (ret)
 		CH_WARN(adap, "DB drop recovery failed.\n");
 }
+
 static void recover_all_queues(struct adapter *adap)
 {
 	int i;
 
 	for_each_ethrxq(&adap->sge, i)
 		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
@@ -2464,94 +2294,12 @@ void t4_db_dropped(struct adapter *adap)
 	queue_work(adap->workq, &adap->db_drop_task);
 }
 
-static void uld_attach(struct adapter *adap, unsigned int uld)
-{
-	void *handle;
-	struct cxgb4_lld_info lli;
-	unsigned short i;
-
-	lli.pdev = adap->pdev;
-	lli.pf = adap->pf;
-	lli.l2t = adap->l2t;
-	lli.tids = &adap->tids;
-	lli.ports = adap->port;
-	lli.vr = &adap->vres;
-	lli.mtus = adap->params.mtus;
-	if (uld == CXGB4_ULD_RDMA) {
-		lli.rxq_ids = adap->sge.rdma_rxq;
-		lli.ciq_ids = adap->sge.rdma_ciq;
-		lli.nrxq = adap->sge.rdmaqs;
-		lli.nciq = adap->sge.rdmaciqs;
-	} else if (uld == CXGB4_ULD_ISCSI) {
-		lli.rxq_ids = adap->sge.iscsi_rxq;
-		lli.nrxq = adap->sge.iscsiqsets;
-	} else if (uld == CXGB4_ULD_ISCSIT) {
-		lli.rxq_ids = adap->sge.iscsit_rxq;
-		lli.nrxq = adap->sge.niscsitq;
-	}
-	lli.ntxq = adap->sge.iscsiqsets;
-	lli.nchan = adap->params.nports;
-	lli.nports = adap->params.nports;
-	lli.wr_cred = adap->params.ofldq_wr_cred;
-	lli.adapter_type = adap->params.chip;
-	lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
-	lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
-	lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
-	lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
-	lli.iscsi_ppm = &adap->iscsi_ppm;
-	lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
-	lli.udb_density = 1 << adap->params.sge.eq_qpp;
-	lli.ucq_density = 1 << adap->params.sge.iq_qpp;
-	lli.filt_mode = adap->params.tp.vlan_pri_map;
-	/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
-	for (i = 0; i < NCHAN; i++)
-		lli.tx_modq[i] = i;
-	lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
-	lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
-	lli.fw_vers = adap->params.fw_vers;
-	lli.dbfifo_int_thresh = dbfifo_int_thresh;
-	lli.sge_ingpadboundary = adap->sge.fl_align;
-	lli.sge_egrstatuspagesize = adap->sge.stat_len;
-	lli.sge_pktshift = adap->sge.pktshift;
-	lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
-	lli.max_ordird_qp = adap->params.max_ordird_qp;
-	lli.max_ird_adapter = adap->params.max_ird_adapter;
-	lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
-	lli.nodeid = dev_to_node(adap->pdev_dev);
-
-	handle = ulds[uld].add(&lli);
-	if (IS_ERR(handle)) {
-		dev_warn(adap->pdev_dev,
-			 "could not attach to the %s driver, error %ld\n",
-			 uld_str[uld], PTR_ERR(handle));
-		return;
-	}
-
-	adap->uld_handle[uld] = handle;
-
+void t4_register_netevent_notifier(void)
+{
 	if (!netevent_registered) {
 		register_netevent_notifier(&cxgb4_netevent_nb);
 		netevent_registered = true;
 	}
-
-	if (adap->flags & FULL_INIT_DONE)
-		ulds[uld].state_change(handle, CXGB4_STATE_UP);
-}
-
-static void attach_ulds(struct adapter *adap)
-{
-	unsigned int i;
-
-	spin_lock(&adap_rcu_lock);
-	list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
-	spin_unlock(&adap_rcu_lock);
-
-	mutex_lock(&uld_mutex);
-	list_add_tail(&adap->list_node, &adapter_list);
-	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (ulds[i].add)
-			uld_attach(adap, i);
-	mutex_unlock(&uld_mutex);
 }
 
 static void detach_ulds(struct adapter *adap)
@@ -2561,12 +2309,6 @@ static void detach_ulds(struct adapter *adap)
 	mutex_lock(&uld_mutex);
 	list_del(&adap->list_node);
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld_handle[i]) {
-			ulds[i].state_change(adap->uld_handle[i],
-					     CXGB4_STATE_DETACH);
-			adap->uld_handle[i] = NULL;
-		}
-	for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
 		if (adap->uld && adap->uld[i].handle) {
 			adap->uld[i].state_change(adap->uld[i].handle,
 					     CXGB4_STATE_DETACH);
@@ -2577,10 +2319,6 @@ static void detach_ulds(struct adapter *adap)
 		netevent_registered = false;
 	}
 	mutex_unlock(&uld_mutex);
-
-	spin_lock(&adap_rcu_lock);
-	list_del_rcu(&adap->rcu_node);
-	spin_unlock(&adap_rcu_lock);
 }
 
 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
@@ -2589,65 +2327,12 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
 
 	mutex_lock(&uld_mutex);
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld_handle[i])
-			ulds[i].state_change(adap->uld_handle[i], new_state);
-	for (i = 0; i < CXGB4_PCI_ULD_MAX; i++)
 		if (adap->uld && adap->uld[i].handle)
 			adap->uld[i].state_change(adap->uld[i].handle,
 						  new_state);
 	mutex_unlock(&uld_mutex);
 }
 
-/**
- *	cxgb4_register_uld - register an upper-layer driver
- *	@type: the ULD type
- *	@p: the ULD methods
- *
- *	Registers an upper-layer driver with this driver and notifies the ULD
- *	about any presently available devices that support its type.  Returns
- *	%-EBUSY if a ULD of the same type is already registered.
- */
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
-{
-	int ret = 0;
-	struct adapter *adap;
-
-	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
-	mutex_lock(&uld_mutex);
-	if (ulds[type].add) {
-		ret = -EBUSY;
-		goto out;
-	}
-	ulds[type] = *p;
-	list_for_each_entry(adap, &adapter_list, list_node)
-		uld_attach(adap, type);
-out:	mutex_unlock(&uld_mutex);
-	return ret;
-}
-EXPORT_SYMBOL(cxgb4_register_uld);
-
-/**
- *	cxgb4_unregister_uld - unregister an upper-layer driver
- *	@type: the ULD type
- *
- *	Unregisters an existing upper-layer driver.
- */
-int cxgb4_unregister_uld(enum cxgb4_uld type)
-{
-	struct adapter *adap;
-
-	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
-	mutex_lock(&uld_mutex);
-	list_for_each_entry(adap, &adapter_list, list_node)
-		adap->uld_handle[type] = NULL;
-	ulds[type].add = NULL;
-	mutex_unlock(&uld_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(cxgb4_unregister_uld);
-
 #if IS_ENABLED(CONFIG_IPV6)
 static int cxgb4_inet6addr_handler(struct notifier_block *this,
 				   unsigned long event, void *data)
@@ -2752,7 +2437,6 @@ static int cxgb_up(struct adapter *adap)
 				  adap->msix_info[0].desc, adap);
 		if (err)
 			goto irq_err;
-
 		err = request_msix_queue_irqs(adap);
 		if (err) {
 			free_irq(adap->msix_info[0].vec, adap);
@@ -4262,6 +3946,7 @@ static int adap_init0(struct adapter *adap)
 		adap->params.ofldq_wr_cred = val[5];
 
 		adap->params.offload = 1;
+		adap->num_ofld_uld += 1;
 	}
 	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
@@ -4314,6 +3999,7 @@ static int adap_init0(struct adapter *adap)
 			 "max_ordird_qp %d max_ird_adapter %d\n",
 			 adap->params.max_ordird_qp,
 			 adap->params.max_ird_adapter);
+		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
 		params[0] = FW_PARAM_PFVF(ISCSI_START);
@@ -4324,6 +4010,8 @@ static int adap_init0(struct adapter *adap)
 			goto bye;
 		adap->vres.iscsi.start = val[0];
 		adap->vres.iscsi.size = val[1] - val[0] + 1;
+		/* LIO target and cxgb4i initiaitor */
+		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.cryptocaps) {
 		/* Should query params here...TODO */
@@ -4523,14 +4211,14 @@ static void cfg_queues(struct adapter *adap)
 #ifndef CONFIG_CHELSIO_T4_DCB
 	int q10g = 0;
 #endif
-	int ciq_size;
 
 	/* Reduce memory usage in kdump environment, disable all offload.
 	 */
 	if (is_kdump_kernel()) {
 		adap->params.offload = 0;
 		adap->params.crypto = 0;
-	} else if (adap->num_uld && uld_mem_alloc(adap)) {
+	} else if (is_uld(adap) && t4_uld_mem_alloc(adap)) {
+		adap->params.offload = 0;
 		adap->params.crypto = 0;
 	}
 
@@ -4576,33 +4264,18 @@ static void cfg_queues(struct adapter *adap)
 	s->ethqsets = qidx;
 	s->max_ethqsets = qidx;   /* MSI-X may lower it later */
 
-	if (is_offload(adap)) {
+	if (is_uld(adap)) {
 		/*
 		 * For offload we use 1 queue/channel if all ports are up to 1G,
 		 * otherwise we divide all available queues amongst the channels
 		 * capped by the number of available cores.
 		 */
 		if (n10g) {
-			i = min_t(int, ARRAY_SIZE(s->iscsirxq),
-				  num_online_cpus());
-			s->iscsiqsets = roundup(i, adap->params.nports);
-		} else
-			s->iscsiqsets = adap->params.nports;
-		/* For RDMA one Rx queue per channel suffices */
-		s->rdmaqs = adap->params.nports;
-		/* Try and allow at least 1 CIQ per cpu rounding down
-		 * to the number of ports, with a minimum of 1 per port.
-		 * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
-		 * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
-		 * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
-		 */
-		s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
-		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
-				adap->params.nports;
-		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
-
-		if (!is_t4(adap->params.chip))
-			s->niscsitq = s->iscsiqsets;
+			i = num_online_cpus();
+			s->ofldqsets = roundup(i, adap->params.nports);
+		} else {
+			s->ofldqsets = adap->params.nports;
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -4621,47 +4294,8 @@ static void cfg_queues(struct adapter *adap)
 	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
 		s->ofldtxq[i].q.size = 1024;
 
-	for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) {
-		struct sge_ofld_rxq *r = &s->iscsirxq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
-		r->rspq.uld = CXGB4_ULD_ISCSI;
-		r->fl.size = 72;
-	}
-
-	if (!is_t4(adap->params.chip)) {
-		for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
-			struct sge_ofld_rxq *r = &s->iscsitrxq[i];
-
-			init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
-			r->rspq.uld = CXGB4_ULD_ISCSIT;
-			r->fl.size = 72;
-		}
-	}
-
-	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
-		struct sge_ofld_rxq *r = &s->rdmarxq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, 511, 64);
-		r->rspq.uld = CXGB4_ULD_RDMA;
-		r->fl.size = 72;
-	}
-
-	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
-	if (ciq_size > SGE_MAX_IQ_SIZE) {
-		CH_WARN(adap, "CIQ size too small for available IQs\n");
-		ciq_size = SGE_MAX_IQ_SIZE;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
-		struct sge_ofld_rxq *r = &s->rdmaciq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
-		r->rspq.uld = CXGB4_ULD_RDMA;
-	}
-
 	init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
-	init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
+	init_rspq(adap, &s->intrq, 0, 1, 512, 64);
 }
 
 /*
@@ -4695,7 +4329,15 @@ static void reduce_ethqs(struct adapter *adap, int n)
 static int get_msix_info(struct adapter *adap)
 {
 	struct uld_msix_info *msix_info;
-	int max_ingq = (MAX_OFLD_QSETS * adap->num_uld);
+	unsigned int max_ingq = 0;
+
+	if (is_offload(adap))
+		max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
+	if (is_pci_uld(adap))
+		max_ingq += MAX_OFLD_QSETS * adap->num_uld;
+
+	if (!max_ingq)
+		goto out;
 
 	msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
 	if (!msix_info)
@@ -4709,12 +4351,13 @@ static int get_msix_info(struct adapter *adap)
 	}
 	spin_lock_init(&adap->msix_bmap_ulds.lock);
 	adap->msix_info_ulds = msix_info;
+out:
 	return 0;
 }
 
 static void free_msix_info(struct adapter *adap)
 {
-	if (!adap->num_uld)
+	if (!(adap->num_uld && adap->num_ofld_uld))
 		return;
 
 	kfree(adap->msix_info_ulds);
@@ -4733,32 +4376,32 @@ static int enable_msix(struct adapter *adap)
 	struct msix_entry *entries;
 	int max_ingq = MAX_INGQ;
 
-	max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+	if (is_pci_uld(adap))
+		max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+	if (is_offload(adap))
+		max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
 	entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
 			  GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
 
 	/* map for msix */
-	if (is_pci_uld(adap) && get_msix_info(adap))
+	if (get_msix_info(adap)) {
+		adap->params.offload = 0;
 		adap->params.crypto = 0;
+	}
 
 	for (i = 0; i < max_ingq + 1; ++i)
 		entries[i].entry = i;
 
 	want = s->max_ethqsets + EXTRA_VECS;
 	if (is_offload(adap)) {
-		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets	+
-			s->niscsitq;
-		/* need nchan for each possible ULD */
-		if (is_t4(adap->params.chip))
-			ofld_need = 3 * nchan;
-		else
-			ofld_need = 4 * nchan;
+		want += adap->num_ofld_uld * s->ofldqsets;
+		ofld_need = adap->num_ofld_uld * nchan;
 	}
 	if (is_pci_uld(adap)) {
-		want += netif_get_num_default_rss_queues() * nchan;
-		uld_need = nchan;
+		want += adap->num_uld * s->ofldqsets;
+		uld_need = adap->num_uld * nchan;
 	}
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
@@ -4786,43 +4429,25 @@ static int enable_msix(struct adapter *adap)
 		if (i < s->ethqsets)
 			reduce_ethqs(adap, i);
 	}
-	if (is_pci_uld(adap)) {
+	if (is_uld(adap)) {
 		if (allocated < want)
 			s->nqs_per_uld = nchan;
 		else
-			s->nqs_per_uld = netif_get_num_default_rss_queues() *
-					nchan;
+			s->nqs_per_uld = s->ofldqsets;
 	}
 
-	if (is_offload(adap)) {
-		if (allocated < want) {
-			s->rdmaqs = nchan;
-			s->rdmaciqs = nchan;
-
-			if (!is_t4(adap->params.chip))
-				s->niscsitq = nchan;
-		}
-
-		/* leftovers go to OFLD */
-		i = allocated - EXTRA_VECS - s->max_ethqsets -
-			s->rdmaqs - s->rdmaciqs - s->niscsitq;
-		if (is_pci_uld(adap))
-			i -= s->nqs_per_uld * adap->num_uld;
-		s->iscsiqsets = (i / nchan) * nchan;  /* round down */
-
-	}
-
-	for (i = 0; i < (allocated - (s->nqs_per_uld * adap->num_uld)); ++i)
+	for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
 		adap->msix_info[i].vec = entries[i].vector;
-	if (is_pci_uld(adap)) {
-		for (j = 0 ; i < allocated; ++i, j++)
+	if (is_uld(adap)) {
+		for (j = 0 ; i < allocated; ++i, j++) {
 			adap->msix_info_ulds[j].vec = entries[i].vector;
+			adap->msix_info_ulds[j].idx = i;
+		}
 		adap->msix_bmap_ulds.mapsize = j;
 	}
 	dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
-		 "nic %d iscsi %d rdma cpl %d rdma ciq %d uld %d\n",
-		 allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
-		 s->rdmaciqs, s->nqs_per_uld);
+		 "nic %d per uld %d\n",
+		 allocated, s->max_ethqsets, s->nqs_per_uld);
 
 	kfree(entries);
 	return 0;
@@ -5535,10 +5160,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
 	pdev->needs_freset = 1;
 
-	if (is_offload(adapter))
-		attach_ulds(adapter);
+	if (is_uld(adapter)) {
+		mutex_lock(&uld_mutex);
+		list_add_tail(&adapter->list_node, &adapter_list);
+		mutex_unlock(&uld_mutex);
+	}
 
 	print_adapter_info(adapter);
+	setup_fw_sge_queues(adapter);
 	return 0;
 
 sriov:
@@ -5593,8 +5222,8 @@ sriov:
 	free_some_resources(adapter);
 	if (adapter->flags & USING_MSIX)
 		free_msix_info(adapter);
-	if (adapter->num_uld)
-		uld_mem_free(adapter);
+	if (adapter->num_uld || adapter->num_ofld_uld)
+		t4_uld_mem_free(adapter);
  out_unmap_bar:
 	if (!is_t4(adapter->params.chip))
 		iounmap(adapter->bar2);
@@ -5631,7 +5260,7 @@ static void remove_one(struct pci_dev *pdev)
 		 */
 		destroy_workqueue(adapter->workq);
 
-		if (is_offload(adapter))
+		if (is_uld(adapter))
 			detach_ulds(adapter);
 
 		disable_interrupts(adapter);
@@ -5658,8 +5287,8 @@ static void remove_one(struct pci_dev *pdev)
 
 		if (adapter->flags & USING_MSIX)
 			free_msix_info(adapter);
-		if (adapter->num_uld)
-			uld_mem_free(adapter);
+		if (adapter->num_uld || adapter->num_ofld_uld)
+			t4_uld_mem_free(adapter);
 		free_some_resources(adapter);
 #if IS_ENABLED(CONFIG_IPV6)
 		t4_cleanup_clip_tbl(adapter);
@@ -5690,12 +5319,58 @@ static void remove_one(struct pci_dev *pdev)
 #endif
 }
 
+/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
+ * delivery.  This is essentially a stripped down version of the PCI remove()
+ * function where we do the minimal amount of work necessary to shutdown any
+ * further activity.
+ */
+static void shutdown_one(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+
+	/* As with remove_one() above (see extended comment), we only want do
+	 * do cleanup on PCI Devices which went all the way through init_one()
+	 * ...
+	 */
+	if (!adapter) {
+		pci_release_regions(pdev);
+		return;
+	}
+
+	if (adapter->pf == 4) {
+		int i;
+
+		for_each_port(adapter, i)
+			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+				cxgb_close(adapter->port[i]);
+
+		t4_uld_clean_up(adapter);
+		disable_interrupts(adapter);
+		disable_msi(adapter);
+
+		t4_sge_stop(adapter);
+		if (adapter->flags & FW_OK)
+			t4_fw_bye(adapter, adapter->mbox);
+	}
+#ifdef CONFIG_PCI_IOV
+	else {
+		if (adapter->port[0])
+			unregister_netdev(adapter->port[0]);
+		iounmap(adapter->regs);
+		kfree(adapter->vfinfo);
+		kfree(adapter);
+		pci_disable_sriov(pdev);
+		pci_release_regions(pdev);
+	}
+#endif
+}
+
 static struct pci_driver cxgb4_driver = {
 	.name     = KBUILD_MODNAME,
 	.id_table = cxgb4_pci_tbl,
 	.probe    = init_one,
 	.remove   = remove_one,
-	.shutdown = remove_one,
+	.shutdown = shutdown_one,
 #ifdef CONFIG_PCI_IOV
 	.sriov_configure = cxgb4_iov_configure,
 #endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 5d402ba..fc04e3b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -82,6 +82,24 @@ static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx)
 	spin_unlock_irqrestore(&bmap->lock, flags);
 }
 
+/* Flush the aggregated lro sessions */
+static void uldrx_flush_handler(struct sge_rspq *q)
+{
+	struct adapter *adap = q->adap;
+
+	if (adap->uld[q->uld].lro_flush)
+		adap->uld[q->uld].lro_flush(&q->lro_mgr);
+}
+
+/**
+ *	uldrx_handler - response queue handler for ULD queues
+ *	@q: the response queue that received the packet
+ *	@rsp: the response queue descriptor holding the offload message
+ *	@gl: the gather list of packet fragments
+ *
+ *	Deliver an ingress offload packet to a ULD.  All processing is done by
+ *	the ULD, we just maintain statistics.
+ */
 static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 			 const struct pkt_gl *gl)
 {
@@ -124,8 +142,8 @@ static int alloc_uld_rxqs(struct adapter *adap,
 	struct sge_ofld_rxq *q = rxq_info->uldrxq + offset;
 	unsigned short *ids = rxq_info->rspq_id + offset;
 	unsigned int per_chan = nq / adap->params.nports;
-	unsigned int msi_idx, bmap_idx;
-	int i, err;
+	unsigned int bmap_idx = 0;
+	int i, err, msi_idx;
 
 	if (adap->flags & USING_MSIX)
 		msi_idx = 1;
@@ -135,14 +153,14 @@ static int alloc_uld_rxqs(struct adapter *adap,
 	for (i = 0; i < nq; i++, q++) {
 		if (msi_idx >= 0) {
 			bmap_idx = get_msix_idx_from_bmap(adap);
-			adap->msi_idx++;
+			msi_idx = adap->msix_info_ulds[bmap_idx].idx;
 		}
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 				       adap->port[i / per_chan],
-				       adap->msi_idx,
+				       msi_idx,
 				       q->fl.size ? &q->fl : NULL,
 				       uldrx_handler,
-				       NULL,
+				       lro ? uldrx_flush_handler : NULL,
 				       0);
 		if (err)
 			goto freeout;
@@ -159,7 +177,6 @@ freeout:
 		if (q->rspq.desc)
 			free_rspq_fl(adap, &q->rspq,
 				     q->fl.size ? &q->fl : NULL);
-		adap->msi_idx--;
 	}
 
 	/* We need to free rxq also in case of ciq allocation failure */
@@ -169,7 +186,6 @@ freeout:
 			if (q->rspq.desc)
 				free_rspq_fl(adap, &q->rspq,
 					     q->fl.size ? &q->fl : NULL);
-			adap->msi_idx--;
 		}
 	}
 	return err;
@@ -178,17 +194,38 @@ freeout:
 int setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int i, ret = 0;
 
 	if (adap->flags & USING_MSIX) {
-		rxq_info->msix_tbl = kzalloc(rxq_info->nrxq + rxq_info->nciq,
+		rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
+					     sizeof(unsigned short),
 					     GFP_KERNEL);
 		if (!rxq_info->msix_tbl)
 			return -ENOMEM;
 	}
 
-	return !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
+	ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
 		 !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq,
 				 rxq_info->nrxq, lro));
+
+	/* Tell uP to route control queue completions to rdma rspq */
+	if (adap->flags & FULL_INIT_DONE &&
+	    !ret && uld_type == CXGB4_ULD_RDMA) {
+		struct sge *s = &adap->sge;
+		unsigned int cmplqid;
+		u32 param, cmdop;
+
+		cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
+		for_each_port(adap, i) {
+			cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
+			param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+				 FW_PARAMS_PARAM_X_V(cmdop) |
+				 FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &param, &cmplqid);
+		}
+	}
+	return ret;
 }
 
 static void t4_free_uld_rxqs(struct adapter *adap, int n,
@@ -198,7 +235,6 @@ static void t4_free_uld_rxqs(struct adapter *adap, int n,
 		if (q->rspq.desc)
 			free_rspq_fl(adap, &q->rspq,
 				     q->fl.size ? &q->fl : NULL);
-		adap->msi_idx--;
 	}
 }
 
@@ -206,6 +242,21 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 
+	if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
+		struct sge *s = &adap->sge;
+		u32 param, cmdop, cmplqid = 0;
+		int i;
+
+		cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
+		for_each_port(adap, i) {
+			param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+				 FW_PARAMS_PARAM_X_V(cmdop) |
+				 FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
+			t4_set_params(adap, adap->mbox, adap->pf,
+				      0, 1, &param, &cmplqid);
+		}
+	}
+
 	if (rxq_info->nciq)
 		t4_free_uld_rxqs(adap, rxq_info->nciq,
 				 rxq_info->uldrxq + rxq_info->nrxq);
@@ -215,26 +266,38 @@ void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
 }
 
 int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
-		   const struct cxgb4_pci_uld_info *uld_info)
+		   const struct cxgb4_uld_info *uld_info)
 {
 	struct sge *s = &adap->sge;
 	struct sge_uld_rxq_info *rxq_info;
-	int i, nrxq;
+	int i, nrxq, ciq_size;
 
 	rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL);
 	if (!rxq_info)
 		return -ENOMEM;
 
-	if (uld_info->nrxq > s->nqs_per_uld)
-		rxq_info->nrxq = s->nqs_per_uld;
-	else
-		rxq_info->nrxq = uld_info->nrxq;
-	if (!uld_info->nciq)
+	if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) {
+		i = s->nqs_per_uld;
+		rxq_info->nrxq = roundup(i, adap->params.nports);
+	} else {
+		i = min_t(int, uld_info->nrxq,
+			  num_online_cpus());
+		rxq_info->nrxq = roundup(i, adap->params.nports);
+	}
+	if (!uld_info->ciq) {
 		rxq_info->nciq = 0;
-	else if (uld_info->nciq && uld_info->nciq > s->nqs_per_uld)
-		rxq_info->nciq = s->nqs_per_uld;
-	else
-		rxq_info->nciq = uld_info->nciq;
+	} else  {
+		if (adap->flags & USING_MSIX)
+			rxq_info->nciq = min_t(int, s->nqs_per_uld,
+					       num_online_cpus());
+		else
+			rxq_info->nciq = min_t(int, MAX_OFLD_QSETS,
+					       num_online_cpus());
+		rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) *
+				  adap->params.nports);
+		rxq_info->nciq = max_t(int, rxq_info->nciq,
+				       adap->params.nports);
+	}
 
 	nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */
 	rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq),
@@ -259,12 +322,17 @@ int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
 		r->fl.size = 72;
 	}
 
+	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
+	if (ciq_size > SGE_MAX_IQ_SIZE) {
+		dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n");
+		ciq_size = SGE_MAX_IQ_SIZE;
+	}
+
 	for (i = rxq_info->nrxq; i < nrxq; i++) {
 		struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
 
-		init_rspq(adap, &r->rspq, 5, 1, uld_info->ciq_size, 64);
+		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
 		r->rspq.uld = uld_type;
-		r->fl.size = 72;
 	}
 
 	memcpy(rxq_info->name, uld_info->name, IFNAMSIZ);
@@ -285,7 +353,8 @@ void free_queues_uld(struct adapter *adap, unsigned int uld_type)
 int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int idx, bmap_idx, err = 0;
+	int err = 0;
+	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
 		bmap_idx = rxq_info->msix_tbl[idx];
@@ -310,10 +379,10 @@ unwind:
 void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int idx;
+	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		unsigned int bmap_idx = rxq_info->msix_tbl[idx];
+		bmap_idx = rxq_info->msix_tbl[idx];
 
 		free_msix_idx_in_bmap(adap, bmap_idx);
 		free_irq(adap->msix_info_ulds[bmap_idx].vec,
@@ -325,10 +394,10 @@ void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
 	int n = sizeof(adap->msix_info_ulds[0].desc);
-	int idx;
+	unsigned int idx, bmap_idx;
 
 	for_each_uldrxq(rxq_info, idx) {
-		unsigned int bmap_idx = rxq_info->msix_tbl[idx];
+		bmap_idx = rxq_info->msix_tbl[idx];
 
 		snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
 			 adap->port[0]->name, rxq_info->name, idx);
@@ -390,15 +459,15 @@ static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
 	lli->nciq = rxq_info->nciq;
 }
 
-int uld_mem_alloc(struct adapter *adap)
+int t4_uld_mem_alloc(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 
-	adap->uld = kcalloc(adap->num_uld, sizeof(*adap->uld), GFP_KERNEL);
+	adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL);
 	if (!adap->uld)
 		return -ENOMEM;
 
-	s->uld_rxq_info = kzalloc(adap->num_uld *
+	s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX *
 				  sizeof(struct sge_uld_rxq_info *),
 				  GFP_KERNEL);
 	if (!s->uld_rxq_info)
@@ -410,7 +479,7 @@ err_uld:
 	return -ENOMEM;
 }
 
-void uld_mem_free(struct adapter *adap)
+void t4_uld_mem_free(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 
@@ -418,6 +487,26 @@ void uld_mem_free(struct adapter *adap)
 	kfree(adap->uld);
 }
 
+void t4_uld_clean_up(struct adapter *adap)
+{
+	struct sge_uld_rxq_info *rxq_info;
+	unsigned int i;
+
+	if (!adap->uld)
+		return;
+	for (i = 0; i < CXGB4_ULD_MAX; i++) {
+		if (!adap->uld[i].handle)
+			continue;
+		rxq_info = adap->sge.uld_rxq_info[i];
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, i);
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, i);
+		free_sge_queues_uld(adap, i);
+		free_queues_uld(adap, i);
+	}
+}
+
 static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 {
 	int i;
@@ -429,10 +518,15 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
 	lld->ports = adap->port;
 	lld->vr = &adap->vres;
 	lld->mtus = adap->params.mtus;
-	lld->ntxq = adap->sge.iscsiqsets;
+	lld->ntxq = adap->sge.ofldqsets;
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
+	lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
+	lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
+	lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
+	lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
+	lld->iscsi_ppm = &adap->iscsi_ppm;
 	lld->adapter_type = adap->params.chip;
 	lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
 	lld->udb_density = 1 << adap->params.sge.eq_qpp;
@@ -472,23 +566,37 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 	}
 
 	adap->uld[uld].handle = handle;
+	t4_register_netevent_notifier();
 
 	if (adap->flags & FULL_INIT_DONE)
 		adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
 }
 
-int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
-			   struct cxgb4_pci_uld_info *p)
+/**
+ *	cxgb4_register_uld - register an upper-layer driver
+ *	@type: the ULD type
+ *	@p: the ULD methods
+ *
+ *	Registers an upper-layer driver with this driver and notifies the ULD
+ *	about any presently available devices that support its type.  Returns
+ *	%-EBUSY if a ULD of the same type is already registered.
+ */
+int cxgb4_register_uld(enum cxgb4_uld type,
+		       const struct cxgb4_uld_info *p)
 {
 	int ret = 0;
+	unsigned int adap_idx = 0;
 	struct adapter *adap;
 
-	if (type >= CXGB4_PCI_ULD_MAX)
+	if (type >= CXGB4_ULD_MAX)
 		return -EINVAL;
 
 	mutex_lock(&uld_mutex);
 	list_for_each_entry(adap, &adapter_list, list_node) {
-		if (!is_pci_uld(adap))
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
 			continue;
 		ret = cfg_queues_uld(adap, type, p);
 		if (ret)
@@ -510,11 +618,14 @@ int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
 		}
 		adap->uld[type] = *p;
 		uld_attach(adap, type);
+		adap_idx++;
 	}
 	mutex_unlock(&uld_mutex);
 	return 0;
 
 free_irq:
+	if (adap->flags & FULL_INIT_DONE)
+		quiesce_rx_uld(adap, type);
 	if (adap->flags & USING_MSIX)
 		free_msix_queue_irqs_uld(adap, type);
 free_rxq:
@@ -522,21 +633,49 @@ free_rxq:
 free_queues:
 	free_queues_uld(adap, type);
 out:
+
+	list_for_each_entry(adap, &adapter_list, list_node) {
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+			continue;
+		if (!adap_idx)
+			break;
+		adap->uld[type].handle = NULL;
+		adap->uld[type].add = NULL;
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, type);
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, type);
+		free_sge_queues_uld(adap, type);
+		free_queues_uld(adap, type);
+		adap_idx--;
+	}
 	mutex_unlock(&uld_mutex);
 	return ret;
 }
-EXPORT_SYMBOL(cxgb4_register_pci_uld);
+EXPORT_SYMBOL(cxgb4_register_uld);
 
-int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type)
+/**
+ *	cxgb4_unregister_uld - unregister an upper-layer driver
+ *	@type: the ULD type
+ *
+ *	Unregisters an existing upper-layer driver.
+ */
+int cxgb4_unregister_uld(enum cxgb4_uld type)
 {
 	struct adapter *adap;
 
-	if (type >= CXGB4_PCI_ULD_MAX)
+	if (type >= CXGB4_ULD_MAX)
 		return -EINVAL;
 
 	mutex_lock(&uld_mutex);
 	list_for_each_entry(adap, &adapter_list, list_node) {
-		if (!is_pci_uld(adap))
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
 			continue;
 		adap->uld[type].handle = NULL;
 		adap->uld[type].add = NULL;
@@ -551,4 +690,4 @@ int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type)
 
 	return 0;
 }
-EXPORT_SYMBOL(cxgb4_unregister_pci_uld);
+EXPORT_SYMBOL(cxgb4_unregister_uld);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index ab40372..b3544f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -42,6 +42,8 @@
 #include <linux/atomic.h>
 #include "cxgb4.h"
 
+#define MAX_ULD_QSETS 16
+
 /* CPL message priority levels */
 enum {
 	CPL_PRIORITY_DATA     = 0,  /* data messages */
@@ -189,9 +191,11 @@ static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 }
 
 enum cxgb4_uld {
+	CXGB4_ULD_INIT,
 	CXGB4_ULD_RDMA,
 	CXGB4_ULD_ISCSI,
 	CXGB4_ULD_ISCSIT,
+	CXGB4_ULD_CRYPTO,
 	CXGB4_ULD_MAX
 };
 
@@ -284,31 +288,11 @@ struct cxgb4_lld_info {
 
 struct cxgb4_uld_info {
 	const char *name;
-	void *(*add)(const struct cxgb4_lld_info *p);
-	int (*rx_handler)(void *handle, const __be64 *rsp,
-			  const struct pkt_gl *gl);
-	int (*state_change)(void *handle, enum cxgb4_state new_state);
-	int (*control)(void *handle, enum cxgb4_control control, ...);
-	int (*lro_rx_handler)(void *handle, const __be64 *rsp,
-			      const struct pkt_gl *gl,
-			      struct t4_lro_mgr *lro_mgr,
-			      struct napi_struct *napi);
-	void (*lro_flush)(struct t4_lro_mgr *);
-};
-
-enum cxgb4_pci_uld {
-	CXGB4_PCI_ULD1,
-	CXGB4_PCI_ULD_MAX
-};
-
-struct cxgb4_pci_uld_info {
-	const char *name;
-	bool lro;
 	void *handle;
 	unsigned int nrxq;
-	unsigned int nciq;
 	unsigned int rxq_size;
-	unsigned int ciq_size;
+	bool ciq;
+	bool lro;
 	void *(*add)(const struct cxgb4_lld_info *p);
 	int (*rx_handler)(void *handle, const __be64 *rsp,
 			  const struct pkt_gl *gl);
@@ -323,9 +307,6 @@ struct cxgb4_pci_uld_info {
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
-int cxgb4_register_pci_uld(enum cxgb4_pci_uld type,
-			   struct cxgb4_pci_uld_info *p);
-int cxgb4_unregister_pci_uld(enum cxgb4_pci_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 9a607db..1e74fd6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2860,6 +2860,18 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 	return 0;
 }
 
+int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+			unsigned int cmplqid)
+{
+	u32 param, val;
+
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) |
+		 FW_PARAMS_PARAM_YZ_V(eqid));
+	val = cmplqid;
+	return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+}
+
 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 			  struct net_device *dev, unsigned int iqid)
 {
@@ -3014,12 +3026,6 @@ void t4_free_sge_resources(struct adapter *adap)
 		}
 	}
 
-	/* clean up RDMA and iSCSI Rx queues */
-	t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq);
-	t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq);
-	t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq);
-	t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq);
-
 	/* clean up offload Tx queues */
 	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
 		struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index e4ba2d2..7c0d7af 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -84,6 +84,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *);
 
 static const struct cxgb4_uld_info cxgb4i_uld_info = {
 	.name = DRV_MODULE_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.lro = false,
 	.add = t4_uld_add,
 	.rx_handler = t4_uld_rx_handler,
 	.state_change = t4_uld_state_change,
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 27dd11a..ad26b93 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -652,6 +652,9 @@ static struct iscsit_transport cxgbit_transport = {
 
 static struct cxgb4_uld_info cxgbit_uld_info = {
 	.name		= DRV_NAME,
+	.nrxq		= MAX_ULD_QSETS,
+	.rxq_size	= 1024,
+	.lro		= true,
 	.add		= cxgbit_uld_add,
 	.state_change	= cxgbit_uld_state_change,
 	.lro_rx_handler = cxgbit_uld_lro_rx_handler,
-- 
1.7.3

^ permalink raw reply related

* Re: [PATCHv5 net-next 05/15] bpf: expose internal verfier structures
From: Daniel Borkmann @ 2016-09-16 22:47 UTC (permalink / raw)
  To: Jakub Kicinski, netdev; +Cc: ast
In-Reply-To: <1474018622-10653-6-git-send-email-jakub.kicinski@netronome.com>

On 09/16/2016 11:36 AM, Jakub Kicinski wrote:
> Move verifier's internal structures to a header file and
> prefix their names with bpf_ to avoid potential namespace
> conflicts.  Those structures will soon be used by external
> analyzers.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Acked-by: Alexei Starovoitov <ast@kernel.org>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* Re: [PATCHv5 net-next 06/15] bpf: enable non-core use of the verfier
From: Daniel Borkmann @ 2016-09-16 22:31 UTC (permalink / raw)
  To: Jakub Kicinski, netdev; +Cc: ast
In-Reply-To: <1474018622-10653-7-git-send-email-jakub.kicinski@netronome.com>

On 09/16/2016 11:36 AM, Jakub Kicinski wrote:
> Advanced JIT compilers and translators may want to use
> eBPF verifier as a base for parsers or to perform custom
> checks and validations.
>
> Add ability for external users to invoke the verifier
> and provide callbacks to be invoked for every intruction
> checked.  For now only add most basic callback for
> per-instruction pre-interpretation checks is added.  More
> advanced users may also like to have per-instruction post
> callback and state comparison callback.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> ---
> v4:
>   - separate from the header split patch.
> ---
>   include/linux/bpf_verifier.h | 11 +++++++
>   kernel/bpf/verifier.c        | 68 ++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 79 insertions(+)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 9056117b4a81..925359e1d9a1 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -59,6 +59,12 @@ struct bpf_insn_aux_data {
>
>   #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
>
> +struct bpf_verifier_env;
> +struct bpf_ext_analyzer_ops {
> +	int (*insn_hook)(struct bpf_verifier_env *env,
> +			 int insn_idx, int prev_insn_idx);
> +};
> +
>   /* single container for all structs
>    * one verifier_env per bpf_check() call
>    */
> @@ -68,6 +74,8 @@ struct bpf_verifier_env {
>   	int stack_size;			/* number of states to be processed */
>   	struct bpf_verifier_state cur_state; /* current verifier state */
>   	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
> +	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
> +	void *analyzer_priv; /* pointer to external analyzer's private data */
>   	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
>   	u32 used_map_cnt;		/* number of used maps */
>   	u32 id_gen;			/* used to generate unique reg IDs */
> @@ -75,4 +83,7 @@ struct bpf_verifier_env {
>   	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
>   };
>
> +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
> +		 void *priv);
> +
>   #endif /* _LINUX_BPF_VERIFIER_H */
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 6e126a417290..d93e78331b90 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -624,6 +624,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
>   static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
>   			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
>   {
> +	/* for analyzer ctx accesses are already validated and converted */
> +	if (env->analyzer_ops)
> +		return 0;
> +
>   	if (env->prog->aux->ops->is_valid_access &&
>   	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
>   		/* remember the offset of last byte accessed in ctx */
> @@ -2222,6 +2226,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
>   	return 0;
>   }
>
> +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
> +				  int insn_idx, int prev_insn_idx)
> +{
> +	if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
> +		return 0;
> +
> +	return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
> +}
> +
>   static int do_check(struct bpf_verifier_env *env)
>   {
>   	struct bpf_verifier_state *state = &env->cur_state;
> @@ -2280,6 +2293,10 @@ static int do_check(struct bpf_verifier_env *env)
>   			print_bpf_insn(insn);
>   		}
>
> +		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
> +		if (err)
> +			return err;
> +

Looking at this and the nfp code translator (patch 8/15): Did you check this
also with JIT hardening enabled? Presumably nfp sees this after it got JITed
through the normal load via syscall. Then, when this gets rewritten using the
BPF_REG_AX helper before the image gets locked, and you later on push this
through bpf_analyzer() again, where the hook is called before re-verification
of insns, it's still assumed to be MAX_BPF_REG in your hook callbacks, right?
So, I was wondering wrt out of bounds in nfp_verify_insn() -> nfp_bpf_check_ctx_ptr()
for things like BPF_STX? If that's the case, it would make sense to just reject
any prog with reg that is BPF_REG_AX in nfp_verify_insn() upfront. Alternative
would be to use MAX_BPF_JIT_REG in nfp and let bpf_analyzer() fail this via
check_reg_arg() test.

>   		if (class == BPF_ALU || class == BPF_ALU64) {
>   			err = check_alu_op(env, insn);
>   			if (err)
> @@ -2829,3 +2846,54 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
>   	kfree(env);
>   	return ret;
>   }
> +
> +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
> +		 void *priv)
> +{
> +	struct bpf_verifier_env *env;
> +	int ret;
> +
> +	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
> +	if (!env)
> +		return -ENOMEM;
> +
> +	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
> +				     prog->len);
> +	ret = -ENOMEM;
> +	if (!env->insn_aux_data)
> +		goto err_free_env;
> +	env->prog = prog;
> +	env->analyzer_ops = ops;
> +	env->analyzer_priv = priv;
> +
> +	/* grab the mutex to protect few globals used by verifier */
> +	mutex_lock(&bpf_verifier_lock);
> +
> +	log_level = 0;
> +
> +	env->explored_states = kcalloc(env->prog->len,
> +				       sizeof(struct bpf_verifier_state_list *),
> +				       GFP_KERNEL);
> +	ret = -ENOMEM;
> +	if (!env->explored_states)
> +		goto skip_full_check;
> +
> +	ret = check_cfg(env);
> +	if (ret < 0)
> +		goto skip_full_check;
> +
> +	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
> +
> +	ret = do_check(env);
> +
> +skip_full_check:
> +	while (pop_stack(env, NULL) >= 0);
> +	free_states(env);
> +
> +	mutex_unlock(&bpf_verifier_lock);
> +	vfree(env->insn_aux_data);
> +err_free_env:
> +	kfree(env);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(bpf_analyzer);
>

^ permalink raw reply

* Re: Modification to skb->queue_mapping affecting performance
From: Michael Ma @ 2016-09-16 22:00 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1474055639.22679.74.camel@edumazet-glaptop3.roam.corp.google.com>

2016-09-16 12:53 GMT-07:00 Eric Dumazet <eric.dumazet@gmail.com>:
> On Fri, 2016-09-16 at 10:57 -0700, Michael Ma wrote:
>
>> This is actually the problem - if flows from different RX queues are
>> switched to the same RX queue in IFB, they'll use different processor
>> context with the same tasklet, and the processor context of different
>> tasklets might be the same. So multiple tasklets in IFB competes for
>> the same core when queue is switched.
>>
>> The following simple fix proved this - with this change even switching
>> the queue won't affect small packet bandwidth/latency anymore:
>>
>> in ifb.c:
>>
>> -       struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
>> +       struct ifb_q_private *txp = dp->tx_private +
>> (smp_processor_id() % dev->num_tx_queues);
>>
>> This should be more efficient since we're not sending the task to a
>> different processor, instead we try to queue the packet to an
>> appropriate tasklet based on the processor ID. Will this cause any
>> packet out-of-order problem? If packets from the same flow are queued
>> to the same RX queue due to RSS, and processor affinity is set for RX
>> queues, I assume packets from the same flow will end up in the same
>> core when tasklet is scheduled. But I might have missed some uncommon
>> cases here... Would appreciate if anyone can provide more insights.
>
> Wait, don't you have proper smp affinity for the RX queues on your NIC ?
>
> ( Documentation/networking/scaling.txt RSS IRQ Configuration )
>
Yes - what I was trying to say is that this change will be more
efficient than using smp_call_function_single() to schedule the
tasklet to a different processor.

RSS IRQ should be set properly already. The issue here is that I'll
need to switch the queue mapping for NIC RX to a different TXQ on IFB,
which allows me to classify the flows at the IFB TXQ layer and avoid
qdisc lock contention.

When that switch happens, ideally processor core shouldn't be switched
because all the thread context isn't changed. The work in tasklet
should be scheduled to the same processor as well. That's why I tried
this change. Also conceptually IFB is a software device which should
be able to schedule its workload independent from how NIC is
configured for the interrupt handling.

> A driver ndo_start_xmit() MUST use skb_get_queue_mapping(skb), because
> the driver queue is locked before ndo_start_xmit())  (for non
> NETIF_F_LLTX drivers at least)
>

Thanks a lot for pointing out this! I was expecting this kind of
guidance... Then the options would be:

1. Use smp_call_function_single() to schedule the tasklet to a core
statically mapped to the IFB TXQ, which is very similar to how TX/RX
IRQ is configured.
2. As you suggested below add some additional action to do the
rescheduling before entering IFB - for example when receiving the
packet we could just use RSS to redirect to the desired RXQ, however
this doesn't seem to be easy, especially compared with the way how
mqprio chooses the queue. The challenge here is that IFB queue
selection is based on queue_mapping when skb arrives at IFB and core
selection is based on RXQ on NIC and so it's also based on
queue_mapping when skb arrives at NIC. Then these two queue_mappings
must be the same so that there is no core conflict of processing two
TXQs of IFB. Then this essentially means we have to change queue
mapping of the NIC on the receiver side which can't be achieved using
TC.

> In case of ifb, __skb_queue_tail(&txp->rq, skb); could corrupt the skb
> list.
>
> In any case, you could have an action to do this before reaching IFB.
>
>
>

^ permalink raw reply

* [PATCH net-next 2/2] bnx2x: allocate mac filtering pending list in PAGE_SIZE increments
From: Jason Baron @ 2016-09-16 21:30 UTC (permalink / raw)
  To: davem, Ariel.Elior; +Cc: Yuval.Mintz, netdev
In-Reply-To: <cover.1474060371.git.jbaron@akamai.com>

Currently, we can have high order page allocations that specify
GFP_ATOMIC when configuring multicast MAC address filters.

For example, we have seen order 2 page allocation failures with
~500 multicast addresses configured.

Convert the allocation for the pending list to be done in PAGE_SIZE
increments.

Signed-off-by: Jason Baron <jbaron@akamai.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 131 ++++++++++++++++++-------
 1 file changed, 94 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index d468380c2a23..6db8dd252d7c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -2606,8 +2606,23 @@ struct bnx2x_mcast_bin_elem {
 	int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */
 };
 
+union bnx2x_mcast_elem {
+	struct bnx2x_mcast_bin_elem bin_elem;
+	struct bnx2x_mcast_mac_elem mac_elem;
+};
+
+struct bnx2x_mcast_elem_group {
+	struct list_head mcast_group_link;
+	union bnx2x_mcast_elem mcast_elems[];
+};
+
+#define MCAST_MAC_ELEMS_PER_PG \
+	((PAGE_SIZE - sizeof(struct bnx2x_mcast_elem_group)) / \
+	sizeof(union bnx2x_mcast_elem))
+
 struct bnx2x_pending_mcast_cmd {
 	struct list_head link;
+	struct list_head group_head;
 	int type; /* BNX2X_MCAST_CMD_X */
 	union {
 		struct list_head macs_head;
@@ -2638,16 +2653,30 @@ static int bnx2x_mcast_wait(struct bnx2x *bp,
 	return 0;
 }
 
+static void bnx2x_free_groups(struct list_head *mcast_group_list)
+{
+	struct bnx2x_mcast_elem_group *current_mcast_group;
+
+	while (!list_empty(mcast_group_list)) {
+		current_mcast_group = list_first_entry(mcast_group_list,
+				      struct bnx2x_mcast_elem_group,
+				      mcast_group_link);
+		list_del(&current_mcast_group->mcast_group_link);
+		kfree(current_mcast_group);
+	}
+}
+
 static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
 				   struct bnx2x_mcast_obj *o,
 				   struct bnx2x_mcast_ramrod_params *p,
 				   enum bnx2x_mcast_cmd cmd)
 {
-	int total_sz;
 	struct bnx2x_pending_mcast_cmd *new_cmd;
-	struct bnx2x_mcast_mac_elem *cur_mac = NULL;
 	struct bnx2x_mcast_list_elem *pos;
-	int macs_list_len = 0, macs_list_len_size;
+	struct bnx2x_mcast_elem_group *elem_group;
+	struct bnx2x_mcast_mac_elem *mac_elem;
+	int i = 0, offset = 0, macs_list_len = 0;
+	int total_elems, alloc_size;
 
 	/* When adding MACs we'll need to store their values */
 	if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET)
@@ -2657,50 +2686,68 @@ static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
 	if (!p->mcast_list_len)
 		return 0;
 
-	/* For a set command, we need to allocate sufficient memory for all
-	 * the bins, since we can't analyze at this point how much memory would
-	 * be required.
-	 */
-	macs_list_len_size = macs_list_len *
-			     sizeof(struct bnx2x_mcast_mac_elem);
-	if (cmd == BNX2X_MCAST_CMD_SET) {
-		int bin_size = BNX2X_MCAST_BINS_NUM *
-			       sizeof(struct bnx2x_mcast_bin_elem);
-
-		if (bin_size > macs_list_len_size)
-			macs_list_len_size = bin_size;
-	}
-	total_sz = sizeof(*new_cmd) + macs_list_len_size;
-
 	/* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */
-	new_cmd = kzalloc(total_sz, GFP_ATOMIC);
-
+	new_cmd = kzalloc(sizeof(*new_cmd), GFP_ATOMIC);
 	if (!new_cmd)
 		return -ENOMEM;
 
-	DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n",
-	   cmd, macs_list_len);
-
 	INIT_LIST_HEAD(&new_cmd->data.macs_head);
-
+	INIT_LIST_HEAD(&new_cmd->group_head);
 	new_cmd->type = cmd;
 	new_cmd->done = false;
 
+	DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n",
+	   cmd, macs_list_len);
+
 	switch (cmd) {
 	case BNX2X_MCAST_CMD_ADD:
 	case BNX2X_MCAST_CMD_SET:
-		cur_mac = (struct bnx2x_mcast_mac_elem *)
-			  ((u8 *)new_cmd + sizeof(*new_cmd));
-
-		/* Push the MACs of the current command into the pending command
-		 * MACs list: FIFO
+		/* For a set command, we need to allocate sufficient memory for
+		 * all the bins, since we can't analyze at this point how much
+		 * memory would be required.
 		 */
+		total_elems = macs_list_len;
+		if (cmd == BNX2X_MCAST_CMD_SET) {
+			if (total_elems < BNX2X_MCAST_BINS_NUM)
+				total_elems = BNX2X_MCAST_BINS_NUM;
+		}
+		i = total_elems;
+		while (i) {
+			if (i < MCAST_MAC_ELEMS_PER_PG) {
+				alloc_size = sizeof(struct list_head) +
+					     i * sizeof(union bnx2x_mcast_elem);
+				i = 0;
+			} else {
+				alloc_size = PAGE_SIZE;
+				i -= MCAST_MAC_ELEMS_PER_PG;
+			}
+			elem_group = kzalloc(alloc_size, GFP_ATOMIC);
+			if (!elem_group) {
+				bnx2x_free_groups(&new_cmd->group_head);
+				kfree(new_cmd);
+				return -ENOMEM;
+			}
+			list_add_tail(&elem_group->mcast_group_link,
+				      &new_cmd->group_head);
+		}
+		elem_group = list_first_entry(&new_cmd->group_head,
+					      struct bnx2x_mcast_elem_group,
+					      mcast_group_link);
 		list_for_each_entry(pos, &p->mcast_list, link) {
-			memcpy(cur_mac->mac, pos->mac, ETH_ALEN);
-			list_add_tail(&cur_mac->link, &new_cmd->data.macs_head);
-			cur_mac++;
+			mac_elem = &elem_group->mcast_elems[offset].mac_elem;
+			memcpy(mac_elem->mac, pos->mac, ETH_ALEN);
+			/* Push the MACs of the current command into the pending
+			 * command MACs list: FIFO
+			 */
+			list_add_tail(&mac_elem->link,
+				      &new_cmd->data.macs_head);
+			offset++;
+			if (offset == MCAST_MAC_ELEMS_PER_PG) {
+				offset = 0;
+				elem_group = (struct bnx2x_mcast_elem_group *)
+					     elem_group->mcast_group_link.next;
+			}
 		}
-
 		break;
 
 	case BNX2X_MCAST_CMD_DEL:
@@ -2978,7 +3025,8 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp,
 	u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ];
 	struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n;
 	struct bnx2x_mcast_bin_elem *p_item;
-	int i, cnt = 0, mac_cnt = 0;
+	struct bnx2x_mcast_elem_group *elem_group;
+	int cnt = 0, mac_cnt = 0, offset = 0, i;
 
 	memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ);
 	memcpy(cur, o->registry.aprox_match.vec,
@@ -3001,9 +3049,10 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp,
 	 * a list that will be used to configure bins.
 	 */
 	cmd_pos->set_convert = true;
-	p_item = (struct bnx2x_mcast_bin_elem *)(cmd_pos + 1);
 	INIT_LIST_HEAD(&cmd_pos->data.macs_head);
-
+	elem_group = list_first_entry(&cmd_pos->group_head,
+				      struct bnx2x_mcast_elem_group,
+				      mcast_group_link);
 	for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) {
 		bool b_current = !!BIT_VEC64_TEST_BIT(cur, i);
 		bool b_required = !!BIT_VEC64_TEST_BIT(req, i);
@@ -3011,12 +3060,18 @@ bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp,
 		if (b_current == b_required)
 			continue;
 
+		p_item = &elem_group->mcast_elems[offset].bin_elem;
 		p_item->bin = i;
 		p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD
 					  : BNX2X_MCAST_CMD_SET_DEL;
 		list_add_tail(&p_item->link , &cmd_pos->data.macs_head);
-		p_item++;
 		cnt++;
+		offset++;
+		if (offset == MCAST_MAC_ELEMS_PER_PG) {
+			offset = 0;
+			elem_group = (struct bnx2x_mcast_elem_group *)
+				     elem_group->mcast_group_link.next;
+		}
 	}
 
 	/* We now definitely know how many commands are hiding here.
@@ -3103,6 +3158,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp,
 		 */
 		if (cmd_pos->done) {
 			list_del(&cmd_pos->link);
+			bnx2x_free_groups(&cmd_pos->group_head);
 			kfree(cmd_pos);
 		}
 
@@ -3741,6 +3797,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e1(
 	}
 
 	list_del(&cmd_pos->link);
+	bnx2x_free_groups(&cmd_pos->group_head);
 	kfree(cmd_pos);
 
 	return cnt;
-- 
2.6.1

^ permalink raw reply related

* Re: [PATCH net-next 07/14] tcp: export data delivery rate
From: kbuild test robot @ 2016-09-16 21:38 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: kbuild-all, David Miller, netdev, Yuchung Cheng, Van Jacobson,
	Neal Cardwell, Nandita Dukkipati, Eric Dumazet,
	Soheil Hassas Yeganeh
In-Reply-To: <1474051743-13311-8-git-send-email-ncardwell@google.com>

[-- Attachment #1: Type: text/plain, Size: 4388 bytes --]

Hi Yuchung,

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Neal-Cardwell/tcp-BBR-congestion-control-algorithm/20160917-025323
config: arm-simpad_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:142:0,
                    from include/linux/crypto.h:21,
                    from include/crypto/hash.h:16,
                    from net/ipv4/tcp.c:250:
   net/ipv4/tcp.c: In function 'tcp_get_info':
>> arch/arm/include/asm/div64.h:59:36: error: passing argument 1 of '__div64_32' from incompatible pointer type [-Werror=incompatible-pointer-types]
    #define do_div(n, base) __div64_32(&(n), base)
                                       ^
   net/ipv4/tcp.c:2794:3: note: in expansion of macro 'do_div'
      do_div(rate, intv);
      ^~~~~~
   arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t * {aka long long unsigned int *}' but argument is of type 'u32 * {aka unsigned int *}'
    static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
                           ^~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/__div64_32 +59 arch/arm/include/asm/div64.h

040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  43  		: "=r" (__rem), "=r" (__res)
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  44  		: "r" (__n), "r" (__base)
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  45  		: "ip", "lr", "cc");
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  46  	*n = __res;
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  47  	return __rem;
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  48  }
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  49  #define __div64_32 __div64_32
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  50  
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  51  #if !defined(CONFIG_AEABI)
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  52  
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  53  /*
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  54   * In OABI configurations, some uses of the do_div function
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  55   * cause gcc to run out of registers. To work around that,
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  56   * we can force the use of the out-of-line version for
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  57   * configurations that build a OABI kernel.
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  58   */
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02 @59  #define do_div(n, base) __div64_32(&(n), base)
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  60  
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  61  #else
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  62  
fa4adc614 include/asm-arm/div64.h      Nicolas Pitre 2006-12-06  63  /*
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  64   * gcc versions earlier than 4.0 are simply too problematic for the
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  65   * __div64_const32() code in asm-generic/div64.h. First there is
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  66   * gcc PR 15089 that tend to trig on more complex constructs, spurious
040b323b5 arch/arm/include/asm/div64.h Nicolas Pitre 2015-11-02  67   * .global __udivsi3 are inserted even if none of those symbols are

:::::: The code at line 59 was first introduced by commit
:::::: 040b323b5012b5503561ec7fe15cccd6a4bcaec2 ARM: asm/div64.h: adjust to generic codde

:::::: TO: Nicolas Pitre <nicolas.pitre@linaro.org>
:::::: CC: Nicolas Pitre <nicolas.pitre@linaro.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 12411 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 05/14] tcp: track data delivery rate for a TCP connection
From: kbuild test robot @ 2016-09-16 21:38 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: kbuild-all, David Miller, netdev, Yuchung Cheng, Van Jacobson,
	Neal Cardwell, Nandita Dukkipati, Eric Dumazet,
	Soheil Hassas Yeganeh
In-Reply-To: <1474051743-13311-6-git-send-email-ncardwell@google.com>

[-- Attachment #1: Type: text/plain, Size: 2325 bytes --]

Hi Yuchung,

[auto build test WARNING on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Neal-Cardwell/tcp-BBR-congestion-control-algorithm/20160917-025323
config: cris-etrax-100lx_v2_defconfig (attached as .config)
compiler: cris-linux-gcc (GCC) 6.2.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=cris 

All warnings (new ones prefixed by >>):

   In file included from net/ipv4/route.c:103:0:
>> include/net/tcp.h:769:11: warning: 'packed' attribute ignored for field of type 'struct skb_mstamp' [-Wattributes]
       struct skb_mstamp first_tx_mstamp __packed;
              ^~~~~~~~~~
   include/net/tcp.h:771:11: warning: 'packed' attribute ignored for field of type 'struct skb_mstamp' [-Wattributes]
       struct skb_mstamp delivered_mstamp __packed;
              ^~~~~~~~~~

vim +769 include/net/tcp.h

   753	#define TCPCB_TAGBITS		0x07	/* All tag bits			*/
   754	#define TCPCB_REPAIRED		0x10	/* SKB repaired (no skb_mstamp)	*/
   755	#define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
   756	#define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
   757					TCPCB_REPAIRED)
   758	
   759		__u8		ip_dsfield;	/* IPv4 tos or IPv6 dsfield	*/
   760		__u8		txstamp_ack:1,	/* Record TX timestamp for ack? */
   761				eor:1,		/* Is skb MSG_EOR marked? */
   762				unused:6;
   763		__u32		ack_seq;	/* Sequence number ACK'd	*/
   764		union {
   765			struct {
   766				/* There is space for up to 24 bytes */
   767				__u32 in_flight;/* Bytes in flight when packet sent */
   768				/* start of send pipeline phase */
 > 769				struct skb_mstamp first_tx_mstamp __packed;
   770				/* when we reached the "delivered" count */
   771				struct skb_mstamp delivered_mstamp __packed;
   772				/* pkts S/ACKed so far upon tx of skb, incl retrans: */
   773				__u32 delivered;
   774			} tx;   /* only used for outgoing skbs */
   775			union {
   776				struct inet_skb_parm	h4;
   777	#if IS_ENABLED(CONFIG_IPV6)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 8333 bytes --]

^ permalink raw reply

* Re: [PATCHv5 net-next 07/15] bpf: recognize 64bit immediate loads as consts
From: Daniel Borkmann @ 2016-09-16 21:37 UTC (permalink / raw)
  To: Jakub Kicinski, netdev; +Cc: ast
In-Reply-To: <1474018622-10653-8-git-send-email-jakub.kicinski@netronome.com>

On 09/16/2016 11:36 AM, Jakub Kicinski wrote:
> When running as parser interpret BPF_LD | BPF_IMM | BPF_DW
> instructions as loading CONST_IMM with the value stored
> in imm.  The verifier will continue not recognizing those
> due to concerns about search space/program complexity
> increase.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Acked-by: Alexei Starovoitov <ast@kernel.org>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* [PATCH net-next 1/2] bnx2x: allocate mac filtering 'mcast_list' in PAGE_SIZE increments
From: Jason Baron @ 2016-09-16 21:30 UTC (permalink / raw)
  To: davem, Ariel.Elior; +Cc: Yuval.Mintz, netdev
In-Reply-To: <cover.1474060371.git.jbaron@akamai.com>

Currently, we can have high order page allocations that specify
GFP_ATOMIC when configuring multicast MAC address filters.

For example, we have seen order 2 page allocation failures with
~500 multicast addresses configured.

Convert the allocation for 'mcast_list' to be done in PAGE_SIZE
increments.

Signed-off-by: Jason Baron <jbaron@akamai.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 85 ++++++++++++++++--------
 1 file changed, 57 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index dab61a81a3ba..9f5b2d94e4df 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12563,43 +12563,70 @@ static int bnx2x_close(struct net_device *dev)
 	return 0;
 }
 
-static int bnx2x_init_mcast_macs_list(struct bnx2x *bp,
-				      struct bnx2x_mcast_ramrod_params *p)
+struct bnx2x_mcast_list_elem_group
 {
-	int mc_count = netdev_mc_count(bp->dev);
-	struct bnx2x_mcast_list_elem *mc_mac =
-		kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC);
-	struct netdev_hw_addr *ha;
+	struct list_head mcast_group_link;
+	struct bnx2x_mcast_list_elem mcast_elems[];
+};
 
-	if (!mc_mac) {
-		BNX2X_ERR("Failed to allocate mc MAC list\n");
-		return -ENOMEM;
+#define MCAST_ELEMS_PER_PG \
+	((PAGE_SIZE - sizeof(struct bnx2x_mcast_list_elem_group)) / \
+	sizeof(struct bnx2x_mcast_list_elem))
+
+static void bnx2x_free_mcast_macs_list(struct list_head *mcast_group_list)
+{
+	struct bnx2x_mcast_list_elem_group *current_mcast_group;
+
+	while (!list_empty(mcast_group_list)) {
+		current_mcast_group = list_first_entry(mcast_group_list,
+				      struct bnx2x_mcast_list_elem_group,
+				      mcast_group_link);
+		list_del(&current_mcast_group->mcast_group_link);
+		kfree(current_mcast_group);
 	}
+}
 
-	INIT_LIST_HEAD(&p->mcast_list);
+static int bnx2x_init_mcast_macs_list(struct bnx2x *bp,
+				      struct bnx2x_mcast_ramrod_params *p,
+				      struct list_head *mcast_group_list)
+{
+	struct bnx2x_mcast_list_elem *mc_mac;
+	struct netdev_hw_addr *ha;
+	struct bnx2x_mcast_list_elem_group *current_mcast_group = NULL;
+	int mc_count = netdev_mc_count(bp->dev);
+	int i = 0, offset = 0, alloc_size;
 
+	INIT_LIST_HEAD(&p->mcast_list);
 	netdev_for_each_mc_addr(ha, bp->dev) {
+		if (!offset) {
+			if ((mc_count - i) < MCAST_ELEMS_PER_PG)
+				alloc_size = sizeof(struct list_head) +
+				(sizeof(struct bnx2x_mcast_list_elem) *
+				(mc_count - i));
+			else
+				alloc_size = PAGE_SIZE;
+			current_mcast_group = kmalloc(alloc_size, GFP_ATOMIC);
+			if (!current_mcast_group) {
+				bnx2x_free_mcast_macs_list(mcast_group_list);
+				BNX2X_ERR("Failed to allocate mc MAC list\n");
+				return -ENOMEM;
+			}
+			list_add(&current_mcast_group->mcast_group_link,
+				 mcast_group_list);
+		}
+		mc_mac = &current_mcast_group->mcast_elems[offset];
 		mc_mac->mac = bnx2x_mc_addr(ha);
 		list_add_tail(&mc_mac->link, &p->mcast_list);
-		mc_mac++;
+		offset++;
+		if (offset == MCAST_ELEMS_PER_PG) {
+			i += offset;
+			offset = 0;
+		}
 	}
-
 	p->mcast_list_len = mc_count;
-
 	return 0;
 }
 
-static void bnx2x_free_mcast_macs_list(
-	struct bnx2x_mcast_ramrod_params *p)
-{
-	struct bnx2x_mcast_list_elem *mc_mac =
-		list_first_entry(&p->mcast_list, struct bnx2x_mcast_list_elem,
-				 link);
-
-	WARN_ON(!mc_mac);
-	kfree(mc_mac);
-}
-
 /**
  * bnx2x_set_uc_list - configure a new unicast MACs list.
  *
@@ -12647,6 +12674,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp)
 
 static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 {
+	LIST_HEAD(mcast_group_list);
 	struct net_device *dev = bp->dev;
 	struct bnx2x_mcast_ramrod_params rparam = {NULL};
 	int rc = 0;
@@ -12662,7 +12690,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 
 	/* then, configure a new MACs list */
 	if (netdev_mc_count(dev)) {
-		rc = bnx2x_init_mcast_macs_list(bp, &rparam);
+		rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list);
 		if (rc)
 			return rc;
 
@@ -12673,7 +12701,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 			BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
 				  rc);
 
-		bnx2x_free_mcast_macs_list(&rparam);
+		bnx2x_free_mcast_macs_list(&mcast_group_list);
 	}
 
 	return rc;
@@ -12681,6 +12709,7 @@ static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 
 static int bnx2x_set_mc_list(struct bnx2x *bp)
 {
+	LIST_HEAD(mcast_group_list);
 	struct bnx2x_mcast_ramrod_params rparam = {NULL};
 	struct net_device *dev = bp->dev;
 	int rc = 0;
@@ -12692,7 +12721,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
 	rparam.mcast_obj = &bp->mcast_obj;
 
 	if (netdev_mc_count(dev)) {
-		rc = bnx2x_init_mcast_macs_list(bp, &rparam);
+		rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list);
 		if (rc)
 			return rc;
 
@@ -12703,7 +12732,7 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
 			BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
 				  rc);
 
-		bnx2x_free_mcast_macs_list(&rparam);
+		bnx2x_free_mcast_macs_list(&mcast_group_list);
 	} else {
 		/* If no mc addresses are required, flush the configuration */
 		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
-- 
2.6.1

^ permalink raw reply related

* [PATCH net-next 0/2] bnx2x: page allocation failure
From: Jason Baron @ 2016-09-16 21:30 UTC (permalink / raw)
  To: davem, Ariel.Elior; +Cc: Yuval.Mintz, netdev

Hi,

While configuring ~500 multicast addrs, we ran into high order
page allocation failures. They don't need to be high order, and
thus I'm proposing to split them into at most PAGE_SIZE allocations.

Below is a sample failure.

Thanks,

-Jason

[1201902.617882] bnx2x: [bnx2x_set_mc_list:12374(eth0)]Failed to create multicast MACs list: -12
[1207325.695021] kworker/1:0: page allocation failure: order:2, mode:0xc020
[1207325.702059] CPU: 1 PID: 15805 Comm: kworker/1:0 Tainted: G        W
[1207325.712940] Hardware name: SYNNEX CORPORATION 1x8-X4i SSD 10GE/S5512LE, BIOS V8.810 05/16/2013
[1207325.722284] Workqueue: events bnx2x_sp_rtnl_task [bnx2x]
[1207325.728206]  0000000000000000 ffff88012d873a78 ffffffff8267f7c7 000000000000c020
[1207325.736754]  0000000000000000 ffff88012d873b08 ffffffff8212f8e0 fffffffc00000003
[1207325.745301]  ffff88041ffecd80 ffff880400000030 0000000000000002 0000c0206800da13
[1207325.753846] Call Trace:
[1207325.756789]  [<ffffffff8267f7c7>] dump_stack+0x4d/0x63
[1207325.762426]  [<ffffffff8212f8e0>] warn_alloc_failed+0xe0/0x130
[1207325.768756]  [<ffffffff8213c898>] ? wakeup_kswapd+0x48/0x140
[1207325.774914]  [<ffffffff82132afc>] __alloc_pages_nodemask+0x2bc/0x970
[1207325.781761]  [<ffffffff82173691>] alloc_pages_current+0x91/0x100
[1207325.788260]  [<ffffffff8212fa1e>] alloc_kmem_pages+0xe/0x10
[1207325.794329]  [<ffffffff8214c9c8>] kmalloc_order+0x18/0x50
[1207325.800227]  [<ffffffff8214ca26>] kmalloc_order_trace+0x26/0xb0
[1207325.806642]  [<ffffffff82451c68>] ? _xfer_secondary_pool+0xa8/0x1a0
[1207325.813404]  [<ffffffff8217cfda>] __kmalloc+0x19a/0x1b0
[1207325.819142]  [<ffffffffa02fe975>] bnx2x_set_rx_mode_inner+0x3d5/0x590 [bnx2x]
[1207325.827000]  [<ffffffffa02ff52d>] bnx2x_sp_rtnl_task+0x28d/0x760 [bnx2x]
[1207325.834197]  [<ffffffff820695d4>] process_one_work+0x134/0x3c0
[1207325.840522]  [<ffffffff82069981>] worker_thread+0x121/0x460
[1207325.846585]  [<ffffffff82069860>] ? process_one_work+0x3c0/0x3c0
[1207325.853089]  [<ffffffff8206f039>] kthread+0xc9/0xe0
[1207325.858459]  [<ffffffff82070000>] ? notify_die+0x10/0x40
[1207325.864263]  [<ffffffff8206ef70>] ? kthread_create_on_node+0x180/0x180
[1207325.871288]  [<ffffffff826852d2>] ret_from_fork+0x42/0x70
[1207325.877183]  [<ffffffff8206ef70>] ? kthread_create_on_node+0x180/0x180


Jason Baron (2):
  bnx2x: allocate mac filtering 'mcast_list' in PAGE_SIZE increments
  bnx2x: allocate mac filtering pending list in PAGE_SIZE increments

 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  85 ++++++++++-----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c   | 131 ++++++++++++++++-------
 2 files changed, 151 insertions(+), 65 deletions(-)

-- 
2.6.1

^ permalink raw reply

* Re: [PATCHv5 net-next 04/15] bpf: don't (ab)use instructions to store state
From: Daniel Borkmann @ 2016-09-16 21:22 UTC (permalink / raw)
  To: Jakub Kicinski, netdev; +Cc: ast
In-Reply-To: <1474018622-10653-5-git-send-email-jakub.kicinski@netronome.com>

On 09/16/2016 11:36 AM, Jakub Kicinski wrote:
> Storing state in reserved fields of instructions makes
> it impossible to run verifier on programs already
> marked as read-only. Allocate and use an array of
> per-instruction state instead.
>
> While touching the error path rename and move existing
> jump target.
>
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Acked-by: Alexei Starovoitov <ast@kernel.org>

LGMT

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply

* [PATCH resend] sctp: Remove some redundant code
From: Christophe JAILLET @ 2016-09-16 21:05 UTC (permalink / raw)
  To: marcelo.leitner, vyasevich, nhorman, davem
  Cc: linux-sctp, netdev, linux-kernel, kernel-janitors,
	Christophe JAILLET

In commit 311b21774f13 ("sctp: simplify sk_receive_queue locking"), a call
to 'skb_queue_splice_tail_init()' has been made explicit. Previously it was
hidden in 'sctp_skb_list_tail()'

Now, the code around it looks redundant. The '_init()' part of
'skb_queue_splice_tail_init()' should already do the same.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
---
Resent because netdev@ was not in copy
Acked-by tags added
---
 net/sctp/ulpqueue.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 877e55066f89..84d0fdaf7de9 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -140,11 +140,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
 		 * we can go ahead and clear out the lobby in one shot
 		 */
 		if (!skb_queue_empty(&sp->pd_lobby)) {
-			struct list_head *list;
 			skb_queue_splice_tail_init(&sp->pd_lobby,
 						   &sk->sk_receive_queue);
-			list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
-			INIT_LIST_HEAD(list);
 			return 1;
 		}
 	} else {
-- 
2.7.4

^ permalink raw reply related

* Re: [net PATCH] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full
From: Brenden Blanco @ 2016-09-16 20:43 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, tariqt, Eric Dumazet, tom, rana.shahot, David S. Miller
In-Reply-To: <20160916203535.20016.45146.stgit@firesoul>

On Fri, Sep 16, 2016 at 10:36:12PM +0200, Jesper Dangaard Brouer wrote:
> The XDP_TX action can fail transmitting the frame in case the TX ring
> is full or port is down.  In case of TX failure it should drop the
> frame, and not as now call 'break' which is the same as XDP_PASS.
> 
> Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support")
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

You could in theory have also tried to recycle the page instead of
dropping it, but that's probably not worth optimizing when tx is backed
up, as you'll only save a handful of page_put's. The code to do so
wouldn't have been pretty.

Reviewed-by: Brenden Blanco <bblanco@plumgrid.com>

^ permalink raw reply

* Re: [net PATCH] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full
From: Jesper Dangaard Brouer @ 2016-09-16 20:37 UTC (permalink / raw)
  To: netdev, tariqt
  Cc: Eric Dumazet, tom, bblanco, rana.shahot, David S. Miller, brouer
In-Reply-To: <20160916203535.20016.45146.stgit@firesoul>

On Fri, 16 Sep 2016 22:36:12 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> The XDP_TX action can fail transmitting the frame in case the TX ring
> is full or port is down.  In case of TX failure it should drop the
> frame, and not as now call 'break' which is the same as XDP_PASS.

Ups, forgot to add the V2 subject tag... Dave let me know if I should
resend with V2 in subj.?

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* [net PATCH] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full
From: Jesper Dangaard Brouer @ 2016-09-16 20:36 UTC (permalink / raw)
  To: netdev, tariqt
  Cc: Eric Dumazet, tom, bblanco, Jesper Dangaard Brouer, rana.shahot,
	David S. Miller

The XDP_TX action can fail transmitting the frame in case the TX ring
is full or port is down.  In case of TX failure it should drop the
frame, and not as now call 'break' which is the same as XDP_PASS.

Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

---
Note, this fix have nothing to do with the page-refcnt bug I just reported.

 drivers/net/ethernet/mellanox/mlx4/en_rx.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 2040dad8611d..adcd55c655db 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -906,7 +906,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 							length, tx_index,
 							&doorbell_pending))
 					goto consumed;
-				break;
+				goto next; /* Drop on xmit failure */
 			default:
 				bpf_warn_invalid_xdp_action(act);
 			case XDP_ABORTED:

^ permalink raw reply related

* [v2] net: ipv6: fallback to full lookup if table lookup is unsuitable
From: Vincent Bernat @ 2016-09-16 20:33 UTC (permalink / raw)
  To: David Ahern, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev
  Cc: Vincent Bernat
In-Reply-To: <0e4071d6-a3cc-37a7-6a32-2169e0e3b305@cumulusnetworks.com>

Commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop
lookups") introduced a regression: insertion of an IPv6 route in a table
not containing the appropriate connected route for the gateway but which
contained a non-connected route (like a default gateway) fails while it
was previously working:

    $ ip link add eth0 type dummy
    $ ip link set up dev eth0
    $ ip addr add 2001:db8::1/64 dev eth0
    $ ip route add ::/0 via 2001:db8::5 dev eth0 table 20
    $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20
    RTNETLINK answers: No route to host
    $ ip -6 route show table 20
    default via 2001:db8::5 dev eth0  metric 1024  pref medium

After this patch, we get:

    $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20
    $ ip -6 route show table 20
    2001:db8:cafe::1 via 2001:db8::6 dev eth0  metric 1024  pref medium
    default via 2001:db8::5 dev eth0  metric 1024  pref medium

Signed-off-by: Vincent Bernat <vincent@bernat.im>
---
 net/ipv6/route.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad4a7ff301fc..2c6c7257ff75 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1994,6 +1994,14 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 			if (cfg->fc_table)
 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
 
+			if (grt) {
+				if (grt->rt6i_flags & RTF_GATEWAY ||
+				    (dev && dev != grt->dst.dev)) {
+					ip6_rt_put(grt);
+					grt = NULL;
+				}
+			}
+
 			if (!grt)
 				grt = rt6_lookup(net, gw_addr, NULL,
 						 cfg->fc_ifindex, 1);
-- 
2.9.3

^ permalink raw reply related

* Re: [net PATCH] mlx4: fix XDP_TX is acting like XDP_PASS on TX ring full
From: Jesper Dangaard Brouer @ 2016-09-16 20:29 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, tariqt, tom, bblanco, rana.shahot,
	"David S. Miller\" <davem
In-Reply-To: <1474056050.22679.75.camel@edumazet-glaptop3.roam.corp.google.com>

On Fri, 16 Sep 2016 13:00:50 -0700
Eric Dumazet <eric.dumazet@gmail.com> wrote:

> > diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> > index 2040dad8611d..d414c67dfd12 100644
> > --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> > +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> > @@ -906,6 +906,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> >  							length, tx_index,
> >  							&doorbell_pending))
> >  					goto consumed;
> > +				goto next;
> >  				break;  
> 
> Why keeping this break; then ? ;)

I'll send a V2


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* [PATCH v4 net 1/1] net sched actions: fix GETing actions
From: Jamal Hadi Salim @ 2016-09-16 20:27 UTC (permalink / raw)
  To: davem; +Cc: netdev, xiyou.wangcong, Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

With the batch changes that translated transient actions into
a temporary list lost in the translation was the fact that
tcf_action_destroy() will eventually delete the action from
the permanent location if the refcount is zero.

Example of what broke:
...add a gact action to drop
sudo $TC actions add action drop index 10
...now retrieve it, looks good
sudo $TC actions get action gact index 10
...retrieve it again and find it is gone!
sudo $TC actions get action gact index 10

Fixes:
commit 22dc13c837c3 ("net_sched: convert tcf_exts from list to pointer array"),
commit 824a7e8863b3 ("net_sched: remove an unnecessary list_del()")
commit f07fed82ad79 ("net_sched: remove the leftover cleanup_a()")

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/act_api.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d09d068..e1c0ce1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -592,6 +592,16 @@ err_out:
 	return ERR_PTR(err);
 }
 
+static void cleanup_a(struct list_head *actions, int ovr)
+{
+	struct tc_action *a;
+
+	list_for_each_entry(a, actions, list) {
+		if (ovr)
+			a->tcfa_refcnt--;
+	}
+}
+
 int tcf_action_init(struct net *net, struct nlattr *nla,
 				  struct nlattr *est, char *name, int ovr,
 				  int bind, struct list_head *actions)
@@ -612,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla,
 			goto err;
 		}
 		act->order = i;
+		if (ovr)
+			act->tcfa_refcnt++;
 		list_add_tail(&act->list, actions);
 	}
+
+	/* Remove the temp refcnt which was necessary to protect against
+	 * destroying an existing action which was being replaced
+	 */
+	cleanup_a(actions, ovr);
 	return 0;
 
 err:
@@ -883,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 		act->order = i;
+		if (event == RTM_GETACTION)
+			act->tcfa_refcnt++;
 		list_add_tail(&act->list, &actions);
 	}
 
-- 
1.9.1

^ permalink raw reply related

* Re: net/bluetooth: workqueue destruction WARNING in hci_unregister_dev
From: Tejun Heo @ 2016-09-16 20:24 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: Dmitry Vyukov, Marcel Holtmann, Gustavo Padovan, Johan Hedberg,
	David S. Miller, linux-bluetooth, netdev, LKML, syzkaller,
	Kostya Serebryany, Alexander Potapenko, Eric Dumazet,
	Takashi Iwai
In-Reply-To: <c70c2e6f-1451-0705-db14-71bc022102c6@suse.cz>

Hello,

On Tue, Sep 13, 2016 at 08:14:40PM +0200, Jiri Slaby wrote:
> I assume Dmitry sees the same what I am still seeing, so I reported this
> some time ago:
> https://lkml.org/lkml/2016/3/21/492
> 
> This warning is trigerred there and still occurs with "HEAD":
>   (pwq != wq->dfl_pwq) && (pwq->refcnt > 1)
> and the state dump is in the log empty too:
> destroy_workqueue: name='hci0' pwq=ffff88006b5c8f00
> wq->dfl_pwq=ffff88006b5c9b00 pwq->refcnt=2 pwq->nr_active=0 delayed_works:
>   pwq 13:
>  cpus=2-3 node=1 flags=0x4 nice=-20 active=0/1
>     in-flight: 2669:wq_barrier_func

Hmmm... I think it could be from rescuer holding reference on the pwq.
Both cases have WQ_MEM_RECLAIM and it could be that rescuer was still
in flight (even without work items pending) when the sanity checks
were done.  The following patch moves the sanity checks after rescuer
destruction.  Dmitry, Jiri, can you please see whether the warning
goes away with this patch?

Thanks.

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 984f6ff..e8046a1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4042,8 +4042,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 			}
 		}
 
-		if (WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt > 1)) ||
-		    WARN_ON(pwq->nr_active) ||
+		if (WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			mutex_unlock(&wq->mutex);
 			show_workqueue_state();
@@ -4080,6 +4079,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		for_each_node(node) {
 			pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
 			RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
+			WARN_ON((pwq != wq->dfl_pwq) && (pwq->refcnt != 1));
 			put_pwq_unlocked(pwq);
 		}
 
@@ -4089,6 +4089,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		 */
 		pwq = wq->dfl_pwq;
 		wq->dfl_pwq = NULL;
+		WARN_ON(pwq->refcnt != 1);
 		put_pwq_unlocked(pwq);
 	}
 }

^ permalink raw reply related

* Re: [PATCH net-next 07/14] tcp: export data delivery rate
From: Eric Dumazet @ 2016-09-16 20:11 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: kbuild test robot, kbuild-all, David Miller, Netdev,
	Yuchung Cheng, Van Jacobson, Nandita Dukkipati,
	Soheil Hassas Yeganeh
In-Reply-To: <CADVnQymVmhX86St6aeTh7asW68cC8d3qOZEBrV-ajp1okbx6Zw@mail.gmail.com>

On Fri, Sep 16, 2016 at 1:03 PM, Neal Cardwell <ncardwell@google.com> wrote:

>
> Looks like 'rate' should be 'rate64'. I will include this fix in the
> next version of the patch series.
>
> neal


Oh, right you are !

^ permalink raw reply

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
From: Cyrill Gorcunov @ 2016-09-16 20:07 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet, jmorris,
	yoshfuji, kaber, avagin, stephen
In-Reply-To: <6464ac16-a15b-384e-ffb1-3ee84cdce313@cumulusnetworks.com>

On Fri, Sep 16, 2016 at 01:55:42PM -0600, David Ahern wrote:
> >> Since the display is showing sockets in addition to IPPROTO_RAW:
> >>
> >> $ ss -A raw
> >> State      Recv-Q Send-Q        Local Address:Port                         Peer Address:Port
> >> UNCONN     0      0                    *%eth0:icmp                                    *:*
> >>
> >> It is going to be confusing if only ipproto-255 sockets can be killed.
> > 
> > OK, gimme some time to implement it. Hopefully on the weekend or monday.
> > Thanks a huge for feedback!
> > 
> 
> It may well be a ss bug / problem. As I mentioned I am always seeing 255 for the protocol which

It is rather not addressed in ss. I mean, look, when we send out a diag packet
the kernel look ups for a handler, which for raw protocol we register as

static const struct inet_diag_handler raw_diag_handler = {
	.dump= raw_diag_dump,
	.dump_one= raw_diag_dump_one,
	.idiag_get_info= raw_diag_get_info,
	.idiag_type= IPPROTO_RAW,
	.idiag_info_size= 0,
#ifdef CONFIG_INET_DIAG_DESTROY
	.destroy= raw_diag_destroy,
#endif
};

so if we patch ss and ask for IPPROTO_ICMP in netlink packet the
kernel simply won't find anything. Thus I think we need (well, I need)
to extend the patch and register IPPROTO_ICMP diag type, then
extend ss as well. (If only I didn't miss somethin obvious).

> is odd since ss does a dump and takes the matches and invokes the kill. Thanks for taking
> the time to do the kill piece.

Sure!

^ permalink raw reply

* Re: [PATCH net-next 07/14] tcp: export data delivery rate
From: Neal Cardwell @ 2016-09-16 20:03 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, David Miller, Netdev, Yuchung Cheng, Van Jacobson,
	Nandita Dukkipati, Eric Dumazet, Soheil Hassas Yeganeh
In-Reply-To: <201609171104.7NaqQo41%fengguang.wu@intel.com>

On Fri, Sep 16, 2016 at 11:56 PM, kbuild test robot <lkp@intel.com> wrote:
>
>>> net/ipv4/tcp.c:2794:3: note: in expansion of macro 'do_div'
>       do_div(rate, intv);
>       ^~~~~~
>    In file included from arch/arm/include/asm/div64.h:126:0,
>                     from include/linux/kernel.h:142,
>                     from include/linux/crypto.h:21,
>                     from include/crypto/hash.h:16,
>                     from net/ipv4/tcp.c:250:
>>> include/asm-generic/div64.h:224:22: error: passing argument 1 of '__div64_32' from incompatible pointer type [-Werror=incompatible-pointer-types]
>       __rem = __div64_32(&(n), __base); \
...
> > 2794                  do_div(rate, intv);

Looks like 'rate' should be 'rate64'. I will include this fix in the
next version of the patch series.

neal

^ permalink raw reply

* Re: [PATCH net-next 07/14] tcp: export data delivery rate
From: Eric Dumazet @ 2016-09-16 20:04 UTC (permalink / raw)
  To: kbuild test robot
  Cc: Neal Cardwell, kbuild-all, David Miller, netdev, Yuchung Cheng,
	Van Jacobson, Nandita Dukkipati, Eric Dumazet,
	Soheil Hassas Yeganeh
In-Reply-To: <201609171104.7NaqQo41%fengguang.wu@intel.com>

On Sat, 2016-09-17 at 11:56 +0800, kbuild test robot wrote:
> Hi Yuchung,
> 
> [auto build test ERROR on net-next/master]
> 
> url:    https://github.com/0day-ci/linux/commits/Neal-Cardwell/tcp-BBR-congestion-control-algorithm/20160917-025323
> config: arm-nhk8815_defconfig (attached as .config)
> compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
> reproduce:
>         wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         make.cross ARCH=arm 

Right, we need to include <asm/div64.h>  for some arches.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox