[PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer
@ 2022-09-28 21:43 Nick Child
  2022-09-28 21:43 ` [PATCH v2 net-next 2/3] ibmveth: Implement multi queue on xmit Nick Child
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Nick Child @ 2022-09-28 21:43 UTC (permalink / raw)
  To: netdev; +Cc: Nick Child

Rather than DMA mapping and unmapping every outgoing skb, copy the skb
into a buffer that was mapped during the drivers open function. Copying
the skb and its frags have proven to be more time efficient than
mapping and unmapping. As an effect, performance increases by 3-5
Gbits/s.

Allocate and DMA map one continuous 64KB buffer at `ndo_open`. This
buffer is maintained until `ibmveth_close` is called. This buffer is
large enough to hold the largest possible linnear skb. During
`ndo_start_xmit`, copy the skb and all of it's frags into the continuous
buffer. By manually linnearizing all the socket buffers, time is saved
during memcpy as well as more efficient handling in FW.
As a result, we no longer need to worry about the firmware limitation
of handling a max of 6 frags. So, we only need to maintain 1 descriptor
instead of 6 and can hardcode 0 for the other 5 descriptors during
h_send_logical_lan.

Since, DMA allocation/mapping issues can no longer arise in xmit
functions, we can further reduce code size by removing the need for a
bounce buffer on DMA errors.

Signed-off-by: Nick Child <nnac123@linux.ibm.com>
---

Changes in v2:
 - Respond to feedback from Jakub Kicinski:
   - use min function in ibmveth_real_max_tx_queues
   - assume unused channels are initialized in `get_channel`
   - allow the core ethtool functions to perform argument validity checks
     in set_channel
   - rename 'fallback' variable to 'old' in set_channel
   - skip allocation steps if the device is not UP in set_channel
   - simplify error handling by making logical assumptions in set_channel
   - set IBMVETH_MAX_QUEUES to 16 instead of 8

 drivers/net/ethernet/ibm/ibmveth.c | 185 ++++++++++-------------------
 drivers/net/ethernet/ibm/ibmveth.h |  22 ++--
 2 files changed, 74 insertions(+), 133 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index ee4548e08446..675eaeed7a7b 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -538,6 +538,22 @@ static int ibmveth_open(struct net_device *netdev)
 		goto out_unmap_buffer_list;
 	}
 
+	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
+	adapter->tx_ltb_ptr = kzalloc(adapter->tx_ltb_size, GFP_KERNEL);
+	if (!adapter->tx_ltb_ptr) {
+		netdev_err(netdev,
+			   "unable to allocate transmit long term buffer\n");
+		goto out_unmap_buffer_list;
+	}
+	adapter->tx_ltb_dma = dma_map_single(dev, adapter->tx_ltb_ptr,
+					     adapter->tx_ltb_size,
+					     DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, adapter->tx_ltb_dma)) {
+		netdev_err(netdev,
+			   "unable to DMA map transmit long term buffer\n");
+		goto out_unmap_tx_dma;
+	}
+
 	adapter->rx_queue.index = 0;
 	adapter->rx_queue.num_slots = rxq_entries;
 	adapter->rx_queue.toggle = 1;
@@ -595,14 +611,6 @@ static int ibmveth_open(struct net_device *netdev)
 
 	rc = -ENOMEM;
 
-	adapter->bounce_buffer = dma_alloc_coherent(&adapter->vdev->dev,
-						    netdev->mtu + IBMVETH_BUFF_OH,
-						    &adapter->bounce_buffer_dma, GFP_KERNEL);
-	if (!adapter->bounce_buffer) {
-		netdev_err(netdev, "unable to alloc bounce buffer\n");
-		goto out_free_irq;
-	}
-
 	netdev_dbg(netdev, "initial replenish cycle\n");
 	ibmveth_interrupt(netdev->irq, netdev);
 
@@ -612,8 +620,6 @@ static int ibmveth_open(struct net_device *netdev)
 
 	return 0;
 
-out_free_irq:
-	free_irq(netdev->irq, netdev);
 out_free_buffer_pools:
 	while (--i >= 0) {
 		if (adapter->rx_buff_pool[i].active)
@@ -623,6 +629,10 @@ static int ibmveth_open(struct net_device *netdev)
 out_unmap_filter_list:
 	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
 			 DMA_BIDIRECTIONAL);
+
+out_unmap_tx_dma:
+	kfree(adapter->tx_ltb_ptr);
+
 out_unmap_buffer_list:
 	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
 			 DMA_BIDIRECTIONAL);
@@ -685,9 +695,9 @@ static int ibmveth_close(struct net_device *netdev)
 			ibmveth_free_buffer_pool(adapter,
 						 &adapter->rx_buff_pool[i]);
 
-	dma_free_coherent(&adapter->vdev->dev,
-			  adapter->netdev->mtu + IBMVETH_BUFF_OH,
-			  adapter->bounce_buffer, adapter->bounce_buffer_dma);
+	dma_unmap_single(dev, adapter->tx_ltb_dma, adapter->tx_ltb_size,
+			 DMA_TO_DEVICE);
+	kfree(adapter->tx_ltb_ptr);
 
 	netdev_dbg(netdev, "close complete\n");
 
@@ -969,7 +979,7 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 static int ibmveth_send(struct ibmveth_adapter *adapter,
-			union ibmveth_buf_desc *descs, unsigned long mss)
+			unsigned long desc, unsigned long mss)
 {
 	unsigned long correlator;
 	unsigned int retry_count;
@@ -982,12 +992,9 @@ static int ibmveth_send(struct ibmveth_adapter *adapter,
 	retry_count = 1024;
 	correlator = 0;
 	do {
-		ret = h_send_logical_lan(adapter->vdev->unit_address,
-					     descs[0].desc, descs[1].desc,
-					     descs[2].desc, descs[3].desc,
-					     descs[4].desc, descs[5].desc,
-					     correlator, &correlator, mss,
-					     adapter->fw_large_send_support);
+		ret = h_send_logical_lan(adapter->vdev->unit_address, desc,
+					 correlator, &correlator, mss,
+					 adapter->fw_large_send_support);
 	} while ((ret == H_BUSY) && (retry_count--));
 
 	if (ret != H_SUCCESS && ret != H_DROPPED) {
@@ -1021,33 +1028,14 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 {
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
 	unsigned int desc_flags;
-	union ibmveth_buf_desc descs[6];
-	int last, i;
-	int force_bounce = 0;
-	dma_addr_t dma_addr;
+	union ibmveth_buf_desc desc;
+	int i;
 	unsigned long mss = 0;
+	size_t total_bytes;
 
 	if (ibmveth_is_packet_unsupported(skb, netdev))
 		goto out;
 
-	/* veth doesn't handle frag_list, so linearize the skb.
-	 * When GRO is enabled SKB's can have frag_list.
-	 */
-	if (adapter->is_active_trunk &&
-	    skb_has_frag_list(skb) && __skb_linearize(skb)) {
-		netdev->stats.tx_dropped++;
-		goto out;
-	}
-
-	/*
-	 * veth handles a maximum of 6 segments including the header, so
-	 * we have to linearize the skb if there are more than this.
-	 */
-	if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) {
-		netdev->stats.tx_dropped++;
-		goto out;
-	}
-
 	/* veth can't checksum offload UDP */
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    ((skb->protocol == htons(ETH_P_IP) &&
@@ -1077,56 +1065,6 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 			desc_flags |= IBMVETH_BUF_LRG_SND;
 	}
 
-retry_bounce:
-	memset(descs, 0, sizeof(descs));
-
-	/*
-	 * If a linear packet is below the rx threshold then
-	 * copy it into the static bounce buffer. This avoids the
-	 * cost of a TCE insert and remove.
-	 */
-	if (force_bounce || (!skb_is_nonlinear(skb) &&
-				(skb->len < tx_copybreak))) {
-		skb_copy_from_linear_data(skb, adapter->bounce_buffer,
-					  skb->len);
-
-		descs[0].fields.flags_len = desc_flags | skb->len;
-		descs[0].fields.address = adapter->bounce_buffer_dma;
-
-		if (ibmveth_send(adapter, descs, 0)) {
-			adapter->tx_send_failed++;
-			netdev->stats.tx_dropped++;
-		} else {
-			netdev->stats.tx_packets++;
-			netdev->stats.tx_bytes += skb->len;
-		}
-
-		goto out;
-	}
-
-	/* Map the header */
-	dma_addr = dma_map_single(&adapter->vdev->dev, skb->data,
-				  skb_headlen(skb), DMA_TO_DEVICE);
-	if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
-		goto map_failed;
-
-	descs[0].fields.flags_len = desc_flags | skb_headlen(skb);
-	descs[0].fields.address = dma_addr;
-
-	/* Map the frags */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0,
-					    skb_frag_size(frag), DMA_TO_DEVICE);
-
-		if (dma_mapping_error(&adapter->vdev->dev, dma_addr))
-			goto map_failed_frags;
-
-		descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag);
-		descs[i+1].fields.address = dma_addr;
-	}
-
 	if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) {
 		if (adapter->fw_large_send_support) {
 			mss = (unsigned long)skb_shinfo(skb)->gso_size;
@@ -1143,7 +1081,36 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		}
 	}
 
-	if (ibmveth_send(adapter, descs, mss)) {
+	/* Copy header into mapped buffer */
+	if (unlikely(skb->len > adapter->tx_ltb_size)) {
+		netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n",
+			   skb->len, adapter->tx_ltb_size);
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
+	memcpy(adapter->tx_ltb_ptr, skb->data, skb_headlen(skb));
+	total_bytes = skb_headlen(skb);
+	/* Copy frags into mapped buffers */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		memcpy(adapter->tx_ltb_ptr + total_bytes, skb_frag_address_safe(frag),
+		       skb_frag_size(frag));
+		total_bytes += skb_frag_size(frag);
+	}
+
+	if (unlikely(total_bytes != skb->len)) {
+		netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n",
+			   skb->len, total_bytes);
+		netdev->stats.tx_dropped++;
+		goto out;
+	}
+	desc.fields.flags_len = desc_flags | skb->len;
+	desc.fields.address = adapter->tx_ltb_dma;
+	/* finish writing to long_term_buff before VIOS accessing it */
+	dma_wmb();
+
+	if (ibmveth_send(adapter, desc.desc, mss)) {
 		adapter->tx_send_failed++;
 		netdev->stats.tx_dropped++;
 	} else {
@@ -1151,41 +1118,11 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		netdev->stats.tx_bytes += skb->len;
 	}
 
-	dma_unmap_single(&adapter->vdev->dev,
-			 descs[0].fields.address,
-			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
-			 DMA_TO_DEVICE);
-
-	for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++)
-		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
-			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
-			       DMA_TO_DEVICE);
-
 out:
 	dev_consume_skb_any(skb);
 	return NETDEV_TX_OK;
 
-map_failed_frags:
-	last = i+1;
-	for (i = 1; i < last; i++)
-		dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address,
-			       descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK,
-			       DMA_TO_DEVICE);
 
-	dma_unmap_single(&adapter->vdev->dev,
-			 descs[0].fields.address,
-			 descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK,
-			 DMA_TO_DEVICE);
-map_failed:
-	if (!firmware_has_feature(FW_FEATURE_CMO))
-		netdev_err(netdev, "tx: unable to map xmit buffer\n");
-	adapter->tx_map_failed++;
-	if (skb_linearize(skb)) {
-		netdev->stats.tx_dropped++;
-		goto out;
-	}
-	force_bounce = 1;
-	goto retry_bounce;
 }
 
 static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
@@ -1568,6 +1505,8 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev)
 
 	ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE;
 	ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl);
+	/* add size of mapped tx buffers */
+	ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl);
 
 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) {
 		/* add the size of the active receive buffers */
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 27dfff200166..a46ead9b31de 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -46,23 +46,23 @@
 #define h_add_logical_lan_buffer(ua, buf) \
   plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf)
 
+/* FW allows us to send 6 descriptors but we only use one so mark
+ * the other 5 as unused (0)
+ */
 static inline long h_send_logical_lan(unsigned long unit_address,
-		unsigned long desc1, unsigned long desc2, unsigned long desc3,
-		unsigned long desc4, unsigned long desc5, unsigned long desc6,
-		unsigned long corellator_in, unsigned long *corellator_out,
-		unsigned long mss, unsigned long large_send_support)
+		unsigned long desc, unsigned long corellator_in,
+		unsigned long *corellator_out, unsigned long mss,
+		unsigned long large_send_support)
 {
 	long rc;
 	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
 
 	if (large_send_support)
 		rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address,
-				  desc1, desc2, desc3, desc4, desc5, desc6,
-				  corellator_in, mss);
+				  desc, 0, 0, 0, 0, 0, corellator_in, mss);
 	else
 		rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address,
-				  desc1, desc2, desc3, desc4, desc5, desc6,
-				  corellator_in);
+				  desc, 0, 0, 0, 0, 0, corellator_in);
 
 	*corellator_out = retbuf[0];
 
@@ -98,6 +98,7 @@ static inline long h_illan_attributes(unsigned long unit_address,
 #define IBMVETH_BUFF_LIST_SIZE 4096
 #define IBMVETH_FILT_LIST_SIZE 4096
 #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
+#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
 
 static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
 static int pool_count[] = { 256, 512, 256, 256, 256 };
@@ -137,6 +138,9 @@ struct ibmveth_adapter {
     unsigned int mcastFilterSize;
     void * buffer_list_addr;
     void * filter_list_addr;
+    void *tx_ltb_ptr;
+    unsigned int tx_ltb_size;
+    dma_addr_t tx_ltb_dma;
     dma_addr_t buffer_list_dma;
     dma_addr_t filter_list_dma;
     struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS];
@@ -145,8 +149,6 @@ struct ibmveth_adapter {
     int rx_csum;
     int large_send;
     bool is_active_trunk;
-    void *bounce_buffer;
-    dma_addr_t bounce_buffer_dma;
 
     u64 fw_ipv6_csum_support;
     u64 fw_ipv4_csum_support;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 net-next 2/3] ibmveth: Implement multi queue on xmit
  2022-09-28 21:43 [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer Nick Child
@ 2022-09-28 21:43 ` Nick Child
  2022-09-28 21:43 ` [PATCH v2 net-next 3/3] ibmveth: Ethtool set queue support Nick Child
  2022-09-30 11:50 ` [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: Nick Child @ 2022-09-28 21:43 UTC (permalink / raw)
  To: netdev; +Cc: Nick Child

The `ndo_start_xmit` function is protected by a spinlock on the tx queue
being used to transmit the skb. Allow concurrent calls to
`ndo_start_xmit` by using more than one tx queue. This allows for
greater throughput when several jobs are trying to transmit data.

Introduce 16 tx queues (leave single rx queue as is) which each
correspond to one DMA mapped long term buffer.

Signed-off-by: Nick Child <nnac123@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmveth.c | 69 +++++++++++++++++-------------
 drivers/net/ethernet/ibm/ibmveth.h |  5 ++-
 2 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 675eaeed7a7b..7abd67c2336e 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -538,20 +538,22 @@ static int ibmveth_open(struct net_device *netdev)
 		goto out_unmap_buffer_list;
 	}
 
-	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
-	adapter->tx_ltb_ptr = kzalloc(adapter->tx_ltb_size, GFP_KERNEL);
-	if (!adapter->tx_ltb_ptr) {
-		netdev_err(netdev,
-			   "unable to allocate transmit long term buffer\n");
-		goto out_unmap_buffer_list;
-	}
-	adapter->tx_ltb_dma = dma_map_single(dev, adapter->tx_ltb_ptr,
-					     adapter->tx_ltb_size,
-					     DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, adapter->tx_ltb_dma)) {
-		netdev_err(netdev,
-			   "unable to DMA map transmit long term buffer\n");
-		goto out_unmap_tx_dma;
+	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
+		adapter->tx_ltb_ptr[i] = kzalloc(adapter->tx_ltb_size,
+						 GFP_KERNEL);
+		if (!adapter->tx_ltb_ptr[i]) {
+			netdev_err(netdev,
+				   "unable to allocate transmit long term buffer\n");
+			goto out_free_tx_ltb_ptrs;
+		}
+		adapter->tx_ltb_dma[i] = dma_map_single(dev,
+							adapter->tx_ltb_ptr[i],
+							adapter->tx_ltb_size,
+							DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, adapter->tx_ltb_dma[i])) {
+			netdev_err(netdev, "unable to DMA map transmit long term buffer\n");
+			goto out_unmap_tx_dma;
+		}
 	}
 
 	adapter->rx_queue.index = 0;
@@ -614,7 +616,7 @@ static int ibmveth_open(struct net_device *netdev)
 	netdev_dbg(netdev, "initial replenish cycle\n");
 	ibmveth_interrupt(netdev->irq, netdev);
 
-	netif_start_queue(netdev);
+	netif_tx_start_all_queues(netdev);
 
 	netdev_dbg(netdev, "open complete\n");
 
@@ -631,7 +633,14 @@ static int ibmveth_open(struct net_device *netdev)
 			 DMA_BIDIRECTIONAL);
 
 out_unmap_tx_dma:
-	kfree(adapter->tx_ltb_ptr);
+	kfree(adapter->tx_ltb_ptr[i]);
+
+out_free_tx_ltb_ptrs:
+	while (--i >= 0) {
+		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
+				 adapter->tx_ltb_size, DMA_TO_DEVICE);
+		kfree(adapter->tx_ltb_ptr[i]);
+	}
 
 out_unmap_buffer_list:
 	dma_unmap_single(dev, adapter->buffer_list_dma, 4096,
@@ -661,7 +670,7 @@ static int ibmveth_close(struct net_device *netdev)
 	napi_disable(&adapter->napi);
 
 	if (!adapter->pool_config)
-		netif_stop_queue(netdev);
+		netif_tx_stop_all_queues(netdev);
 
 	h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
 
@@ -695,9 +704,11 @@ static int ibmveth_close(struct net_device *netdev)
 			ibmveth_free_buffer_pool(adapter,
 						 &adapter->rx_buff_pool[i]);
 
-	dma_unmap_single(dev, adapter->tx_ltb_dma, adapter->tx_ltb_size,
-			 DMA_TO_DEVICE);
-	kfree(adapter->tx_ltb_ptr);
+	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
+		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
+				 adapter->tx_ltb_size, DMA_TO_DEVICE);
+		kfree(adapter->tx_ltb_ptr[i]);
+	}
 
 	netdev_dbg(netdev, "close complete\n");
 
@@ -1027,15 +1038,13 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 				      struct net_device *netdev)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
-	unsigned int desc_flags;
+	unsigned int desc_flags, total_bytes;
 	union ibmveth_buf_desc desc;
-	int i;
+	int i, queue_num = skb_get_queue_mapping(skb);
 	unsigned long mss = 0;
-	size_t total_bytes;
 
 	if (ibmveth_is_packet_unsupported(skb, netdev))
 		goto out;
-
 	/* veth can't checksum offload UDP */
 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 	    ((skb->protocol == htons(ETH_P_IP) &&
@@ -1088,14 +1097,14 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		netdev->stats.tx_dropped++;
 		goto out;
 	}
-	memcpy(adapter->tx_ltb_ptr, skb->data, skb_headlen(skb));
+	memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb));
 	total_bytes = skb_headlen(skb);
 	/* Copy frags into mapped buffers */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-		memcpy(adapter->tx_ltb_ptr + total_bytes, skb_frag_address_safe(frag),
-		       skb_frag_size(frag));
+		memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes,
+		       skb_frag_address_safe(frag), skb_frag_size(frag));
 		total_bytes += skb_frag_size(frag);
 	}
 
@@ -1106,7 +1115,7 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
 		goto out;
 	}
 	desc.fields.flags_len = desc_flags | skb->len;
-	desc.fields.address = adapter->tx_ltb_dma;
+	desc.fields.address = adapter->tx_ltb_dma[queue_num];
 	/* finish writing to long_term_buff before VIOS accessing it */
 	dma_wmb();
 
@@ -1599,7 +1608,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		return -EINVAL;
 	}
 
-	netdev = alloc_etherdev(sizeof(struct ibmveth_adapter));
+	netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
 
 	if (!netdev)
 		return -ENOMEM;
@@ -1666,6 +1675,8 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 			kobject_uevent(kobj, KOBJ_ADD);
 	}
 
+	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
+
 	netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
 	netdev_dbg(netdev, "registering netdev...\n");
 
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index a46ead9b31de..daf6f615c03f 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -99,6 +99,7 @@ static inline long h_illan_attributes(unsigned long unit_address,
 #define IBMVETH_FILT_LIST_SIZE 4096
 #define IBMVETH_MAX_BUF_SIZE (1024 * 128)
 #define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64)
+#define IBMVETH_MAX_QUEUES 16U
 
 static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 };
 static int pool_count[] = { 256, 512, 256, 256, 256 };
@@ -138,9 +139,9 @@ struct ibmveth_adapter {
     unsigned int mcastFilterSize;
     void * buffer_list_addr;
     void * filter_list_addr;
-    void *tx_ltb_ptr;
+    void *tx_ltb_ptr[IBMVETH_MAX_QUEUES];
     unsigned int tx_ltb_size;
-    dma_addr_t tx_ltb_dma;
+    dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES];
     dma_addr_t buffer_list_dma;
     dma_addr_t filter_list_dma;
     struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS];
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2 net-next 3/3] ibmveth: Ethtool set queue support
  2022-09-28 21:43 [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer Nick Child
  2022-09-28 21:43 ` [PATCH v2 net-next 2/3] ibmveth: Implement multi queue on xmit Nick Child
@ 2022-09-28 21:43 ` Nick Child
  2022-09-30 11:50 ` [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: Nick Child @ 2022-09-28 21:43 UTC (permalink / raw)
  To: netdev; +Cc: Nick Child

Implement channel management functions to allow dynamic addition and
removal of transmit queues. The `ethtool --show-channels` and
`ethtool --set-channels` commands can be used to get and set the
number of queues, respectively. Allow the ability to add as many
transmit queues as available processors but never allow more than the
hard maximum of 16. The number of receive queues is one and cannot be
modified.

Depending on whether the requested number of queues is larger or
smaller than the current value, either allocate or free long term
buffers. Since long term buffer construction and destruction can
occur in two different areas, from either channel set requests or
device open/close, define functions for performing this work. If
allocation of a new buffer fails, then attempt to revert back to the
previous number of queues.

Signed-off-by: Nick Child <nnac123@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmveth.c | 149 +++++++++++++++++++++++------
 1 file changed, 120 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 7abd67c2336e..3b14dc93f59d 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -141,6 +141,13 @@ static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter)
 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD;
 }
 
+static unsigned int ibmveth_real_max_tx_queues(void)
+{
+	unsigned int n_cpu = num_online_cpus();
+
+	return min(n_cpu, IBMVETH_MAX_QUEUES);
+}
+
 /* setup the initial settings for a buffer pool */
 static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool,
 				     u32 pool_index, u32 pool_size,
@@ -456,6 +463,38 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter)
 	}
 }
 
+static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx)
+{
+	dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx],
+			 adapter->tx_ltb_size, DMA_TO_DEVICE);
+	kfree(adapter->tx_ltb_ptr[idx]);
+	adapter->tx_ltb_ptr[idx] = NULL;
+}
+
+static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx)
+{
+	adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size,
+					   GFP_KERNEL);
+	if (!adapter->tx_ltb_ptr[idx]) {
+		netdev_err(adapter->netdev,
+			   "unable to allocate tx long term buffer\n");
+		return -ENOMEM;
+	}
+	adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev,
+						  adapter->tx_ltb_ptr[idx],
+						  adapter->tx_ltb_size,
+						  DMA_TO_DEVICE);
+	if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) {
+		netdev_err(adapter->netdev,
+			   "unable to DMA map tx long term buffer\n");
+		kfree(adapter->tx_ltb_ptr[idx]);
+		adapter->tx_ltb_ptr[idx] = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter,
         union ibmveth_buf_desc rxq_desc, u64 mac_address)
 {
@@ -538,22 +577,9 @@ static int ibmveth_open(struct net_device *netdev)
 		goto out_unmap_buffer_list;
 	}
 
-	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
-		adapter->tx_ltb_ptr[i] = kzalloc(adapter->tx_ltb_size,
-						 GFP_KERNEL);
-		if (!adapter->tx_ltb_ptr[i]) {
-			netdev_err(netdev,
-				   "unable to allocate transmit long term buffer\n");
-			goto out_free_tx_ltb_ptrs;
-		}
-		adapter->tx_ltb_dma[i] = dma_map_single(dev,
-							adapter->tx_ltb_ptr[i],
-							adapter->tx_ltb_size,
-							DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, adapter->tx_ltb_dma[i])) {
-			netdev_err(netdev, "unable to DMA map transmit long term buffer\n");
-			goto out_unmap_tx_dma;
-		}
+	for (i = 0; i < netdev->real_num_tx_queues; i++) {
+		if (ibmveth_allocate_tx_ltb(adapter, i))
+			goto out_free_tx_ltb;
 	}
 
 	adapter->rx_queue.index = 0;
@@ -632,14 +658,9 @@ static int ibmveth_open(struct net_device *netdev)
 	dma_unmap_single(dev, adapter->filter_list_dma, 4096,
 			 DMA_BIDIRECTIONAL);
 
-out_unmap_tx_dma:
-	kfree(adapter->tx_ltb_ptr[i]);
-
-out_free_tx_ltb_ptrs:
+out_free_tx_ltb:
 	while (--i >= 0) {
-		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
-				 adapter->tx_ltb_size, DMA_TO_DEVICE);
-		kfree(adapter->tx_ltb_ptr[i]);
+		ibmveth_free_tx_ltb(adapter, i);
 	}
 
 out_unmap_buffer_list:
@@ -704,11 +725,8 @@ static int ibmveth_close(struct net_device *netdev)
 			ibmveth_free_buffer_pool(adapter,
 						 &adapter->rx_buff_pool[i]);
 
-	for (i = 0; i < IBMVETH_MAX_QUEUES; i++) {
-		dma_unmap_single(dev, adapter->tx_ltb_dma[i],
-				 adapter->tx_ltb_size, DMA_TO_DEVICE);
-		kfree(adapter->tx_ltb_ptr[i]);
-	}
+	for (i = 0; i < netdev->real_num_tx_queues; i++)
+		ibmveth_free_tx_ltb(adapter, i);
 
 	netdev_dbg(netdev, "close complete\n");
 
@@ -974,6 +992,69 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev,
 		data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset);
 }
 
+static void ibmveth_get_channels(struct net_device *netdev,
+				 struct ethtool_channels *channels)
+{
+	channels->max_tx = ibmveth_real_max_tx_queues();
+	channels->tx_count = netdev->real_num_tx_queues;
+
+	channels->max_rx = netdev->real_num_rx_queues;
+	channels->rx_count = netdev->real_num_rx_queues;
+}
+
+static int ibmveth_set_channels(struct net_device *netdev,
+				struct ethtool_channels *channels)
+{
+	struct ibmveth_adapter *adapter = netdev_priv(netdev);
+	unsigned int old = netdev->real_num_tx_queues,
+		     goal = channels->tx_count;
+	int rc, i;
+
+	/* If ndo_open has not been called yet then don't allocate, just set
+	 * desired netdev_queue's and return
+	 */
+	if (!(netdev->flags & IFF_UP))
+		return netif_set_real_num_tx_queues(netdev, goal);
+
+	/* We have IBMVETH_MAX_QUEUES netdev_queue's allocated
+	 * but we may need to alloc/free the ltb's.
+	 */
+	netif_tx_stop_all_queues(netdev);
+
+	/* Allocate any queue that we need */
+	for (i = old; i < goal; i++) {
+		if (adapter->tx_ltb_ptr[i])
+			continue;
+
+		rc = ibmveth_allocate_tx_ltb(adapter, i);
+		if (!rc)
+			continue;
+
+		/* if something goes wrong, free everything we just allocated */
+		netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n",
+			   old);
+		goal = old;
+		old = i;
+		break;
+	}
+	rc = netif_set_real_num_tx_queues(netdev, goal);
+	if (rc) {
+		netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n",
+			   old);
+		goal = old;
+		old = i;
+	}
+	/* Free any that are no longer needed */
+	for (i = old; i > goal; i--) {
+		if (adapter->tx_ltb_ptr[i - 1])
+			ibmveth_free_tx_ltb(adapter, i - 1);
+	}
+
+	netif_tx_wake_all_queues(netdev);
+
+	return rc;
+}
+
 static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_drvinfo		         = netdev_get_drvinfo,
 	.get_link		         = ethtool_op_get_link,
@@ -982,6 +1063,8 @@ static const struct ethtool_ops netdev_ethtool_ops = {
 	.get_ethtool_stats	         = ibmveth_get_ethtool_stats,
 	.get_link_ksettings	         = ibmveth_get_link_ksettings,
 	.set_link_ksettings              = ibmveth_set_link_ksettings,
+	.get_channels			 = ibmveth_get_channels,
+	.set_channels			 = ibmveth_set_channels
 };
 
 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1609,7 +1692,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	}
 
 	netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1);
-
 	if (!netdev)
 		return -ENOMEM;
 
@@ -1675,7 +1757,16 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 			kobject_uevent(kobj, KOBJ_ADD);
 	}
 
+	rc = netif_set_real_num_tx_queues(netdev, ibmveth_real_max_tx_queues());
+	if (rc) {
+		netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n",
+			   rc);
+		free_netdev(netdev);
+		return rc;
+	}
 	adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE);
+	for (i = 0; i < IBMVETH_MAX_QUEUES; i++)
+		adapter->tx_ltb_ptr[i] = NULL;
 
 	netdev_dbg(netdev, "adapter @ 0x%p\n", adapter);
 	netdev_dbg(netdev, "registering netdev...\n");
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer
  2022-09-28 21:43 [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer Nick Child
  2022-09-28 21:43 ` [PATCH v2 net-next 2/3] ibmveth: Implement multi queue on xmit Nick Child
  2022-09-28 21:43 ` [PATCH v2 net-next 3/3] ibmveth: Ethtool set queue support Nick Child
@ 2022-09-30 11:50 ` patchwork-bot+netdevbpf
  2 siblings, 0 replies; 4+ messages in thread
From: patchwork-bot+netdevbpf @ 2022-09-30 11:50 UTC (permalink / raw)
  To: Nick Child; +Cc: netdev

Hello:

This series was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:

On Wed, 28 Sep 2022 16:43:48 -0500 you wrote:
> Rather than DMA mapping and unmapping every outgoing skb, copy the skb
> into a buffer that was mapped during the drivers open function. Copying
> the skb and its frags have proven to be more time efficient than
> mapping and unmapping. As an effect, performance increases by 3-5
> Gbits/s.
> 
> Allocate and DMA map one continuous 64KB buffer at `ndo_open`. This
> buffer is maintained until `ibmveth_close` is called. This buffer is
> large enough to hold the largest possible linnear skb. During
> `ndo_start_xmit`, copy the skb and all of it's frags into the continuous
> buffer. By manually linnearizing all the socket buffers, time is saved
> during memcpy as well as more efficient handling in FW.
> As a result, we no longer need to worry about the firmware limitation
> of handling a max of 6 frags. So, we only need to maintain 1 descriptor
> instead of 6 and can hardcode 0 for the other 5 descriptors during
> h_send_logical_lan.
> 
> [...]

Here is the summary with links:
  - [v2,net-next,1/3] ibmveth: Copy tx skbs into a premapped buffer
    https://git.kernel.org/netdev/net-next/c/d6832ca48d8a
  - [v2,net-next,2/3] ibmveth: Implement multi queue on xmit
    https://git.kernel.org/netdev/net-next/c/d926793c1de9
  - [v2,net-next,3/3] ibmveth: Ethtool set queue support
    https://git.kernel.org/netdev/net-next/c/10c2aba89cc0

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-09-30 11:52 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-28 21:43 [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer Nick Child
2022-09-28 21:43 ` [PATCH v2 net-next 2/3] ibmveth: Implement multi queue on xmit Nick Child
2022-09-28 21:43 ` [PATCH v2 net-next 3/3] ibmveth: Ethtool set queue support Nick Child
2022-09-30 11:50 ` [PATCH v2 net-next 1/3] ibmveth: Copy tx skbs into a premapped buffer patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).