Netdev List
 help / color / mirror / Atom feed
* [PATCH] rtw88: pci: Use general byte arrays as the elements of RX ring
From: Jian-Hong Pan @ 2019-07-25  8:09 UTC (permalink / raw)
  To: Yan-Hsuan Chuang, Kalle Valo, David S . Miller, David Laight
  Cc: linux-wireless, netdev, linux-kernel, linux, Jian-Hong Pan,
	stable

Each skb as the element in RX ring was expected with sized buffer 8216
(RTK_PCI_RX_BUF_SIZE) bytes. However, the skb buffer's true size is
16640 bytes for alignment after allocated, x86_64 for example. And, the
difference will be enlarged 512 times (RTK_MAX_RX_DESC_NUM).
To prevent that much wasted memory, this patch follows David's
suggestion [1] and uses general buffer arrays, instead of skbs as the
elements in RX ring.

[1] https://www.spinics.net/lists/linux-wireless/msg187870.html

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Cc: <stable@vger.kernel.org>
---
 drivers/net/wireless/realtek/rtw88/pci.c | 132 +++++++++++++----------
 drivers/net/wireless/realtek/rtw88/pci.h |   2 +-
 2 files changed, 75 insertions(+), 59 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 23dd06afef3d..e953010f0179 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -111,25 +111,49 @@ static void rtw_pci_free_tx_ring(struct rtw_dev *rtwdev,
 	tx_ring->r.head = NULL;
 }
 
+static struct rtw_pci_rx_buffer_desc *rtw_pci_get_rx_desc(
+					struct rtw_pci_rx_ring *rx_ring,
+					u32 idx)
+{
+	struct rtw_pci_rx_buffer_desc *buf_desc;
+	u32 desc_sz = rx_ring->r.desc_size;
+
+	buf_desc = (struct rtw_pci_rx_buffer_desc *)(rx_ring->r.head +
+						     idx * desc_sz);
+	return buf_desc;
+}
+
+static dma_addr_t rtw_pci_get_rx_bufdma(struct rtw_pci_rx_ring *rx_ring,
+					u32 idx)
+{
+	struct rtw_pci_rx_buffer_desc *buf_desc;
+	dma_addr_t dma;
+
+	buf_desc = rtw_pci_get_rx_desc(rx_ring, idx);
+	dma = le32_to_cpu(buf_desc->dma);
+
+	return dma;
+}
+
 static void rtw_pci_free_rx_ring(struct rtw_dev *rtwdev,
 				 struct rtw_pci_rx_ring *rx_ring)
 {
 	struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
-	struct sk_buff *skb;
+	u8 *buf;
 	dma_addr_t dma;
 	u8 *head = rx_ring->r.head;
 	int buf_sz = RTK_PCI_RX_BUF_SIZE;
 	int ring_sz = rx_ring->r.desc_size * rx_ring->r.len;
-	int i;
+	u32 i;
 
 	for (i = 0; i < rx_ring->r.len; i++) {
-		skb = rx_ring->buf[i];
-		if (!skb)
+		buf = rx_ring->buf[i];
+		if (!buf)
 			continue;
 
-		dma = *((dma_addr_t *)skb->cb);
-		pci_unmap_single(pdev, dma, buf_sz, PCI_DMA_FROMDEVICE);
-		dev_kfree_skb(skb);
+		dma = rtw_pci_get_rx_bufdma(rx_ring, i);
+		pci_unmap_single(pdev, dma, buf_sz, DMA_FROM_DEVICE);
+		devm_kfree(rtwdev->dev, buf);
 		rx_ring->buf[i] = NULL;
 	}
 
@@ -180,27 +204,24 @@ static int rtw_pci_init_tx_ring(struct rtw_dev *rtwdev,
 	return 0;
 }
 
-static int rtw_pci_reset_rx_desc(struct rtw_dev *rtwdev, struct sk_buff *skb,
-				 struct rtw_pci_rx_ring *rx_ring,
-				 u32 idx, u32 desc_sz)
+static int rtw_pci_reset_rx_desc(struct rtw_dev *rtwdev, u8 *buf,
+				 struct rtw_pci_rx_ring *rx_ring, u32 idx)
 {
 	struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
 	struct rtw_pci_rx_buffer_desc *buf_desc;
 	int buf_sz = RTK_PCI_RX_BUF_SIZE;
 	dma_addr_t dma;
 
-	if (!skb)
+	if (!buf)
 		return -EINVAL;
 
-	dma = pci_map_single(pdev, skb->data, buf_sz, PCI_DMA_FROMDEVICE);
+	dma = pci_map_single(pdev, buf, buf_sz, DMA_FROM_DEVICE);
 	if (pci_dma_mapping_error(pdev, dma))
 		return -EBUSY;
 
-	*((dma_addr_t *)skb->cb) = dma;
-	buf_desc = (struct rtw_pci_rx_buffer_desc *)(rx_ring->r.head +
-						     idx * desc_sz);
-	memset(buf_desc, 0, sizeof(*buf_desc));
+	buf_desc = rtw_pci_get_rx_desc(rx_ring, idx);
 	buf_desc->buf_size = cpu_to_le16(RTK_PCI_RX_BUF_SIZE);
+	buf_desc->total_pkt_size = cpu_to_le16(0);
 	buf_desc->dma = cpu_to_le32(dma);
 
 	return 0;
@@ -208,7 +229,7 @@ static int rtw_pci_reset_rx_desc(struct rtw_dev *rtwdev, struct sk_buff *skb,
 
 static void rtw_pci_sync_rx_desc_device(struct rtw_dev *rtwdev, dma_addr_t dma,
 					struct rtw_pci_rx_ring *rx_ring,
-					u32 idx, u32 desc_sz)
+					u32 idx)
 {
 	struct device *dev = rtwdev->dev;
 	struct rtw_pci_rx_buffer_desc *buf_desc;
@@ -216,10 +237,9 @@ static void rtw_pci_sync_rx_desc_device(struct rtw_dev *rtwdev, dma_addr_t dma,
 
 	dma_sync_single_for_device(dev, dma, buf_sz, DMA_FROM_DEVICE);
 
-	buf_desc = (struct rtw_pci_rx_buffer_desc *)(rx_ring->r.head +
-						     idx * desc_sz);
-	memset(buf_desc, 0, sizeof(*buf_desc));
+	buf_desc = rtw_pci_get_rx_desc(rx_ring, idx);
 	buf_desc->buf_size = cpu_to_le16(RTK_PCI_RX_BUF_SIZE);
+	buf_desc->total_pkt_size = cpu_to_le16(0);
 	buf_desc->dma = cpu_to_le32(dma);
 }
 
@@ -228,12 +248,12 @@ static int rtw_pci_init_rx_ring(struct rtw_dev *rtwdev,
 				u8 desc_size, u32 len)
 {
 	struct pci_dev *pdev = to_pci_dev(rtwdev->dev);
-	struct sk_buff *skb = NULL;
+	u8 *buf = NULL;
 	dma_addr_t dma;
 	u8 *head;
 	int ring_sz = desc_size * len;
 	int buf_sz = RTK_PCI_RX_BUF_SIZE;
-	int i, allocated;
+	u32 i, allocated;
 	int ret = 0;
 
 	head = pci_zalloc_consistent(pdev, ring_sz, &dma);
@@ -242,41 +262,39 @@ static int rtw_pci_init_rx_ring(struct rtw_dev *rtwdev,
 		return -ENOMEM;
 	}
 	rx_ring->r.head = head;
+	rx_ring->r.dma = dma;
+	rx_ring->r.len = len;
+	rx_ring->r.desc_size = desc_size;
+	rx_ring->r.wp = 0;
+	rx_ring->r.rp = 0;
 
 	for (i = 0; i < len; i++) {
-		skb = dev_alloc_skb(buf_sz);
-		if (!skb) {
+		buf = devm_kzalloc(rtwdev->dev, buf_sz, GFP_ATOMIC);
+		if (!buf) {
 			allocated = i;
 			ret = -ENOMEM;
 			goto err_out;
 		}
 
-		memset(skb->data, 0, buf_sz);
-		rx_ring->buf[i] = skb;
-		ret = rtw_pci_reset_rx_desc(rtwdev, skb, rx_ring, i, desc_size);
+		rx_ring->buf[i] = buf;
+		ret = rtw_pci_reset_rx_desc(rtwdev, buf, rx_ring, i);
 		if (ret) {
 			allocated = i;
-			dev_kfree_skb_any(skb);
+			devm_kfree(rtwdev->dev, buf);
 			goto err_out;
 		}
 	}
 
-	rx_ring->r.dma = dma;
-	rx_ring->r.len = len;
-	rx_ring->r.desc_size = desc_size;
-	rx_ring->r.wp = 0;
-	rx_ring->r.rp = 0;
-
 	return 0;
 
 err_out:
 	for (i = 0; i < allocated; i++) {
-		skb = rx_ring->buf[i];
-		if (!skb)
+		buf = rx_ring->buf[i];
+		if (!buf)
 			continue;
-		dma = *((dma_addr_t *)skb->cb);
-		pci_unmap_single(pdev, dma, buf_sz, PCI_DMA_FROMDEVICE);
-		dev_kfree_skb_any(skb);
+		dma = rtw_pci_get_rx_bufdma(rx_ring, i);
+		pci_unmap_single(pdev, dma, buf_sz, DMA_FROM_DEVICE);
+		devm_kfree(rtwdev->dev, buf);
 		rx_ring->buf[i] = NULL;
 	}
 	pci_free_consistent(pdev, ring_sz, head, dma);
@@ -776,13 +794,12 @@ static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	struct rtw_pci_rx_ring *ring;
 	struct rtw_rx_pkt_stat pkt_stat;
 	struct ieee80211_rx_status rx_status;
-	struct sk_buff *skb, *new;
+	struct sk_buff *skb;
 	u32 cur_wp, cur_rp, tmp;
 	u32 count;
 	u32 pkt_offset;
 	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
-	u32 buf_desc_sz = chip->rx_buf_desc_sz;
-	u32 new_len;
+	u32 len;
 	u8 *rx_desc;
 	dma_addr_t dma;
 
@@ -799,11 +816,11 @@ static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	cur_rp = ring->r.rp;
 	while (count--) {
 		rtw_pci_dma_check(rtwdev, ring, cur_rp);
-		skb = ring->buf[cur_rp];
-		dma = *((dma_addr_t *)skb->cb);
+		/* buffer is already filled as rx_desc */
+		rx_desc = ring->buf[cur_rp];
+		dma = rtw_pci_get_rx_bufdma(ring, cur_rp);
 		dma_sync_single_for_cpu(rtwdev->dev, dma, RTK_PCI_RX_BUF_SIZE,
 					DMA_FROM_DEVICE);
-		rx_desc = skb->data;
 		chip->ops->query_rx_desc(rtwdev, rx_desc, &pkt_stat, &rx_status);
 
 		/* offset from rx_desc to payload */
@@ -813,32 +830,31 @@ static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 		/* allocate a new skb for this frame,
 		 * discard the frame if none available
 		 */
-		new_len = pkt_stat.pkt_len + pkt_offset;
-		new = dev_alloc_skb(new_len);
-		if (WARN_ONCE(!new, "rx routine starvation\n"))
+		len = pkt_stat.pkt_len + pkt_offset;
+		skb = dev_alloc_skb(len);
+		if (WARN_ONCE(!skb, "rx routine starvation\n"))
 			goto next_rp;
 
 		/* put the DMA data including rx_desc from phy to new skb */
-		skb_put_data(new, skb->data, new_len);
+		skb_put_data(skb, rx_desc, len);
 
 		if (pkt_stat.is_c2h) {
 			 /* pass rx_desc & offset for further operation */
-			*((u32 *)new->cb) = pkt_offset;
-			skb_queue_tail(&rtwdev->c2h_queue, new);
+			*((u32 *)skb->cb) = pkt_offset;
+			skb_queue_tail(&rtwdev->c2h_queue, skb);
 			ieee80211_queue_work(rtwdev->hw, &rtwdev->c2h_work);
 		} else {
 			/* remove rx_desc */
-			skb_pull(new, pkt_offset);
+			skb_pull(skb, pkt_offset);
 
-			rtw_rx_stats(rtwdev, pkt_stat.vif, new);
-			memcpy(new->cb, &rx_status, sizeof(rx_status));
-			ieee80211_rx_irqsafe(rtwdev->hw, new);
+			rtw_rx_stats(rtwdev, pkt_stat.vif, skb);
+			memcpy(skb->cb, &rx_status, sizeof(rx_status));
+			ieee80211_rx_irqsafe(rtwdev->hw, skb);
 		}
 
 next_rp:
-		/* new skb delivered to mac80211, re-enable original skb DMA */
-		rtw_pci_sync_rx_desc_device(rtwdev, dma, ring, cur_rp,
-					    buf_desc_sz);
+		/* new skb delivered to mac80211, re-enable original buf DMA */
+		rtw_pci_sync_rx_desc_device(rtwdev, dma, ring, cur_rp);
 
 		/* host read next element in ring */
 		if (++cur_rp >= ring->r.len)
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index 87824a4caba9..283685421a64 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -174,7 +174,7 @@ struct rtw_pci_rx_buffer_desc {
 
 struct rtw_pci_rx_ring {
 	struct rtw_pci_ring r;
-	struct sk_buff *buf[RTK_MAX_RX_DESC_NUM];
+	u8 *buf[RTK_MAX_RX_DESC_NUM];
 };
 
 #define RX_TAG_MAX	8192
-- 
2.22.0


^ permalink raw reply related

* Re: [PATCH net-next] can: ti_hecc: remove set but not used variable 'mbx_mask'
From: Jeroen Hofstee @ 2019-07-25  8:19 UTC (permalink / raw)
  To: YueHaibing, Wolfgang Grandegger, Marc Kleine-Budde
  Cc: linux-can@vger.kernel.org, netdev@vger.kernel.org,
	kernel-janitors@vger.kernel.org, Hulk Robot
In-Reply-To: <20190725070044.2692-1-yuehaibing@huawei.com>


On 7/25/19 9:00 AM, YueHaibing wrote:
> Fixes gcc '-Wunused-but-set-variable' warning:
>
> drivers/net/can/ti_hecc.c: In function 'ti_hecc_mailbox_read':
> drivers/net/can/ti_hecc.c:533:12: warning:
>   variable 'mbx_mask' set but not used [-Wunused-but-set-variable]
>
> It is never used so can be removed.
>
> Reported-by: Hulk Robot <hulkci@huawei.com>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> ---
>   drivers/net/can/ti_hecc.c | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
> index b62f75fa03f0..e63e2f86c289 100644
> --- a/drivers/net/can/ti_hecc.c
> +++ b/drivers/net/can/ti_hecc.c
> @@ -530,9 +530,8 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload,
>   					 u32 *timestamp, unsigned int mbxno)
>   {
>   	struct ti_hecc_priv *priv = rx_offload_to_priv(offload);
> -	u32 data, mbx_mask;
> +	u32 data;
>   
> -	mbx_mask = BIT(mbxno);
>   	data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
>   	if (data & HECC_CANMID_IDE)
>   		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
>
>

Indeed, mbx_mask is no longer used after including
"can: ti_hecc: use timestamp based rx-offloading".

Reviewed-by: Jeroen Hofstee <jhofstee@victronenergy.com>


^ permalink raw reply

* Re: [RFC] performance regression with commit-id<adb03115f459> ("net: get rid of an signed integer overflow in ip_idents_reserve()")
From: Zhangshaokun @ 2019-07-25  9:05 UTC (permalink / raw)
  To: Eric Dumazet, Eric Dumazet, Jiri Pirko, netdev, linux-kernel
  Cc: David S. Miller, guoyang (C), zhudacai@hisilicon.com
In-Reply-To: <3d77a08a-22e9-16e8-4091-c5ba4851ff13@gmail.com>

Hi Eric,

Thanks your quick reply.

On 2019/7/24 16:56, Eric Dumazet wrote:
> 
> 
> On 7/24/19 10:38 AM, Zhangshaokun wrote:
>> Hi,
>>
>> I've observed an significant performance regression with the following commit-id <adb03115f459>
>> ("net: get rid of an signed integer overflow in ip_idents_reserve()").
> 
> Yes this UBSAN false positive has been painful
> 
> 
> 
>>
>> Here are my test scenes:
>> ----Server----
>> Cmd: iperf3 -s xxx.xxx.xxxx.xxx -p 10000 -i 0 -A 0
>> Kenel: 4.19.34
>> Server number: 32
>> Port: 10000 – 10032
>> CPU affinity: 0 – 32
>> CPU architecture: aarch64
>> NUMA node0 CPU(s): 0-23
>> NUMA node1 CPU(s): 24-47
>>
>> ----Client----
>> Cmd: iperf3 -u -c xxx.xxx.xxxx.xxx -p 10000 -l 16 -b 0 -t 0 -i 0 -A 8
>> Kenel: 4.19.34
>> Client number: 32
>> Port: 10000 – 10032
>> CPU affinity: 0 – 32
>> CPU architecture: aarch64
>> NUMA node0 CPU(s): 0-23
>> NUMA node1 CPU(s): 24-47
>>
>> Firstly, With patch <adb03115f459> ("net: get rid of an signed integer overflow in ip_idents_reserve()") ,
>> client’s cpu is 100%, and function ip_idents_reserve() cpu usage is very high, but the result is not good.
>> 03:08:32 AM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
>> 03:08:33 AM      eth0      0.00      0.00      0.00      0.00      0.00      0.00      0.00      0.00
>> 03:08:33 AM      eth1      0.00 3461296.00      0.00 196049.97      0.00      0.00      0.00      0.00
>> 03:08:33 AM        lo      0.00      0.00      0.00      0.00      0.00      0.00      0.00      0.00
>>
>> Secondly, revert that patch, use atomic_add_return() instead, the result is better, as below:
>> 03:23:24 AM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
>> 03:23:25 AM        lo      0.00      0.00      0.00      0.00      0.00      0.00      0.00      0.00
>> 03:23:25 AM      eth1      0.00 12834590.00      0.00 726959.20      0.00      0.00      0.00      0.00
>> 03:23:25 AM      eth0      7.00     11.00      0.40      2.95      0.00      0.00      0.00      0.00
>>
>> Thirdly, atomic is not used in ip_idents_reserve() completely ,while each cpu core allocates its own ID segment,
>> Such as: cpu core0 allocate ID 0 – 1023, cpu core1 allocate 1024 – 2047, …,etc
>> the result is the best:
> 
> Not sure what you mean.
> 
> Less entropy in IPv4 ID is not going to help when fragments _are_ needed.
> 
> Send 40,000 datagrams of 2000 bytes each, add delays, reorders, and boom, most of the packets will be lost.
> 
> This is not because your use case does not need proper IP ID that we can mess with them.
> 

Got it, thanks your more explanation.

> If you need to send packets very fast,  maybe use AF_PACKET ?
> 

Ok, I will try it later.

>> 03:27:06 AM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
>> 03:27:07 AM        lo      0.00      0.00      0.00      0.00      0.00      0.00      0.00      0.00
>> 03:27:07 AM      eth1      0.00 14275505.00      0.00 808573.53      0.00      0.00      0.00      0.00
>> 03:27:07 AM      eth0      0.00      2.00      0.00      0.18      0.00      0.00      0.00      0.00
>>
>> Because atomic operation performance is bottleneck when cpu core number increase, Can we revert the patch or
>> use ID segment for each cpu core instead?
> 
> 
> This has been discussed in the past.
> 
> https://lore.kernel.org/lkml/b0160f4b-b996-b0ee-405a-3d5f1866272e@gmail.com/
> 
> We can revert now UBSAN has been fixed.
> 
> Or even use Peter patch : https://lore.kernel.org/lkml/20181101172739.GA3196@hirez.programming.kicks-ass.net/
> 

I have tried this patch under the condition that I remove try_cmpxchg because there is no this API in arm64 :
09:21:16 PM     IFACE   rxpck/s   txpck/s    rxkB/s    txkB/s   rxcmp/s   txcmp/s  rxmcst/s   %ifutil
09:21:17 PM        lo      0.00      0.00      0.00      0.00      0.00      0.00      0.00      0.00
09:21:17 PM      eth1      0.00 10434613.00      0.00 591023.00      0.00      0.00      0.00      0.00
09:21:17 PM      eth0      1.00      0.00      0.12      0.00      0.00      0.00      0.00      0.00

The result is 10434613.00 pps and it is less than the atomic_add_return(12834590.00 pps).
Any thoughts?

Thanks,
Shaokun

> However, you will still hit badly a shared cache line, not matter what.
> 
> Some arches are known to have terrible LL/SC implementation :/
> 
> 
> .
> 


^ permalink raw reply

* [PATCH] net: tipc: Fix a possible null-pointer dereference in tipc_publ_purge()
From: Jia-Ju Bai @ 2019-07-25  9:20 UTC (permalink / raw)
  To: jon.maloy, ying.xue, davem
  Cc: netdev, tipc-discussion, linux-kernel, Jia-Ju Bai

In tipc_publ_purge(), there is an if statement on 215 to 
check whether p is NULL: 
    if (p)

When p is NULL, it is used on line 226:
    kfree_rcu(p, rcu);

Thus, a possible null-pointer dereference may occur.

To fix this bug, p is checked before being used.

This bug is found by a static analysis tool STCheck written by us.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
 net/tipc/name_distr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 44abc8e9c990..241ed2274473 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -223,7 +223,8 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr)
 		       publ->key);
 	}
 
-	kfree_rcu(p, rcu);
+	if (p)
+		kfree_rcu(p, rcu);
 }
 
 /**
-- 
2.17.0


^ permalink raw reply related

* RE: [PATCH] rtw88: pci: Use general byte arrays as the elements of RX ring
From: David Laight @ 2019-07-25  9:21 UTC (permalink / raw)
  To: 'Jian-Hong Pan', Yan-Hsuan Chuang, Kalle Valo,
	David S . Miller
  Cc: linux-wireless@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux@endlessm.com,
	stable@vger.kernel.org
In-Reply-To: <20190725080925.6575-1-jian-hong@endlessm.com>

From: Jian-Hong Pan
> Sent: 25 July 2019 09:09
> Each skb as the element in RX ring was expected with sized buffer 8216
> (RTK_PCI_RX_BUF_SIZE) bytes. However, the skb buffer's true size is
> 16640 bytes for alignment after allocated, x86_64 for example. And, the
> difference will be enlarged 512 times (RTK_MAX_RX_DESC_NUM).
> To prevent that much wasted memory, this patch follows David's
> suggestion [1] and uses general buffer arrays, instead of skbs as the
> elements in RX ring.
...
>  	for (i = 0; i < len; i++) {
> -		skb = dev_alloc_skb(buf_sz);
> -		if (!skb) {
> +		buf = devm_kzalloc(rtwdev->dev, buf_sz, GFP_ATOMIC);

You should do this allocation somewhere than can sleep.
So you don't need GFP_ATOMIC, making the allocate (and dma map)
much less likely to fail.
If they do fail using a smaller ring might be better than failing
completely.

I suspect that buf_sz gets rounded up somewhat.
Also you almost certainly want 'buf' to be cache-line aligned.
I don't think devm_kzalloc() guarantees that at all.

While allocating all 512 buffers in one block (just over 4MB)
is probably not a good idea, you may need to allocated (and dma map)
then in groups.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* [PATCH] net: bluetooth: hci_sock: Fix a possible null-pointer dereference in hci_mgmt_cmd()
From: Jia-Ju Bai @ 2019-07-25  9:22 UTC (permalink / raw)
  To: marcel, johan.hedberg, davem
  Cc: linux-bluetooth, netdev, linux-kernel, Jia-Ju Bai

In hci_mgmt_cmd(), there is an if statement on line 1570 to check
whether hdev is NULL:
    if (hdev && chan->hdev_init)

When hdev is NULL, it is used on line 1575:
    err = handler->func(sk, hdev, cp, len);

Some called functions of handler->func use hdev, such as:
set_appearance(), add_device() and remove_device() in mgmt.c.

Thus, a possible null-pointer dereference may occur.

To fix this bug, hdev is checked before calling handler->func().

This bug is found by a static analysis tool STCheck written by us.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
 net/bluetooth/hci_sock.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index d32077b28433..18ea1e47ea48 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1570,11 +1570,12 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
 	if (hdev && chan->hdev_init)
 		chan->hdev_init(sk, hdev);
 
-	cp = buf + sizeof(*hdr);
-
-	err = handler->func(sk, hdev, cp, len);
-	if (err < 0)
-		goto done;
+	if (hdev) {
+		cp = buf + sizeof(*hdr);
+		err = handler->func(sk, hdev, cp, len);
+		if (err < 0)
+			goto done;
+	}
 
 	err = msglen;
 
-- 
2.17.0


^ permalink raw reply related

* Re: [PATCH bpf-next v3 03/11] xsk: add support to allow unaligned chunk placement
From: Maxim Mikityanskiy @ 2019-07-25  9:27 UTC (permalink / raw)
  To: Kevin Laatz
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	bjorn.topel@intel.com, magnus.karlsson@intel.com,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	Saeed Mahameed, stephen@networkplumber.org,
	bruce.richardson@intel.com, ciara.loftus@intel.com,
	bpf@vger.kernel.org, intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190724051043.14348-4-kevin.laatz@intel.com>

On 2019-07-24 08:10, Kevin Laatz wrote:
> Currently, addresses are chunk size aligned. This means, we are very
> restricted in terms of where we can place chunk within the umem. For
> example, if we have a chunk size of 2k, then our chunks can only be placed
> at 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0).
> 
> This patch introduces the ability to use unaligned chunks. With these
> changes, we are no longer bound to having to place chunks at a 2k (or
> whatever your chunk size is) interval. Since we are no longer dealing with
> aligned chunks, they can now cross page boundaries. Checks for page
> contiguity have been added in order to keep track of which pages are
> followed by a physically contiguous page.
> 
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> ---
> v2:
>    - Add checks for the flags coming from userspace
>    - Fix how we get chunk_size in xsk_diag.c
>    - Add defines for masking the new descriptor format
>    - Modified the rx functions to use new descriptor format
>    - Modified the tx functions to use new descriptor format
> 
> v3:
>    - Add helper function to do address/offset masking/addition
> ---
>   include/net/xdp_sock.h      | 17 ++++++++
>   include/uapi/linux/if_xdp.h |  9 ++++
>   net/xdp/xdp_umem.c          | 18 +++++---
>   net/xdp/xsk.c               | 86 ++++++++++++++++++++++++++++++-------
>   net/xdp/xsk_diag.c          |  2 +-
>   net/xdp/xsk_queue.h         | 68 +++++++++++++++++++++++++----
>   6 files changed, 170 insertions(+), 30 deletions(-)
> 

<...>

> +/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
> + * each page. This is only required in copy mode.
> + */
> +static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
> +			     u32 len, u32 metalen)
> +{
> +	void *to_buf = xdp_umem_get_data(umem, addr);
> +
> +	if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
> +		void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
> +		u64 page_start = addr & (PAGE_SIZE - 1);
> +		u64 first_len = PAGE_SIZE - (addr - page_start);

Let addr = 0x12345, PAGE_SIZE = 0x1000, len = 0x1000. Your calculations 
lead to page_start = 0x345, first_len = 0x1000 - 0x12000, which is 
negative. I think page_start is calculated incorrectly (is ~ missing?).

> +
> +		memcpy(to_buf, from_buf, first_len + metalen);
> +		memcpy(next_pg_addr, from_buf + first_len, len - first_len);
> +
> +		return;
> +	}
> +
> +	memcpy(to_buf, from_buf, len + metalen);
> +}
> +

<...>

> +static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
> +						u64 length,
> +						struct xdp_umem *umem)
> +{
> +	addr += addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> +	addr &= XSK_UNALIGNED_BUF_ADDR_MASK;
> +	if (addr >= q->size ||

Addresses like 0x00aaffffffffffff will pass the validation (0xaa + 
0xffffffffffff will overflow mod 2^48 and become a small number), 
whereas such addresses don't look valid for me.

> +	    xskq_crosses_non_contig_pg(umem, addr, length)) {

If the region is not contiguous, we cant RX into it - that's clear. 
However, how can the userspace determine whether these two pages of UMEM 
are mapped contiguously in the DMA space? Are we going to silently drop 
descriptors to non-contiguous frames and leak them? Please explain how 
to use it correctly from the application side.

> +		q->invalid_descs++;
> +		return false;
> +	}
> +
> +	return true;
> +}

<...>


^ permalink raw reply

* linux-next: build failure after merge of the net-next tree
From: Stephen Rothwell @ 2019-07-25  9:37 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux Next Mailing List, Linux Kernel Mailing List,
	Matthew Wilcox (Oracle)

[-- Attachment #1: Type: text/plain, Size: 412 bytes --]

Hi all,

After merging the net-next tree, today's linux-next build (mips
cavium_octeon_defconfig) failed like this:

drivers/staging/octeon/ethernet-tx.c:287:23: error: implicit declaration of function 'skb_drag_size'; did you mean 'skb_frag_size'? [-Werror=implicit-function-declaration]

Caused by commit

  92493a2f8a8d ("Build fixes for skb_frag_size conversion")

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] net: bluetooth: hci_sock: Fix a possible null-pointer dereference in hci_mgmt_cmd()
From: Marcel Holtmann @ 2019-07-25  9:39 UTC (permalink / raw)
  To: Jia-Ju Bai
  Cc: Johan Hedberg, David S. Miller, linux-bluetooth, netdev,
	linux-kernel
In-Reply-To: <20190725092253.15912-1-baijiaju1990@gmail.com>

Hi Jia-Ju,

> In hci_mgmt_cmd(), there is an if statement on line 1570 to check
> whether hdev is NULL:
>    if (hdev && chan->hdev_init)
> 
> When hdev is NULL, it is used on line 1575:
>    err = handler->func(sk, hdev, cp, len);
> 
> Some called functions of handler->func use hdev, such as:
> set_appearance(), add_device() and remove_device() in mgmt.c.
> 
> Thus, a possible null-pointer dereference may occur.
> 
> To fix this bug, hdev is checked before calling handler->func().
> 
> This bug is found by a static analysis tool STCheck written by us.
> 
> Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
> ---
> net/bluetooth/hci_sock.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
> index d32077b28433..18ea1e47ea48 100644
> --- a/net/bluetooth/hci_sock.c
> +++ b/net/bluetooth/hci_sock.c
> @@ -1570,11 +1570,12 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
> 	if (hdev && chan->hdev_init)
> 		chan->hdev_init(sk, hdev);
> 
> -	cp = buf + sizeof(*hdr);
> -
> -	err = handler->func(sk, hdev, cp, len);
> -	if (err < 0)
> -		goto done;
> +	if (hdev) {
> +		cp = buf + sizeof(*hdr);
> +		err = handler->func(sk, hdev, cp, len);
> +		if (err < 0)
> +			goto done;
> +	}
> 
> 	err = msglen;

have you evaluated the statement above:

        no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);                           
        if (no_hdev != !hdev) {                                                  
                err = mgmt_cmd_status(sk, index, opcode,                         
                                      MGMT_STATUS_INVALID_INDEX);                
                goto done;                                                       
        }

I think that code is just overly complex and can be simplified, but I doubt you get to the situation where hdev is NULL for any function that requires it. Only the handler->func marked with HCI_MGMT_NO_HDEV will get hdev == NULL and these are not using it.

So we might can make this easier code to really check the index != MGMT_INDEX_NONE check above to cover all cases to ensure that hdev is either valid or set to NULL before proceeding any further.

And since we have a full set of unit tests in tools/mgmt-tester, I assume we would have had a chance to catch an issue like this. But we can add a test case to it to explicitly call the functions with either MGMT_INDEX_NONE used or not.

Regards

Marcel


^ permalink raw reply

* Re: [PATCH bpf-next v3 08/11] samples/bpf: add unaligned chunks mode support to xdpsock
From: Maxim Mikityanskiy @ 2019-07-25  9:43 UTC (permalink / raw)
  To: Kevin Laatz
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	bjorn.topel@intel.com, magnus.karlsson@intel.com,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	Saeed Mahameed, stephen@networkplumber.org,
	bruce.richardson@intel.com, ciara.loftus@intel.com,
	bpf@vger.kernel.org, intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190724051043.14348-9-kevin.laatz@intel.com>

On 2019-07-24 08:10, Kevin Laatz wrote:
> This patch adds support for the unaligned chunks mode. The addition of the
> unaligned chunks option will allow users to run the application with more
> relaxed chunk placement in the XDP umem.
> 
> Unaligned chunks mode can be used with the '-u' or '--unaligned' command
> line options.
> 
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> ---
>   samples/bpf/xdpsock_user.c | 17 +++++++++++++++--
>   1 file changed, 15 insertions(+), 2 deletions(-)

<...>

> @@ -372,6 +378,7 @@ static void usage(const char *prog)
>   		"  -z, --zero-copy      Force zero-copy mode.\n"
>   		"  -c, --copy           Force copy mode.\n"
>   		"  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"

Help text for -f has to be updated, it doesn't have to be a power of two 
if -u is specified.

> +		"  -u, --unaligned	Enable unaligned chunk placement\n"

<...>

>   
> -	if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
> +	if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
> +			!opt_unaligned_chunks) {
>   		fprintf(stderr, "--frame-size=%d is not a power of two\n",
>   			opt_xsk_frame_size);
>   		usage(basename(argv[0]));
> 


^ permalink raw reply

* Re: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Jon Hunter @ 2019-07-25  9:45 UTC (permalink / raw)
  To: Jose Abreu, Ilias Apalodimas
  Cc: David Miller, robin.murphy@arm.com, lists@bofh.nu,
	Joao.Pinto@synopsys.com, alexandre.torgue@st.com,
	maxime.ripard@bootlin.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-stm32@st-md-mailman.stormreply.com, wens@csie.org,
	mcoquelin.stm32@gmail.com, linux-tegra@vger.kernel.org,
	peppe.cavallaro@st.com, linux-arm-kernel@lists.infradead.org
In-Reply-To: <BYAPR12MB3269F4E62B64484B08F90998D3C10@BYAPR12MB3269.namprd12.prod.outlook.com>


On 25/07/2019 08:44, Jose Abreu wrote:

...

> OK. Can you please test what Ilias mentioned ?
> 
> Basically you can hard-code the order to 0 in 
> alloc_dma_rx_desc_resources():
> - pp_params.order = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE);
> + pp_params.order = 0;
> 
> Unless you use a MTU > PAGE_SIZE.

I made the change but unfortunately the issue persists.

Jon

-- 
nvpublic

^ permalink raw reply

* [PATCH net-next 2/3] flow_offload: Support get tcf block immediately
From: wenxu @ 2019-07-25  9:55 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1564048533-27283-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

It provide a callback to find the tcf block in
the flow_indr_block_dev_get

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 include/net/flow_offload.h |  4 ++++
 net/core/flow_offload.c    | 12 ++++++++++++
 net/sched/cls_api.c        | 31 +++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 373028e..0ebb7e1 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -371,6 +371,10 @@ struct flow_indr_block_dev {
 	void *block;
 };
 
+typedef void flow_indr_get_default_block_t(struct flow_indr_block_dev *indr_dev);
+
+void flow_indr_set_default_block_cb(flow_indr_get_default_block_t *cb);
+
 struct flow_indr_block_dev *flow_indr_block_dev_lookup(struct net_device *dev);
 
 int flow_indr_rhashtable_init(void);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 77e18dc..6aa02b5 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -298,6 +298,14 @@ struct flow_indr_block_dev *
 }
 EXPORT_SYMBOL(flow_indr_block_dev_lookup);
 
+static flow_indr_get_default_block_t *flow_indr_get_default_block;
+
+void flow_indr_set_default_block_cb(flow_indr_get_default_block_t *cb)
+{
+	flow_indr_get_default_block = cb;
+}
+EXPORT_SYMBOL(flow_indr_set_default_block_cb);
+
 static struct flow_indr_block_dev *flow_indr_block_dev_get(struct net_device *dev)
 {
 	struct flow_indr_block_dev *indr_dev;
@@ -312,6 +320,10 @@ static struct flow_indr_block_dev *flow_indr_block_dev_get(struct net_device *de
 
 	INIT_LIST_HEAD(&indr_dev->cb_list);
 	indr_dev->dev = dev;
+
+	if (flow_indr_get_default_block)
+		flow_indr_get_default_block(indr_dev);
+
 	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
 				   flow_indr_setup_block_ht_params)) {
 		kfree(indr_dev);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 359d92f..e64a0d2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -571,6 +571,35 @@ static void tc_indr_block_ing_cmd(struct net_device *dev, void *block,
 	tcf_block_setup(t_block, &bo);
 }
 
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+{
+	const struct Qdisc_class_ops *cops;
+	struct Qdisc *qdisc;
+
+	if (!dev_ingress_queue(dev))
+		return NULL;
+
+	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+	if (!qdisc)
+		return NULL;
+
+	cops = qdisc->ops->cl_ops;
+	if (!cops)
+		return NULL;
+
+	if (!cops->tcf_block)
+		return NULL;
+
+	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+}
+
+static void tc_indr_get_default_block(struct flow_indr_block_dev *indr_dev)
+{
+	indr_dev->block = tc_dev_ingress_block(indr_dev->dev);
+	if (indr_dev->block)
+		indr_dev->cmd_cb = tc_indr_block_ing_cmd;
+}
+
 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
 			       enum flow_block_command command,
@@ -3143,6 +3172,8 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_rhash_setup_block_ht;
 
+	flow_indr_set_default_block_cb(tc_indr_get_default_block);
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next 1/3] flow_offload: move tc indirect block to flow offload
From: wenxu @ 2019-07-25  9:55 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev

From: wenxu <wenxu@ucloud.cn>

move tc indirect block to flow_offload.c. The nf_tables
can use the indr block architecture.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  10 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  10 +-
 include/net/flow_offload.h                         |  40 ++++
 include/net/pkt_cls.h                              |  35 ----
 include/net/sch_generic.h                          |   3 -
 net/core/flow_offload.c                            | 189 +++++++++++++++++
 net/sched/cls_api.c                                | 225 ++-------------------
 7 files changed, 254 insertions(+), 258 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7f747cb..074573b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -785,9 +785,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 {
 	int err;
 
-	err = __tc_indr_block_cb_register(netdev, rpriv,
-					  mlx5e_rep_indr_setup_tc_cb,
-					  rpriv);
+	err = __flow_indr_block_cb_register(netdev, rpriv,
+					    mlx5e_rep_indr_setup_tc_cb,
+					    rpriv);
 	if (err) {
 		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
@@ -800,8 +800,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
 					    struct net_device *netdev)
 {
-	__tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
-				      rpriv);
+	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+					rpriv);
 }
 
 static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index e209f15..6a0f034 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1479,16 +1479,16 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
-		err = __tc_indr_block_cb_register(netdev, app,
-						  nfp_flower_indr_setup_tc_cb,
-						  app);
+		err = __flow_indr_block_cb_register(netdev, app,
+						    nfp_flower_indr_setup_tc_cb,
+						    app);
 		if (err)
 			nfp_flower_cmsg_warn(app,
 					     "Indirect block reg failed - %s\n",
 					     netdev->name);
 	} else if (event == NETDEV_UNREGISTER) {
-		__tc_indr_block_cb_unregister(netdev,
-					      nfp_flower_indr_setup_tc_cb, app);
+		__flow_indr_block_cb_unregister(netdev,
+						nfp_flower_indr_setup_tc_cb, app);
 	}
 
 	return NOTIFY_OK;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index b16d216..373028e 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <net/flow_dissector.h>
+#include <linux/rhashtable.h>
 
 struct flow_match {
 	struct flow_dissector	*dissector;
@@ -347,4 +348,43 @@ static inline void flow_block_init(struct flow_block *flow_block)
 	INIT_LIST_HEAD(&flow_block->cb_list);
 }
 
+typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+				      enum tc_setup_type type, void *type_data);
+
+struct flow_indr_block_cb {
+	struct list_head list;
+	void *cb_priv;
+	flow_indr_block_bind_cb_t *cb;
+	void *cb_ident;
+};
+
+typedef void flow_indr_block_ing_cmd_t(struct net_device *dev, void *block,
+				       struct flow_indr_block_cb *indr_block_cb,
+				       enum flow_block_command command);
+
+struct flow_indr_block_dev {
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	unsigned int refcnt;
+	struct list_head cb_list;
+	flow_indr_block_ing_cmd_t *cmd_cb;
+	void *block;
+};
+
+struct flow_indr_block_dev *flow_indr_block_dev_lookup(struct net_device *dev);
+
+int flow_indr_rhashtable_init(void);
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809..0790a4e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 	return block->q;
 }
 
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident);
-
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode);
 
@@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 {
 }
 
-static inline
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
-static inline
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
 static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			       struct tcf_result *res, bool compat_mode)
 {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6b6b012..d9f359a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,9 +23,6 @@
 struct module;
 struct bpf_flow_keys;
 
-typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
-				    enum tc_setup_type type, void *type_data);
-
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d63b970..77e18dc 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
 
 struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
@@ -280,3 +281,191 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 	}
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static struct rhashtable indr_setup_block_ht;
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+	.key_offset	= offsetof(struct flow_indr_block_dev, dev),
+	.head_offset	= offsetof(struct flow_indr_block_dev, ht_node),
+	.key_len	= sizeof(struct net_device *),
+};
+
+struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+				      flow_indr_setup_block_ht_params);
+}
+EXPORT_SYMBOL(flow_indr_block_dev_lookup);
+
+static struct flow_indr_block_dev *flow_indr_block_dev_get(struct net_device *dev)
+{
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (indr_dev)
+		goto inc_ref;
+
+	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&indr_dev->cb_list);
+	indr_dev->dev = dev;
+	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+				   flow_indr_setup_block_ht_params)) {
+		kfree(indr_dev);
+		return NULL;
+	}
+
+inc_ref:
+	indr_dev->refcnt++;
+	return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+	if (--indr_dev->refcnt)
+		return;
+
+	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+			       flow_indr_setup_block_ht_params);
+	kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+			  flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		if (indr_block_cb->cb == cb &&
+		    indr_block_cb->cb_ident == cb_ident)
+			return indr_block_cb;
+	return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+		       flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (indr_block_cb)
+		return ERR_PTR(-EEXIST);
+
+	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+	if (!indr_block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	indr_block_cb->cb_priv = cb_priv;
+	indr_block_cb->cb = cb;
+	indr_block_cb->cb_ident = cb_ident;
+	list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+	return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+	list_del(&indr_block_cb->list);
+	kfree(indr_block_cb);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+	int err;
+
+	indr_dev = flow_indr_block_dev_get(dev);
+	if (!indr_dev)
+		return -ENOMEM;
+
+	indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+	err = PTR_ERR_OR_ZERO(indr_block_cb);
+	if (err)
+		goto err_dev_put;
+
+	if (indr_dev->cmd_cb)
+		indr_dev->cmd_cb(indr_dev->dev, indr_dev->block, indr_block_cb,
+				 FLOW_BLOCK_BIND);
+
+	return 0;
+
+err_dev_put:
+	flow_indr_block_dev_put(indr_dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb,
+				void *cb_ident)
+{
+	int err;
+
+	rtnl_lock();
+	err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (!indr_block_cb)
+		return;
+
+	/* Send unbind message if required to free any block cbs. */
+	if (indr_dev->cmd_cb)
+		indr_dev->cmd_cb(indr_dev->dev, indr_dev->block,
+				 indr_block_cb,
+				 FLOW_BLOCK_UNBIND);
+
+	flow_indr_block_cb_del(indr_block_cb);
+	flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident)
+{
+	rtnl_lock();
+	__flow_indr_block_cb_unregister(dev, cb, cb_ident);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+static bool rhash_table_init;
+int flow_indr_rhashtable_init(void)
+{
+	int err = 0;
+
+	if (!rhash_table_init) {
+		err = rhashtable_init(&indr_setup_block_ht,
+				      &flow_indr_setup_block_ht_params);
+
+		if (!err)
+			rhash_table_init = true;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_rhashtable_init);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index efd3cfb..359d92f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -36,6 +36,7 @@
 #include <net/tc_act/tc_sample.h>
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
+#include <net/flow_offload.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -544,235 +545,39 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 	}
 }
 
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
-{
-	const struct Qdisc_class_ops *cops;
-	struct Qdisc *qdisc;
-
-	if (!dev_ingress_queue(dev))
-		return NULL;
-
-	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
-	if (!qdisc)
-		return NULL;
-
-	cops = qdisc->ops->cl_ops;
-	if (!cops)
-		return NULL;
-
-	if (!cops->tcf_block)
-		return NULL;
-
-	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
-}
-
-static struct rhashtable indr_setup_block_ht;
-
-struct tc_indr_block_dev {
-	struct rhash_head ht_node;
-	struct net_device *dev;
-	unsigned int refcnt;
-	struct list_head cb_list;
-	struct tcf_block *block;
-};
-
-struct tc_indr_block_cb {
-	struct list_head list;
-	void *cb_priv;
-	tc_indr_block_bind_cb_t *cb;
-	void *cb_ident;
-};
-
-static const struct rhashtable_params tc_indr_setup_block_ht_params = {
-	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
-	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
-	.key_len	= sizeof(struct net_device *),
-};
-
-static struct tc_indr_block_dev *
-tc_indr_block_dev_lookup(struct net_device *dev)
-{
-	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
-				      tc_indr_setup_block_ht_params);
-}
-
-static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
-{
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (indr_dev)
-		goto inc_ref;
-
-	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
-	if (!indr_dev)
-		return NULL;
-
-	INIT_LIST_HEAD(&indr_dev->cb_list);
-	indr_dev->dev = dev;
-	indr_dev->block = tc_dev_ingress_block(dev);
-	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-				   tc_indr_setup_block_ht_params)) {
-		kfree(indr_dev);
-		return NULL;
-	}
-
-inc_ref:
-	indr_dev->refcnt++;
-	return indr_dev;
-}
-
-static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
-{
-	if (--indr_dev->refcnt)
-		return;
-
-	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-			       tc_indr_setup_block_ht_params);
-	kfree(indr_dev);
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
-			tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		if (indr_block_cb->cb == cb &&
-		    indr_block_cb->cb_ident == cb_ident)
-			return indr_block_cb;
-	return NULL;
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
-		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (indr_block_cb)
-		return ERR_PTR(-EEXIST);
-
-	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
-	if (!indr_block_cb)
-		return ERR_PTR(-ENOMEM);
-
-	indr_block_cb->cb_priv = cb_priv;
-	indr_block_cb->cb = cb;
-	indr_block_cb->cb_ident = cb_ident;
-	list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
-	return indr_block_cb;
-}
-
-static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
-{
-	list_del(&indr_block_cb->list);
-	kfree(indr_block_cb);
-}
-
 static int tcf_block_setup(struct tcf_block *block,
 			   struct flow_block_offload *bo);
 
-static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
-				  struct tc_indr_block_cb *indr_block_cb,
+static void tc_indr_block_ing_cmd(struct net_device *dev, void *block,
+				  struct flow_indr_block_cb *indr_block_cb,
 				  enum flow_block_command command)
 {
+	struct tcf_block *t_block = (struct tcf_block *)block;
 	struct flow_block_offload bo = {
 		.command	= command,
 		.binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
-		.net		= dev_net(indr_dev->dev),
-		.block_shared	= tcf_block_non_null_shared(indr_dev->block),
+		.net		= dev_net(dev),
+		.block_shared	= tcf_block_non_null_shared(t_block),
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	if (!indr_dev->block)
-		return;
-
-	bo.block = &indr_dev->block->flow_block;
-
-	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
-			  &bo);
-	tcf_block_setup(indr_dev->block, &bo);
-}
-
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-	int err;
-
-	indr_dev = tc_indr_block_dev_get(dev);
-	if (!indr_dev)
-		return -ENOMEM;
-
-	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
-	err = PTR_ERR_OR_ZERO(indr_block_cb);
-	if (err)
-		goto err_dev_put;
-
-	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND);
-	return 0;
-
-err_dev_put:
-	tc_indr_block_dev_put(indr_dev);
-	return err;
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
-
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	int err;
-
-	rtnl_lock();
-	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
-	rtnl_unlock();
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
-
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
+	if (!t_block)
 		return;
 
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (!indr_block_cb)
-		return;
+	bo.block = &t_block->flow_block;
 
-	/* Send unbind message if required to free any block cbs. */
-	tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND);
-	tc_indr_block_cb_del(indr_block_cb);
-	tc_indr_block_dev_put(indr_dev);
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+	indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, &bo);
 
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	rtnl_lock();
-	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
-	rtnl_unlock();
+	tcf_block_setup(t_block, &bo);
 }
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
 
 static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
 			       enum flow_block_command command,
 			       struct netlink_ext_ack *extack)
 {
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
 	struct flow_block_offload bo = {
 		.command	= command,
 		.binder_type	= ei->binder_type,
@@ -783,11 +588,12 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	indr_dev = tc_indr_block_dev_lookup(dev);
+	indr_dev = flow_indr_block_dev_lookup(dev);
 	if (!indr_dev)
 		return;
 
 	indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL;
+	indr_dev->cmd_cb = command == FLOW_BLOCK_BIND ? tc_indr_block_ing_cmd : NULL;
 
 	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
 		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
@@ -3333,8 +3139,7 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
-	err = rhashtable_init(&indr_setup_block_ht,
-			      &tc_indr_setup_block_ht_params);
+	err = flow_indr_rhashtable_init();
 	if (err)
 		goto err_rhash_setup_block_ht;
 
-- 
1.8.3.1


^ permalink raw reply related

* [PATCH net-next 3/3] netfilter: nf_tables_offload: support indr block call
From: wenxu @ 2019-07-25  9:55 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1564048533-27283-1-git-send-email-wenxu@ucloud.cn>

From: wenxu <wenxu@ucloud.cn>

nftable support indr-block call. It makes nftable an offload vlan
and tunnel device

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 net/netfilter/nf_tables_api.c     |   6 ++
 net/netfilter/nf_tables_offload.c | 137 ++++++++++++++++++++++++++++++--------
 2 files changed, 115 insertions(+), 28 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c6dc173..20daf87 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7623,8 +7623,14 @@ static int __init nf_tables_module_init(void)
 	if (err < 0)
 		goto err5;
 
+	err = flow_indr_rhashtable_init();
+	if (err)
+		goto err6;
+
 	nft_chain_route_init();
 	return err;
+err6:
+	nfnetlink_subsys_unregister(&nf_tables_subsys);
 err5:
 	rhltable_destroy(&nft_objname_ht);
 err4:
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 3e1a1a8..be050f4 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -176,24 +176,125 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
 	return 0;
 }
 
+static int nft_block_setup(struct nft_base_chain *basechain,
+			   struct flow_block_offload *bo,
+			   enum flow_block_command cmd)
+{
+	int err;
+
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		err = nft_flow_offload_bind(bo, basechain);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = nft_flow_offload_unbind(bo, basechain);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+	int err;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	rtnl_lock();
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0) {
+		rtnl_unlock();
+		return err;
+	}
+	rtnl_unlock();
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev, void *block,
+				   struct flow_indr_block_cb *indr_block_cb,
+				   enum flow_block_command cmd)
+{
+	struct nft_base_chain *chain = (struct nft_base_chain *)block;
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	if (block)
+		return;
+
+	rtnl_lock();
+	indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, &bo);
+	rtnl_unlock();
+
+	nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+				      struct net_device *dev,
+				      enum flow_block_command cmd)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+	struct flow_block_offload bo = {};
+	struct netlink_ext_ack extack = {};
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return -EOPNOTSUPP;
+
+	indr_dev->block = cmd == FLOW_BLOCK_BIND ? chain : NULL;
+	indr_dev->cmd_cb = cmd == FLOW_BLOCK_BIND ? nft_indr_block_ing_cmd : NULL;
+
+	rtnl_lock();
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  &bo);
+	rtnl_unlock();
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
 #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
 
 static int nft_flow_offload_chain(struct nft_trans *trans,
 				  enum flow_block_command cmd)
 {
 	struct nft_chain *chain = trans->ctx.chain;
-	struct netlink_ext_ack extack = {};
-	struct flow_block_offload bo = {};
 	struct nft_base_chain *basechain;
 	struct net_device *dev;
-	int err;
 
 	if (!nft_is_base_chain(chain))
 		return -EOPNOTSUPP;
 
 	basechain = nft_base_chain(chain);
 	dev = basechain->ops.dev;
-	if (!dev || !dev->netdev_ops->ndo_setup_tc)
+	if (!dev)
 		return -EOPNOTSUPP;
 
 	/* Only default policy to accept is supported for now. */
@@ -202,30 +303,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
 	    nft_trans_chain_policy(trans) != NF_ACCEPT)
 		return -EOPNOTSUPP;
 
-	bo.command = cmd;
-	bo.block = &basechain->flow_block;
-	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-	bo.extack = &extack;
-	INIT_LIST_HEAD(&bo.cb_list);
-
-	rtnl_lock();
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
-	if (err < 0)
-		goto out;
-
-	switch (cmd) {
-	case FLOW_BLOCK_BIND:
-		err = nft_flow_offload_bind(&bo, basechain);
-		break;
-	case FLOW_BLOCK_UNBIND:
-		err = nft_flow_offload_unbind(&bo, basechain);
-		break;
-	}
-
-out:
-	rtnl_unlock();
-	return err;
+	if (dev->netdev_ops->ndo_setup_tc)
+		return nft_block_offload_cmd(basechain, dev, cmd);
+	else
+		return nft_indr_block_offload_cmd(basechain, dev, cmd);
 }
 
 int nft_flow_rule_offload_commit(struct net *net)
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH 2/3] serial: remove netx serial driver
From: Greg Kroah-Hartman @ 2019-07-25 10:04 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Linus Walleij, netdev, linux-serial, Thomas Gleixner,
	David S. Miller, Sascha Hauer, Michael Trensch, Robert Schwebel,
	Jiri Slaby, linux-kernel@vger.kernel.org
In-Reply-To: <CAK8P3a1=Bnsxg-3RztGEL-c6muQjam-egyrsZfqc7_yjBzcGXA@mail.gmail.com>

On Tue, Jul 23, 2019 at 10:43:05AM +0200, Arnd Bergmann wrote:
> On Tue, Jul 23, 2019 at 10:26 AM Linus Walleij <linus.walleij@linaro.org> wrote:
> >
> > On Mon, Jul 22, 2019 at 9:16 PM Arnd Bergmann <arnd@arndb.de> wrote:
> >
> > > The netx platform got removed, so this driver is now
> > > useless.
> > >
> > > Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> >
> > We seem so overlap :)
> > https://marc.info/?l=linux-serial&m=156377843325488&w=2
> >
> > Anyways, the patches are identical except here:
> >
> > > -/* Hilscher netx */
> > > +/* Hilscher netx (removed) */
> > >  #define PORT_NETX      71
> >
> > Is there some reason for keeping the magical number around?
> > When I looked over the file there seemed to be more "holes"
> > in the list.
> 
> I looked at the same list and though I saw more obsolete entries
> than holes. The last ones that I saw getting removed were
> PORT_MFD in 2017 and PORT_V850E_UART in 2008.
> 
> It probably doesn't matter as we have precedence for both.

I want to just get rid of that whole list as I don't think it's ever
needed, but haven't spent the time digging through userspace code to
verify it.

I'll take Linus's patch as it came first, thanks.

greg k-h

^ permalink raw reply

* Re: [PATCH bpf-next v3 03/11] xsk: add support to allow unaligned chunk placement
From: Maxim Mikityanskiy @ 2019-07-25 10:08 UTC (permalink / raw)
  To: Kevin Laatz
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	bjorn.topel@intel.com, magnus.karlsson@intel.com,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	Saeed Mahameed, stephen@networkplumber.org,
	bruce.richardson@intel.com, ciara.loftus@intel.com,
	bpf@vger.kernel.org, intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190724051043.14348-4-kevin.laatz@intel.com>

On 2019-07-24 08:10, Kevin Laatz wrote:
> Currently, addresses are chunk size aligned. This means, we are very
> restricted in terms of where we can place chunk within the umem. For
> example, if we have a chunk size of 2k, then our chunks can only be placed
> at 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0).
> 
> This patch introduces the ability to use unaligned chunks. With these
> changes, we are no longer bound to having to place chunks at a 2k (or
> whatever your chunk size is) interval. Since we are no longer dealing with
> aligned chunks, they can now cross page boundaries. Checks for page
> contiguity have been added in order to keep track of which pages are
> followed by a physically contiguous page.
> 
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> 
> ---
> v2:
>    - Add checks for the flags coming from userspace
>    - Fix how we get chunk_size in xsk_diag.c
>    - Add defines for masking the new descriptor format
>    - Modified the rx functions to use new descriptor format
>    - Modified the tx functions to use new descriptor format
> 
> v3:
>    - Add helper function to do address/offset masking/addition
> ---
>   include/net/xdp_sock.h      | 17 ++++++++
>   include/uapi/linux/if_xdp.h |  9 ++++
>   net/xdp/xdp_umem.c          | 18 +++++---
>   net/xdp/xsk.c               | 86 ++++++++++++++++++++++++++++++-------
>   net/xdp/xsk_diag.c          |  2 +-
>   net/xdp/xsk_queue.h         | 68 +++++++++++++++++++++++++----
>   6 files changed, 170 insertions(+), 30 deletions(-)

<...>

> +
> +static inline u64 xsk_umem_handle_offset(struct xdp_umem *umem, u64 handle,
> +					 u64 offset)
> +{
> +	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
> +		return handle |= (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
> +	else
> +		return handle += offset;
> +}

|= and += are not necessary here, | and + are enough.

In the unaligned mode, it's not supported to run xsk_umem_handle_offset 
multiple times, and it's assumed that offset is zero. I'll explain the 
need in my following comment to patch 6.

<...>

^ permalink raw reply

* Re: [PATCH bpf-next v3 06/11] mlx5e: modify driver for handling offsets
From: Maxim Mikityanskiy @ 2019-07-25 10:15 UTC (permalink / raw)
  To: Kevin Laatz
  Cc: netdev@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
	bjorn.topel@intel.com, magnus.karlsson@intel.com,
	jakub.kicinski@netronome.com, jonathan.lemon@gmail.com,
	Saeed Mahameed, stephen@networkplumber.org,
	bruce.richardson@intel.com, ciara.loftus@intel.com,
	bpf@vger.kernel.org, intel-wired-lan@lists.osuosl.org
In-Reply-To: <20190724051043.14348-7-kevin.laatz@intel.com>

On 2019-07-24 08:10, Kevin Laatz wrote:
> With the addition of the unaligned chunks option, we need to make sure we
> handle the offsets accordingly based on the mode we are currently running
> in. This patch modifies the driver to appropriately mask the address for
> each case.
> 
> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
> 
> ---
> v3:
>    - Use new helper function to handle offset
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c    | 8 ++++++--
>   drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 9 +++++++--
>   2 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> index b0b982cf69bb..d5245893d2c8 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
> @@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
>   		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
>   {
>   	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
> +	struct xdp_umem *umem = rq->umem;
>   	struct xdp_buff xdp;
>   	u32 act;
>   	int err;
> @@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
>   	xdp.rxq = &rq->xdp_rxq;
>   
>   	act = bpf_prog_run_xdp(prog, &xdp);
> -	if (xsk)
> -		xdp.handle += xdp.data - xdp.data_hard_start;
> +	if (xsk) {
> +		u64 off = xdp.data - xdp.data_hard_start;
> +
> +		xdp.handle = xsk_umem_handle_offset(umem, xdp.handle, off);
> +	}

What's missed is that umem_headroom is added to handle directly in 
mlx5e_xsk_page_alloc_umem. In my understanding umem_headroom should go 
to the offset part (high 16 bits) of the handle, to 
xsk_umem_handle_offset has to support increasing the offset.

>   	switch (act) {
>   	case XDP_PASS:
>   		*rx_headroom = xdp.data - xdp.data_hard_start;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
> index 35e188cf4ea4..f596e63cba00 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
> @@ -61,6 +61,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
>   	struct mlx5e_xdp_xmit_data xdptxd;
>   	bool work_done = true;
>   	bool flush = false;
> +	u64 addr, offset;
>   
>   	xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
>   
> @@ -82,8 +83,12 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
>   			break;
>   		}
>   
> -		xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
> -		xdptxd.data = xdp_umem_get_data(umem, desc.addr);
> +		/* for unaligned chunks need to take offset from upper bits */
> +		offset = (desc.addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
> +		addr = (desc.addr & XSK_UNALIGNED_BUF_ADDR_MASK);
> +
> +		xdptxd.dma_addr = xdp_umem_get_dma(umem, addr + offset);
> +		xdptxd.data = xdp_umem_get_data(umem, addr + offset);

Why can't these calculations be encapsulated into 
xdp_umem_get_{dma,data}? I think they are common for all drivers, aren't 
they?

Even if there is some reason not to put this bitshifting stuff into 
xdp_umem_get_* functions, I suggest to encapsulate it into a function 
anyway, because it's a good idea to keep those calculations in a single 
place.

>   		xdptxd.len = desc.len;
>   
>   		dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
> 


^ permalink raw reply

* Re: [PATCH net-next 1/3] flow_offload: move tc indirect block to flow offload
From: Pablo Neira Ayuso @ 2019-07-25 10:20 UTC (permalink / raw)
  To: wenxu; +Cc: netfilter-devel, netdev
In-Reply-To: <1564048533-27283-1-git-send-email-wenxu@ucloud.cn>

On Thu, Jul 25, 2019 at 05:55:31PM +0800, wenxu@ucloud.cn wrote:
> +struct flow_indr_block_dev {
> +	struct rhash_head ht_node;
> +	struct net_device *dev;
> +	unsigned int refcnt;
> +	struct list_head cb_list;
> +	flow_indr_block_ing_cmd_t *cmd_cb;
> +	void *block;

There's now a flow_block object in order to avoid void * which is
prone to bugs later on since the compiler cannot make type validation
anymore.

^ permalink raw reply

* Re: [PATCH net-next 1/3] flow_offload: move tc indirect block to flow offload
From: Florian Westphal @ 2019-07-25 10:22 UTC (permalink / raw)
  To: wenxu; +Cc: pablo, netfilter-devel, netdev
In-Reply-To: <1564048533-27283-1-git-send-email-wenxu@ucloud.cn>

wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
> From: wenxu <wenxu@ucloud.cn>
> 
> move tc indirect block to flow_offload.c. The nf_tables
> can use the indr block architecture.

... to do what?  Can you please illustrate how this is going to be
used/useful?

^ permalink raw reply

* Re: [PATCH net-next 2/3] flow_offload: Support get tcf block immediately
From: Florian Westphal @ 2019-07-25 10:24 UTC (permalink / raw)
  To: wenxu; +Cc: pablo, netfilter-devel, netdev
In-Reply-To: <1564048533-27283-2-git-send-email-wenxu@ucloud.cn>

wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
> From: wenxu <wenxu@ucloud.cn>
> 
> It provide a callback to find the tcf block in
> the flow_indr_block_dev_get

Can you explain why you're making this change?
This will help us understand the concept/idea of your series.

The above describes what the patch does, but it should
explain why this is callback is added.

^ permalink raw reply

* Re: [PATCH bpf-next v4 3/6] xdp: Add devmap_hash map type for looking up devices by hashed index
From: Toke Høiland-Jørgensen @ 2019-07-25 10:32 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: Daniel Borkmann, Alexei Starovoitov, netdev, David Miller,
	Jakub Kicinski, Björn Töpel, Yonghong Song, brouer
In-Reply-To: <20190725100717.0c4e8265@carbon>

Jesper Dangaard Brouer <brouer@redhat.com> writes:

> On Mon, 22 Jul 2019 13:52:48 +0200
> Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>
>> +static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
>> +						    int idx)
>> +{
>> +	return &dtab->dev_index_head[idx & (NETDEV_HASHENTRIES - 1)];
>> +}
>
> It is good for performance that our "hash" function is simply an AND
> operation on the idx.  We want to keep it this way.
>
> I don't like that you are using NETDEV_HASHENTRIES, because the BPF map
> infrastructure already have a way to specify the map size (struct
> bpf_map_def .max_entries).  BUT for performance reasons, to keep the
> AND operation, we would need to round up the hash-array size to nearest
> power of 2 (or reject if user didn't specify a power of 2, if we want
> to "expose" this limit to users).

But do we really want the number of hash buckets to be equal to the max
number of entries? The values are not likely to be evenly distributed,
so we'll end up with big buckets if the number is small, meaning we'll
blow performance on walking long lists in each bucket.

Also, if the size is dynamic the size needs to be loaded from memory
instead of being a compile-time constant, which will presumably hurt
performance (though not sure by how much)?

-Toke

^ permalink raw reply

* Re: [PATCH 0/8] can: flexcan: add CAN FD support for NXP Flexcan
From: Marc Kleine-Budde @ 2019-07-25 10:37 UTC (permalink / raw)
  To: Joakim Zhang, linux-can@vger.kernel.org
  Cc: wg@grandegger.com, dl-linux-imx, netdev@vger.kernel.org
In-Reply-To: <22ca8787-fa99-50bb-af1a-098866542e42@pengutronix.de>


[-- Attachment #1.1: Type: text/plain, Size: 1736 bytes --]

On 7/25/19 9:53 AM, Marc Kleine-Budde wrote:
> On 7/25/19 9:38 AM, Joakim Zhang wrote:
>> Kindly pinging...
>>
>> After you git pull request for linux-can-next-for-5.4-20190724, some patches are missing from linux-can-next/testing.
>> can: flexcan: flexcan_mailbox_read() make use of flexcan_write64() to mark the mailbox as read
>> can: flexcan: flexcan_irq(): add support for TX mailbox in iflag1
>> can: flexcan: flexcan_read_reg_iflag_rx(): optimize reading
>> can: flexcan: introduce struct flexcan_priv::tx_mask and make use of it
>> can: flexcan: convert struct flexcan_priv::rx_mask{1,2} to rx_mask
>> can: flexcan: remove TX mailbox bit from struct flexcan_priv::rx_mask{1,2}
>> can: flexcan: rename struct flexcan_priv::reg_imask{1,2}_default to rx_mask{1,2}
>> can: flexcan: flexcan_irq(): rename variable reg_iflag -> reg_iflag_rx
>> can: flexcan: rename macro FLEXCAN_IFLAG_MB() -> FLEXCAN_IFLAG2_MB()
>>
>> You can refer to below link for the reason of adding above patches:
>> https://www.spinics.net/lists/linux-can/msg00777.html
>> https://www.spinics.net/lists/linux-can/msg01150.html
>>
>> Are you prepared to add back these patches as they are necessary for
>> Flexcan CAN FD? And this Flexcan CAN FD patch set is based on these
>> patches.
> 
> Yes, these patches will be added back.

I've cleaned up the first patch a bit, and pushed everything to the
testing branch. Can you give it a test.

regards,
Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] flow_offload: move tc indirect block to flow offload
From: wenxu @ 2019-07-25 10:53 UTC (permalink / raw)
  To: Florian Westphal; +Cc: pablo, netfilter-devel, netdev
In-Reply-To: <20190725102217.zmkpmsnyt7xnz2vu@breakpoint.cc>


On 7/25/2019 6:22 PM, Florian Westphal wrote:
> wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> move tc indirect block to flow_offload.c. The nf_tables
>> can use the indr block architecture.
> ... to do what?  Can you please illustrate how this is going to be
> used/useful?
This is used to offload the tunnel packet. The decap rule is set on the tunnel device, but not the hardware device.

^ permalink raw reply

* Re: [PATCH net-next 2/3] flow_offload: Support get tcf block immediately
From: wenxu @ 2019-07-25 10:59 UTC (permalink / raw)
  To: Florian Westphal; +Cc: pablo, netfilter-devel, netdev
In-Reply-To: <20190725102434.c72m32tpsjwf7nff@breakpoint.cc>


@tc_indr_block_dev_get funcion,

static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
{
    struct tc_indr_block_dev *indr_dev;

    indr_dev = tc_indr_block_dev_lookup(dev);
    if (indr_dev)
        goto inc_ref;

    indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
    if (!indr_dev)
        return NULL;

    INIT_LIST_HEAD(&indr_dev->cb_list);
    indr_dev->dev = dev;
    indr_dev->block = tc_dev_ingress_block(dev);

when the indr device register. It will call __tc_indr_block_cb_register-->tc_indr_block_dev_get,

It can get the indr_dev->block immediately through tc_dev_ingress_block,

But when the indr_block_dev_get put in the common flow_offload.  It can not direct access 

tc_dev_ingress_block.



On 7/25/2019 6:24 PM, Florian Westphal wrote:
> wenxu@ucloud.cn <wenxu@ucloud.cn> wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> It provide a callback to find the tcf block in
>> the flow_indr_block_dev_get
> Can you explain why you're making this change?
> This will help us understand the concept/idea of your series.
>
> The above describes what the patch does, but it should
> explain why this is callback is added.
>

^ permalink raw reply

* [PATCH] qed: RDMA - Fix the hw_ver returned in device attributes
From: Michal Kalderon @ 2019-07-25 10:59 UTC (permalink / raw)
  To: michal.kalderon, ariel.elior, davem; +Cc: netdev

The hw_ver field was initialized to zero. Return the chip revision.
This is relevant for rdma driver.

Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
---
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index f900fde448db..7110cae384ee 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -442,7 +442,7 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
 	/* Vendor specific information */
 	dev->vendor_id = cdev->vendor_id;
 	dev->vendor_part_id = cdev->device_id;
-	dev->hw_ver = 0;
+	dev->hw_ver = cdev->chip_rev;
 	dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) |
 		      (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION);
 
-- 
2.14.5


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox