Netdev List
 help / color / mirror / Atom feed
* [PATCH net v2 1/4] net:ethernet:aquantia: Setup max_mtu in ndev to enable jumbo frames
From: Igor Russkikh @ 2017-09-25  7:48 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
	Simon Edelhaus, Igor Russkikh, Pavel Belous
In-Reply-To: <cover.1506324091.git.igor.russkikh@aquantia.com>

Although hardware is capable for almost 16K MTU, without max_mtu field
correctly set it only allows standard MTU to be used.
This patch enables max MTU, calculating it from hardware maximum frame size
of 16352 octets (including FCS).

Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu")

Signed-off-by: Pavel Belous <Pavel.Belous@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c               | 11 ++---------
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h    |  2 +-
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 6ac9e26..bf26a59 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -214,7 +214,6 @@ struct aq_nic_s *aq_nic_alloc_cold(const struct net_device_ops *ndev_ops,
 	SET_NETDEV_DEV(ndev, dev);
 
 	ndev->if_port = port;
-	ndev->min_mtu = ETH_MIN_MTU;
 	self->ndev = ndev;
 
 	self->aq_pci_func = aq_pci_func;
@@ -283,6 +282,7 @@ int aq_nic_ndev_init(struct aq_nic_s *self)
 	self->ndev->features = aq_hw_caps->hw_features;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
 	self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN;
+	self->ndev->max_mtu = self->aq_hw_caps.mtu - ETH_FCS_LEN - ETH_HLEN;
 
 	return 0;
 }
@@ -693,16 +693,9 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
 
 int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu)
 {
-	int err = 0;
-
-	if (new_mtu > self->aq_hw_caps.mtu) {
-		err = -EINVAL;
-		goto err_exit;
-	}
 	self->aq_nic_cfg.mtu = new_mtu;
 
-err_exit:
-	return err;
+	return 0;
 }
 
 int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index f3957e93..fcf89e2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -16,7 +16,7 @@
 
 #include "../aq_common.h"
 
-#define HW_ATL_B0_MTU_JUMBO (16000U)
+#define HW_ATL_B0_MTU_JUMBO  16352U
 #define HW_ATL_B0_MTU        1514U
 
 #define HW_ATL_B0_TX_RINGS 4U
-- 
2.7.4

^ permalink raw reply related

* [PATCH net v2 0/4] net:ethernet:aquantia: Atlantic driver bugfixes und improvements
From: Igor Russkikh @ 2017-09-25  7:48 UTC (permalink / raw)
  To: David S . Miller
  Cc: netdev, David Arcari, Pavel Belous, Nadezhda Krupnina,
	Simon Edelhaus, Igor Russkikh

This series contains bugfixes for aQuantia Atlantic driver.

Changes in v2:
Review comments applied:
- min_mtu set removed
- extra mtu range check is removed
- err codes handling improved

Igor Russkikh (3):
  net:ethernet:aquantia: Setup max_mtu in ndev to enable jumbo frames
  net:ethernet:aquantia: Fix Tx queue hangups
  net:ethernet:aquantia: Fix transient invalid link down/up indications

Pavel Belous (1):
  net:ethernet:atlantic: fix iommu errors

 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h    |   4 +
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 145 ++++++++++-----------
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |   2 -
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c   |  53 ++++++--
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h   |  10 +-
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c    |   8 +-
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  |   2 +-
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        |   3 +-
 8 files changed, 129 insertions(+), 98 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH v2 4/4] net: nfc: llcp_core: use setup_timer() helper.
From: Allen Pais @ 2017-09-25  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>

Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.

 net/nfc/llcp_core.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5c..7988185 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 	INIT_LIST_HEAD(&local->list);
 	kref_init(&local->ref);
 	mutex_init(&local->sdp_lock);
-	init_timer(&local->link_timer);
-	local->link_timer.data = (unsigned long) local;
-	local->link_timer.function = nfc_llcp_symm_timer;
+	setup_timer(&local->link_timer, nfc_llcp_symm_timer,
+		    (unsigned long)local);
 
 	skb_queue_head_init(&local->tx_queue);
 	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	mutex_init(&local->sdreq_lock);
 	INIT_HLIST_HEAD(&local->pending_sdreqs);
-	init_timer(&local->sdreq_timer);
-	local->sdreq_timer.data = (unsigned long) local;
-	local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+	setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
+		    (unsigned long)local);
 	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
 
 	list_add(&local->list, &llcp_devices);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v2 3/4] net: nfc: hci: llc_shdlc: use setup_timer() helper.
From: Allen Pais @ 2017-09-25  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>

Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.

 net/nfc/hci/llc_shdlc.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a0..58df37e 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
 	mutex_init(&shdlc->state_mutex);
 	shdlc->state = SHDLC_DISCONNECTED;
 
-	init_timer(&shdlc->connect_timer);
-	shdlc->connect_timer.data = (unsigned long)shdlc;
-	shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+	setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
+		    (unsigned long)shdlc);
 
-	init_timer(&shdlc->t1_timer);
-	shdlc->t1_timer.data = (unsigned long)shdlc;
-	shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+	setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
+		    (unsigned long)shdlc);
 
-	init_timer(&shdlc->t2_timer);
-	shdlc->t2_timer.data = (unsigned long)shdlc;
-	shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+	setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
+		    (unsigned long)shdlc);
 
 	shdlc->w = SHDLC_MAX_WINDOW;
 	shdlc->srej_support = SHDLC_SREJ_SUPPORT;
-- 
2.7.4

^ permalink raw reply related

* [PATCH v2 2/4] net: nfc: hci: use setup_timer() helper.
From: Allen Pais @ 2017-09-25  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais
In-Reply-To: <1506324605-10160-1-git-send-email-allen.lkml@gmail.com>

Use setup_timer function instead of initializing timer with the
   function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.

 net/nfc/hci/core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef..a8a6e78 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
 
 	INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
 
-	init_timer(&hdev->cmd_timer);
-	hdev->cmd_timer.data = (unsigned long)hdev;
-	hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+	setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
+		    (unsigned long)hdev);
 
 	skb_queue_head_init(&hdev->rx_hcp_frags);
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH 0/5] use setup_timer() helper function.
From: Allen @ 2017-09-25  7:30 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, sameo
In-Reply-To: <20170922.182231.828352137786459100.davem@davemloft.net>

>
> There was a recent change to the nfc code in net-next which causes
> your patches to not apply.
>
> Please repsin against net-next, thanks.

Sent out V2.

Thanks.

^ permalink raw reply

* [PATCH v2 1/4] net: af_packet: use setup_timer() helper.
From: Allen Pais @ 2017-09-25  7:30 UTC (permalink / raw)
  To: netdev; +Cc: davem, sameo, Allen Pais

Use setup_timer function instead of initializing timer with the
function and data fields.

Signed-off-by: Allen Pais <allen.lkml@gmail.com>
---
v2: rebased to latest net-next.

 net/packet/af_packet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d288f52..b0f2218 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po,
 		struct tpacket_kbdq_core *pkc,
 		void (*func) (unsigned long))
 {
-	init_timer(&pkc->retire_blk_timer);
-	pkc->retire_blk_timer.data = (long)po;
-	pkc->retire_blk_timer.function = func;
+	setup_timer(&pkc->retire_blk_timer, func, (long)po);
 	pkc->retire_blk_timer.expires = jiffies;
 }
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Yunsheng Lin @ 2017-09-25  7:22 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: davem@davemloft.net, huangdaode, xuwei (O), Liguozhu (Kenneth),
	Zhuangyuzeng (Yisen), Gabriele Paoloni, John Garry, Linuxarm,
	Salil Mehta, lipeng (Y), netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20170925065740.GB1899@nanopsycho>

Hi, Jiri

On 2017/9/25 14:57, Jiri Pirko wrote:
> Mon, Sep 25, 2017 at 02:45:08AM CEST, linyunsheng@huawei.com wrote:
>> Hi, Jiri
>>
>> On 2017/9/24 19:37, Jiri Pirko wrote:
>>> Sat, Sep 23, 2017 at 02:47:20AM CEST, linyunsheng@huawei.com wrote:
>>>> Hi, Jiri
>>>>
>>>> On 2017/9/23 0:03, Jiri Pirko wrote:
>>>>> Fri, Sep 22, 2017 at 04:11:51PM CEST, linyunsheng@huawei.com wrote:
>>>>>> Hi, Jiri
>>>>>>
>>>>>>>> - if (!tc) {
>>>>>>>> + if (if_running) {
>>>>>>>> + (void)hns3_nic_net_stop(netdev);
>>>>>>>> + msleep(100);
>>>>>>>> + }
>>>>>>>> +
>>>>>>>> + ret = (kinfo->dcb_ops && kinfo->dcb_ops->>setup_tc) ?
>>>>>>>> + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : ->EOPNOTSUPP;
>>>>>>
>>>>>>> This is most odd. Why do you call dcb_ops from >ndo_setup_tc callback?
>>>>>>> Why are you mixing this together? prio->tc mapping >can be done
>>>>>>> directly in dcbnl
>>>>>>
>>>>>> Here is what we do in dcb_ops->setup_tc:
>>>>>> Firstly, if current tc num is different from the tc num
>>>>>> that user provide, then we setup the queues for each
>>>>>> tc.
>>>>>>
>>>>>> Secondly, we tell hardware the pri to tc mapping that
>>>>>> the stack is using. In rx direction, our hardware need
>>>>>> that mapping to put different packet into different tc'
>>>>>> queues according to the priority of the packet, then
>>>>>> rss decides which specific queue in the tc should the
>>>>>> packet goto.
>>>>>>
>>>>>> By mixing, I suppose you meant why we need the
>>>>>> pri to tc infomation?
>>>>>
>>>>> by mixing, I mean what I wrote. You are calling dcb_ops callback from
>>>>> ndo_setup_tc callback. So you are mixing DCBNL subsystem and TC
>>>>> subsystem. Why? Why do you need sch_mqprio? Why DCBNL is not enough for
>>>>> all?
>>>>
>>>> When using lldptool, dcbnl is involved.
>>>>
>>>> But when using tc qdisc, dcbbl is not involved, below is the a few key
>>>> call graph in the kernel when tc qdisc cmd is executed.
>>>>
>>>> cmd:
>>>> tc qdisc add dev eth0 root handle 1:0 mqprio num_tc 4 map 1 2 3 3 1 3 1 1 hw 1
>>>>
>>>> call graph:
>>>> rtnetlink_rcv_msg -> tc_modify_qdisc -> qdisc_create -> mqprio_init ->
>>>> hns3_nic_setup_tc
>>>>
>>>> When hns3_nic_setup_tc is called, we need to know how many tc num and
>>>> prio_tc mapping from the tc_mqprio_qopt which is provided in the paramter
>>>> in the ndo_setup_tc function, and dcb_ops is the our hardware specific
>>>> method to setup the tc related parameter to the hardware, so this is why
>>>> we call dcb_ops callback in ndo_setup_tc callback.
>>>>
>>>> I hope this will answer your question, thanks for your time.
>>>
>>> Okay. I understand that you have a usecase for mqprio mapping offload
>>> without lldptool being involved. Ok. I believe it is wrong to call dcb_ops
>>> from tc callback. You should have a generic layer inside the driver and
>>> call it from both dcb_ops and tc callbacks.
>>
>> Actually, dcb_ops is our generic layer inside the driver.
>> Below is high level architecture:
>>
>>       [ tc qdisc ]	       [ lldpad ]
>>             |                     |
>>             |                     |
>>             |                     |
>>       [ hns3_enet ]        [ hns3_dcbnl ]
>>             \                    /
>>                \              /
>>                   \        /
>>                 [ hclge_dcb ]
>>                   /      \
>>                /            \
>>             /                  \
>>     [ hclgc_main ]        [ hclge_tm ]
>>
>> hns3_enet.c implements the ndo_setup_tc callback.
>> hns3_dcbnl.c implements the dcbnl_rtnl_ops for stack's DCBNL system.
>> hclge_dcb implements the dcb_ops.
>> So we already have a generic layer that tc and dcbnl all call from.
>>
>>>
>>> Also, what happens If I run lldptool concurrently with mqprio? Who wins
>>> and is going to configure the mapping?
>>
>> Both lldptool and tc qdisc cmd use rtnl interface provided by stack, so
>> they are both protected by rtnl_lock, so we do not have to do the locking
>> in the driver.
> 
> I was not asking about locking, which is obvious, I was asking about the
> behaviour. Like for example:
> If I use tc to configure some mapping, later on I run lldptool and change
> the mapping. Does the tc dump show the updated values or the original
> ones?

If it is that case, tc dump show the updated values.
Normally, we use tc qdisc to configure the netdev to use mqprio, and
then use the lldptool the tc_prio mapping, tc schedule mode, tc bandwidth
and pfc option.

if lldptool change the tc num and tc_prio mapping, it will tell the tc
system by the following function which is called in client_ops->setup_tc:
netdev_set_tc_queue
netdev_set_prio_tc_map

So lldptool and tc qdisc can work together.



> 
>>
>> The locking is in rtnetlink_rcv_msg:
>>
>> 	rtnl_lock();
>> 	handlers = rtnl_dereference(rtnl_msg_handlers[family]);
>> 	if (handlers) {
>> 		doit = READ_ONCE(handlers[type].doit);
>> 		if (doit)
>> 			err = doit(skb, nlh, extack);
>> 	}
>> 	rtnl_unlock();
>>
>> Thanks.
>>
>>>
>>>
>>>>
>>>>>
>>>>>
>>>>>
>>>>>> I hope I did not misunderstand your question, thanks
>>>>>> for your time reviewing.
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>> .
>>>
>>
> 
> .
> 

^ permalink raw reply

* Re: [2/2] ath9k: Avoid a potential deadlock
From: Kalle Valo @ 2017-09-25  7:18 UTC (permalink / raw)
  To: Ville Syrjälä
  Cc: linux-wireless, QCA ath9k Development, Kalle Valo, netdev,
	Ville Syrjälä
In-Reply-To: <20170918195919.15860-2-ville.syrjala@linux.intel.com>

Ville Syrjälä wrote:

> Lockdep warns us that sc_pm_lock and cc_lock can cause a deadlock when
> cc_lock is acquired by itself with interrupts enabled. Disable irqs
> whenever taking cc_lock to avoid this.
> 
> [   19.094524] kworker/u2:0/5 just changed the state of lock:
> [   19.094578]  (&(&sc->sc_pm_lock)->rlock){-.-...}, at: [<f836c00e>] ath_isr+0x15e/0x200 [ath9k]
> [   19.094674] but this lock took another, HARDIRQ-unsafe lock in the past:
> [   19.094731]  (&(&common->cc_lock)->rlock){+.-...}
> [   19.094741]
> 
>                and interrupts could create inverse lock ordering between them.
> 
> [   19.094866]
>                other info that might help us debug this:
> [   19.094926]  Possible interrupt unsafe locking scenario:
> 
> [   19.094985]        CPU0                    CPU1
> [   19.095036]        ----                    ----
> [   19.095086]   lock(&(&common->cc_lock)->rlock);
> [   19.095197]                                local_irq_disable();
> [   19.095305]                                lock(&(&sc->sc_pm_lock)->rlock);
> [   19.095423]                                lock(&(&common->cc_lock)->rlock);
> [   19.095539]   <Interrupt>
> [   19.095636]     lock(&(&sc->sc_pm_lock)->rlock);
> [   19.095745]
>                 *** DEADLOCK ***
> 
> [   19.095965] 3 locks held by kworker/u2:0/5:
> [   19.096067]  #0:  ("%s"wiphy_name(local->hw.wiphy)){.+.+.+}, at: [<c1067f37>] process_one_work+0x127/0x580
> [   19.096260]  #1:  ((&local->dynamic_ps_enable_work)){+.+...}, at: [<c1067f37>] process_one_work+0x127/0x580
> [   19.096447]  #2:  (&sc->mutex){+.+...}, at: [<f836b8b0>] ath9k_config+0x30/0x1d0 [ath9k]
> [   19.096639]
>                the shortest dependencies between 2nd lock and 1st lock:
> [   19.096813]  -> (&(&common->cc_lock)->rlock){+.-...} ops: 38 {
> [   19.096816]     HARDIRQ-ON-W at:
> [   19.096816]                       __lock_acquire+0x57e/0x1260
> [   19.096816]                       lock_acquire+0xb1/0x1c0
> [   19.096816]                       _raw_spin_lock_bh+0x3f/0x50
> [   19.096816]                       ath_chanctx_set_channel+0xb6/0x2c0 [ath9k]
> [   19.096816]                       ath9k_config+0xa8/0x1d0 [ath9k]
> [   19.096816]                       ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [   19.096816]                       ieee80211_do_open+0x67a/0x920 [mac80211]
> [   19.096816]                       ieee80211_open+0x41/0x50 [mac80211]
> [   19.096816]                       __dev_open+0xab/0x140
> [   19.096816]                       __dev_change_flags+0x89/0x150
> [   19.096816]                       dev_change_flags+0x28/0x60
> [   19.096816]                       do_setlink+0x290/0x890
> [   19.096816]                       rtnl_newlink+0x7cf/0x8e0
> [   19.096816]                       rtnetlink_rcv_msg+0xbf/0x1f0
> [   19.096816]                       netlink_rcv_skb+0xb9/0xe0
> [   19.096816]                       rtnetlink_rcv+0x1e/0x30
> [   19.096816]                       netlink_unicast+0x13a/0x2c0
> [   19.096816]                       netlink_sendmsg+0x290/0x380
> [   19.096816]                       ___sys_sendmsg+0x1e2/0x280
> [   19.096816]                       __sys_sendmsg+0x3f/0x80
> [   19.096816]                       SyS_socketcall+0x58c/0x6b0
> [   19.096816]                       do_fast_syscall_32+0x96/0x1d0
> [   19.096816]                       entry_SYSENTER_32+0x4c/0x7b
> [   19.096816]     IN-SOFTIRQ-W at:
> [   19.096816]                       __lock_acquire+0x55a/0x1260
> [   19.096816]                       lock_acquire+0xb1/0x1c0
> [   19.096816]                       _raw_spin_lock+0x3c/0x50
> [   19.096816]                       ath_ps_full_sleep+0x24/0x70 [ath9k]
> [   19.096816]                       call_timer_fn+0xa4/0x300
> [   19.096816]                       run_timer_softirq+0x1b1/0x560
> [   19.096816]                       __do_softirq+0xb0/0x430
> [   19.096816]                       do_softirq_own_stack+0x33/0x40
> [   19.096816]                       irq_exit+0xad/0xc0
> [   19.096816]                       smp_apic_timer_interrupt+0x31/0x40
> [   19.096816]                       apic_timer_interrupt+0x37/0x3c
> [   19.096816]                       wp_page_copy+0xb8/0x580
> [   19.096816]                       do_wp_page+0x64/0x420
> [   19.096816]                       handle_mm_fault+0x430/0x990
> [   19.096816]                       __do_page_fault+0x18b/0x430
> [   19.096816]                       do_page_fault+0xb/0x10
> [   19.096816]                       common_exception+0x62/0x6a
> [   19.096816]     INITIAL USE at:
> [   19.096816]                      __lock_acquire+0x204/0x1260
> [   19.096816]                      lock_acquire+0xb1/0x1c0
> [   19.096816]                      _raw_spin_lock_bh+0x3f/0x50
> [   19.096816]                      ath_chanctx_set_channel+0xb6/0x2c0 [ath9k]
> [   19.096816]                      ath9k_config+0xa8/0x1d0 [ath9k]
> [   19.096816]                      ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [   19.096816]                      ieee80211_do_open+0x67a/0x920 [mac80211]
> [   19.096816]                      ieee80211_open+0x41/0x50 [mac80211]
> [   19.096816]                      __dev_open+0xab/0x140
> [   19.096816]                      __dev_change_flags+0x89/0x150
> [   19.096816]                      dev_change_flags+0x28/0x60
> [   19.096816]                      do_setlink+0x290/0x890
> [   19.096816]                      rtnl_newlink+0x7cf/0x8e0
> [   19.096816]                      rtnetlink_rcv_msg+0xbf/0x1f0
> [   19.096816]                      netlink_rcv_skb+0xb9/0xe0
> [   19.096816]                      rtnetlink_rcv+0x1e/0x30
> [   19.096816]                      netlink_unicast+0x13a/0x2c0
> [   19.096816]                      netlink_sendmsg+0x290/0x380
> [   19.096816]                      ___sys_sendmsg+0x1e2/0x280
> [   19.096816]                      __sys_sendmsg+0x3f/0x80
> [   19.096816]                      SyS_socketcall+0x58c/0x6b0
> [   19.096816]                      do_fast_syscall_32+0x96/0x1d0
> [   19.096816]                      entry_SYSENTER_32+0x4c/0x7b
> [   19.096816]   }
> [   19.096816]   ... key      at: [<f837b694>] __key.61991+0x0/0xffffc96c [ath9k]
> [   19.096816]   ... acquired at:
> [   19.096816]    lock_acquire+0xb1/0x1c0
> [   19.096816]    _raw_spin_lock+0x3c/0x50
> [   19.096816]    ath9k_ps_wakeup+0x85/0xe0 [ath9k]
> [   19.096816]    ath9k_bss_info_changed+0x2a/0x1b0 [ath9k]
> [   19.096816]    ieee80211_bss_info_change_notify+0xf3/0x360 [mac80211]
> [   19.096816]    ieee80211_recalc_txpower+0x33/0x40 [mac80211]
> [   19.096816]    ieee80211_set_tx_power+0x45/0x1d0 [mac80211]
> [   19.096816]    cfg80211_wext_siwtxpower+0xd3/0x350 [cfg80211]
> [   19.096816]    ioctl_standard_call+0x4e/0x400
> [   19.096816]    wext_handle_ioctl+0xf4/0x190
> [   19.096816]    dev_ioctl+0xb7/0x630
> [   19.096816]    sock_ioctl+0x13e/0x2d0
> [   19.096816]    do_vfs_ioctl+0x84/0x750
> [   19.096816]    SyS_ioctl+0x34/0x60
> [   19.096816]    do_fast_syscall_32+0x96/0x1d0
> [   19.096816]    entry_SYSENTER_32+0x4c/0x7b
> 
> [   19.096816] -> (&(&sc->sc_pm_lock)->rlock){-.-...} ops: 597 {
> [   19.096816]    IN-HARDIRQ-W at:
> [   19.096816]                     __lock_acquire+0x6ae/0x1260
> [   19.096816]                     lock_acquire+0xb1/0x1c0
> [   19.096816]                     _raw_spin_lock_irqsave+0x45/0x60
> [   19.096816]                     ath_isr+0x15e/0x200 [ath9k]
> [   19.096816]                     __handle_irq_event_percpu+0x44/0x340
> [   19.096816]                     handle_irq_event_percpu+0x1d/0x50
> [   19.096816]                     handle_irq_event+0x32/0x60
> [   19.096816]                     handle_level_irq+0x81/0x100
> [   19.096816]                     handle_irq+0x9c/0xd0
> [   19.096816]                     do_IRQ+0x5c/0x120
> [   19.096816]                     common_interrupt+0x36/0x3c
> [   19.096816]                     _raw_spin_unlock_irqrestore+0x57/0x70
> [   19.096816]                     ath9k_config+0x16a/0x1d0 [ath9k]
> [   19.096816]                     ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [   19.096816]                     ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [   19.096816]                     process_one_work+0x1d1/0x580
> [   19.096816]                     worker_thread+0x31/0x380
> [   19.096816]                     kthread+0xd9/0x110
> [   19.096816]                     ret_from_fork+0x19/0x24
> [   19.096816]    IN-SOFTIRQ-W at:
> [   19.096816]                     __lock_acquire+0x55a/0x1260
> [   19.096816]                     lock_acquire+0xb1/0x1c0
> [   19.096816]                     _raw_spin_lock_irqsave+0x45/0x60
> [   19.096816]                     ath9k_ps_wakeup+0x24/0xe0 [ath9k]
> [   19.096816]                     ath9k_tasklet+0x42/0x260 [ath9k]
> [   19.096816]                     tasklet_action+0x196/0x1e0
> [   19.096816]                     __do_softirq+0xb0/0x430
> [   19.096816]                     do_softirq_own_stack+0x33/0x40
> [   19.096816]                     irq_exit+0xad/0xc0
> [   19.096816]                     do_IRQ+0x65/0x120
> [   19.096816]                     common_interrupt+0x36/0x3c
> [   19.096816]                     get_page_from_freelist+0x20a/0x970
> [   19.096816]                     __alloc_pages_nodemask+0xca/0xed0
> [   19.096816]                     __get_free_pages+0x14/0x30
> [   19.096816]                     pgd_alloc+0x1d/0x160
> [   19.096816]                     mm_init.isra.47+0x13a/0x1b0
> [   19.096816]                     copy_process.part.54+0xb55/0x1700
> [   19.096816]                     _do_fork+0xd4/0x6a0
> [   19.096816]                     SyS_clone+0x27/0x30
> [   19.096816]                     do_fast_syscall_32+0x96/0x1d0
> [   19.096816]                     entry_SYSENTER_32+0x4c/0x7b
> [   19.096816]    INITIAL USE at:
> [   19.096816]                    __lock_acquire+0x204/0x1260
> [   19.096816]                    lock_acquire+0xb1/0x1c0
> [   19.096816]                    _raw_spin_lock_irqsave+0x45/0x60
> [   19.096816]                    ath9k_ps_wakeup+0x24/0xe0 [ath9k]
> [   19.096816]                    ath9k_start+0x29/0x1f0 [ath9k]
> [   19.096816]                    drv_start+0x71/0x270 [mac80211]
> [   19.096816]                    ieee80211_do_open+0x31f/0x920 [mac80211]
> [   19.096816]                    ieee80211_open+0x41/0x50 [mac80211]
> [   19.096816]                    __dev_open+0xab/0x140
> [   19.096816]                    __dev_change_flags+0x89/0x150
> [   19.096816]                    dev_change_flags+0x28/0x60
> [   19.096816]                    do_setlink+0x290/0x890
> [   19.096816]                    rtnl_newlink+0x7cf/0x8e0
> [   19.096816]                    rtnetlink_rcv_msg+0xbf/0x1f0
> [   19.096816]                    netlink_rcv_skb+0xb9/0xe0
> [   19.096816]                    rtnetlink_rcv+0x1e/0x30
> [   19.096816]                    netlink_unicast+0x13a/0x2c0
> [   19.096816]                    netlink_sendmsg+0x290/0x380
> [   19.096816]                    ___sys_sendmsg+0x1e2/0x280
> [   19.096816]                    __sys_sendmsg+0x3f/0x80
> [   19.096816]                    SyS_socketcall+0x58c/0x6b0
> [   19.096816]                    do_fast_syscall_32+0x96/0x1d0
> [   19.096816]                    entry_SYSENTER_32+0x4c/0x7b
> [   19.096816]  }
> [   19.096816]  ... key      at: [<f837b67c>] __key.61994+0x0/0xffffc984 [ath9k]
> [   19.096816]  ... acquired at:
> [   19.096816]    check_usage_forwards+0x118/0x120
> [   19.096816]    mark_lock+0x2e4/0x590
> [   19.096816]    __lock_acquire+0x6ae/0x1260
> [   19.096816]    lock_acquire+0xb1/0x1c0
> [   19.096816]    _raw_spin_lock_irqsave+0x45/0x60
> [   19.096816]    ath_isr+0x15e/0x200 [ath9k]
> [   19.096816]    __handle_irq_event_percpu+0x44/0x340
> [   19.096816]    handle_irq_event_percpu+0x1d/0x50
> [   19.096816]    handle_irq_event+0x32/0x60
> [   19.096816]    handle_level_irq+0x81/0x100
> [   19.096816]    handle_irq+0x9c/0xd0
> [   19.096816]    do_IRQ+0x5c/0x120
> [   19.096816]    common_interrupt+0x36/0x3c
> [   19.096816]    _raw_spin_unlock_irqrestore+0x57/0x70
> [   19.096816]    ath9k_config+0x16a/0x1d0 [ath9k]
> [   19.096816]    ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [   19.096816]    ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [   19.096816]    process_one_work+0x1d1/0x580
> [   19.096816]    worker_thread+0x31/0x380
> [   19.096816]    kthread+0xd9/0x110
> [   19.096816]    ret_from_fork+0x19/0x24
> 
> [   19.096816]
>                stack backtrace:
> [   19.096816] CPU: 0 PID: 5 Comm: kworker/u2:0 Not tainted 4.13.0-mgm-ovl+ #51
> [   19.096816] Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26  05/10/2004
> [   19.096816] Workqueue: phy0 ieee80211_dynamic_ps_enable_work [mac80211]
> [   19.096816] Call Trace:
> [   19.096816]  <IRQ>
> [   19.096816]  dump_stack+0x16/0x19
> [   19.096816]  print_irq_inversion_bug.part.37+0x16c/0x179
> [   19.096816]  check_usage_forwards+0x118/0x120
> [   19.096816]  ? ret_from_fork+0x19/0x24
> [   19.096816]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
> [   19.096816]  mark_lock+0x2e4/0x590
> [   19.096816]  ? print_shortest_lock_dependencies+0x1a0/0x1a0
> [   19.096816]  __lock_acquire+0x6ae/0x1260
> [   19.096816]  lock_acquire+0xb1/0x1c0
> [   19.096816]  ? ath_isr+0x15e/0x200 [ath9k]
> [   19.096816]  _raw_spin_lock_irqsave+0x45/0x60
> [   19.096816]  ? ath_isr+0x15e/0x200 [ath9k]
> [   19.096816]  ath_isr+0x15e/0x200 [ath9k]
> [   19.096816]  __handle_irq_event_percpu+0x44/0x340
> [   19.096816]  handle_irq_event_percpu+0x1d/0x50
> [   19.096816]  handle_irq_event+0x32/0x60
> [   19.096816]  ? handle_nested_irq+0x100/0x100
> [   19.096816]  handle_level_irq+0x81/0x100
> [   19.096816]  handle_irq+0x9c/0xd0
> [   19.096816]  </IRQ>
> [   19.096816]  do_IRQ+0x5c/0x120
> [   19.096816]  common_interrupt+0x36/0x3c
> [   19.096816] EIP: _raw_spin_unlock_irqrestore+0x57/0x70
> [   19.096816] EFLAGS: 00000286 CPU: 0
> [   19.096816] EAX: f60a3600 EBX: 00000286 ECX: 00000006 EDX: 00000001
> [   19.096816] ESI: f46c9e68 EDI: f46c8620 EBP: f60b5e8c ESP: f60b5e84
> [   19.096816]  DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> [   19.096816]  ath9k_config+0x16a/0x1d0 [ath9k]
> [   19.096816]  ieee80211_hw_config+0xa8/0x5f0 [mac80211]
> [   19.096816]  ? ieee80211_hw_config+0x1db/0x5f0 [mac80211]
> [   19.096816]  ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211]
> [   19.096816]  ? process_one_work+0x127/0x580
> [   19.096816]  ? process_one_work+0x127/0x580
> [   19.096816]  process_one_work+0x1d1/0x580
> [   19.096816]  ? process_one_work+0x127/0x580
> [   19.096816]  worker_thread+0x31/0x380
> [   19.096816]  kthread+0xd9/0x110
> [   19.096816]  ? process_one_work+0x580/0x580
> [   19.096816]  ? kthread_create_on_node+0x30/0x30
> [   19.096816]  ret_from_fork+0x19/0x24
> 
> Cc: QCA ath9k Development <ath9k-devel@qca.qualcomm.com>
> Cc: Kalle Valo <kvalo@codeaurora.org>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

Patch applied to ath-next branch of ath.git, thanks.

ba24d63dd374 ath9k: Avoid a potential deadlock

-- 
https://patchwork.kernel.org/patch/9957575/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: ath10k: make ath10k_hw_ce_regs const
From: Kalle Valo @ 2017-09-25  7:16 UTC (permalink / raw)
  To: Bhumika Goyal
  Cc: julia.lawall, ath10k, linux-wireless, netdev, linux-kernel,
	Bhumika Goyal
In-Reply-To: <1505329312-26432-1-git-send-email-bhumirks@gmail.com>

Bhumika Goyal <bhumirks@gmail.com> wrote:

> Make them const as they are not modified in the file referencing
> them. They are only stored in the const field 'hw_ce_reg' of an ath10k
> structure. Also, make the declarations in the header const.
> 
> Signed-off-by: Bhumika Goyal <bhumirks@gmail.com>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

Patch applied to ath-next branch of ath.git, thanks.

496cbf3ebb6b ath10k: make ath10k_hw_ce_regs const

-- 
https://patchwork.kernel.org/patch/9951901/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: [8/10] ath9k: Use ARRAY_SIZE macro
From: Kalle Valo @ 2017-09-25  7:15 UTC (permalink / raw)
  To: Thomas Meyer; +Cc: kvalo, linux-wireless, netdev, linux-kernel
In-Reply-To: <1504439110050-1887263060-8-diffsplit-thomas@m3y3r.de>

Thomas Meyer <thomas@m3y3r.de> wrote:

> Use ARRAY_SIZE macro, rather than explicitly coding some variant of it
> yourself.
> Found with: find -type f -name "*.c" -o -name "*.h" | xargs perl -p -i -e
> 's/\bsizeof\s*\(\s*(\w+)\s*\)\s*\ /\s*sizeof\s*\(\s*\1\s*\[\s*0\s*\]\s*\)
> /ARRAY_SIZE(\1)/g' and manual check/verification.
> 
> Signed-off-by: Thomas Meyer <thomas@m3y3r.de>
> Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

Patch applied to ath-next branch of ath.git, thanks.

896cbefadf62 ath9k: Use ARRAY_SIZE macro

-- 
https://patchwork.kernel.org/patch/9936271/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: [PATCH net-next 10/10] net: hns3: Add mqprio support when interacting with network stack
From: Jiri Pirko @ 2017-09-25  6:57 UTC (permalink / raw)
  To: Yunsheng Lin
  Cc: davem@davemloft.net, huangdaode, xuwei (O), Liguozhu (Kenneth),
	Zhuangyuzeng (Yisen), Gabriele Paoloni, John Garry, Linuxarm,
	Salil Mehta, lipeng (Y), netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <290b0679-bfc2-c23c-00ee-43768c1c2327@huawei.com>

Mon, Sep 25, 2017 at 02:45:08AM CEST, linyunsheng@huawei.com wrote:
>Hi, Jiri
>
>On 2017/9/24 19:37, Jiri Pirko wrote:
>> Sat, Sep 23, 2017 at 02:47:20AM CEST, linyunsheng@huawei.com wrote:
>>> Hi, Jiri
>>>
>>> On 2017/9/23 0:03, Jiri Pirko wrote:
>>>> Fri, Sep 22, 2017 at 04:11:51PM CEST, linyunsheng@huawei.com wrote:
>>>>> Hi, Jiri
>>>>>
>>>>>>> - if (!tc) {
>>>>>>> + if (if_running) {
>>>>>>> + (void)hns3_nic_net_stop(netdev);
>>>>>>> + msleep(100);
>>>>>>> + }
>>>>>>> +
>>>>>>> + ret = (kinfo->dcb_ops && kinfo->dcb_ops->>setup_tc) ?
>>>>>>> + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : ->EOPNOTSUPP;
>>>>>
>>>>>> This is most odd. Why do you call dcb_ops from >ndo_setup_tc callback?
>>>>>> Why are you mixing this together? prio->tc mapping >can be done
>>>>>> directly in dcbnl
>>>>>
>>>>> Here is what we do in dcb_ops->setup_tc:
>>>>> Firstly, if current tc num is different from the tc num
>>>>> that user provide, then we setup the queues for each
>>>>> tc.
>>>>>
>>>>> Secondly, we tell hardware the pri to tc mapping that
>>>>> the stack is using. In rx direction, our hardware need
>>>>> that mapping to put different packet into different tc'
>>>>> queues according to the priority of the packet, then
>>>>> rss decides which specific queue in the tc should the
>>>>> packet goto.
>>>>>
>>>>> By mixing, I suppose you meant why we need the
>>>>> pri to tc infomation?
>>>>
>>>> by mixing, I mean what I wrote. You are calling dcb_ops callback from
>>>> ndo_setup_tc callback. So you are mixing DCBNL subsystem and TC
>>>> subsystem. Why? Why do you need sch_mqprio? Why DCBNL is not enough for
>>>> all?
>>>
>>> When using lldptool, dcbnl is involved.
>>>
>>> But when using tc qdisc, dcbbl is not involved, below is the a few key
>>> call graph in the kernel when tc qdisc cmd is executed.
>>>
>>> cmd:
>>> tc qdisc add dev eth0 root handle 1:0 mqprio num_tc 4 map 1 2 3 3 1 3 1 1 hw 1
>>>
>>> call graph:
>>> rtnetlink_rcv_msg -> tc_modify_qdisc -> qdisc_create -> mqprio_init ->
>>> hns3_nic_setup_tc
>>>
>>> When hns3_nic_setup_tc is called, we need to know how many tc num and
>>> prio_tc mapping from the tc_mqprio_qopt which is provided in the paramter
>>> in the ndo_setup_tc function, and dcb_ops is the our hardware specific
>>> method to setup the tc related parameter to the hardware, so this is why
>>> we call dcb_ops callback in ndo_setup_tc callback.
>>>
>>> I hope this will answer your question, thanks for your time.
>> 
>> Okay. I understand that you have a usecase for mqprio mapping offload
>> without lldptool being involved. Ok. I believe it is wrong to call dcb_ops
>> from tc callback. You should have a generic layer inside the driver and
>> call it from both dcb_ops and tc callbacks.
>
>Actually, dcb_ops is our generic layer inside the driver.
>Below is high level architecture:
>
>       [ tc qdisc ]	       [ lldpad ]
>             |                     |
>             |                     |
>             |                     |
>       [ hns3_enet ]        [ hns3_dcbnl ]
>             \                    /
>                \              /
>                   \        /
>                 [ hclge_dcb ]
>                   /      \
>                /            \
>             /                  \
>     [ hclgc_main ]        [ hclge_tm ]
>
>hns3_enet.c implements the ndo_setup_tc callback.
>hns3_dcbnl.c implements the dcbnl_rtnl_ops for stack's DCBNL system.
>hclge_dcb implements the dcb_ops.
>So we already have a generic layer that tc and dcbnl all call from.
>
>> 
>> Also, what happens If I run lldptool concurrently with mqprio? Who wins
>> and is going to configure the mapping?
>
>Both lldptool and tc qdisc cmd use rtnl interface provided by stack, so
>they are both protected by rtnl_lock, so we do not have to do the locking
>in the driver.

I was not asking about locking, which is obvious, I was asking about the
behaviour. Like for example:
If I use tc to configure some mapping, later on I run lldptool and change
the mapping. Does the tc dump show the updated values or the original
ones?

>
>The locking is in rtnetlink_rcv_msg:
>
>	rtnl_lock();
>	handlers = rtnl_dereference(rtnl_msg_handlers[family]);
>	if (handlers) {
>		doit = READ_ONCE(handlers[type].doit);
>		if (doit)
>			err = doit(skb, nlh, extack);
>	}
>	rtnl_unlock();
>
>Thanks.
>
>> 
>> 
>>>
>>>>
>>>>
>>>>
>>>>> I hope I did not misunderstand your question, thanks
>>>>> for your time reviewing.
>>>>
>>>> .
>>>>
>>>
>> 
>> .
>> 
>

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Yunsheng Lin @ 2017-09-25  6:32 UTC (permalink / raw)
  To: Yotam Gigi, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <54bef87c-8b7a-2512-3763-bac3469db421@mellanox.com>

Hi, Yotam

On 2017/9/25 13:38, Yotam Gigi wrote:
> On 09/25/2017 04:19 AM, Yunsheng Lin wrote:
>> Hi, Jiri
>>
>> On 2017/9/25 1:22, Jiri Pirko wrote:
>>> From: Yotam Gigi <yotamg@mellanox.com>
>>>
>>> Make the ipmr module register as a FIB notifier. To do that, implement both
>>> the ipmr_seq_read and ipmr_dump ops.
>>>
>>> The ipmr_seq_read op returns a sequence counter that is incremented on
>>> every notification related operation done by the ipmr. To implement that,
>>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>>> new MFC route or VIF are added or deleted. The sequence operations are
>>> protected by the RTNL lock.
>>>
>>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>>> and sends notifications about them. The entries dump is done under RCU
>>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>>> under RCU.
>>>
>>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>> ---
>>> v1->v2:
>>>  - Take the mrt_lock when dumping VIF entries.
>>> ---
>>>  include/linux/mroute.h   |  15 ++++++
>>>  include/net/netns/ipv4.h |   3 ++
>>>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>>  3 files changed, 153 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>>> index 10028f2..54c5cb8 100644
>>> --- a/include/linux/mroute.h
>>> +++ b/include/linux/mroute.h
>>> @@ -5,6 +5,7 @@
>>>  #include <linux/pim.h>
>>>  #include <linux/rhashtable.h>
>>>  #include <net/sock.h>
>>> +#include <net/fib_notifier.h>
>>>  #include <uapi/linux/mroute.h>
>>>  
>>>  #ifdef CONFIG_IP_MROUTE
>>> @@ -58,6 +59,14 @@ struct vif_device {
>>>  	int		link;			/* Physical interface index	*/
>>>  };
>>>  
>>> +struct vif_entry_notifier_info {
>>> +	struct fib_notifier_info info;
>>> +	struct net_device *dev;
>>> +	vifi_t vif_index;
>>> +	unsigned short vif_flags;
>>> +	u32 tb_id;
>>> +};
>>> +
>>>  #define VIFF_STATIC 0x8000
>>>  
>>>  #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
>>> @@ -146,6 +155,12 @@ struct mfc_cache {
>>>  	struct rcu_head	rcu;
>>>  };
>>>  
>>> +struct mfc_entry_notifier_info {
>>> +	struct fib_notifier_info info;
>>> +	struct mfc_cache *mfc;
>>> +	u32 tb_id;
>>> +};
>>> +
>>>  struct rtmsg;
>>>  int ipmr_get_route(struct net *net, struct sk_buff *skb,
>>>  		   __be32 saddr, __be32 daddr,
>>> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
>>> index 8387f09..abc84d9 100644
>>> --- a/include/net/netns/ipv4.h
>>> +++ b/include/net/netns/ipv4.h
>>> @@ -163,6 +163,9 @@ struct netns_ipv4 {
>>>  	struct fib_notifier_ops	*notifier_ops;
>>>  	unsigned int	fib_seq;	/* protected by rtnl_mutex */
>>>  
>>> +	struct fib_notifier_ops	*ipmr_notifier_ops;
>> Can we add a const here?
> 
> It cannot be const as it get initialized it in ipmr_notifier_init.
> 
>>
>>> +	unsigned int	ipmr_seq;	/* protected by rtnl_mutex */
>>> +
>>>  	atomic_t	rt_genid;
>>>  };
>>>  #endif
>>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>>> index 86dc5f9..49879c3 100644
>>> --- a/net/ipv4/ipmr.c
>>> +++ b/net/ipv4/ipmr.c
>>> @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>>  	fib_rules_unregister(net->ipv4.mr_rules_ops);
>>>  	rtnl_unlock();
>>>  }
>>> +
>>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> +	return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
>>> +}
>>> +
>>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>>> +{
>>> +	return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
>>> +}
>>>  #else
>>>  #define ipmr_for_each_table(mrt, net) \
>>>  	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
>>> @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>>  	net->ipv4.mrt = NULL;
>>>  	rtnl_unlock();
>>>  }
>>> +
>>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> +	return 0;
>>> +}
>>> +
>>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>>> +{
>>> +	return 0;
>>> +}
>>>  #endif
>>>  
>>>  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>>> @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
>>>  }
>>>  #endif
>>>  
>>> +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
>>> +					struct net *net,
>>> +					enum fib_event_type event_type,
>>> +					struct vif_device *vif,
>>> +					vifi_t vif_index, u32 tb_id)
>>> +{
>>> +	struct vif_entry_notifier_info info = {
>>> +		.info = {
>>> +			.family = RTNL_FAMILY_IPMR,
>>> +			.net = net,
>>> +		},
>>> +		.dev = vif->dev,
>>> +		.vif_index = vif_index,
>>> +		.vif_flags = vif->flags,
>>> +		.tb_id = tb_id,
>>> +	};
>> We only use info.info which is fib_notifier_info, the
>> vif_entry_notifier_info seems to be not needed, why not just
>> use fib_notifier_info?
> 
> No, that's not true.
> 
> The driver gets the notification with a pointer to a fib_notifier_info struct,
> and according to the type field uses container_of to get to the parent struct,
> which in this case is vif_entry_notifier_info. All the fields here are needed.
> You can see this code in patch 10.
> 
> By the way, this function is completely symmetric to fib4 (which is in
> fib_trie.c +88) and fib6 (which is in ip6_fib +336) notify functions, who uses
> the exact same process.

Thanks for clarifying, I am not familar with ipmr, so only checking
coding style.


> 
>>
>>> +
>>> +	return call_fib_notifier(nb, net, event_type, &info.info);
>>> +}
>>> +
>>> +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>>> +					struct net *net,
>>> +					enum fib_event_type event_type,
>>> +					struct mfc_cache *mfc, u32 tb_id)
>>> +{
>>> +	struct mfc_entry_notifier_info info = {
>>> +		.info = {
>>> +			.family = RTNL_FAMILY_IPMR,
>>> +			.net = net,
>>> +		},
>>> +		.mfc = mfc,
>>> +		.tb_id = tb_id
>>> +	};
>>> +
>> As above.
> 
> 
> As above.
> 
> 
>>
>>> +	return call_fib_notifier(nb, net, event_type, &info.info);
>>> +}
>>> +
>>>  /**
>>>   *	vif_delete - Delete a VIF entry
>>>   *	@notify: Set to 1, if the caller is a notifier_call
>>> @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = {
>>>  };
>>>  #endif
>>>  
>>> +static unsigned int ipmr_seq_read(struct net *net)
>>> +{
>>> +	ASSERT_RTNL();
>>> +
>>> +	return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
>>> +}
>>> +
>>> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
>>> +{
>>> +	struct mr_table *mrt;
>>> +	int err;
>>> +
>>> +	err = ipmr_rules_dump(net, nb);
>>> +	if (err)
>>> +		return err;
>>> +
>>> +	ipmr_for_each_table(mrt, net) {
>>> +		struct vif_device *v = &mrt->vif_table[0];
>>> +		struct mfc_cache *mfc;
>>> +		int vifi;
>>> +
>>> +		/* Notifiy on table VIF entries */
>>> +		read_lock(&mrt_lock);
>>> +		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
>>> +			if (!v->dev)
>>> +				continue;
>>> +
>>> +			call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
>>> +						     v, vifi, mrt->id);
>>> +		}
>>> +		read_unlock(&mrt_lock);
>>> +
>>> +		/* Notify on table MFC entries */
>>> +		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
>>> +			call_ipmr_mfc_entry_notifier(nb, net,
>>> +						     FIB_EVENT_ENTRY_ADD, mfc,
>>> +						     mrt->id);
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
>>> +	.family		= RTNL_FAMILY_IPMR,
>>> +	.fib_seq_read	= ipmr_seq_read,
>>> +	.fib_dump	= ipmr_dump,
>>> +	.owner		= THIS_MODULE,
>>> +};
>>> +
>>> +int __net_init ipmr_notifier_init(struct net *net)
>>> +{
>>> +	struct fib_notifier_ops *ops;
>>> +
>>> +	net->ipv4.ipmr_seq = 0;
>>> +
>>> +	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
>>> +	if (IS_ERR(ops))
>>> +		return PTR_ERR(ops);
>>> +	net->ipv4.ipmr_notifier_ops = ops;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void __net_exit ipmr_notifier_exit(struct net *net)
>>> +{
>>> +	fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
>>> +	net->ipv4.ipmr_notifier_ops = NULL;
>>> +}
>>> +
>>>  /* Setup for IP multicast routing */
>>>  static int __net_init ipmr_net_init(struct net *net)
>>>  {
>>>  	int err;
>>>  
>>> +	err = ipmr_notifier_init(net);
>>> +	if (err)
>>> +		goto ipmr_notifier_fail;
>>> +
>>>  	err = ipmr_rules_init(net);
>>>  	if (err < 0)
>>> -		goto fail;
>>> +		goto ipmr_rules_fail;
>>>  
>>>  #ifdef CONFIG_PROC_FS
>>>  	err = -ENOMEM;
>>> @@ -3074,7 +3204,9 @@ static int __net_init ipmr_net_init(struct net *net)
>>>  proc_vif_fail:
>>>  	ipmr_rules_exit(net);
>>>  #endif
>>> -fail:
>>> +ipmr_rules_fail:
>>> +	ipmr_notifier_exit(net);
>>> +ipmr_notifier_fail:
>>>  	return err;
>>>  }
>>>  
>>> @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
>>>  	remove_proc_entry("ip_mr_cache", net->proc_net);
>>>  	remove_proc_entry("ip_mr_vif", net->proc_net);
>>>  #endif
>>> +	ipmr_notifier_exit(net);
>>>  	ipmr_rules_exit(net);
>>>  }
>>>  
>>>
> 
> 
> .
> 

^ permalink raw reply

* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Herbert Xu @ 2017-09-25  6:14 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
	linux-wireless, netdev
In-Reply-To: <1506316946.2909.8.camel@sipsolutions.net>

On Mon, Sep 25, 2017 at 07:22:26AM +0200, Johannes Berg wrote:
> 
> The code moves to crypto/ though, and I'm not even sure I can vouch for
> the Makefile choice there.

Thanks, I missed that.  I don't think this belongs in crypto.
This proposed helper is only useful for wireless so it should
stay there.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [patch net-next v2 07/12] mlxsw: spectrum: Add the multicast routing offloading logic
From: Yotam Gigi @ 2017-09-25  5:55 UTC (permalink / raw)
  To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <fb5d1fcc-ad70-d656-f860-7b05cdd2834a@huawei.com>

On 09/25/2017 04:48 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> Add the multicast router offloading logic, which is in charge of handling
>> the VIF and MFC notifications and translating it to the hardware logic API.
>>
>> The offloading logic has to overcome several obstacles in order to safely
>> comply with the kernel multicast router user API:
>>  - It must keep track of the mapping between VIFs to netdevices. The user
>>    can add an MFC cache entry pointing to a VIF, delete the VIF and add
>>    re-add it with a different netdevice. The offloading logic has to handle
>>    this in order to be compatible with the kernel logic.
>>  - It must keep track of the mapping between netdevices to spectrum RIFs,
>>    as the current hardware implementation assume having a RIF for every
>>    port in a multicast router.
>>  - It must handle routes pointing to pimreg device to be trapped to the
>>    kernel, as the packet should be delivered to userspace.
>>  - It must handle routes pointing tunnel VIFs. The current implementation
>>    does not support multicast forwarding to tunnels, thus routes that point
>>    to a tunnel should be trapped to the kernel.
>>  - It must be aware of proxy multicast routes, which include both (*,*)
>>    routes and duplicate routes. Currently proxy routes are not offloaded
>>    and trigger the abort mechanism: removal of all routes from hardware and
>>    triggering the traffic to go through the kernel.
>>
>> The multicast routing offloading logic also updates the counters of the
>> offloaded MFC routes in a periodic work.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v1->v2:
>>  - Update the lastuse MFC entry field too, in addition to packets an bytes.
>> ---
>>  drivers/net/ethernet/mellanox/mlxsw/Makefile      |    3 +-
>>  drivers/net/ethernet/mellanox/mlxsw/spectrum.h    |    1 +
>>  drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 1014 +++++++++++++++++++++
>>  drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h |  133 +++
>>  4 files changed, 1150 insertions(+), 1 deletion(-)
>>  create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>>  create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> index 4b88158..9b29764 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
>> @@ -17,7 +17,8 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
>>  				   spectrum_kvdl.o spectrum_acl_tcam.o \
>>  				   spectrum_acl.o spectrum_flower.o \
>>  				   spectrum_cnt.o spectrum_fid.o \
>> -				   spectrum_ipip.o spectrum_acl_flex_actions.o
>> +				   spectrum_ipip.o spectrum_acl_flex_actions.o \
>> +				   spectrum_mr.o
>>  mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
>>  mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
>>  obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> index e907ec4..51d8b9f 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
>> @@ -153,6 +153,7 @@ struct mlxsw_sp {
>>  	struct mlxsw_sp_sb *sb;
>>  	struct mlxsw_sp_bridge *bridge;
>>  	struct mlxsw_sp_router *router;
>> +	struct mlxsw_sp_mr *mr;
>>  	struct mlxsw_afa *afa;
>>  	struct mlxsw_sp_acl *acl;
>>  	struct mlxsw_sp_fid_core *fid_core;
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> new file mode 100644
>> index 0000000..89b2e60
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> @@ -0,0 +1,1014 @@
>> +/*
>> + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
>> + * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
>> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + *    contributors may be used to endorse or promote products derived from
>> + *    this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#include <linux/rhashtable.h>
>> +
>> +#include "spectrum_mr.h"
>> +#include "spectrum_router.h"
>> +
>> +struct mlxsw_sp_mr {
>> +	const struct mlxsw_sp_mr_ops *mr_ops;
>> +	void *catchall_route_priv;
>> +	struct delayed_work stats_update_dw;
>> +	struct list_head table_list;
>> +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
>> +	unsigned long priv[0];
>> +	/* priv has to be always the last item */
>> +};
>> +
>> +struct mlxsw_sp_mr_vif {
>> +	struct net_device *dev;
>> +	const struct mlxsw_sp_rif *rif;
>> +	unsigned long vif_flags;
>> +
>> +	/* A list of route_vif_entry structs that point to routes that the VIF
>> +	 * instance is used as one of the egress VIFs
>> +	 */
>> +	struct list_head route_evif_list;
>> +
>> +	/* A list of route_vif_entry structs that point to routes that the VIF
>> +	 * instance is used as an ingress VIF
>> +	 */
>> +	struct list_head route_ivif_list;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_vif_entry {
>> +	struct list_head vif_node;
>> +	struct list_head route_node;
>> +	struct mlxsw_sp_mr_vif *mr_vif;
>> +	struct mlxsw_sp_mr_route *mr_route;
>> +};
>> +
>> +struct mlxsw_sp_mr_table {
>> +	struct list_head node;
>> +	enum mlxsw_sp_l3proto proto;
>> +	struct mlxsw_sp *mlxsw_sp;
>> +	u32 vr_id;
>> +	struct mlxsw_sp_mr_vif vifs[MAXVIFS];
>> +	struct list_head route_list;
>> +	struct rhashtable route_ht;
>> +	char catchall_route_priv[0];
>> +	/* catchall_route_priv has to be always the last item */
>> +};
>> +
>> +struct mlxsw_sp_mr_route {
>> +	struct list_head node;
>> +	struct rhash_head ht_node;
>> +	struct mlxsw_sp_mr_route_key key;
>> +	enum mlxsw_sp_mr_route_action route_action;
>> +	u16 min_mtu;
>> +	struct mfc_cache *mfc4;
>> +	void *route_priv;
>> +	const struct mlxsw_sp_mr_table *mr_table;
>> +	/* A list of route_vif_entry structs that point to the egress VIFs */
>> +	struct list_head evif_list;
>> +	/* A route_vif_entry struct that point to the ingress VIF */
>> +	struct mlxsw_sp_mr_route_vif_entry ivif;
>> +};
>> +
>> +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
>> +	.key_len = sizeof(struct mlxsw_sp_mr_route_key),
>> +	.key_offset = offsetof(struct mlxsw_sp_mr_route, key),
>> +	.head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node),
>> +	.automatic_shrinking = true,
>> +};
>> +
>> +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> +	return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
>> +}
>> +
>> +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> +	return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
>> +}
>> +
>> +static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif)
>> +{
>> +	return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif;
>> +}
>> +
>> +static bool
>> +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	vifi_t ivif;
>> +
>> +	switch (mr_route->mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		ivif = mr_route->mfc4->mfc_parent;
>> +		return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
>> +	case MLXSW_SP_L3_PROTO_IPV6:
>> +		/* fall through */
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +	}
>> +	return false;
>> +}
>> +
>> +static int
>> +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +	int valid_evifs = 0;
>> +
>> +	valid_evifs = 0;
> you are doing valid_evifs = 0 twice.

Didn't notice. I will fix and send v3.

Thanks.

>
>> +	list_for_each_entry(rve, &mr_route->evif_list, route_node)
>> +		if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> +			valid_evifs++;
>> +	return valid_evifs;
>> +}
>> +
>> +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	switch (mr_route->mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		return mr_route->key.source_mask.addr4 == INADDR_ANY;
>> +	case MLXSW_SP_L3_PROTO_IPV6:
>> +		/* fall through */
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +	}
>> +	return false;
>> +}
>> +
>> +static enum mlxsw_sp_mr_route_action
>> +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> +	/* If the ingress port is not regular and resolved, trap the route */
>> +	if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
>> +		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> +	/* The kernel does not match a (*,G) route that the ingress interface is
>> +	 * not one of the egress interfaces, so trap these kind of routes.
>> +	 */
>> +	if (mlxsw_sp_mr_route_starg(mr_route) &&
>> +	    !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
>> +		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> +	/* If the route has no valid eVIFs, trap it. */
>> +	if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route))
>> +		return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +
>> +	/* If either one of the eVIFs is not regular (VIF of type pimreg or
>> +	 * tunnel) or one of the VIFs has no matching RIF, trap the packet.
>> +	 */
>> +	list_for_each_entry(rve, &mr_route->evif_list, route_node) {
>> +		if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) ||
>> +		    mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif))
>> +			return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +	}
>> +	return MLXSW_SP_MR_ROUTE_ACTION_FORWARD;
>> +}
>> +
>> +static enum mlxsw_sp_mr_route_prio
>> +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	return mlxsw_sp_mr_route_starg(mr_route) ?
>> +		MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
>> +}
>> +
>> +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
>> +				   struct mlxsw_sp_mr_route_key *key,
>> +				   const struct mfc_cache *mfc)
>> +{
>> +	bool starg = (mfc->mfc_origin == INADDR_ANY);
>> +
>> +	memset(key, 0, sizeof(*key));
>> +	key->vrid = mr_table->vr_id;
>> +	key->proto = mr_table->proto;
>> +	key->group.addr4 = mfc->mfc_mcastgrp;
>> +	key->group_mask.addr4 = 0xffffffff;
>> +	key->source.addr4 = mfc->mfc_origin;
>> +	key->source_mask.addr4 = starg ? 0 : 0xffffffff;
>> +}
>> +
>> +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
>> +				       struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> +	rve = kzalloc(sizeof(*rve), GFP_KERNEL);
>> +	if (!rve)
>> +		return -ENOMEM;
>> +	rve->mr_route = mr_route;
>> +	rve->mr_vif = mr_vif;
>> +	list_add_tail(&rve->route_node, &mr_route->evif_list);
>> +	list_add_tail(&rve->vif_node, &mr_vif->route_evif_list);
>> +	return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> +	list_del(&rve->route_node);
>> +	list_del(&rve->vif_node);
>> +	kfree(rve);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route,
>> +					struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> +	mr_route->ivif.mr_route = mr_route;
>> +	mr_route->ivif.mr_vif = mr_vif;
>> +	list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	list_del(&mr_route->ivif.vif_node);
>> +}
>> +
>> +static int
>> +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table,
>> +			      struct mlxsw_sp_mr_route *mr_route,
>> +			      struct mlxsw_sp_mr_route_info *route_info)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +	u16 *erif_indices;
>> +	u16 irif_index;
>> +	u16 erif = 0;
>> +
>> +	erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices),
>> +				     GFP_KERNEL);
>> +	if (!erif_indices)
>> +		return -ENOMEM;
>> +
>> +	list_for_each_entry(rve, &mr_route->evif_list, route_node) {
>> +		if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
>> +			u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> +
>> +			erif_indices[erif++] = rifi;
>> +		}
>> +	}
>> +
>> +	if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
>> +		irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif);
>> +	else
>> +		irif_index = 0;
>> +
>> +	route_info->irif_index = irif_index;
>> +	route_info->erif_indices = erif_indices;
>> +	route_info->min_mtu = mr_route->min_mtu;
>> +	route_info->route_action = mr_route->route_action;
>> +	route_info->erif_num = erif;
>> +	return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info)
>> +{
>> +	kfree(route_info->erif_indices);
>> +}
>> +
>> +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table,
>> +				   struct mlxsw_sp_mr_route *mr_route,
>> +				   bool replace)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	struct mlxsw_sp_mr_route_info route_info;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	int err;
>> +
>> +	err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info);
>> +	if (err)
>> +		return err;
>> +
>> +	if (!replace) {
>> +		struct mlxsw_sp_mr_route_params route_params;
>> +
>> +		mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size,
>> +					       GFP_KERNEL);
>> +		if (!mr_route->route_priv) {
>> +			err = -ENOMEM;
>> +			goto out;
>> +		}
>> +
>> +		route_params.key = mr_route->key;
>> +		route_params.value = route_info;
>> +		route_params.prio = mlxsw_sp_mr_route_prio(mr_route);
>> +		err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
>> +					       mr_route->route_priv,
>> +					       &route_params);
>> +		if (err)
>> +			kfree(mr_route->route_priv);
>> +	} else {
>> +		err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv,
>> +					       &route_info);
>> +	}
>> +out:
>> +	mlxsw_sp_mr_route_info_destroy(&route_info);
>> +	return err;
>> +}
>> +
>> +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
>> +				    struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> +	mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv);
>> +	kfree(mr_route->route_priv);
>> +}
>> +
>> +static struct mlxsw_sp_mr_route *
>> +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
>> +			  struct mfc_cache *mfc)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
>> +	struct mlxsw_sp_mr_route *mr_route;
>> +	int err;
>> +	int i;
>> +
>> +	/* Allocate and init a new route and fill it with parameters */
>> +	mr_route = kzalloc(sizeof(*mr_table), GFP_KERNEL);
>> +	if (!mr_route)
>> +		return ERR_PTR(-ENOMEM);
>> +	INIT_LIST_HEAD(&mr_route->evif_list);
>> +	mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
>> +
>> +	/* Find min_mtu and link iVIF and eVIFs */
>> +	mr_route->min_mtu = ETH_MAX_MTU;
>> +	ipmr_cache_hold(mfc);
>> +	mr_route->mfc4 = mfc;
>> +	mr_route->mr_table = mr_table;
>> +	for (i = 0; i < MAXVIFS; i++) {
>> +		if (mfc->mfc_un.res.ttls[i] != 255) {
>> +			err = mlxsw_sp_mr_route_evif_link(mr_route,
>> +							  &mr_table->vifs[i]);
>> +			if (err)
>> +				goto err;
>> +			if (mr_table->vifs[i].dev &&
>> +			    mr_table->vifs[i].dev->mtu < mr_route->min_mtu)
>> +				mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
>> +		}
>> +	}
>> +	mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
>> +	if (err)
>> +		goto err;
>> +
>> +	mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
>> +	return mr_route;
>> +err:
>> +	ipmr_cache_put(mfc);
>> +	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
>> +		mlxsw_sp_mr_route_evif_unlink(rve);
>> +	kfree(mr_route);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
>> +				       struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
>> +
>> +	mlxsw_sp_mr_route_ivif_unlink(mr_route);
>> +	ipmr_cache_put(mr_route->mfc4);
>> +	list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
>> +		mlxsw_sp_mr_route_evif_unlink(rve);
>> +	kfree(mr_route);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
>> +				      struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	switch (mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
>> +		break;
>> +	case MLXSW_SP_L3_PROTO_IPV6:
>> +		/* fall through */
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +	}
>> +}
>> +
>> +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
>> +					bool offload)
>> +{
>> +	switch (mr_route->mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		if (offload)
>> +			mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
>> +		else
>> +			mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
>> +		break;
>> +	case MLXSW_SP_L3_PROTO_IPV6:
>> +		/* fall through */
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +	}
>> +}
>> +
>> +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	bool offload;
>> +
>> +	offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +	mlxsw_sp_mr_mfc_offload_set(mr_route, offload);
>> +}
>> +
>> +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
>> +				    struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	mlxsw_sp_mr_mfc_offload_set(mr_route, false);
>> +	mlxsw_sp_mr_route_erase(mr_table, mr_route);
>> +	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
>> +			       mlxsw_sp_mr_route_ht_params);
>> +	list_del(&mr_route->node);
>> +	mlxsw_sp_mr_route_destroy(mr_table, mr_route);
>> +}
>> +
>> +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
>> +			   struct mfc_cache *mfc, bool replace)
>> +{
>> +	struct mlxsw_sp_mr_route *mr_orig_route = NULL;
>> +	struct mlxsw_sp_mr_route *mr_route;
>> +	int err;
>> +
>> +	/* If the route is a (*,*) route, abort, as these kind of routes are
>> +	 * used for proxy routes.
>> +	 */
>> +	if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) {
>> +		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
>> +			 "Offloading proxy routes is not supported.\n");
> You are return err, why not use dev_err?


This err return value is properly handled by the caller (spectrum_router.c),
which will trigger the driver abort mechanism. The kernel will still be
functional, but the driver will stop offloading and eject all current offloaded
routes.

It is totally valid for a user to add a proxy route on machine with Spectrum,
but he should be warned that from now on, the routes go through slowpath. This
is why it is only a warning print.

Again, this is symmetric to the case of a failure in ipv4 and ipv6 route
offloading, which does not use dev_err either and only print with dev_warn.


>
>> +		return -EINVAL;
>> +	}
>> +
>> +	/* Create a new route */
>> +	mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
>> +	if (IS_ERR(mr_route))
>> +		return PTR_ERR(mr_route);
>> +
>> +	/* Find any route with a matching key */
>> +	mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht,
>> +					       &mr_route->key,
>> +					       mlxsw_sp_mr_route_ht_params);
>> +	if (replace) {
>> +		/* On replace case, make the route point to the new route_priv.
>> +		 */
>> +		if (WARN_ON(!mr_orig_route)) {
>> +			err = -ENOENT;
>> +			goto err_no_orig_route;
>> +		}
>> +		mr_route->route_priv = mr_orig_route->route_priv;
>> +	} else if (mr_orig_route) {
>> +		/* On non replace case, if another route with the same key was
>> +		 * found, abort, as duplicate routes are used for proxy routes.
>> +		 */
>> +		dev_warn(mr_table->mlxsw_sp->bus_info->dev,
>> +			 "Offloading proxy routes is not supported.\n");
> Same as here.

Same as above.

>
>> +		err = -EINVAL;
>> +		goto err_duplicate_route;
>> +	}
>> +
>> +	/* Put it in the table data-structures */
>> +	list_add_tail(&mr_route->node, &mr_table->route_list);
>> +	err = rhashtable_insert_fast(&mr_table->route_ht,
>> +				     &mr_route->ht_node,
>> +				     mlxsw_sp_mr_route_ht_params);
>> +	if (err)
>> +		goto err_rhashtable_insert;
>> +
>> +	/* Write the route to the hardware */
>> +	err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
>> +	if (err)
>> +		goto err_mr_route_write;
>> +
>> +	/* Destroy the original route */
>> +	if (replace) {
>> +		rhashtable_remove_fast(&mr_table->route_ht,
>> +				       &mr_orig_route->ht_node,
>> +				       mlxsw_sp_mr_route_ht_params);
>> +		list_del(&mr_orig_route->node);
>> +		mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
>> +	}
>> +
>> +	mlxsw_sp_mr_mfc_offload_update(mr_route);
>> +	return 0;
>> +
>> +err_mr_route_write:
>> +	rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
>> +			       mlxsw_sp_mr_route_ht_params);
>> +err_rhashtable_insert:
>> +	list_del(&mr_route->node);
>> +err_no_orig_route:
>> +err_duplicate_route:
>> +	mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
>> +	return err;
>> +}
>> +
>> +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
>> +			    struct mfc_cache *mfc)
>> +{
>> +	struct mlxsw_sp_mr_route *mr_route;
>> +	struct mlxsw_sp_mr_route_key key;
>> +
>> +	mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
>> +	mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
>> +					  mlxsw_sp_mr_route_ht_params);
>> +	if (mr_route)
>> +		__mlxsw_sp_mr_route_del(mr_table, mr_route);
>> +}
>> +
>> +/* Should be called after the VIF struct is updated */
>> +static int
>> +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> +			       struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	enum mlxsw_sp_mr_route_action route_action;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	u16 irif_index;
>> +	int err;
>> +
>> +	route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> +	if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
>> +		return 0;
>> +
>> +	/* rve->mr_vif->rif is guaranteed to be valid at this stage */
>> +	irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> +	err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv,
>> +					    irif_index);
>> +	if (err)
>> +		return err;
>> +
>> +	err = mr->mr_ops->route_action_update(mlxsw_sp,
>> +					      rve->mr_route->route_priv,
>> +					      route_action);
>> +	if (err)
>> +		/* No need to rollback here because the iRIF change only takes
>> +		 * place after the action has been updated.
>> +		 */
>> +		return err;
>> +
>> +	rve->mr_route->route_action = route_action;
>> +	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +	return 0;
>> +}
>> +
>> +static void
>> +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> +				 struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> +	mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv,
>> +					MLXSW_SP_MR_ROUTE_ACTION_TRAP);
>> +	rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +}
>> +
>> +/* Should be called after the RIF struct is updated */
>> +static int
>> +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> +			       struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	enum mlxsw_sp_mr_route_action route_action;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	u16 erif_index = 0;
>> +	int err;
>> +
>> +	/* Update the route action, as the new eVIF can be a tunnel or a pimreg
>> +	 * device which will require updating the action.
>> +	 */
>> +	route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> +	if (route_action != rve->mr_route->route_action) {
>> +		err = mr->mr_ops->route_action_update(mlxsw_sp,
>> +						      rve->mr_route->route_priv,
>> +						      route_action);
>> +		if (err)
>> +			return err;
>> +	}
>> +
>> +	/* Add the eRIF */
>> +	if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
>> +		erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> +		err = mr->mr_ops->route_erif_add(mlxsw_sp,
>> +						 rve->mr_route->route_priv,
>> +						 erif_index);
>> +		if (err)
>> +			goto err_route_erif_add;
>> +	}
>> +
>> +	/* Update the minimum MTU */
>> +	if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) {
>> +		rve->mr_route->min_mtu = rve->mr_vif->dev->mtu;
>> +		err = mr->mr_ops->route_min_mtu_update(mlxsw_sp,
>> +						       rve->mr_route->route_priv,
>> +						       rve->mr_route->min_mtu);
>> +		if (err)
>> +			goto err_route_min_mtu_update;
>> +	}
>> +
>> +	rve->mr_route->route_action = route_action;
>> +	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +	return 0;
>> +
>> +err_route_min_mtu_update:
>> +	if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> +		mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
>> +					   erif_index);
>> +err_route_erif_add:
>> +	if (route_action != rve->mr_route->route_action)
>> +		mr->mr_ops->route_action_update(mlxsw_sp,
>> +						rve->mr_route->route_priv,
>> +						rve->mr_route->route_action);
>> +	return err;
>> +}
>> +
>> +/* Should be called before the RIF struct is updated */
>> +static void
>> +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> +				 struct mlxsw_sp_mr_route_vif_entry *rve)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	enum mlxsw_sp_mr_route_action route_action;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	u16 rifi;
>> +
>> +	/* If the unresolved RIF was not valid, no need to delete it */
>> +	if (!mlxsw_sp_mr_vif_valid(rve->mr_vif))
>> +		return;
>> +
>> +	/* Update the route action: if there is only one valid eVIF in the
>> +	 * route, set the action to trap as the VIF deletion will lead to zero
>> +	 * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to
>> +	 * determine the route action.
>> +	 */
>> +	if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1)
>> +		route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
>> +	else
>> +		route_action = mlxsw_sp_mr_route_action(rve->mr_route);
>> +	if (route_action != rve->mr_route->route_action)
>> +		mr->mr_ops->route_action_update(mlxsw_sp,
>> +						rve->mr_route->route_priv,
>> +						route_action);
>> +
>> +	/* Delete the erif from the route */
>> +	rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
>> +	mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi);
>> +	rve->mr_route->route_action = route_action;
>> +	mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
>> +}
>> +
>> +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
>> +				   struct net_device *dev,
>> +				   struct mlxsw_sp_mr_vif *mr_vif,
>> +				   unsigned long vif_flags,
>> +				   const struct mlxsw_sp_rif *rif)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *irve, *erve;
>> +	int err;
>> +
>> +	/* Update the VIF */
>> +	mr_vif->dev = dev;
>> +	mr_vif->rif = rif;
>> +	mr_vif->vif_flags = vif_flags;
>> +
>> +	/* Update all routes where this VIF is used as an unresolved iRIF */
>> +	list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) {
>> +		err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve);
>> +		if (err)
>> +			goto err_irif_unresolve;
>> +	}
>> +
>> +	/* Update all routes where this VIF is used as an unresolved eRIF */
>> +	list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) {
>> +		err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve);
>> +		if (err)
>> +			goto err_erif_unresolve;
>> +	}
>> +	return 0;
>> +
>> +err_erif_unresolve:
>> +	list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
>> +					 vif_node)
>> +		mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
>> +err_irif_unresolve:
>> +	list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
>> +					 vif_node)
>> +		mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
>> +	mr_vif->rif = NULL;
>> +	return err;
>> +}
>> +
>> +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table,
>> +				      struct net_device *dev,
>> +				      struct mlxsw_sp_mr_vif *mr_vif)
>> +{
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +
>> +	/* Update all routes where this VIF is used as an unresolved eRIF */
>> +	list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node)
>> +		mlxsw_sp_mr_route_evif_unresolve(mr_table, rve);
>> +
>> +	/* Update all routes where this VIF is used as an unresolved iRIF */
>> +	list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node)
>> +		mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve);
>> +
>> +	/* Update the VIF */
>> +	mr_vif->dev = dev;
>> +	mr_vif->rif = NULL;
>> +}
>> +
>> +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
>> +			struct net_device *dev, vifi_t vif_index,
>> +			unsigned long vif_flags, const struct mlxsw_sp_rif *rif)
>> +{
>> +	struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
>> +
>> +	if (WARN_ON(vif_index >= MAXVIFS))
>> +		return -EINVAL;
>> +	if (mr_vif->dev)
>> +		return -EEXIST;
> -ENODEV?


No, Look carefully. The error is returned if mr_vif->dev is *not* NULL, which
means that the VIF was already added before, hence -EEXIST.

This error will happen if the mlxsw_sp_mr_vif_add function is called twice with
the same VIF index.


>
>> +	return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif);
>> +}
>> +
>> +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index)
>> +{
>> +	struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
>> +
>> +	if (WARN_ON(vif_index >= MAXVIFS))
>> +		return;
>> +	if (WARN_ON(!mr_vif->dev))
>> +		return;
>> +	mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif);
>> +}
>> +
>> +struct mlxsw_sp_mr_vif *
>> +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table,
>> +			   const struct net_device *dev)
>> +{
>> +	vifi_t vif_index;
>> +
>> +	for (vif_index = 0; vif_index < MAXVIFS; vif_index++)
>> +		if (mr_table->vifs[vif_index].dev == dev)
>> +			return &mr_table->vifs[vif_index];
>> +	return NULL;
>> +}
>> +
>> +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
>> +			const struct mlxsw_sp_rif *rif)
>> +{
>> +	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> +	struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> +	if (!rif_dev)
>> +		return 0;
>> +
>> +	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> +	if (!mr_vif)
>> +		return 0;
>> +	return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif,
>> +				       mr_vif->vif_flags, rif);
>> +}
>> +
>> +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
>> +			 const struct mlxsw_sp_rif *rif)
>> +{
>> +	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> +	struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> +	if (!rif_dev)
>> +		return;
>> +
>> +	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> +	if (!mr_vif)
>> +		return;
>> +	mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif);
>> +}
>> +
>> +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
>> +				const struct mlxsw_sp_rif *rif, int mtu)
>> +{
>> +	const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	struct mlxsw_sp_mr_route_vif_entry *rve;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	struct mlxsw_sp_mr_vif *mr_vif;
>> +
>> +	if (!rif_dev)
>> +		return;
>> +
>> +	/* Search for a VIF that use that RIF */
>> +	mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
>> +	if (!mr_vif)
>> +		return;
>> +
>> +	/* Update all the routes that uses that VIF as eVIF */
>> +	list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) {
>> +		if (mtu < rve->mr_route->min_mtu) {
>> +			rve->mr_route->min_mtu = mtu;
>> +			mr->mr_ops->route_min_mtu_update(mlxsw_sp,
>> +							 rve->mr_route->route_priv,
>> +							 mtu);
>> +		}
>> +	}
>> +}
>> +
>> +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
>> +						   u32 vr_id,
>> +						   enum mlxsw_sp_l3proto proto)
>> +{
>> +	struct mlxsw_sp_mr_route_params catchall_route_params = {
>> +		.prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
>> +		.key = {
>> +			.vrid = vr_id,
>> +		},
>> +		.value = {
>> +			.route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
>> +		}
>> +	};
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	struct mlxsw_sp_mr_table *mr_table;
>> +	int err;
>> +	int i;
>> +
>> +	mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size,
>> +			   GFP_KERNEL);
>> +	if (!mr_table)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	mr_table->vr_id = vr_id;
>> +	mr_table->mlxsw_sp = mlxsw_sp;
>> +	mr_table->proto = proto;
>> +	INIT_LIST_HEAD(&mr_table->route_list);
>> +
>> +	err = rhashtable_init(&mr_table->route_ht,
>> +			      &mlxsw_sp_mr_route_ht_params);
>> +	if (err)
>> +		goto err_route_rhashtable_init;
>> +
>> +	for (i = 0; i < MAXVIFS; i++) {
>> +		INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
>> +		INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
>> +	}
>> +
>> +	err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
>> +				       mr_table->catchall_route_priv,
>> +				       &catchall_route_params);
>> +	if (err)
>> +		goto err_ops_route_create;
>> +	list_add_tail(&mr_table->node, &mr->table_list);
>> +	return mr_table;
>> +
>> +err_ops_route_create:
>> +	rhashtable_destroy(&mr_table->route_ht);
>> +err_route_rhashtable_init:
>> +	kfree(mr_table);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
>> +{
>> +	struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> +	WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
>> +	list_del(&mr_table->node);
>> +	mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
>> +				  &mr_table->catchall_route_priv);
>> +	rhashtable_destroy(&mr_table->route_ht);
>> +	kfree(mr_table);
>> +}
>> +
>> +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
>> +{
>> +	struct mlxsw_sp_mr_route *mr_route, *tmp;
>> +	int i;
>> +
>> +	list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
>> +		__mlxsw_sp_mr_route_del(mr_table, mr_route);
>> +
>> +	for (i = 0; i < MAXVIFS; i++) {
>> +		mr_table->vifs[i].dev = NULL;
>> +		mr_table->vifs[i].rif = NULL;
>> +	}
>> +}
>> +
>> +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < MAXVIFS; i++)
>> +		if (mr_table->vifs[i].dev)
>> +			return false;
>> +	return list_empty(&mr_table->route_list);
>> +}
>> +
>> +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
>> +					   struct mlxsw_sp_mr_route *mr_route)
>> +{
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +	u64 packets, bytes;
>> +
>> +	if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
>> +		return;
>> +
>> +	mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
>> +				&bytes);
>> +
>> +	switch (mr_route->mr_table->proto) {
>> +	case MLXSW_SP_L3_PROTO_IPV4:
>> +		if (mr_route->mfc4->mfc_un.res.pkt != packets)
>> +			mr_route->mfc4->mfc_un.res.lastuse = jiffies;
>> +		mr_route->mfc4->mfc_un.res.pkt = packets;
>> +		mr_route->mfc4->mfc_un.res.bytes = bytes;
>> +		break;
>> +	case MLXSW_SP_L3_PROTO_IPV6:
>> +		/* fall through */
>> +	default:
>> +		WARN_ON_ONCE(1);
>> +	}
>> +}
>> +
>> +static void mlxsw_sp_mr_stats_update(struct work_struct *work)
>> +{
>> +	struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr,
>> +					      stats_update_dw.work);
>> +	struct mlxsw_sp_mr_table *mr_table;
>> +	struct mlxsw_sp_mr_route *mr_route;
>> +	unsigned long interval;
>> +
>> +	rtnl_lock();
>> +	list_for_each_entry(mr_table, &mr->table_list, node)
>> +		list_for_each_entry(mr_route, &mr_table->route_list, node)
>> +			mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
>> +						       mr_route);
>> +	rtnl_unlock();
>> +
>> +	interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
>> +	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
>> +}
>> +
>> +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
>> +		     const struct mlxsw_sp_mr_ops *mr_ops)
>> +{
>> +	struct mlxsw_sp_mr *mr;
>> +	unsigned long interval;
>> +	int err;
>> +
>> +	mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL);
>> +	if (!mr)
>> +		return -ENOMEM;
>> +	mr->mr_ops = mr_ops;
>> +	mlxsw_sp->mr = mr;
>> +	INIT_LIST_HEAD(&mr->table_list);
>> +
>> +	err = mr_ops->init(mlxsw_sp, mr->priv);
>> +	if (err)
>> +		goto err;
>> +
>> +	/* Create the delayed work for counter updates */
>> +	INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update);
>> +	interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
>> +	mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
>> +	return 0;
>> +err:
>> +	kfree(mr);
>> +	return err;
>> +}
>> +
>> +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
>> +{
>> +	struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
>> +
>> +	cancel_delayed_work_sync(&mr->stats_update_dw);
>> +	mr->mr_ops->fini(mr->priv);
>> +	kfree(mr);
>> +}
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> new file mode 100644
>> index 0000000..c851b23
>> --- /dev/null
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> @@ -0,0 +1,133 @@
>> +/*
>> + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
>> + * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
>> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + *    contributors may be used to endorse or promote products derived from
>> + *    this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifndef _MLXSW_SPECTRUM_MCROUTER_H
>> +#define _MLXSW_SPECTRUM_MCROUTER_H
>> +
>> +#include <linux/mroute.h>
>> +#include "spectrum_router.h"
>> +#include "spectrum.h"
>> +
>> +enum mlxsw_sp_mr_route_action {
>> +	MLXSW_SP_MR_ROUTE_ACTION_FORWARD,
>> +	MLXSW_SP_MR_ROUTE_ACTION_TRAP,
>> +};
>> +
>> +enum mlxsw_sp_mr_route_prio {
>> +	MLXSW_SP_MR_ROUTE_PRIO_SG,
>> +	MLXSW_SP_MR_ROUTE_PRIO_STARG,
>> +	MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
>> +	__MLXSW_SP_MR_ROUTE_PRIO_MAX
>> +};
>> +
>> +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
>> +
>> +struct mlxsw_sp_mr_route_key {
>> +	int vrid;
>> +	enum mlxsw_sp_l3proto proto;
>> +	union mlxsw_sp_l3addr group;
>> +	union mlxsw_sp_l3addr group_mask;
>> +	union mlxsw_sp_l3addr source;
>> +	union mlxsw_sp_l3addr source_mask;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_info {
>> +	enum mlxsw_sp_mr_route_action route_action;
>> +	u16 irif_index;
>> +	u16 *erif_indices;
>> +	size_t erif_num;
>> +	u16 min_mtu;
>> +};
>> +
>> +struct mlxsw_sp_mr_route_params {
>> +	struct mlxsw_sp_mr_route_key key;
>> +	struct mlxsw_sp_mr_route_info value;
>> +	enum mlxsw_sp_mr_route_prio prio;
>> +};
>> +
>> +struct mlxsw_sp_mr_ops {
>> +	int priv_size;
>> +	int route_priv_size;
>> +	int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
>> +	int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
>> +			    void *route_priv,
>> +			    struct mlxsw_sp_mr_route_params *route_params);
>> +	int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +			    struct mlxsw_sp_mr_route_info *route_info);
>> +	int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +			   u64 *packets, u64 *bytes);
>> +	int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +				   enum mlxsw_sp_mr_route_action route_action);
>> +	int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +				    u16 min_mtu);
>> +	int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +				 u16 irif_index);
>> +	int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +			      u16 erif_index);
>> +	int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
>> +			      u16 erif_index);
>> +	void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
>> +			      void *route_priv);
>> +	void (*fini)(void *priv);
>> +};
>> +
>> +struct mlxsw_sp_mr;
>> +struct mlxsw_sp_mr_table;
>> +
>> +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
>> +		     const struct mlxsw_sp_mr_ops *mr_ops);
>> +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
>> +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
>> +			   struct mfc_cache *mfc, bool replace);
>> +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
>> +			    struct mfc_cache *mfc);
>> +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
>> +			struct net_device *dev, vifi_t vif_index,
>> +			unsigned long vif_flags,
>> +			const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index);
>> +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
>> +			const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
>> +			 const struct mlxsw_sp_rif *rif);
>> +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
>> +				const struct mlxsw_sp_rif *rif, int mtu);
>> +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
>> +						   u32 tb_id,
>> +						   enum mlxsw_sp_l3proto proto);
>> +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table);
>> +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table);
>> +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table);
>> +
>> +#endif
>>

^ permalink raw reply

* Re: [patch net-next v2 06/12] net: mroute: Check if rule is a default rule
From: Yotam Gigi @ 2017-09-25  5:39 UTC (permalink / raw)
  To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <93dcea9d-94eb-39cf-4102-e5c59fb2dfa5@huawei.com>

On 09/25/2017 04:28 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> When the ipmr starts, it adds one default FIB rule that matches all packets
>> and sends them to the DEFAULT (multicast) FIB table. A more complex rule
>> can be added by user to specify that for a specific interface, a packet
>> should be look up at either an arbitrary table or according to the l3mdev
>> of the interface.
>>
>> For drivers willing to offload the ipmr logic into a hardware but don't
>> want to offload all the FIB rules functionality, provide a function that
>> can indicate whether the FIB rule is the default multicast rule, thus only
>> one routing table is needed.
>>
>> This way, a driver can register to the FIB notification chain, get
>> notifications about FIB rules added and trigger some kind of an internal
>> abort mechanism when a non default rule is added by the user.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>  include/linux/mroute.h |  7 +++++++
>>  net/ipv4/ipmr.c        | 10 ++++++++++
>>  2 files changed, 17 insertions(+)
>>
>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>> index 5566580..b072a84 100644
>> --- a/include/linux/mroute.h
>> +++ b/include/linux/mroute.h
>> @@ -5,6 +5,7 @@
>>  #include <linux/pim.h>
>>  #include <linux/rhashtable.h>
>>  #include <net/sock.h>
>> +#include <net/fib_rules.h>
>>  #include <net/fib_notifier.h>
>>  #include <uapi/linux/mroute.h>
>>  
>> @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
>>  int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
>>  int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
>>  int ip_mr_init(void);
>> +bool ipmr_rule_default(const struct fib_rule *rule);
>>  #else
>>  static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
>>  				       char __user *optval, unsigned int optlen)
>> @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt)
>>  {
>>  	return 0;
>>  }
>> +
>> +static inline bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> +	return true;
>> +}
>>  #endif
>>  
>>  struct vif_device {
>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>> index 2a795d2..a714f55 100644
>> --- a/net/ipv4/ipmr.c
>> +++ b/net/ipv4/ipmr.c
>> @@ -320,6 +320,16 @@ static unsigned int ipmr_rules_seq_read(struct net *net)
>>  }
>>  #endif
>>  
>> +bool ipmr_rule_default(const struct fib_rule *rule)
>> +{
>> +#if IS_ENABLED(CONFIG_FIB_RULES)
>> +	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
>> +#else
>> +	return true;
>> +#endif
> In patch 02, You have the following, can you do the same for the above?
> +#ifdef CONFIG_IP_MROUTE
> +void ipmr_cache_free(struct mfc_cache *mfc_cache);
> +#else
> +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
> +{
> +}
> +#endif

OK.

>> +}
>> +EXPORT_SYMBOL(ipmr_rule_default);
>> +
>>  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>>  				const void *ptr)
>>  {
>>

^ permalink raw reply

* Re: [patch net-next v2 03/12] ipmr: Add FIB notification access functions
From: Yotam Gigi @ 2017-09-25  5:38 UTC (permalink / raw)
  To: Yunsheng Lin, Jiri Pirko, netdev; +Cc: davem, idosch, mlxsw, nikolay, andrew
In-Reply-To: <fbf5db3b-a324-eed2-e6b8-b4b89e11f3c3@huawei.com>

On 09/25/2017 04:19 AM, Yunsheng Lin wrote:
> Hi, Jiri
>
> On 2017/9/25 1:22, Jiri Pirko wrote:
>> From: Yotam Gigi <yotamg@mellanox.com>
>>
>> Make the ipmr module register as a FIB notifier. To do that, implement both
>> the ipmr_seq_read and ipmr_dump ops.
>>
>> The ipmr_seq_read op returns a sequence counter that is incremented on
>> every notification related operation done by the ipmr. To implement that,
>> add a sequence counter in the netns_ipv4 struct and increment it whenever a
>> new MFC route or VIF are added or deleted. The sequence operations are
>> protected by the RTNL lock.
>>
>> The ipmr_dump iterates the list of MFC routes and the list of VIF entries
>> and sends notifications about them. The entries dump is done under RCU
>> where the VIF dump uses the mrt_lock too, as the vif->dev field can change
>> under RCU.
>>
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>> v1->v2:
>>  - Take the mrt_lock when dumping VIF entries.
>> ---
>>  include/linux/mroute.h   |  15 ++++++
>>  include/net/netns/ipv4.h |   3 ++
>>  net/ipv4/ipmr.c          | 137 ++++++++++++++++++++++++++++++++++++++++++++++-
>>  3 files changed, 153 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/mroute.h b/include/linux/mroute.h
>> index 10028f2..54c5cb8 100644
>> --- a/include/linux/mroute.h
>> +++ b/include/linux/mroute.h
>> @@ -5,6 +5,7 @@
>>  #include <linux/pim.h>
>>  #include <linux/rhashtable.h>
>>  #include <net/sock.h>
>> +#include <net/fib_notifier.h>
>>  #include <uapi/linux/mroute.h>
>>  
>>  #ifdef CONFIG_IP_MROUTE
>> @@ -58,6 +59,14 @@ struct vif_device {
>>  	int		link;			/* Physical interface index	*/
>>  };
>>  
>> +struct vif_entry_notifier_info {
>> +	struct fib_notifier_info info;
>> +	struct net_device *dev;
>> +	vifi_t vif_index;
>> +	unsigned short vif_flags;
>> +	u32 tb_id;
>> +};
>> +
>>  #define VIFF_STATIC 0x8000
>>  
>>  #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
>> @@ -146,6 +155,12 @@ struct mfc_cache {
>>  	struct rcu_head	rcu;
>>  };
>>  
>> +struct mfc_entry_notifier_info {
>> +	struct fib_notifier_info info;
>> +	struct mfc_cache *mfc;
>> +	u32 tb_id;
>> +};
>> +
>>  struct rtmsg;
>>  int ipmr_get_route(struct net *net, struct sk_buff *skb,
>>  		   __be32 saddr, __be32 daddr,
>> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
>> index 8387f09..abc84d9 100644
>> --- a/include/net/netns/ipv4.h
>> +++ b/include/net/netns/ipv4.h
>> @@ -163,6 +163,9 @@ struct netns_ipv4 {
>>  	struct fib_notifier_ops	*notifier_ops;
>>  	unsigned int	fib_seq;	/* protected by rtnl_mutex */
>>  
>> +	struct fib_notifier_ops	*ipmr_notifier_ops;
> Can we add a const here?

It cannot be const as it get initialized it in ipmr_notifier_init.

>
>> +	unsigned int	ipmr_seq;	/* protected by rtnl_mutex */
>> +
>>  	atomic_t	rt_genid;
>>  };
>>  #endif
>> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
>> index 86dc5f9..49879c3 100644
>> --- a/net/ipv4/ipmr.c
>> +++ b/net/ipv4/ipmr.c
>> @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>  	fib_rules_unregister(net->ipv4.mr_rules_ops);
>>  	rtnl_unlock();
>>  }
>> +
>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>> +{
>> +	return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
>> +}
>> +
>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>> +{
>> +	return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
>> +}
>>  #else
>>  #define ipmr_for_each_table(mrt, net) \
>>  	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
>> @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net)
>>  	net->ipv4.mrt = NULL;
>>  	rtnl_unlock();
>>  }
>> +
>> +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
>> +{
>> +	return 0;
>> +}
>> +
>> +static unsigned int ipmr_rules_seq_read(struct net *net)
>> +{
>> +	return 0;
>> +}
>>  #endif
>>  
>>  static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
>> @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
>>  }
>>  #endif
>>  
>> +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
>> +					struct net *net,
>> +					enum fib_event_type event_type,
>> +					struct vif_device *vif,
>> +					vifi_t vif_index, u32 tb_id)
>> +{
>> +	struct vif_entry_notifier_info info = {
>> +		.info = {
>> +			.family = RTNL_FAMILY_IPMR,
>> +			.net = net,
>> +		},
>> +		.dev = vif->dev,
>> +		.vif_index = vif_index,
>> +		.vif_flags = vif->flags,
>> +		.tb_id = tb_id,
>> +	};
> We only use info.info which is fib_notifier_info, the
> vif_entry_notifier_info seems to be not needed, why not just
> use fib_notifier_info?

No, that's not true.

The driver gets the notification with a pointer to a fib_notifier_info struct,
and according to the type field uses container_of to get to the parent struct,
which in this case is vif_entry_notifier_info. All the fields here are needed.
You can see this code in patch 10.

By the way, this function is completely symmetric to fib4 (which is in
fib_trie.c +88) and fib6 (which is in ip6_fib +336) notify functions, who uses
the exact same process.

>
>> +
>> +	return call_fib_notifier(nb, net, event_type, &info.info);
>> +}
>> +
>> +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
>> +					struct net *net,
>> +					enum fib_event_type event_type,
>> +					struct mfc_cache *mfc, u32 tb_id)
>> +{
>> +	struct mfc_entry_notifier_info info = {
>> +		.info = {
>> +			.family = RTNL_FAMILY_IPMR,
>> +			.net = net,
>> +		},
>> +		.mfc = mfc,
>> +		.tb_id = tb_id
>> +	};
>> +
> As above.


As above.


>
>> +	return call_fib_notifier(nb, net, event_type, &info.info);
>> +}
>> +
>>  /**
>>   *	vif_delete - Delete a VIF entry
>>   *	@notify: Set to 1, if the caller is a notifier_call
>> @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = {
>>  };
>>  #endif
>>  
>> +static unsigned int ipmr_seq_read(struct net *net)
>> +{
>> +	ASSERT_RTNL();
>> +
>> +	return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
>> +}
>> +
>> +static int ipmr_dump(struct net *net, struct notifier_block *nb)
>> +{
>> +	struct mr_table *mrt;
>> +	int err;
>> +
>> +	err = ipmr_rules_dump(net, nb);
>> +	if (err)
>> +		return err;
>> +
>> +	ipmr_for_each_table(mrt, net) {
>> +		struct vif_device *v = &mrt->vif_table[0];
>> +		struct mfc_cache *mfc;
>> +		int vifi;
>> +
>> +		/* Notifiy on table VIF entries */
>> +		read_lock(&mrt_lock);
>> +		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
>> +			if (!v->dev)
>> +				continue;
>> +
>> +			call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
>> +						     v, vifi, mrt->id);
>> +		}
>> +		read_unlock(&mrt_lock);
>> +
>> +		/* Notify on table MFC entries */
>> +		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
>> +			call_ipmr_mfc_entry_notifier(nb, net,
>> +						     FIB_EVENT_ENTRY_ADD, mfc,
>> +						     mrt->id);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static const struct fib_notifier_ops ipmr_notifier_ops_template = {
>> +	.family		= RTNL_FAMILY_IPMR,
>> +	.fib_seq_read	= ipmr_seq_read,
>> +	.fib_dump	= ipmr_dump,
>> +	.owner		= THIS_MODULE,
>> +};
>> +
>> +int __net_init ipmr_notifier_init(struct net *net)
>> +{
>> +	struct fib_notifier_ops *ops;
>> +
>> +	net->ipv4.ipmr_seq = 0;
>> +
>> +	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
>> +	if (IS_ERR(ops))
>> +		return PTR_ERR(ops);
>> +	net->ipv4.ipmr_notifier_ops = ops;
>> +
>> +	return 0;
>> +}
>> +
>> +static void __net_exit ipmr_notifier_exit(struct net *net)
>> +{
>> +	fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
>> +	net->ipv4.ipmr_notifier_ops = NULL;
>> +}
>> +
>>  /* Setup for IP multicast routing */
>>  static int __net_init ipmr_net_init(struct net *net)
>>  {
>>  	int err;
>>  
>> +	err = ipmr_notifier_init(net);
>> +	if (err)
>> +		goto ipmr_notifier_fail;
>> +
>>  	err = ipmr_rules_init(net);
>>  	if (err < 0)
>> -		goto fail;
>> +		goto ipmr_rules_fail;
>>  
>>  #ifdef CONFIG_PROC_FS
>>  	err = -ENOMEM;
>> @@ -3074,7 +3204,9 @@ static int __net_init ipmr_net_init(struct net *net)
>>  proc_vif_fail:
>>  	ipmr_rules_exit(net);
>>  #endif
>> -fail:
>> +ipmr_rules_fail:
>> +	ipmr_notifier_exit(net);
>> +ipmr_notifier_fail:
>>  	return err;
>>  }
>>  
>> @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
>>  	remove_proc_entry("ip_mr_cache", net->proc_net);
>>  	remove_proc_entry("ip_mr_vif", net->proc_net);
>>  #endif
>> +	ipmr_notifier_exit(net);
>>  	ipmr_rules_exit(net);
>>  }
>>  
>>

^ permalink raw reply

* Re: [PATCH net-next] sch_netem: faster rb tree removal
From: Eric Dumazet @ 2017-09-25  5:27 UTC (permalink / raw)
  To: David Ahern; +Cc: David Miller, netdev, Stephen Hemminger
In-Reply-To: <aa146e80-4cfd-888f-f6d2-bf4ec0eb7373@gmail.com>

On Sun, 2017-09-24 at 20:05 -0600, David Ahern wrote:
> On 9/24/17 7:57 PM, David Ahern wrote:

> > Hi Eric:
> > 
> > I'm guessing the cost is in the rb_first and rb_next computations. Did
> > you consider something like this:
> > 
> >         struct rb_root *root
> >         struct rb_node **p = &root->rb_node;
> > 
> >         while (*p != NULL) {
> >                 struct foobar *fb;
> > 
> >                 fb = container_of(*p, struct foobar, rb_node);
> >                 // fb processing
> 		  rb_erase(&nh->rb_node, root);
> 
> >                 p = &root->rb_node;
> >         }
> > 
> 
> Oops, dropped the rb_erase in my consolidating the code to this snippet.

Hi David

This gives about same numbers than method_1

I tried with 10^7 skbs in the tree :

Your suggestion takes 66ns per skb, while the one I chose takes 37ns per
skb.

Thanks.

^ permalink raw reply

* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Johannes Berg @ 2017-09-25  5:22 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
	linux-wireless, netdev
In-Reply-To: <20170925045641.GB22947@gondor.apana.org.au>

On Mon, 2017-09-25 at 12:56 +0800, Herbert Xu wrote:
> On Sun, Sep 24, 2017 at 07:42:46PM +0200, Johannes Berg wrote:
> > 
> > Unrelated to this, I'm not sure whose tree this should go through -
> > probably Herbert's (or DaveM's with his ACK? not sure if there's a
> > crypto tree?) or so?
> 
> Since you're just rearranging code invoking the crypto API, rather
> than touching actual crypto API code, I think you should handle it
> as you do with any other wireless patch.

The code moves to crypto/ though, and I'm not even sure I can vouch for
the Makefile choice there.

johannes

^ permalink raw reply

* Re: [PATCH] mac80211: aead api to reduce redundancy
From: Herbert Xu @ 2017-09-25  4:56 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Xiang Gao, David S. Miller, linux-crypto, linux-kernel,
	linux-wireless, netdev
In-Reply-To: <1506274966.2909.7.camel@sipsolutions.net>

On Sun, Sep 24, 2017 at 07:42:46PM +0200, Johannes Berg wrote:
>
> Unrelated to this, I'm not sure whose tree this should go through -
> probably Herbert's (or DaveM's with his ACK? not sure if there's a
> crypto tree?) or so?

Since you're just rearranging code invoking the crypto API, rather
than touching actual crypto API code, I think you should handle it
as you do with any other wireless patch.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [PATCH v4 1/9] brcmsmac: make some local variables 'static const' to reduce stack size
From: Kalle Valo @ 2017-09-25  4:33 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Arend van Spriel, Franky Lin, Hante Meuleman, Chi-Hsien Lin,
	Wright Feng, Mauro Carvalho Chehab, Jiri Pirko, David S. Miller,
	Andrey Ryabinin, Alexander Potapenko, Dmitry Vyukov,
	Masahiro Yamada, Michal Marek, Andrew Morton, Kees Cook,
	Geert Uytterhoeven, Greg Kroah-Hartman, linux-media, linux-kernel
In-Reply-To: <20170922212930.620249-2-arnd@arndb.de>

Arnd Bergmann <arnd@arndb.de> writes:

> With KASAN and a couple of other patches applied, this driver is one
> of the few remaining ones that actually use more than 2048 bytes of
> kernel stack:
>
> broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy_gainctrl':
> broadcom/brcm80211/brcmsmac/phy/phy_n.c:16065:1: warning: the frame size of 3264 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> broadcom/brcm80211/brcmsmac/phy/phy_n.c: In function 'wlc_phy_workarounds_nphy':
> broadcom/brcm80211/brcmsmac/phy/phy_n.c:17138:1: warning: the frame size of 2864 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>
> Here, I'm reducing the stack size by marking as many local variables as
> 'static const' as I can without changing the actual code.
>
> This is the first of three patches to improve the stack usage in this
> driver. It would be good to have this backported to stabl kernels
> to get all drivers in 'allmodconfig' below the 2048 byte limit so
> we can turn on the frame warning again globally, but I realize that
> the patch is larger than the normal limit for stable backports.
>
> The other two patches do not need to be backported.
>
> Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

I'll queue this and the two following brcmsmac patches for 4.14.

Also I'll add (only for this patch):

Cc: <stable@vger.kernel.org>

-- 
Kalle Valo

^ permalink raw reply

* Re: usb/wireless/rsi_91x: use-after-free write in __run_timers
From: Kalle Valo @ 2017-09-25  4:26 UTC (permalink / raw)
  To: Andrey Konovalov
  Cc: Amitkumar Karwar, Prameela Rani Garnepudi, Karun Eagalapati,
	linux-wireless, netdev, LKML, Dmitry Vyukov, Kostya Serebryany,
	syzkaller
In-Reply-To: <CAAeHK+wXCaLZ8=enfsmjg-+FJsW1UhSCz8GUmSWhFxB4orLQ6w@mail.gmail.com>

Andrey Konovalov <andreyknvl@google.com> writes:

> I've got the following report while fuzzing the kernel with syzkaller.
>
> On commit 6e80ecdddf4ea6f3cd84e83720f3d852e6624a68 (Sep 21).
>
> ==================================================================
> BUG: KASAN: use-after-free in __run_timers+0xc0e/0xd40
> Write of size 8 at addr ffff880069f701b8 by task swapper/0/0
>
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.14.0-rc1-42311-g6e80ecdddf4e #234
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011

[...]

> Allocated by task 1845:
>  save_stack_trace+0x1b/0x20 arch/x86/kernel/stacktrace.c:59
>  save_stack+0x43/0xd0 mm/kasan/kasan.c:447
>  set_track mm/kasan/kasan.c:459
>  kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
>  kmem_cache_alloc_trace+0x11e/0x2d0 mm/slub.c:2772
>  kmalloc ./include/linux/slab.h:493
>  kzalloc ./include/linux/slab.h:666
>  rsi_91x_init+0x98/0x510 drivers/net/wireless/rsi/rsi_91x_main.c:203
>  rsi_probe+0xb6/0x13b0 drivers/net/wireless/rsi/rsi_91x_usb.c:665
>  usb_probe_interface+0x35d/0x8e0 drivers/usb/core/driver.c:361

I'm curious about your setup. Apparently you are running syzkaller on
QEMU but what I don't understand is how the rsi device comes into the
picture. Did you have a rsi usb device connected to the virtual machine
or what? Or does syzkaller do some kind of magic here?

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH] brcm80211: make const array ucode_ofdm_rates static, reduces object code size
From: Kalle Valo @ 2017-09-25  4:20 UTC (permalink / raw)
  To: Arend van Spriel
  Cc: Colin King, Franky Lin, Hante Meuleman, Chi-Hsien Lin,
	Wright Feng, linux-wireless, brcm80211-dev-list.pdl,
	brcm80211-dev-list, netdev, kernel-janitors, linux-kernel
In-Reply-To: <96caf646-a43c-9b78-a4a9-2becf7e74125@broadcom.com>

Arend van Spriel <arend.vanspriel@broadcom.com> writes:

> Please use 'brcmsmac:' as prefix instead of 'brcm80211:'.

I can fix that.

-- 
Kalle Valo

^ permalink raw reply

* [PATCH v3 net-next 12/12] gtp: Allow configuring GTP interface as standalone
From: Tom Herbert @ 2017-09-25  3:29 UTC (permalink / raw)
  To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170925032941.14586-1-tom@quantonium.net>

Add new configuration of GTP interfaces that allow specifying a port to
listen on (as opposed to having to get sockets from a userspace control
plane). This allows GTP interfaces to be configured and the data path
tested without requiring a GTP-C daemon.

Signed-off-by: Tom Herbert <tom@quantonium.net>
---
 drivers/net/gtp.c        | 215 ++++++++++++++++++++++++++++++++++++-----------
 include/uapi/linux/gtp.h |   5 ++
 2 files changed, 169 insertions(+), 51 deletions(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 1c580df4cfc5..dc1fcd3034af 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -93,6 +93,9 @@ struct gtp_dev {
 	struct sock		*sk0;
 	struct sock		*sk1u;
 
+	struct socket		*sock0;
+	struct socket		*sock1u;
+
 	struct net_device	*dev;
 
 	unsigned int		role;
@@ -451,26 +454,33 @@ static void gtp_encap_destroy(struct sock *sk)
 	}
 }
 
-static void gtp_encap_disable_sock(struct sock *sk)
+static void gtp_encap_release(struct gtp_dev *gtp)
 {
-	if (!sk)
-		return;
+	if (gtp->sk0) {
+		if (gtp->sock0) {
+			udp_tunnel_sock_release(gtp->sock0);
+			gtp->sock0 = NULL;
+		} else {
+			gtp_encap_destroy(gtp->sk0);
+		}
 
-	gtp_encap_destroy(sk);
-}
+		gtp->sk0 = NULL;
+	}
 
-static void gtp_encap_disable(struct gtp_dev *gtp)
-{
-	gtp_encap_disable_sock(gtp->sk0);
-	gtp_encap_disable_sock(gtp->sk1u);
+	if (gtp->sk1u) {
+		if (gtp->sock1u) {
+			udp_tunnel_sock_release(gtp->sock1u);
+			gtp->sock1u = NULL;
+		} else {
+			gtp_encap_destroy(gtp->sk1u);
+		}
+
+		gtp->sk1u = NULL;
+	}
 }
 
 static int gtp_dev_init(struct net_device *dev)
 {
-	struct gtp_dev *gtp = netdev_priv(dev);
-
-	gtp->dev = dev;
-
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!dev->tstats)
 		return -ENOMEM;
@@ -482,7 +492,8 @@ static void gtp_dev_uninit(struct net_device *dev)
 {
 	struct gtp_dev *gtp = netdev_priv(dev);
 
-	gtp_encap_disable(gtp);
+	gtp_encap_release(gtp);
+
 	free_percpu(dev->tstats);
 }
 
@@ -751,6 +762,8 @@ static void gtp_link_setup(struct net_device *dev)
 				  sizeof(struct udphdr) +
 				  sizeof(struct gtp0_header);
 
+	gtp->dev = dev;
+
 	gro_cells_init(&gtp->gro_cells, dev);
 }
 
@@ -764,13 +777,19 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 		       struct netlink_ext_ack *extack)
 {
 	unsigned int role = GTP_ROLE_GGSN;
+	bool have_fd, have_ports;
 	bool is_ipv6 = false;
 	struct gtp_dev *gtp;
 	struct gtp_net *gn;
 	int hashsize, err;
 
-	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
+	have_fd = !!data[IFLA_GTP_FD0] || !!data[IFLA_GTP_FD1];
+	have_ports = !!data[IFLA_GTP_PORT0] || !!data[IFLA_GTP_PORT1];
+
+	if (!(have_fd ^ have_ports)) {
+		/* Either got fd(s) or port(s) */
 		return -EINVAL;
+	}
 
 	if (data[IFLA_GTP_ROLE]) {
 		role = nla_get_u32(data[IFLA_GTP_ROLE]);
@@ -831,7 +850,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 out_hashtable:
 	gtp_hashtable_free(gtp);
 out_encap:
-	gtp_encap_disable(gtp);
+	gtp_encap_release(gtp);
 	return err;
 }
 
@@ -840,7 +859,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head)
 	struct gtp_dev *gtp = netdev_priv(dev);
 
 	gro_cells_destroy(&gtp->gro_cells);
-	gtp_encap_disable(gtp);
+	gtp_encap_release(gtp);
 	gtp_hashtable_free(gtp);
 	list_del_rcu(&gtp->list);
 	unregister_netdevice_queue(dev, head);
@@ -851,6 +870,8 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
 	[IFLA_GTP_FD1]			= { .type = NLA_U32 },
 	[IFLA_GTP_PDP_HASHSIZE]		= { .type = NLA_U32 },
 	[IFLA_GTP_ROLE]			= { .type = NLA_U32 },
+	[IFLA_GTP_PORT0]		= { .type = NLA_U16 },
+	[IFLA_GTP_PORT1]		= { .type = NLA_U16 },
 };
 
 static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -949,11 +970,35 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
 	kfree(gtp->tid_hash);
 }
 
-static struct sock *gtp_encap_enable_socket(int fd, int type,
-					    struct gtp_dev *gtp,
-					    bool is_ipv6)
+static int gtp_encap_enable_sock(struct socket *sock, int type,
+				 struct gtp_dev *gtp)
 {
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
+
+	switch (type) {
+	case UDP_ENCAP_GTP0:
+		tuncfg.encap_rcv = gtp0_udp_encap_recv;
+		break;
+	case UDP_ENCAP_GTP1U:
+		tuncfg.encap_rcv = gtp1u_udp_encap_recv;
+		break;
+	default:
+		pr_debug("Unknown encap type %u\n", type);
+		return -EINVAL;
+	}
+
+	tuncfg.sk_user_data = gtp;
+	tuncfg.encap_type = type;
+	tuncfg.encap_destroy = gtp_encap_destroy;
+
+	setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
+
+	return 0;
+}
+
+static struct sock *gtp_encap_enable_fd(int fd, int type, struct gtp_dev *gtp,
+					bool is_ipv6)
+{
 	struct socket *sock;
 	struct sock *sk;
 	int err;
@@ -986,60 +1031,128 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
 	sk = sock->sk;
 	sock_hold(sk);
 
-	switch (type) {
-	case UDP_ENCAP_GTP0:
-		tuncfg.encap_rcv = gtp0_udp_encap_recv;
-		break;
-	case UDP_ENCAP_GTP1U:
-		tuncfg.encap_rcv = gtp1u_udp_encap_recv;
-		break;
-	default:
-		pr_debug("Unknown encap type %u\n", type);
-		sk = ERR_PTR(-EINVAL);
-		goto out_sock;
-	}
-
-	tuncfg.sk_user_data = gtp;
-	tuncfg.encap_type = type;
-	tuncfg.encap_destroy = gtp_encap_destroy;
-
-	setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
+	err = gtp_encap_enable_sock(sock, type, gtp);
+	if (err < 0)
+		sk = ERR_PTR(err);
 
 out_sock:
 	sockfd_put(sock);
 	return sk;
 }
 
+static struct socket *gtp_create_sock(struct net *net, bool ipv6,
+				      __be16 port, u32 flags)
+{
+	struct socket *sock;
+	struct udp_port_cfg udp_conf;
+	int err;
+
+	memset(&udp_conf, 0, sizeof(udp_conf));
+
+#if GTP_IPV6
+	if (ipv6) {
+		udp_conf.family = AF_INET6;
+		udp_conf.ipv6_v6only = 1;
+	} else {
+		udp_conf.family = AF_INET;
+	}
+#else
+	udp_conf.family = AF_INET;
+#endif
+
+	udp_conf.local_udp_port = port;
+
+	/* Open UDP socket */
+	err = udp_sock_create(net, &udp_conf, &sock);
+	if (err)
+		return ERR_PTR(err);
+
+	return sock;
+}
+
 static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
 			    bool is_ipv6)
 {
+	int err;
+
+	struct socket *sock0 = NULL, *sock1u = NULL;
 	struct sock *sk0 = NULL, *sk1u = NULL;
 
 	if (data[IFLA_GTP_FD0]) {
 		u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
 
-		sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp,
-					      is_ipv6);
-		if (IS_ERR(sk0))
-			return PTR_ERR(sk0);
+		sk0 = gtp_encap_enable_fd(fd0, UDP_ENCAP_GTP0, gtp, is_ipv6);
+		if (IS_ERR(sk0)) {
+			err = PTR_ERR(sk0);
+			sk0 = NULL;
+			goto out_err;
+		}
+	} else if (data[IFLA_GTP_PORT0]) {
+		__be16 port = nla_get_u16(data[IFLA_GTP_PORT0]);
+
+		sock0 = gtp_create_sock(dev_net(gtp->dev), is_ipv6, port, 0);
+		if (IS_ERR(sock0)) {
+			err = PTR_ERR(sock0);
+			sock0 = NULL;
+			goto out_err;
+		}
+
+		err = gtp_encap_enable_sock(sock0, UDP_ENCAP_GTP0, gtp);
+		if (err)
+			goto out_err;
 	}
 
 	if (data[IFLA_GTP_FD1]) {
 		u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
 
-		sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp,
-					       is_ipv6);
+		sk1u = gtp_encap_enable_fd(fd1, UDP_ENCAP_GTP1U, gtp, is_ipv6);
 		if (IS_ERR(sk1u)) {
-			if (sk0)
-				gtp_encap_disable_sock(sk0);
-			return PTR_ERR(sk1u);
+			err = PTR_ERR(sk1u);
+			sk1u = NULL;
+			goto out_err;
+		}
+	} else if (data[IFLA_GTP_PORT1]) {
+		__be16 port = nla_get_u16(data[IFLA_GTP_PORT1]);
+
+		sock1u = gtp_create_sock(dev_net(gtp->dev), is_ipv6, port, 0);
+		if (IS_ERR(sock1u)) {
+			err = PTR_ERR(sock1u);
+			sock1u = NULL;
+			goto out_err;
 		}
+
+		err = gtp_encap_enable_sock(sock1u, UDP_ENCAP_GTP1U, gtp);
+		if (err)
+			goto out_err;
+	}
+
+	if (sock0) {
+		gtp->sock0 = sock0;
+		gtp->sk0 = sock0->sk;
+	} else {
+		gtp->sk0 = sk0;
 	}
 
-	gtp->sk0 = sk0;
-	gtp->sk1u = sk1u;
+	if (sock1u) {
+		gtp->sock1u = sock1u;
+		gtp->sk1u = sock1u->sk;
+	} else {
+		gtp->sk1u = sk1u;
+	}
 
 	return 0;
+
+out_err:
+	if (sk0)
+		gtp_encap_destroy(sk0);
+	if (sk1u)
+		gtp_encap_destroy(sk1u);
+	if (sock0)
+		udp_tunnel_sock_release(sock0);
+	if (sock1u)
+		udp_tunnel_sock_release(sock1u);
+
+	return err;
 }
 
 static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
@@ -1624,8 +1737,8 @@ static const struct genl_ops gtp_genl_ops[] = {
 };
 
 static struct genl_family gtp_genl_family __ro_after_init = {
-	.name		= "gtp",
-	.version	= 0,
+	.name		= GTP_GENL_NAME,
+	.version	= GTP_GENL_VERSION,
 	.hdrsize	= 0,
 	.maxattr	= GTPA_MAX,
 	.netnsok	= true,
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index 8eec519fa754..0da18aa88be8 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -9,6 +9,11 @@ enum gtp_genl_cmds {
 	GTP_CMD_MAX,
 };
 
+/* NETLINK_GENERIC related info
+ */
+#define GTP_GENL_NAME		"gtp"
+#define GTP_GENL_VERSION	0
+
 enum gtp_version {
 	GTP_V0 = 0,
 	GTP_V1,
-- 
2.11.0

^ permalink raw reply related

* [PATCH v3 net-next 11/12] gtp: Experimental support encpasulating over IPv6
From: Tom Herbert @ 2017-09-25  3:29 UTC (permalink / raw)
  To: davem; +Cc: pablo, laforge, aschultz, netdev, rohit, Tom Herbert
In-Reply-To: <20170925032941.14586-1-tom@quantonium.net>

Allows using GTP datapath over IPv6. Remote peers are indicated by IPv6.

Note this is experimental, more work is needed to make this
compliant with 3GPP standard.

Signed-off-by: Tom Herbert <tom@quantonium.net>
---
 drivers/net/gtp.c            | 248 ++++++++++++++++++++++++++++++++++---------
 include/uapi/linux/gtp.h     |   1 +
 include/uapi/linux/if_link.h |   3 +
 3 files changed, 200 insertions(+), 52 deletions(-)

diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index 919ec6e14973..1c580df4cfc5 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -28,6 +28,7 @@
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
+#include <net/ip6_tunnel.h>
 #include <net/udp.h>
 #include <net/udp_tunnel.h>
 #include <net/icmp.h>
@@ -59,16 +60,22 @@ struct pdp_ctx {
 	__be16			gtp_port;
 
 	u16			ms_af;
+	u16			peer_af;
 #if GTP_IPV6
 	union {
 		struct in_addr	ms_addr_ip4;
 		struct in6_addr	ms_addr_ip6;
 	};
+
+	union {
+		struct in_addr	peer_addr_ip4;
+		struct in6_addr	peer_addr_ip6;
+	};
 #else
 	struct in_addr	ms_addr_ip4;
+	struct in_addr	peer_addr_ip4;
 #endif
 
-	struct in_addr		peer_addr_ip4;
 
 	struct sock		*sk;
 	struct net_device       *dev;
@@ -93,8 +100,11 @@ struct gtp_dev {
 	struct hlist_head	*tid_hash;
 
 	struct hlist_head	*addr4_hash;
+
 #if GTP_IPV6
 	struct hlist_head	*addr6_hash;
+
+	unsigned int		is_ipv6:1;
 #endif
 
 	struct gro_cells	gro_cells;
@@ -534,8 +544,6 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
 {
 	struct iphdr *inner_iph = NULL;
 	struct sock *sk = pctx->sk;
-	__be32 saddr = inet_sk(sk)->inet_saddr;
-	struct rtable *rt;
 	int err = 0;
 
 	if (skb->protocol == ETH_P_IP)
@@ -548,38 +556,84 @@ static int gtp_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	skb_reset_inner_headers(skb);
 
-	/* Source address returned by route lookup is ignored since
-	 * we get the address from a socket.
-	 */
-	rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
-				 sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
-				 pctx->peer_addr_ip4.s_addr, &saddr,
-				 pctx->gtp_port, pctx->gtp_port,
-				 &pctx->dst_cache, NULL);
-
-	if (IS_ERR(rt)) {
-		err = PTR_ERR(rt);
-		goto out_err;
-	}
+	if (pctx->peer_af == AF_INET) {
+		__be32 saddr = inet_sk(sk)->inet_saddr;
+		struct rtable *rt;
+
+		/* Source address returned by route lookup is ignored since
+		 * we get the address from a socket.
+		 */
+		rt = ip_tunnel_get_route(dev, skb, sk->sk_protocol,
+					 sk->sk_bound_dev_if, RT_CONN_FLAGS(sk),
+					 pctx->peer_addr_ip4.s_addr, &saddr,
+					 pctx->gtp_port, pctx->gtp_port,
+					 &pctx->dst_cache, NULL);
+
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto out_err;
+		}
+
+		skb_dst_drop(skb);
 
-	skb_dst_drop(skb);
+		gtp_push_header(skb, pctx);
 
-	gtp_push_header(skb, pctx);
+		if (inner_iph)
+			__iptunnel_update_pmtu(dev, skb, &rt->dst,
+					       !!inner_iph->frag_off,
+					       inner_iph, pctx->hlen,
+					       pctx->peer_addr_ip4.s_addr);
 
-	if (inner_iph)
-		__iptunnel_update_pmtu(dev, skb, &rt->dst,
-				       !!inner_iph->frag_off,
-				       inner_iph, pctx->hlen,
-				       pctx->peer_addr_ip4.s_addr);
+		udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+				    pctx->peer_addr_ip4.s_addr,
+				    0, ip4_dst_hoplimit(&rt->dst), 0,
+				    pctx->gtp_port, pctx->gtp_port,
+				    false, false);
 
-	udp_tunnel_xmit_skb(rt, sk, skb, saddr,
-			    pctx->peer_addr_ip4.s_addr,
-			    0, ip4_dst_hoplimit(&rt->dst), 0,
-			    pctx->gtp_port, pctx->gtp_port,
-			    false, false);
+		netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
+			   &saddr, &pctx->peer_addr_ip4.s_addr);
 
-	netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n",
-		   &saddr, &pctx->peer_addr_ip4.s_addr);
+#if GTP_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (pctx->peer_af == AF_INET6) {
+		struct in6_addr saddr = inet6_sk(sk)->saddr;
+		struct dst_entry *dst;
+
+		/* Source address returned by route lookup is ignored since
+		 * we get the address from a socket.
+		 */
+		dst = ip6_tnl_get_route(dev, skb, sk, sk->sk_protocol,
+					sk->sk_bound_dev_if, 0,
+					0, &pctx->peer_addr_ip6, &saddr,
+					pctx->gtp_port, pctx->gtp_port,
+					&pctx->dst_cache, NULL);
+
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(dst);
+			goto out_err;
+		}
+
+		skb_dst_drop(skb);
+
+		gtp_push_header(skb, pctx);
+
+		if (inner_iph)
+			__iptunnel_update_pmtu(dev, skb, dst,
+					       !!inner_iph->frag_off,
+					       inner_iph, pctx->hlen, 0);
+
+		udp_tunnel6_xmit_skb(dst, sk, skb, dev,
+				     &saddr, &pctx->peer_addr_ip6,
+				     0, ip6_dst_hoplimit(dst), 0,
+				     pctx->gtp_port, pctx->gtp_port,
+				     false);
+
+		netdev_dbg(dev, "gtp -> IP src: %pI6 dst: %pI6\n",
+			   &saddr, &pctx->peer_addr_ip6);
+
+#endif
+#endif
+	}
 
 	return 0;
 
@@ -688,7 +742,12 @@ static void gtp_link_setup(struct net_device *dev)
 
 	/* Assume largest header, ie. GTPv0. */
 	dev->needed_headroom	= LL_MAX_HEADER +
+#if GTP_IPV6
+				  max_t(int, sizeof(struct iphdr),
+					sizeof(struct ipv6hdr)) +
+#else
 				  sizeof(struct iphdr) +
+#endif
 				  sizeof(struct udphdr) +
 				  sizeof(struct gtp0_header);
 
@@ -697,12 +756,15 @@ static void gtp_link_setup(struct net_device *dev)
 
 static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
 static void gtp_hashtable_free(struct gtp_dev *gtp);
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+			    bool is_ipv6);
 
 static int gtp_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[],
 		       struct netlink_ext_ack *extack)
 {
+	unsigned int role = GTP_ROLE_GGSN;
+	bool is_ipv6 = false;
 	struct gtp_dev *gtp;
 	struct gtp_net *gn;
 	int hashsize, err;
@@ -710,9 +772,32 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 	if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
 		return -EINVAL;
 
+	if (data[IFLA_GTP_ROLE]) {
+		role = nla_get_u32(data[IFLA_GTP_ROLE]);
+		if (role > GTP_ROLE_SGSN)
+			return -EINVAL;
+	}
+
+	if (data[IFLA_GTP_AF]) {
+		u16 af = nla_get_u16(data[IFLA_GTP_AF]);
+
+		switch (af) {
+		case AF_INET:
+			is_ipv6 = false;
+			break;
+#if GTP_IPV6
+		case AF_INET6:
+			is_ipv6 = true;
+			break;
+#endif
+		default:
+			return -EINVAL;
+		}
+	}
+
 	gtp = netdev_priv(dev);
 
-	err = gtp_encap_enable(gtp, data);
+	err = gtp_encap_enable(gtp, data, is_ipv6);
 	if (err < 0)
 		return err;
 
@@ -731,6 +816,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
 		goto out_hashtable;
 	}
 
+	gtp->role = role;
+#if GTP_IPV6
+	gtp->is_ipv6 = is_ipv6;
+#endif
+
 	gn = net_generic(dev_net(dev), gtp_net_id);
 	list_add_rcu(&gtp->list, &gn->gtp_dev_list);
 
@@ -860,7 +950,8 @@ static void gtp_hashtable_free(struct gtp_dev *gtp)
 }
 
 static struct sock *gtp_encap_enable_socket(int fd, int type,
-					    struct gtp_dev *gtp)
+					    struct gtp_dev *gtp,
+					    bool is_ipv6)
 {
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
 	struct socket *sock;
@@ -881,6 +972,12 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
 		goto out_sock;
 	}
 
+	if (sock->sk->sk_family != (is_ipv6 ? AF_INET6 : AF_INET)) {
+		pr_debug("socket fd=%d not right family\n", fd);
+		sk = ERR_PTR(-EINVAL);
+		goto out_sock;
+	}
+
 	if (rcu_dereference_sk_user_data(sock->sk)) {
 		sk = ERR_PTR(-EBUSY);
 		goto out_sock;
@@ -913,16 +1010,16 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
 	return sk;
 }
 
-static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
+static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[],
+			    bool is_ipv6)
 {
-	struct sock *sk1u = NULL;
-	struct sock *sk0 = NULL;
-	unsigned int role = GTP_ROLE_GGSN;
+	struct sock *sk0 = NULL, *sk1u = NULL;
 
 	if (data[IFLA_GTP_FD0]) {
 		u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
 
-		sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp);
+		sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp,
+					      is_ipv6);
 		if (IS_ERR(sk0))
 			return PTR_ERR(sk0);
 	}
@@ -930,7 +1027,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
 	if (data[IFLA_GTP_FD1]) {
 		u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
 
-		sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
+		sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp,
+					       is_ipv6);
 		if (IS_ERR(sk1u)) {
 			if (sk0)
 				gtp_encap_disable_sock(sk0);
@@ -938,15 +1036,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
 		}
 	}
 
-	if (data[IFLA_GTP_ROLE]) {
-		role = nla_get_u32(data[IFLA_GTP_ROLE]);
-		if (role > GTP_ROLE_SGSN)
-			return -EINVAL;
-	}
-
 	gtp->sk0 = sk0;
 	gtp->sk1u = sk1u;
-	gtp->role = role;
 
 	return 0;
 }
@@ -982,8 +1073,18 @@ static void pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
 	__be16 default_port = 0;
 
 	pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
-	pctx->peer_addr_ip4.s_addr =
-		nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
+
+	if (info->attrs[GTPA_PEER_ADDRESS]) {
+		pctx->peer_af = AF_INET;
+		pctx->peer_addr_ip4.s_addr =
+			nla_get_in_addr(info->attrs[GTPA_PEER_ADDRESS]);
+#if GTP_IPV6
+	} else if (info->attrs[GTPA_PEER6_ADDRESS]) {
+		pctx->peer_af = AF_INET6;
+		pctx->peer_addr_ip6 = nla_get_in6_addr(
+					info->attrs[GTPA_PEER6_ADDRESS]);
+#endif
+	}
 
 	switch (pctx->gtp_version) {
 	case GTP_V0:
@@ -1162,11 +1263,17 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 	int err;
 
 	if (!info->attrs[GTPA_VERSION] ||
-	   !info->attrs[GTPA_LINK] ||
-	   !info->attrs[GTPA_PEER_ADDRESS])
+	    !info->attrs[GTPA_LINK])
 		return -EINVAL;
 
 #if GTP_IPV6
+	if (!(!!info->attrs[GTPA_PEER_ADDRESS] ^
+	      !!info->attrs[GTPA_PEER6_ADDRESS])) {
+		/* Either v4 or v6 peer address must be set */
+
+		return -EINVAL;
+	}
+
 	if (!(!!info->attrs[GTPA_MS_ADDRESS] ^
 	      !!info->attrs[GTPA_MS6_ADDRESS])) {
 		/* Either v4 or v6 mobile subscriber address must be set */
@@ -1174,6 +1281,12 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 #else
+	if (!info->attrs[GTPA_PEER_ADDRESS]) {
+		/* v4 peer address must be set */
+
+		return -EINVAL;
+	}
+
 	if (!info->attrs[GTPA_MS_ADDRESS]) {
 		/* v4 mobile subscriber address must be set */
 
@@ -1207,6 +1320,14 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
 		goto out_unlock;
 	}
 
+#if GTP_IPV6
+	if ((info->attrs[GTPA_PEER_ADDRESS] && gtp->is_ipv6) ||
+	    (info->attrs[GTPA_PEER6_ADDRESS] && !gtp->is_ipv6)) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+#endif
+
 	if (version == GTP_V0)
 		sk = gtp->sk0;
 	else if (version == GTP_V1)
@@ -1315,10 +1436,31 @@ static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
 	if (genlh == NULL)
 		goto nlmsg_failure;
 
-	if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
-	    nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr))
+	if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex))
+		goto nla_put_failure;
+
+	switch (pctx->peer_af) {
+	case AF_INET:
+		if (nla_put_be32(skb, GTPA_PEER_ADDRESS,
+				 pctx->peer_addr_ip4.s_addr))
+			goto nla_put_failure;
+
+		break;
+#if GTP_IPV6
+	case AF_INET6:
+		if (nla_put_in6_addr(skb, GTPA_PEER6_ADDRESS,
+				     &pctx->peer_addr_ip6))
+			goto nla_put_failure;
+
+		break;
+#endif
+	default:
+		goto nla_put_failure;
+	}
+
 	switch (pctx->ms_af) {
 	case AF_INET:
 		if (nla_put_be32(skb, GTPA_MS_ADDRESS,
@@ -1448,6 +1590,8 @@ static struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
 	[GTPA_PEER_ADDRESS]	= { .type = NLA_U32, },
 	[GTPA_MS_ADDRESS]	= { .type = NLA_U32, },
 #if GTP_IPV6
+	[GTPA_PEER6_ADDRESS]	= { .len = FIELD_SIZEOF(struct ipv6hdr,
+							daddr) },
 	[GTPA_MS6_ADDRESS]	= { .len = FIELD_SIZEOF(struct ipv6hdr,
 							daddr) },
 #endif
diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h
index ae4e632c0360..8eec519fa754 100644
--- a/include/uapi/linux/gtp.h
+++ b/include/uapi/linux/gtp.h
@@ -29,6 +29,7 @@ enum gtp_attrs {
 	GTPA_PAD,
 	GTPA_PORT,
 	GTPA_MS6_ADDRESS,
+	GTPA_PEER6_ADDRESS,
 	__GTPA_MAX,
 };
 #define GTPA_MAX (__GTPA_MAX + 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8d062c58d5cb..81c26864abeb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -552,6 +552,9 @@ enum {
 	IFLA_GTP_FD1,
 	IFLA_GTP_PDP_HASHSIZE,
 	IFLA_GTP_ROLE,
+	IFLA_GTP_AF,
+	IFLA_GTP_PORT0,
+	IFLA_GTP_PORT1,
 	__IFLA_GTP_MAX,
 };
 #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
-- 
2.11.0

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox