Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v1 net-next] lan78xx: Lan7801 Support for Fixed PHY
From: Raghuram Chary J @ 2018-04-25  6:43 UTC (permalink / raw)
  To: davem; +Cc: netdev, unglinuxdriver, woojung.huh, raghuramchary.jallipalli

Adding Fixed PHY support to the lan78xx driver.

Signed-off-by: Raghuram Chary J <raghuramchary.jallipalli@microchip.com>
---
v0->v1:
   * Remove driver version #define
   * Modify netdev_info to netdev_dbg
   * Move lan7801 specific to new routine and add switch case
   * Minor cleanup
---
 drivers/net/usb/Kconfig   |   1 +
 drivers/net/usb/lan78xx.c | 113 ++++++++++++++++++++++++++++++++--------------
 2 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index f28bd74ac275..418b0904cecb 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -111,6 +111,7 @@ config USB_LAN78XX
 	select MII
 	select PHYLIB
 	select MICROCHIP_PHY
+	select FIXED_PHY
 	help
 	  This option adds support for Microchip LAN78XX based USB 2
 	  & USB 3 10/100/1000 Ethernet adapters.
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 0867f7275852..47fa34a2d09f 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -36,13 +36,12 @@
 #include <linux/irq.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/microchipphy.h>
-#include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include "lan78xx.h"
 
 #define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC	"LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME	"lan78xx"
-#define DRIVER_VERSION	"1.0.6"
 
 #define TX_TIMEOUT_JIFFIES		(5 * HZ)
 #define THROTTLE_JIFFIES		(HZ / 8)
@@ -402,6 +401,7 @@ struct lan78xx_net {
 	struct statstage	stats;
 
 	struct irq_domain_data	domain_data;
+	struct phy_device	*fixedphy;
 };
 
 /* define external phy id */
@@ -1477,7 +1477,6 @@ static void lan78xx_get_drvinfo(struct net_device *net,
 	struct lan78xx_net *dev = netdev_priv(net);
 
 	strncpy(info->driver, DRIVER_NAME, sizeof(info->driver));
-	strncpy(info->version, DRIVER_VERSION, sizeof(info->version));
 	usb_make_path(dev->udev, info->bus_info, sizeof(info->bus_info));
 }
 
@@ -2003,52 +2002,91 @@ static int ksz9031rnx_fixup(struct phy_device *phydev)
 	return 1;
 }
 
-static int lan78xx_phy_init(struct lan78xx_net *dev)
+static int lan7801_phy_init(struct phy_device **phydev,
+			    struct lan78xx_net *dev)
 {
+	u32 buf;
 	int ret;
-	u32 mii_adv;
-	struct phy_device *phydev;
-
-	phydev = phy_find_first(dev->mdiobus);
-	if (!phydev) {
-		netdev_err(dev->net, "no PHY found\n");
-		return -EIO;
-	}
-
-	if ((dev->chipid == ID_REV_CHIP_ID_7800_) ||
-	    (dev->chipid == ID_REV_CHIP_ID_7850_)) {
-		phydev->is_internal = true;
-		dev->interface = PHY_INTERFACE_MODE_GMII;
-
-	} else if (dev->chipid == ID_REV_CHIP_ID_7801_) {
-		if (!phydev->drv) {
+	struct fixed_phy_status fphy_status = {
+		.link = 1,
+		.speed = SPEED_1000,
+		.duplex = DUPLEX_FULL,
+	};
+
+	if (!*phydev) {
+		netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n");
+		*phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1,
+					     NULL);
+		if (IS_ERR(*phydev)) {
+			netdev_err(dev->net, "No PHY/fixed_PHY found\n");
+			return -ENODEV;
+		}
+		netdev_dbg(dev->net, "Registered FIXED PHY\n");
+		dev->interface = PHY_INTERFACE_MODE_RGMII;
+		dev->fixedphy = *phydev;
+		ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
+					MAC_RGMII_ID_TXC_DELAY_EN_);
+		ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+		ret = lan78xx_read_reg(dev, HW_CFG, &buf);
+		buf |= HW_CFG_CLK125_EN_;
+		buf |= HW_CFG_REFCLK25_EN_;
+		ret = lan78xx_write_reg(dev, HW_CFG, buf);
+	} else {
+		if (!(*phydev)->drv) {
 			netdev_err(dev->net, "no PHY driver found\n");
 			return -EIO;
 		}
-
 		dev->interface = PHY_INTERFACE_MODE_RGMII;
-
 		/* external PHY fixup for KSZ9031RNX */
 		ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0,
 						 ksz9031rnx_fixup);
 		if (ret < 0) {
-			netdev_err(dev->net, "fail to register fixup\n");
+			netdev_err(dev->net, "fail to register fixup PHY_KSZ9031RNX\n");
 			return ret;
 		}
 		/* external PHY fixup for LAN8835 */
 		ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0,
 						 lan8835_fixup);
 		if (ret < 0) {
-			netdev_err(dev->net, "fail to register fixup\n");
+			netdev_err(dev->net, "fail to register fixup for PHY_LAN8835\n");
 			return ret;
 		}
 		/* add more external PHY fixup here if needed */
 
-		phydev->is_internal = false;
-	} else {
-		netdev_err(dev->net, "unknown ID found\n");
-		ret = -EIO;
-		goto error;
+		(*phydev)->is_internal = false;
+	}
+	return 0;
+}
+
+static int lan78xx_phy_init(struct lan78xx_net *dev)
+{
+	int ret;
+	u32 mii_adv;
+	struct phy_device *phydev;
+
+	phydev = phy_find_first(dev->mdiobus);
+	switch (dev->chipid) {
+	case ID_REV_CHIP_ID_7801_:
+		ret = lan7801_phy_init(&phydev, dev);
+		if (ret < 0) {
+			netdev_err(dev->net, "lan7801: PHY Init Failed");
+			return ret;
+		}
+		break;
+
+	case ID_REV_CHIP_ID_7800_:
+	case ID_REV_CHIP_ID_7850_:
+		if (!phydev) {
+			netdev_err(dev->net, "no PHY found\n");
+			return -EIO;
+		}
+		phydev->is_internal = true;
+		dev->interface = PHY_INTERFACE_MODE_GMII;
+		break;
+
+	default:
+		netdev_err(dev->net, "Unknown CHIP ID found\n");
+		return -EIO;
 	}
 
 	/* if phyirq is not set, use polling mode in phylib */
@@ -2067,6 +2105,12 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 	if (ret) {
 		netdev_err(dev->net, "can't attach PHY to %s\n",
 			   dev->mdiobus->id);
+		if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+			phy_unregister_fixup_for_uid(PHY_KSZ9031RNX,
+						     0xfffffff0);
+			phy_unregister_fixup_for_uid(PHY_LAN8835,
+						     0xfffffff0);
+		}
 		return -EIO;
 	}
 
@@ -2084,12 +2128,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 	dev->fc_autoneg = phydev->autoneg;
 
 	return 0;
-
-error:
-	phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
-	phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
-
-	return ret;
 }
 
 static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
@@ -3501,6 +3539,11 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 
 	phy_disconnect(net->phydev);
 
+	if (dev->fixedphy) {
+		fixed_phy_unregister(dev->fixedphy);
+		dev->fixedphy = NULL;
+	}
+
 	unregister_netdev(net);
 
 	cancel_delayed_work_sync(&dev->wq);
-- 
2.16.2

^ permalink raw reply related

* Re: [PATCH] can: janz-ican3: fix ican3_xmit()'s return type
From: Marc Kleine-Budde @ 2018-04-25  6:47 UTC (permalink / raw)
  To: Luc Van Oostenryck, linux-kernel; +Cc: Wolfgang Grandegger, linux-can, netdev
In-Reply-To: <20180424131609.3258-1-luc.vanoostenryck@gmail.com>


[-- Attachment #1.1: Type: text/plain, Size: 717 bytes --]

On 04/24/2018 03:16 PM, Luc Van Oostenryck wrote:
> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, but the implementation in this
> driver returns an 'int'.
> 
> Fix this by returning 'netdev_tx_t' in this driver too.
> 
> Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
> ---

Can you send a series fixing the return type in all CAN drivers?

Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [PATCH bpf-next] bpf: clear the ip_tunnel_info.
From: William Tu @ 2018-04-25  6:46 UTC (permalink / raw)
  To: netdev

The percpu metadata_dst might carry the stale ip_tunnel_info
and cause incorrect behavior.  When mixing tests using ipv4/ipv6
bpf vxlan and geneve tunnel, the ipv6 tunnel info incorrectly uses
ipv4's src ip addr as its ipv6 src address, because the previous
tunnel info does not clean up.  The patch zeros the fields in
ip_tunnel_info.

Signed-off-by: William Tu <u9012063@gmail.com>
Reported-by: Yifeng Sun <pkusunyifeng@gmail.com>
---
 net/core/filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 8e45c6c7ab08..d3781daa26ab 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3281,6 +3281,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
 	skb_dst_set(skb, (struct dst_entry *) md);

 	info = &md->u.tun_info;
+	memset(info, 0, sizeof(*info));
 	info->mode = IP_TUNNEL_INFO_TX;

 	info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
-- 
2.7.4

^ permalink raw reply related

* e1000e I219 timestamping oops related to TSYNCRXCTL read
From: Benjamin Poirier @ 2018-04-25  6:52 UTC (permalink / raw)
  To: Bruce Allan, Yanir Lubetkin, Jacob Keller, Neftin, Sasha
  Cc: Alexander Duyck, Jeff Kirsher, Achim Mildenberger, olouvignes,
	jayanth, ehabkost, postmodern.mod3, Bart.VanAssche,
	intel-wired-lan, netdev

In the following openSUSE bug report
https://bugzilla.suse.com/show_bug.cgi?id=1075876
Achim reported an oops related to e1000e timestamping:
kernel: RIP: 0010:[<ffffffff8110303f>] timecounter_read+0xf/0x50
[...]
kernel: Call Trace:
kernel:  [<ffffffffa0806b0f>] e1000e_phc_gettime+0x2f/0x60 [e1000e]
kernel:  [<ffffffffa0806c5d>] e1000e_systim_overflow_work+0x1d/0x80 [e1000e]
kernel:  [<ffffffff810992c5>] process_one_work+0x155/0x440
kernel:  [<ffffffff81099e16>] worker_thread+0x116/0x4b0
kernel:  [<ffffffff8109f422>] kthread+0xd2/0xf0
kernel:  [<ffffffff8163184f>] ret_from_fork+0x3f/0x70

It always occurs 4 hours after boot but not on every boot. It is most
likely the same problem reported here:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1668356
http://lkml.iu.edu/hypermail/linux/kernel/1506.2/index.html#02530
https://bugzilla.redhat.com/show_bug.cgi?id=1463882
https://bugzilla.redhat.com/show_bug.cgi?id=1431863

This occurs with MAC: 12, e1000_pch_spt/I219. The reporter has
reproduced it on a v4.16 derivative.

We've traced it to the fact that e1000e_systim_reset() skips the
timecounter_init() call if e1000e_get_base_timinca() returns -EINVAL,
which leads to a null deref in timecounter_read() (see comment 8 of the
suse bugzilla for more details.)

In commit 83129b37ef35 ("e1000e: fix systim issues", v4.2-rc1) Yanir
reworked e1000e_get_base_timinca() in such a way that it can return
-EINVAL for e1000_pch_spt if the SYSCFI bit is not set in TSYNCRXCTL.
This is also the commit that was identified by bisection in the second
link above (lkml).

What we've observed (in comment 14) is that TSYNCRXCTL reads sometimes
don't have the SYSCFI bit set. Retrying the read shortly after finds the
bit to be set. This was observed at boot (probe) but also link up and
link down.

I have a few questions:

What's the purpose of the SYSCFI bit in TSYNCRXCTL ("Reserved" in the
datasheet)?

Why does it look like subsequent reads of TSYNCRXCTL sometimes have the
SYSCFI bit set/not set on I219?

Is it right to check the SYSCFI bit in e1000e_get_base_timinca() for
_spt and return -EINVAL if it's not set? Could we just remove that
check?

The patch in comment 13 of the suse bugzilla works around the problem by
retrying TSYNCRXCTL reads, maybe we could instead remove that read
altogether or move the timecounter_init() call to at least avoid the
oops. The best approach to take seems to depend on the behavior expected
of TSYNCRXCTL reads on I219 so I'm hoping that you could provide more
info on that.

Thanks,
-Benjamin

^ permalink raw reply

* Re: [PATCH 1/1] IB/rxe: avoid double kfree_skb
From: Yanjun Zhu @ 2018-04-25  6:56 UTC (permalink / raw)
  To: Doug Ledford, monis, jgg, linux-rdma; +Cc: netdev
In-Reply-To: <0b87f416-eee1-fd6c-c386-27469d6db143@oracle.com>

Hi, all

rxe_send [rdma_rxe]
     ip_local_out
         __ip_local_out
         ip_output
             ip_finish_output
                 ip_finish_output2
                     dev_queue_xmit
                         __dev_queue_xmit
                             dev_hard_start_xmit
                                 e1000_xmit_frame [e1000]

When skb is sent, it will pass the above functions. I checked all the 
above functions. If error occurs in the above functions after 
ip_local_out, kfree_skb will be called.
So when ip_local_out returns an error, skb should be freed. It is not 
necessary to call kfree_skb in soft roce module again.

If I am wrong, please correct me.

Zhu Yanjun
On 2018/4/24 16:34, Yanjun Zhu wrote:
> Hi, all
>
> rxe_send
>     ip_local_out
>         __ip_local_out
>             nf_hook_slow
>
> In the above call process, nf_hook_slow drops and frees skb, then 
> -EPERM is returned when iptables rules(iptables -I OUTPUT -p udp 
> --dport 4791 -j DROP) is set.
>
> If skb->users is not changed in softroce, kfree_skb should not be 
> called in this module.
>
> I will make further investigations about other error handler after 
> ip_local_out.
> If I am wrong, please correct me.
>
> Any reply is appreciated.
>
> Zhu Yanjun
> On 2018/4/20 13:46, Yanjun Zhu wrote:
>>
>>
>> On 2018/4/20 10:19, Doug Ledford wrote:
>>> On Thu, 2018-04-19 at 10:01 -0400, Zhu Yanjun wrote:
>>>> When skb is dropped by iptables rules, the skb is freed at the same 
>>>> time
>>>> -EPERM is returned. So in softroce, it is not necessary to free skb 
>>>> again.
>>>> Or else, crash will occur.
>>>>
>>>> The steps to reproduce:
>>>>
>>>>       server                       client
>>>>      ---------                    ---------
>>>>      |1.1.1.1|<----rxe-channel--->|1.1.1.2|
>>>>      ---------                    ---------
>>>>
>>>> On server: rping -s -a 1.1.1.1 -v -C 10000 -S 512
>>>> On client: rping -c -a 1.1.1.1 -v -C 10000 -S 512
>>>>
>>>> The kernel configs CONFIG_DEBUG_KMEMLEAK and
>>>> CONFIG_DEBUG_OBJECTS are enabled on both server and client.
>>>>
>>>> When rping runs, run the following command in server:
>>>>
>>>> iptables -I OUTPUT -p udp  --dport 4791 -j DROP
>>>>
>>>> Without this patch, crash will occur.
>>>>
>>>> CC: Srinivas Eeda <srinivas.eeda@oracle.com>
>>>> CC: Junxiao Bi <junxiao.bi@oracle.com>
>>>> Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
>>>> Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
>>> I have no reason to doubt your analysis, but if there are a bunch of
>>> error paths for net_xmit and they all return with your skb still being
>>> valid and holding a reference, and then one oddball that returns with
>>> your skb already gone, that just sounds like a mistake waiting to 
>>> happen
>>> (not to mention a bajillion special cases sprinkled everywhere to deal
>>> with this apparent inconsistency).
>>>
>>> Can we get a netdev@ confirmation on this being the right solution?
>> Yes. I agree with you.
>> After iptables rule "iptables -I OUTPUT -p udp  --dport 4791 -j 
>> DROP", the skb is freed in this function
>>
>> /* Returns 1 if okfn() needs to be executed by the caller,
>>  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
>> int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
>>                  const struct nf_hook_entries *e, unsigned int s)
>> {
>>         unsigned int verdict;
>>         int ret;
>>
>>         for (; s < e->num_hook_entries; s++) {
>>                 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, 
>> state);
>>                 switch (verdict & NF_VERDICT_MASK) {
>>                 case NF_ACCEPT:
>>                         break;
>>                 case NF_DROP:
>> kfree_skb(skb);                               <----here, skb is freed
>>                         ret = NF_DROP_GETERR(verdict);
>>                         if (ret == 0)
>>                                 ret = -EPERM;
>>                         return ret;
>>                 case NF_QUEUE:
>>                         ret = nf_queue(skb, state, e, s, verdict);
>>                         if (ret == 1)
>>                                 continue;
>>                         return ret;
>>                 default:
>>                         /* Implicit handling for NF_STOLEN, as well 
>> as any other
>>                          * non conventional verdicts.
>>                          */
>>                         return 0;
>>                 }
>>         }
>>
>>         return 1;
>> }
>> EXPORT_SYMBOL(nf_hook_slow);
>>
>> If I am wrong, please correct me.
>>
>> And my test environment is still there, any solution can be verified 
>> in it.
>>
>> Zhu Yanjun
>>>
>>>> ---
>>>>   drivers/infiniband/sw/rxe/rxe_net.c  | 3 +++
>>>>   drivers/infiniband/sw/rxe/rxe_req.c  | 5 +++--
>>>>   drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++++---
>>>>   3 files changed, 12 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c 
>>>> b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> index 9da6e37..2094434 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> @@ -511,6 +511,9 @@ int rxe_send(struct rxe_pkt_info *pkt, struct 
>>>> sk_buff *skb)
>>>>            if (unlikely(net_xmit_eval(err))) {
>>>>                  pr_debug("error sending packet: %d\n", err);
>>>> +               /* -EPERM means the skb is dropped and freed. */
>>>> +               if (err == -EPERM)
>>>> +                       return -EPERM;
>>>>                  return -EAGAIN;
>>>>          }
>>>>   diff --git a/drivers/infiniband/sw/rxe/rxe_req.c 
>>>> b/drivers/infiniband/sw/rxe/rxe_req.c
>>>> index 7bdaf71..9d2efec 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_req.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_req.c
>>>> @@ -727,8 +727,9 @@ int rxe_requester(void *arg)
>>>>                    rollback_state(wqe, qp, &rollback_wqe, 
>>>> rollback_psn);
>>>>   -               if (ret == -EAGAIN) {
>>>> -                       kfree_skb(skb);
>>>> +               if ((ret == -EAGAIN) || (ret == -EPERM)) {
>>>> +                       if (ret == -EAGAIN)
>>>> +                               kfree_skb(skb);
>>>>                          rxe_run_task(&qp->req.task, 1);
>>>>                          goto exit;
>>>>                  }
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c 
>>>> b/drivers/infiniband/sw/rxe/rxe_resp.c
>>>> index a65c996..6bdf9b2 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_resp.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_resp.c
>>>> @@ -742,7 +742,8 @@ static enum resp_states read_reply(struct 
>>>> rxe_qp *qp,
>>>>          err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
>>>>          if (err) {
>>>>                  pr_err("Failed sending RDMA reply.\n");
>>>> -               kfree_skb(skb);
>>>> +               if (err != -EPERM)
>>>> +                       kfree_skb(skb);
>>>>                  return RESPST_ERR_RNR;
>>>>          }
>>>>   @@ -956,7 +957,8 @@ static int send_ack(struct rxe_qp *qp, struct 
>>>> rxe_pkt_info *pkt,
>>>>          err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
>>>>          if (err) {
>>>>                  pr_err_ratelimited("Failed sending ack\n");
>>>> -               kfree_skb(skb);
>>>> +               if (err != -EPERM)
>>>> +                       kfree_skb(skb);
>>>>          }
>>>>     err1:
>>>> @@ -1141,7 +1143,8 @@ static enum resp_states 
>>>> duplicate_request(struct rxe_qp *qp,
>>>>                          if (rc) {
>>>>                                  pr_err("Failed resending result. 
>>>> This flow is not handled - skb ignored\n");
>>>>                                  rxe_drop_ref(qp);
>>>> -                               kfree_skb(skb_copy);
>>>> +                               if (rc != -EPERM)
>>>> +                                       kfree_skb(skb_copy);
>>>>                                  rc = RESPST_CLEANUP;
>>>>                                  goto out;
>>>>                          }
>>>> -- 
>>>> 2.7.4
>>>>
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [PATCH] can: xilinx: fix xcan_start_xmit()'s return type
From: Michal Simek @ 2018-04-25  7:10 UTC (permalink / raw)
  To: Luc Van Oostenryck, linux-kernel
  Cc: Wolfgang Grandegger, Marc Kleine-Budde, Michal Simek, linux-can,
	netdev, linux-arm-kernel
In-Reply-To: <20180424131614.3357-1-luc.vanoostenryck@gmail.com>

On 24.4.2018 15:16, Luc Van Oostenryck wrote:
> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, but the implementation in this
> driver returns an 'int'.
> 
> Fix this by returning 'netdev_tx_t' in this driver too.
> 
> Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
> ---
>  drivers/net/can/xilinx_can.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
> index 89aec07c2..a19648606 100644
> --- a/drivers/net/can/xilinx_can.c
> +++ b/drivers/net/can/xilinx_can.c
> @@ -386,7 +386,7 @@ static int xcan_do_set_mode(struct net_device *ndev, enum can_mode mode)
>   *
>   * Return: 0 on success and failure value on error
>   */
> -static int xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> +static netdev_tx_t xcan_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  {
>  	struct xcan_priv *priv = netdev_priv(ndev);
>  	struct net_device_stats *stats = &ndev->stats;
> 

Can you please also align kernel-doc format above?
I see that the whole function is already returning proper enum values.

Thanks,
Michal

^ permalink raw reply

* RE: [PATCH  1/8] net: ethernet: stmmac: add adaptation for stm32mp157c.
From: Christophe ROULLIER @ 2018-04-25  7:12 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: mark.rutland@arm.com, devicetree@vger.kernel.org,
	Alexandre TORGUE, netdev@vger.kernel.org,
	mcoquelin.stm32@gmail.com, Peppe CAVALLARO,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <20180424153953.GB2360@lunn.ch>

Hi Andrew,

For moment, I've only tested with PHY RGMII, RMII, MII, GMII, I do not have other kind of PHY interface.
Normally there is no impact in my glue, the value of syscfg register will be the same for RGMII/ID/TXID/RXID.
Do you think that I should add these interfaces in my case ?

	case PHY_INTERFACE_MODE_RGMII:
> +	case PHY_INTERFACE_MODE_RGMII_ID:
> +	case PHY_INTERFACE_MODE_RGMII_RXID:
> +	case PHY_INTERFACE_MODE_RGMII_TXID:
		val = SYSCFG_PMCR_ETH_SEL_RGMII;
		if (dwmac->int_phyclk)
			val |= SYSCFG_PMCR_ETH_CLK_SEL;
		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
		break;

Christophe.

-----Original Message-----
From: Andrew Lunn [mailto:andrew@lunn.ch] 
Sent: mardi 24 avril 2018 17:40
To: Christophe ROULLIER <christophe.roullier@st.com>
Cc: mark.rutland@arm.com; mcoquelin.stm32@gmail.com; Alexandre TORGUE <alexandre.torgue@st.com>; Peppe CAVALLARO <peppe.cavallaro@st.com>; devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org; netdev@vger.kernel.org
Subject: Re: [PATCH 1/8] net: ethernet: stmmac: add adaptation for stm32mp157c.

On Tue, Apr 24, 2018 at 05:01:53PM +0200, Christophe Roullier wrote:

> +	case PHY_INTERFACE_MODE_RGMII:
> +		val = SYSCFG_PMCR_ETH_SEL_RGMII;
> +		if (dwmac->int_phyclk)
> +			val |= SYSCFG_PMCR_ETH_CLK_SEL;
> +		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
> +		break;

Hi Christophe

What about PHY_INTERFACE_MODE_RGMII_ID, PHY_INTERFACE_MODE_RGMII_RXID and PHY_INTERFACE_MODE_RGMII_TXID.

    Andrew

^ permalink raw reply

* Re: Boot failures with net-next after rebase to v4.17.0-rc1
From: Jesper Dangaard Brouer @ 2018-04-25  7:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: netdev@vger.kernel.org, LKML, David Miller,
	Toke Høiland-Jørgensen, Paul E. McKenney, David Ahern,
	brouer, Ingo Molnar
In-Reply-To: <CA+55aFx-V8L602X-EHgrJPNA49v_ZiBZW=+v=tcSQK5QZOXYJw@mail.gmail.com>

On Tue, 24 Apr 2018 13:04:23 -0700
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Tue, Apr 24, 2018 at 12:54 PM, Jesper Dangaard Brouer
> <brouer@redhat.com> wrote:
> > Hi all,
> >
> > I'm experiencing boot failures with net-next git-tree after it got
> > rebased/merged with Linus'es tree at v4.17.0-rc1.  
> 
> I suspect it's the global bit stuff that came in very late in the
> merge window, and had been developed and tested for a while before,
> but showed some problems under some configs.
> 
> The fix is currently in the x86/pti tree in -tip, see:
> 
>    x86/pti: Fix boot problems from Global-bit setting
> 
> and I expect it will percolate upstream soon.
> 
> In the meantime, it would be good to verify that merging that x86/pti
> branch fixes it for you?

Thanks for spotting this so quickly!
I have verified that this DOES solve the issue for me :-)))

If others are hit by this, and cannot wait for Linus to pull the tip
tree, this is the pull command:

 git pull git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/pti


> There is another candidate for boot problems - do you happen to have
> CONFIG_DEFERRED_STRUCT_PAGE_INIT enabled? That can under certain
> circumstances get a percpu setup page fault because memory hadn't been
> initialized sufficiently.

CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set

> The fix there is to move the mm_init() call one step earlier in
> init_main(): start_kernel() (to before trap_init()).
> 
> And if it's neither of the above, I think you'll need to help bisect it.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [PATCH v3] ath9k: dfs: Remove VLA usage
From: Kalle Valo @ 2018-04-25  7:26 UTC (permalink / raw)
  To: Kees Cook
  Cc: Andreas Christoforou, Rosen Penev, Eric Dumazet, Joe Perches,
	linux-wireless, netdev, QCA ath9k Development, kernel-hardening,
	linux-kernel
In-Reply-To: <20180424235752.GA37317@beast>

Kees Cook <keescook@chromium.org> writes:

> In the quest to remove all stack VLA usage from the kernel[1], this
> redefines FFT_NUM_SAMPLES as a #define instead of const int, which still
> triggers gcc's VLA checking pass.
>
> [1] https://lkml.org/lkml/2018/3/7/621
>
> Co-developed-by: Andreas Christoforou <andreaschristofo@gmail.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> v3: replace FFT_NUM_SAMPLES as a #define (Joe)
> ---
>  drivers/net/wireless/ath/ath9k/dfs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/dfs.c
> b/drivers/net/wireless/ath/ath9k/dfs.c
> index 6fee9a464cce..e6e56a925121 100644
> --- a/drivers/net/wireless/ath/ath9k/dfs.c
> +++ b/drivers/net/wireless/ath/ath9k/dfs.c
> @@ -40,8 +40,8 @@ static const int BIN_DELTA_MIN		= 1;
>  static const int BIN_DELTA_MAX		= 10;
>  
>  /* we need at least 3 deltas / 4 samples for a reliable chirp detection */
> -#define NUM_DIFFS 3
> -static const int FFT_NUM_SAMPLES	= (NUM_DIFFS + 1);
> +#define NUM_DIFFS	3
> +#define FFT_NUM_SAMPLES	(NUM_DIFFS + 1)

I have already applied an almost identical patch:

ath9k: dfs: remove accidental use of stack VLA

https://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git/commit/?h=ath-next&id=9c27489a34548913baaaf3b2776e05d4a9389e3e

-- 
Kalle Valo

^ permalink raw reply

* Re: [RFC bpf] bpf, x64: fix JIT emission for dead code
From: Daniel Borkmann @ 2018-04-25  7:48 UTC (permalink / raw)
  To: Gianluca Borello, netdev; +Cc: ast
In-Reply-To: <20180425054216.48961-1-g.borello@gmail.com>

On 04/25/2018 07:42 AM, Gianluca Borello wrote:
> Commit 2a5418a13fcf ("bpf: improve dead code sanitizing") replaced dead
> code with a series of ja-1 instructions, for safety. That made JIT
> compilation much more complex for some BPF programs. One instance of such
> programs is, for example:
[...]
> A possible approach to mitigate this behavior consists into noticing that
> for ja-1 instructions we don't really need to rely on the estimated size
> of the previous and current instructions, we know that a -1 BPF jump
> offset can be safely translated into a 0xEB instruction with a jump offset
> of -2.
> 
> Such fix brings the BPF program in the previous example to complete again
> in ~9 passes.
> 
> Fixes: 2a5418a13fcf ("bpf: improve dead code sanitizing")
> Signed-off-by: Gianluca Borello <g.borello@gmail.com>

Thanks for reporting, Gianluca. The approach your fix takes looks good to me!

^ permalink raw reply

* Re: [PATCH] qtnfmac: fix qtnf_netdev_hard_start_xmit()'s return type
From: Sergey Matyukevich @ 2018-04-25  7:49 UTC (permalink / raw)
  To: Luc Van Oostenryck
  Cc: linux-kernel, Igor Mitsyanko, Avinash Patil, Sergey Matyukevich,
	Kalle Valo, Kees Cook, linux-wireless, netdev
In-Reply-To: <20180424131810.4963-1-luc.vanoostenryck@gmail.com>

Hi Luc and all,

> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, but the implementation in this
> driver returns an 'int'.
> 
> Fix this by returning 'netdev_tx_t' in this driver too.
> 
> Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
> ---
>  drivers/net/wireless/quantenna/qtnfmac/core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c
> index cf26c15a8..b3bfb4faa 100644
> --- a/drivers/net/wireless/quantenna/qtnfmac/core.c
> +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c
> @@ -76,7 +76,7 @@ static int qtnf_netdev_close(struct net_device *ndev)
> 
>  /* Netdev handler for data transmission.
>   */
> -static int
> +static netdev_tx_t
>  qtnf_netdev_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>  {
>         struct qtnf_vif *vif;

Previous ACK from Igor slipped through the cracks due to
outlook/exchange issues. So here is another one.

Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>

Thanks for the fix !

Regards,
Sergey

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: clear the ip_tunnel_info.
From: Daniel Borkmann @ 2018-04-25  7:54 UTC (permalink / raw)
  To: William Tu, netdev
In-Reply-To: <1524638819-31626-1-git-send-email-u9012063@gmail.com>

On 04/25/2018 08:46 AM, William Tu wrote:
> The percpu metadata_dst might carry the stale ip_tunnel_info
> and cause incorrect behavior.  When mixing tests using ipv4/ipv6
> bpf vxlan and geneve tunnel, the ipv6 tunnel info incorrectly uses
> ipv4's src ip addr as its ipv6 src address, because the previous
> tunnel info does not clean up.  The patch zeros the fields in
> ip_tunnel_info.
> 
> Signed-off-by: William Tu <u9012063@gmail.com>
> Reported-by: Yifeng Sun <pkusunyifeng@gmail.com>

Since this is a fix, I've applied this to bpf, thanks William!

^ permalink raw reply

* Re: [PATCH bpf-next 0/4] nfp: bpf: optimize negative sums
From: Daniel Borkmann @ 2018-04-25  7:58 UTC (permalink / raw)
  To: Jakub Kicinski, alexei.starovoitov; +Cc: oss-drivers, netdev
In-Reply-To: <20180425042239.27869-1-jakub.kicinski@netronome.com>

On 04/25/2018 06:22 AM, Jakub Kicinski wrote:
> Hi!
> 
> This set adds an optimization run to the NFP jit to turn ADD and SUB
> instructions with negative immediate into the opposite operation with
> a positive immediate.  NFP can fit small immediates into the instructions
> but it can't ever fit negative immediates.  Addition of small negative
> immediates is quite common in BPF programs for stack address calculations,
> therefore this optimization gives us non-negligible savings in instruction
> count (up to 4%).

Applied to bpf-next, thanks Jakub!

^ permalink raw reply

* Re: [1/2] net: wireless: zydas: Replace mdelay with msleep in zd1201_probe
From: Kalle Valo @ 2018-04-25  8:15 UTC (permalink / raw)
  To: Jia-Ju Bai
  Cc: davem, stephen, arvind.yadav.cs, johannes.berg, linux-wireless,
	netdev, linux-kernel, Jia-Ju Bai
In-Reply-To: <1523367004-31935-1-git-send-email-baijiaju1990@gmail.com>

Jia-Ju Bai <baijiaju1990@gmail.com> wrote:

> zd1201_probe() is never called in atomic context.
> 
> zd1201_probe() is only set as ".probe" in struct usb_driver.
> 
> Despite never getting called from atomic context, zd1201_probe()
> calls mdelay() to busily wait.
> This is not necessary and can be replaced with msleep() to
> avoid busy waiting.
> 
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
> 
> Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>

I need a review from someone else before I'm willing to take this.

-- 
https://patchwork.kernel.org/patch/10333189/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* [PATCH bpf-next] bpf: Allow bpf_jit_enable = 2 with BPF_JIT_ALWAYS_ON config
From: Leo Yan @ 2018-04-25  8:18 UTC (permalink / raw)
  To: David S. Miller, Alexei Starovoitov, Daniel Borkmann,
	Kirill Tkhai, netdev, linux-kernel
  Cc: Leo Yan

After enabled BPF_JIT_ALWAYS_ON config, bpf_jit_enable always equals to
1; it is impossible to set 'bpf_jit_enable = 2' and the kernel has no
chance to call bpf_jit_dump().

This patch relaxes bpf_jit_enable range to [1..2] when kernel config
BPF_JIT_ALWAYS_ON is enabled so can invoke jit dump.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
---
 net/core/sysctl_net_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e..6a39b22 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -371,7 +371,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 		.proc_handler	= proc_dointvec_minmax_bpf_enable,
 # ifdef CONFIG_BPF_JIT_ALWAYS_ON
 		.extra1		= &one,
-		.extra2		= &one,
+		.extra2		= &two,
 # else
 		.extra1		= &zero,
 		.extra2		= &two,
-- 
1.9.1

^ permalink raw reply related

* Re: brcmsmac: phy_lcn: remove duplicate code
From: Kalle Valo @ 2018-04-25  8:24 UTC (permalink / raw)
  To: Gustavo A. R. Silva
  Cc: Arend van Spriel, Franky Lin, Hante Meuleman, Chi-Hsien Lin,
	Wright Feng, linux-wireless, brcm80211-dev-list.pdl,
	brcm80211-dev-list, netdev, linux-kernel, Gustavo A. R. Silva
In-Reply-To: <20180405000944.GA22743@embeddedor.com>

"Gustavo A. R. Silva" <gustavo@embeddedor.com> wrote:

> Remove and refactor some code in order to avoid having identical code
> for different branches.
> 
> Notice that this piece of code hasn't been modified since 2011.
> 
> Addresses-Coverity-ID: 1226756 ("Identical code for different branches")
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>

Patch applied to wireless-drivers-next.git, thanks.

863683cfbbfc brcmsmac: phy_lcn: remove duplicate code

-- 
https://patchwork.kernel.org/patch/10323665/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: qtnfmac: pearl: pcie: fix memory leak in qtnf_fw_work_handler
From: Kalle Valo @ 2018-04-25  8:26 UTC (permalink / raw)
  To: Gustavo A. R. Silva
  Cc: Igor Mitsyanko, Avinash Patil, Sergey Matyukevich, linux-wireless,
	netdev, linux-kernel, Gustavo A. R. Silva
In-Reply-To: <20180405154949.GA32223@embeddedor.com>

"Gustavo A. R. Silva" <gustavo@embeddedor.com> wrote:

> In case memory resources for fw were succesfully allocated, release
> them before jumping to fw_load_fail.
> 
> Addresses-Coverity-ID: 1466092 ("Resource leak")
> Fixes: c3b2f7ca4186 ("qtnfmac: implement asynchronous firmware loading")
> Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
> Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>

Patch applied to wireless-drivers-next.git, thanks.

376377004464 qtnfmac: pearl: pcie: fix memory leak in qtnf_fw_work_handler

-- 
https://patchwork.kernel.org/patch/10324855/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

^ permalink raw reply

* Re: [PATCH] net: net_cls: remove a NULL check for css_cls_state
From: Li RongQing @ 2018-04-25  8:35 UTC (permalink / raw)
  To: David Miller; +Cc: lirongqing, netdev
In-Reply-To: <20180420.103725.37816458687606953.davem@davemloft.net>

On 4/20/18, David Miller <davem@davemloft.net> wrote:
> From: Li RongQing <lirongqing@baidu.com>
> Date: Thu, 19 Apr 2018 12:59:21 +0800
>
>> The input of css_cls_state() is impossible to NULL except
>> cgrp_css_online, so simplify it
>>
>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
>
> I don't view this as an improvement.  Just let the helper always check
> NULL and that way there are less situations to audit.
>
css_cls_state maybe return NULL, but nearly no places check the return
value with NULL, this seems unreadable.

net/core/netclassid_cgroup.c:27:        return
css_cls_state(task_css_check(p, net_cls_cgrp_id,
net/core/netclassid_cgroup.c:46:        struct cgroup_cls_state *cs =
css_cls_state(css);
net/core/netclassid_cgroup.c:47:        struct cgroup_cls_state
*parent = css_cls_state(css->parent);
net/core/netclassid_cgroup.c:57:        kfree(css_cls_state(css));
net/core/netclassid_cgroup.c:82:                           (void
*)(unsigned long)css_cls_state(css)->classid);
net/core/netclassid_cgroup.c:89:        return css_cls_state(css)->classid;
net/core/netclassid_cgroup.c:95:        struct cgroup_cls_state *cs =
css_cls_state(css);

> And it's not like this is a critical fast path either.
>

I see css_cls_state will be called  when send packet if
CONFIG_NET_CLS_ACT and CONFIG_NET_EGRESS enabled, the calling stack is
like below:

css_cls_state
  task_cls_state
    task_get_classid
       cls_cgroup_classify
          tcf_classify
            sch_handle_egress
               __dev_queue_xmit
                        CONFIG_NET_CLS_ACT
                         CONFIG_NET_EGRESS

-RongQing






> I'm not applying this, sorry.
>

^ permalink raw reply

* Re: [PATCH net-next 3/4] nfp: flower: support offloading multiple rules with same cookie
From: John Hurley @ 2018-04-25  8:51 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Jakub Kicinski, David Miller, Linux Netdev List, oss-drivers,
	ASAP_Direct_Dev
In-Reply-To: <CAJ3xEMg=E+mq=QGaAtxWBY1NvWG3vcwhNFJ=p7xPLBy7X9nE0Q@mail.gmail.com>

On Wed, Apr 25, 2018 at 7:31 AM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
> On Wed, Apr 25, 2018 at 7:17 AM, Jakub Kicinski
> <jakub.kicinski@netronome.com> wrote:
>> From: John Hurley <john.hurley@netronome.com>
>>
>> When multiple netdevs are attached to a tc offload block and register for
>> callbacks, a rule added to the block will be propogated to all netdevs.
>> Previously these were detected as duplicates (based on cookie) and
>> rejected. Modify the rule nfp lookup function to optionally include an
>> ingress netdev and a host context along with the cookie value when
>> searching for a rule. When a new rule is passed to the driver, the netdev
>> the rule is to be attached to is considered when searching for dublicates.
>
> so if the same rule (cookie) is provided to the driver through multiple ingress
> devices you will not reject it -- what is the use case for that, is it
> block sharing?

Hi Or,
Yes, block sharing is the current use-case.
Simple example for clarity....
Here we want to offload the filter to both ingress devs nfp_0 and nfp_1:

tc qdisc add dev nfp_p0 ingress_block 22 ingress
tc qdisc add dev nfp_p1 ingress_block 22 ingress
tc filter add block 22 protocol ip parent ffff: flower skip_sw
ip_proto tcp action drop

^ permalink raw reply

* Re: [PATCH net-next 3/4] nfp: flower: support offloading multiple rules with same cookie
From: Or Gerlitz @ 2018-04-25  8:56 UTC (permalink / raw)
  To: John Hurley
  Cc: Jakub Kicinski, David Miller, Linux Netdev List, oss-drivers,
	ASAP_Direct_Dev
In-Reply-To: <CAK+XE=mu8qEPt-J9_6KmpJeyrSCKcytRT20ZGh+cHFY=j1vudg@mail.gmail.com>

On Wed, Apr 25, 2018 at 11:51 AM, John Hurley <john.hurley@netronome.com> wrote:
> On Wed, Apr 25, 2018 at 7:31 AM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
>> On Wed, Apr 25, 2018 at 7:17 AM, Jakub Kicinski
>> <jakub.kicinski@netronome.com> wrote:
>>> From: John Hurley <john.hurley@netronome.com>
>>>
>>> When multiple netdevs are attached to a tc offload block and register for
>>> callbacks, a rule added to the block will be propogated to all netdevs.
>>> Previously these were detected as duplicates (based on cookie) and
>>> rejected. Modify the rule nfp lookup function to optionally include an
>>> ingress netdev and a host context along with the cookie value when
>>> searching for a rule. When a new rule is passed to the driver, the netdev
>>> the rule is to be attached to is considered when searching for dublicates.
>>
>> so if the same rule (cookie) is provided to the driver through multiple ingress
>> devices you will not reject it -- what is the use case for that, is it
>> block sharing?
>
> Hi Or,
> Yes, block sharing is the current use-case.
> Simple example for clarity....
> Here we want to offload the filter to both ingress devs nfp_0 and nfp_1:
>
> tc qdisc add dev nfp_p0 ingress_block 22 ingress
> tc qdisc add dev nfp_p1 ingress_block 22 ingress
> tc filter add block 22 protocol ip parent ffff: flower skip_sw
> ip_proto tcp action drop

cool!

Just out of curiosity, do you actually share this HW rule or you duplicate it?

^ permalink raw reply

* Re: [PATCH bpf-next v6 02/10] bpf: add bpf_get_stack helper
From: Daniel Borkmann @ 2018-04-25  9:00 UTC (permalink / raw)
  To: Yonghong Song, ast, netdev, ecree; +Cc: kernel-team
In-Reply-To: <20180423212752.986580-3-yhs@fb.com>

On 04/23/2018 11:27 PM, Yonghong Song wrote:
> Currently, stackmap and bpf_get_stackid helper are provided
> for bpf program to get the stack trace. This approach has
> a limitation though. If two stack traces have the same hash,
> only one will get stored in the stackmap table,
> so some stack traces are missing from user perspective.
> 
> This patch implements a new helper, bpf_get_stack, will
> send stack traces directly to bpf program. The bpf program
> is able to see all stack traces, and then can do in-kernel
> processing or send stack traces to user space through
> shared map or bpf_perf_event_output.
> 
> Acked-by: Alexei Starovoitov <ast@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
[...]
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index d315b39..bf22eca 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -31,6 +31,7 @@
>  #include <linux/rbtree_latch.h>
>  #include <linux/kallsyms.h>
>  #include <linux/rcupdate.h>
> +#include <linux/perf_event.h>
>  
>  #include <asm/unaligned.h>
>  
> @@ -1709,6 +1710,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
>  	aux = container_of(work, struct bpf_prog_aux, work);
>  	if (bpf_prog_is_dev_bound(aux))
>  		bpf_prog_offload_destroy(aux->prog);
> +#ifdef CONFIG_PERF_EVENTS
> +	if (aux->prog->need_callchain_buf)
> +		put_callchain_buffers();
> +#endif
>  	for (i = 0; i < aux->func_cnt; i++)
>  		bpf_jit_free(aux->func[i]);
>  	if (aux->func_cnt) {
[...]
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index fe23dc5a..1ee71f6 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1360,6 +1360,16 @@ static int bpf_prog_load(union bpf_attr *attr)
>  	if (err)
>  		goto free_used_maps;
>  
> +	if (prog->need_callchain_buf) {
> +#ifdef CONFIG_PERF_EVENTS
> +		err = get_callchain_buffers(sysctl_perf_event_max_stack);
> +#else
> +		err = -ENOTSUPP;
> +#endif
> +		if (err)
> +			goto free_used_maps;
> +	}
> +
>  	err = bpf_prog_new_fd(prog);
>  	if (err < 0) {
>  		/* failed to allocate fd.
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 5dd1dcb..aba9425 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2460,6 +2460,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>  	if (err)
>  		return err;
>  
> +	if (func_id == BPF_FUNC_get_stack)
> +		env->prog->need_callchain_buf = true;
> +
>  	if (changes_data)
>  		clear_all_pkt_pointers(env);
>  	return 0;

The above three hunks will cause a use-after-free on the perf callchain buffers.

In check_helper_call() you mark the prog with need_callchain_buf, where the
program hasn't fully completed verification phase yet, meaning some buggy prog
will still bail out.

However, you do the get_callchain_buffers() at a much later phase, so when you
bail out with error from bpf_check(), you take the free_used_maps error path
which calls bpf_prog_free().

The latter calls into bpf_prog_free_deferred() where you do the put_callchain_buffers()
since the need_callchain_buf is marked, but without prior get_callchain_buffers().

^ permalink raw reply

* Re: [PATCH net-next 3/4] nfp: flower: support offloading multiple rules with same cookie
From: John Hurley @ 2018-04-25  9:02 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Jakub Kicinski, David Miller, Linux Netdev List, oss-drivers,
	ASAP_Direct_Dev
In-Reply-To: <CAJ3xEMhv5e9gOx-qELt=YMcpe9Svi=6uKYXs4c9PXDXOR35kow@mail.gmail.com>

On Wed, Apr 25, 2018 at 9:56 AM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
> On Wed, Apr 25, 2018 at 11:51 AM, John Hurley <john.hurley@netronome.com> wrote:
>> On Wed, Apr 25, 2018 at 7:31 AM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
>>> On Wed, Apr 25, 2018 at 7:17 AM, Jakub Kicinski
>>> <jakub.kicinski@netronome.com> wrote:
>>>> From: John Hurley <john.hurley@netronome.com>
>>>>
>>>> When multiple netdevs are attached to a tc offload block and register for
>>>> callbacks, a rule added to the block will be propogated to all netdevs.
>>>> Previously these were detected as duplicates (based on cookie) and
>>>> rejected. Modify the rule nfp lookup function to optionally include an
>>>> ingress netdev and a host context along with the cookie value when
>>>> searching for a rule. When a new rule is passed to the driver, the netdev
>>>> the rule is to be attached to is considered when searching for dublicates.
>>>
>>> so if the same rule (cookie) is provided to the driver through multiple ingress
>>> devices you will not reject it -- what is the use case for that, is it
>>> block sharing?
>>
>> Hi Or,
>> Yes, block sharing is the current use-case.
>> Simple example for clarity....
>> Here we want to offload the filter to both ingress devs nfp_0 and nfp_1:
>>
>> tc qdisc add dev nfp_p0 ingress_block 22 ingress
>> tc qdisc add dev nfp_p1 ingress_block 22 ingress
>> tc filter add block 22 protocol ip parent ffff: flower skip_sw
>> ip_proto tcp action drop
>
> cool!
>
> Just out of curiosity, do you actually share this HW rule or you duplicate it?

It's duplicated.
At HW level the ingress port is part of the match so technically it's
a different rule.

^ permalink raw reply

* Re: [PATCH bpf-next 13/15] xsk: support for Tx
From: Magnus Karlsson @ 2018-04-25  9:11 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Björn Töpel, Karlsson, Magnus, Alexander Duyck,
	Alexander Duyck, John Fastabend, Alexei Starovoitov,
	Jesper Dangaard Brouer, Daniel Borkmann, Michael S. Tsirkin,
	Network Development, michael.lundkvist, Brandeburg, Jesse,
	Singhai, Anjali, Zhang, Qi Z
In-Reply-To: <CAF=yD-JxQsJuJMh4=3An=oE0+R6FJ7f7CnUmQP41EOjEMc7VmQ@mail.gmail.com>

On Tue, Apr 24, 2018 at 6:57 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> On Mon, Apr 23, 2018 at 9:56 AM, Björn Töpel <bjorn.topel@gmail.com> wrote:
>> From: Magnus Karlsson <magnus.karlsson@intel.com>
>>
>> Here, Tx support is added. The user fills the Tx queue with frames to
>> be sent by the kernel, and let's the kernel know using the sendmsg
>> syscall.
>>
>> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
>
>> +static int xsk_xmit_skb(struct sk_buff *skb)
>
> This is basically packet_direct_xmit. Might be better to just move that
> to net/core/dev.c and use in both AF_PACKET and AF_XDP.

It is packet_direct_xmit with some code removed that is not used :-),
so your suggestion makes a lot of sense. Will implement in this patch
set.

> Also, (eventually) AF_XDP may also want to support the regular path
> through dev_queue_xmit to go through traffic shaping.

Agreed. Will put this on the todo list for a later patch.

>> +{
>> +       struct net_device *dev = skb->dev;
>> +       struct sk_buff *orig_skb = skb;
>> +       struct netdev_queue *txq;
>> +       int ret = NETDEV_TX_BUSY;
>> +       bool again = false;
>> +
>> +       if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
>> +               goto drop;
>> +
>> +       skb = validate_xmit_skb_list(skb, dev, &again);
>> +       if (skb != orig_skb)
>> +               return NET_XMIT_DROP;
>
> Need to free generated segment list on error, see packet_direct_xmit.

I do not use segments in the TX code for reasons of simplicity and the
free is in the calling function. But as I will create a common
packet_direct_xmit according to your suggestion, it will have a
kfree_skb_list() there as in af_packet.c.

>> +
>> +       txq = skb_get_tx_queue(dev, skb);
>> +
>> +       local_bh_disable();
>> +
>> +       HARD_TX_LOCK(dev, txq, smp_processor_id());
>> +       if (!netif_xmit_frozen_or_drv_stopped(txq))
>> +               ret = netdev_start_xmit(skb, dev, txq, false);
>> +       HARD_TX_UNLOCK(dev, txq);
>> +
>> +       local_bh_enable();
>> +
>> +       if (!dev_xmit_complete(ret))
>> +               goto out_err;
>> +
>> +       return ret;
>> +drop:
>> +       atomic_long_inc(&dev->tx_dropped);
>> +out_err:
>> +       return NET_XMIT_DROP;
>> +}
>
>> +static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
>> +                           size_t total_len)
>> +{
>> +       bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
>> +       u32 max_batch = TX_BATCH_SIZE;
>> +       struct xdp_sock *xs = xdp_sk(sk);
>> +       bool sent_frame = false;
>> +       struct xdp_desc desc;
>> +       struct sk_buff *skb;
>> +       int err = 0;
>> +
>> +       if (unlikely(!xs->tx))
>> +               return -ENOBUFS;
>> +       if (need_wait)
>> +               return -EOPNOTSUPP;
>> +
>> +       mutex_lock(&xs->mutex);
>> +
>> +       while (xskq_peek_desc(xs->tx, &desc)) {
>
> It is possible to pass a chain of skbs to validate_xmit_skb_list and
> eventually pass this chain to xsk_xmit_skb, amortizing the cost of
> taking the txq lock. Fine to ignore for this patch set.

Good suggestion. Will put it down on the todo list for a later patch set.

>> +               char *buffer;
>> +               u32 id, len;
>> +
>> +               if (max_batch-- == 0) {
>> +                       err = -EAGAIN;
>> +                       goto out;
>> +               }
>> +
>> +               if (xskq_reserve_id(xs->umem->cq)) {
>> +                       err = -EAGAIN;
>> +                       goto out;
>> +               }
>> +
>> +               len = desc.len;
>> +               if (unlikely(len > xs->dev->mtu)) {
>> +                       err = -EMSGSIZE;
>> +                       goto out;
>> +               }
>> +
>> +               skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
>> +               if (unlikely(!skb)) {
>> +                       err = -EAGAIN;
>> +                       goto out;
>> +               }
>> +
>> +               skb_put(skb, len);
>> +               id = desc.idx;
>> +               buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
>> +               err = skb_store_bits(skb, 0, buffer, len);
>> +               if (unlikely(err))
>> +                       goto out_store;
>
> As xsk_destruct_skb delays notification until consume_skb is called, this
> copy can be avoided by linking the xdp buffer into the skb frags array,
> analogous to tpacket_snd.
>
> You probably don't care much about the copy slow path, and this can be
> implemented later, so also no need to do in this patchset.

Agreed. I will also put this in the todo list for a later patch set.

> static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
> +                                             struct xdp_desc *desc)
> +{
> +       struct xdp_rxtx_ring *ring;
> +
> +       if (q->cons_tail == q->cons_head) {
> +               WRITE_ONCE(q->ring->consumer, q->cons_tail);
> +               q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
> +
> +               /* Order consumer and data */
> +               smp_rmb();
> +
> +               return xskq_validate_desc(q, desc);
> +       }
> +
> +       ring = (struct xdp_rxtx_ring *)q->ring;
> +       *desc = ring->desc[q->cons_tail & q->ring_mask];
> +       return desc;
>
> This only validates descriptors if taking the branch.

Yes, that is because we only want to validate the descriptors once
even if we call this function multiple times for the same entry.

Thanks. Highly appreciated comments Will.

/Magnus

^ permalink raw reply

* Re: [PATCH v3 ipsec-next] xfrm: remove VLA usage in __xfrm6_sort()
From: Stefano Brivio @ 2018-04-25  9:11 UTC (permalink / raw)
  To: Kees Cook
  Cc: Andreas Christoforou, kernel-hardening, Steffen Klassert,
	Herbert Xu, David S. Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	netdev, linux-kernel
In-Reply-To: <20180424234651.GA30225@beast>

Hi Kees,

On Tue, 24 Apr 2018 16:46:51 -0700
Kees Cook <keescook@chromium.org> wrote:

> In the quest to remove all stack VLA usage removed from the kernel[1],
> just use XFRM_MAX_DEPTH as already done for the "class" array. In one
> case, it'll do this loop up to 5, the other caller up to 6.
> 
> [1] https://lkml.org/lkml/2018/3/7/621
> 
> Co-developed-by: Andreas Christoforou <andreaschristofo@gmail.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> v3:
> - adjust Subject and commit log (Steffen)
> - use "= { }" instead of memset() (Stefano)
> - reorder variables (Stefano)
> v2:
> - use XFRM_MAX_DEPTH for "count" array (Steffen and Mathias).
> ---
>  net/ipv6/xfrm6_state.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
> index 16f434791763..eeb44b64ae7f 100644
> --- a/net/ipv6/xfrm6_state.c
> +++ b/net/ipv6/xfrm6_state.c
> @@ -60,9 +60,9 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
>  static int
>  __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
>  {
> -	int i;
> +	int count[XFRM_MAX_DEPTH] = { };
>  	int class[XFRM_MAX_DEPTH];
> -	int count[maxclass];
> +	int i;
>  
>  	memset(count, 0, sizeof(count));

I guess you forgot to remove the memset() here. Just to be clear, I
think this is how it should look like:

--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -60,11 +60,9 @@ xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl,
 static int
 __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
 {
-       int i;
+       int count[XFRM_MAX_DEPTH] = { };
        int class[XFRM_MAX_DEPTH];
-       int count[maxclass];
-
-       memset(count, 0, sizeof(count));
+       int i;
 
        for (i = 0; i < n; i++) {
                int c;

-- 
Stefano

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: Allow bpf_jit_enable = 2 with BPF_JIT_ALWAYS_ON config
From: Daniel Borkmann @ 2018-04-25  9:12 UTC (permalink / raw)
  To: Leo Yan, David S. Miller, Alexei Starovoitov, Kirill Tkhai,
	netdev, linux-kernel
In-Reply-To: <1524644322-9263-1-git-send-email-leo.yan@linaro.org>

On 04/25/2018 10:18 AM, Leo Yan wrote:
> After enabled BPF_JIT_ALWAYS_ON config, bpf_jit_enable always equals to
> 1; it is impossible to set 'bpf_jit_enable = 2' and the kernel has no
> chance to call bpf_jit_dump().
> 
> This patch relaxes bpf_jit_enable range to [1..2] when kernel config
> BPF_JIT_ALWAYS_ON is enabled so can invoke jit dump.
> 
> Signed-off-by: Leo Yan <leo.yan@linaro.org>

Is there a specific reason why you need this here instead of retrieving the
dump from the newer interface available from bpftool (tools/bpf/bpftool/)?
The bpf_jit_enable = 2 is not recommended these days since it dumps into the
kernel log which is often readable from unpriv as well. bpftool makes use
of the BPF_OBJ_GET_INFO_BY_FD interface via bpf syscall to get the JIT dump
instead when bpf_jit_enable is set.

> ---
>  net/core/sysctl_net_core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index b1a2c5e..6a39b22 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -371,7 +371,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
>  		.proc_handler	= proc_dointvec_minmax_bpf_enable,
>  # ifdef CONFIG_BPF_JIT_ALWAYS_ON
>  		.extra1		= &one,
> -		.extra2		= &one,
> +		.extra2		= &two,
>  # else
>  		.extra1		= &zero,
>  		.extra2		= &two,
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox