Netdev List
 help / color / mirror / Atom feed
* Re: [RFC PATCH 4/5] PTP: Add flag for non-periodic output
From: Felipe Balbi @ 2019-08-13  7:53 UTC (permalink / raw)
  To: Richard Cochran
  Cc: netdev, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	H . Peter Anvin, x86, linux-kernel, Christopher S . Hall
In-Reply-To: <20190718164121.GB1533@localhost>


Hi,

Richard Cochran <richardcochran@gmail.com> writes:

> On Thu, Jul 18, 2019 at 11:59:10AM +0300, Felipe Balbi wrote:
>> no problem, anything in particular in mind? Just create new versions of
>> all the IOCTLs so we can actually use the reserved fields in the future?
>
> Yes, please!

before I send a new series built on top of this change, I thought I'd
check with you if I'm on the right path. Below you can find my current
take at the new IOCTLs. I maintained the same exact structures so that
there's no maintenance burden. Also introduce a new IOCTL for every
single one of the previously existing ones even though not all of them
needed changes. The reason for that was just to make it easier for
libary authors to update their library by a simple sed script adding '2'
to the end of the IOCTL macro.

Let me know if you want anything to be changed or had a different idea
about any of this. Also, if you prefer that I finish the entire series
before you review, no worries either ;-)

Cheers, patch follows:

From bc2aa511d4c2e2228590fb29604c6c33b56527ad Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Tue, 13 Aug 2019 10:32:35 +0300
Subject: [PATCH] PTP: introduce new versions of IOCTLs

The current version of the IOCTL have a small problem which prevents
us from extending the API by making use of reserved fields. In these
new IOCTLs, we are now making sure that flags and rsv fields are zero
which will allow us to extend the API in the future.

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/ptp/ptp_chardev.c      | 105 +++++++++++++++++++++++++++++++++
 include/uapi/linux/ptp_clock.h |  12 ++++
 2 files changed, 117 insertions(+)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 18ffe449efdf..94775073527b 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -126,6 +126,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 
 	case PTP_CLOCK_GETCAPS:
+	case PTP_CLOCK_GETCAPS2:
 		memset(&caps, 0, sizeof(caps));
 		caps.max_adj = ptp->info->max_adj;
 		caps.n_alarm = ptp->info->n_alarm;
@@ -153,6 +154,28 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		err = ops->enable(ops, &req, enable);
 		break;
 
+	case PTP_EXTTS_REQUEST2:
+		memset(&req, 0, sizeof(req));
+		if (copy_from_user(&req.extts, (void __user *)arg,
+				   sizeof(req.extts))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.extts.flags || req.extts.rsv[0]
+				|| req.extts.rsv[1]) {
+			err = -EINVAL;
+			break;
+		}
+			
+		if (req.extts.index >= ops->n_ext_ts) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_EXTTS;
+		enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
 	case PTP_PEROUT_REQUEST:
 		if (copy_from_user(&req.perout, (void __user *)arg,
 				   sizeof(req.perout))) {
@@ -168,6 +191,28 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		err = ops->enable(ops, &req, enable);
 		break;
 
+	case PTP_PEROUT_REQUEST2:
+		memset(&req, 0, sizeof(req));
+		if (copy_from_user(&req.perout, (void __user *)arg,
+				   sizeof(req.perout))) {
+			err = -EFAULT;
+			break;
+		}
+		if (req.perout.flags || req.perout.rsv[0]
+				|| req.perout.rsv[1] || req.perout.rsv[2]
+				|| req.perout.rsv[3]) {
+			err = -EINVAL;
+			break;
+		}
+		if (req.perout.index >= ops->n_per_out) {
+			err = -EINVAL;
+			break;
+		}
+		req.type = PTP_CLK_REQ_PEROUT;
+		enable = req.perout.period.sec || req.perout.period.nsec;
+		err = ops->enable(ops, &req, enable);
+		break;
+
 	case PTP_ENABLE_PPS:
 		if (!capable(CAP_SYS_TIME))
 			return -EPERM;
@@ -176,7 +221,17 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		err = ops->enable(ops, &req, enable);
 		break;
 
+	case PTP_ENABLE_PPS2:
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+		memset(&req, 0, sizeof(req));
+		req.type = PTP_CLK_REQ_PPS;
+		enable = arg ? 1 : 0;
+		err = ops->enable(ops, &req, enable);
+		break;
+
 	case PTP_SYS_OFFSET_PRECISE:
+	case PTP_SYS_OFFSET_PRECISE2:
 		if (!ptp->info->getcrosststamp) {
 			err = -EOPNOTSUPP;
 			break;
@@ -201,6 +256,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_SYS_OFFSET_EXTENDED:
+	case PTP_SYS_OFFSET_EXTENDED2:
 		if (!ptp->info->gettimex64) {
 			err = -EOPNOTSUPP;
 			break;
@@ -232,6 +288,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_SYS_OFFSET:
+	case PTP_SYS_OFFSET2:
 		sysoff = memdup_user((void __user *)arg, sizeof(*sysoff));
 		if (IS_ERR(sysoff)) {
 			err = PTR_ERR(sysoff);
@@ -284,6 +341,31 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EFAULT;
 		break;
 
+	case PTP_PIN_GETFUNC2:
+		memset(&pd, 0, sizeof(pd));
+		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
+			err = -EFAULT;
+			break;
+		}
+		if (pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
+				|| pd.rsv[3] || pd.rsv[4]) {
+			err = -EINVAL;
+			break;
+		}
+		pin_index = pd.index;
+		if (pin_index >= ops->n_pins) {
+			err = -EINVAL;
+			break;
+		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
+		if (mutex_lock_interruptible(&ptp->pincfg_mux))
+			return -ERESTARTSYS;
+		pd = ops->pin_config[pin_index];
+		mutex_unlock(&ptp->pincfg_mux);
+		if (!err && copy_to_user((void __user *)arg, &pd, sizeof(pd)))
+			err = -EFAULT;
+		break;
+
 	case PTP_PIN_SETFUNC:
 		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
 			err = -EFAULT;
@@ -301,6 +383,29 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		mutex_unlock(&ptp->pincfg_mux);
 		break;
 
+	case PTP_PIN_SETFUNC2:
+		memset(&pd, 0, sizeof(pd));
+		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
+			err = -EFAULT;
+			break;
+		}
+		if (pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
+				|| pd.rsv[3] || pd.rsv[4]) {
+			err = -EINVAL;
+			break;
+		}
+		pin_index = pd.index;
+		if (pin_index >= ops->n_pins) {
+			err = -EINVAL;
+			break;
+		}
+		pin_index = array_index_nospec(pin_index, ops->n_pins);
+		if (mutex_lock_interruptible(&ptp->pincfg_mux))
+			return -ERESTARTSYS;
+		err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan);
+		mutex_unlock(&ptp->pincfg_mux);
+		break;
+
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 1bc794ad957a..039cd62ec706 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -149,6 +149,18 @@ struct ptp_pin_desc {
 #define PTP_SYS_OFFSET_EXTENDED \
 	_IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended)
 
+#define PTP_CLOCK_GETCAPS2  _IOR(PTP_CLK_MAGIC, 10, struct ptp_clock_caps)
+#define PTP_EXTTS_REQUEST2  _IOW(PTP_CLK_MAGIC, 11, struct ptp_extts_request)
+#define PTP_PEROUT_REQUEST2 _IOW(PTP_CLK_MAGIC, 12, struct ptp_perout_request)
+#define PTP_ENABLE_PPS2     _IOW(PTP_CLK_MAGIC, 13, int)
+#define PTP_SYS_OFFSET2     _IOW(PTP_CLK_MAGIC, 14, struct ptp_sys_offset)
+#define PTP_PIN_GETFUNC2    _IOWR(PTP_CLK_MAGIC, 15, struct ptp_pin_desc)
+#define PTP_PIN_SETFUNC2    _IOW(PTP_CLK_MAGIC, 16, struct ptp_pin_desc)
+#define PTP_SYS_OFFSET_PRECISE2 \
+	_IOWR(PTP_CLK_MAGIC, 17, struct ptp_sys_offset_precise)
+#define PTP_SYS_OFFSET_EXTENDED2 \
+	_IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended)
+
 struct ptp_extts_event {
 	struct ptp_clock_time t; /* Time event occured. */
 	unsigned int index;      /* Which channel produced the event. */
-- 
2.22.0



-- 
balbi

^ permalink raw reply related

* Re: [PATCH] dpaa2-ethsw: move the DPAA2 Ethernet Switch driver out of staging
From: Ioana Ciornei @ 2019-08-13  7:21 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: davem@davemloft.net, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	f.fainelli@gmail.com, Ioana Ciocoi Radulescu
In-Reply-To: <20190812135746.GL14290@lunn.ch>

On 8/12/19 4:57 PM, Andrew Lunn wrote:
>> In the DPAA2 architecture MACs are not the only entities that can be
>> connected to a switch port.
>> Below is an exemple of a 4 port DPAA2 switch which is configured to
>> interconnect 2 DPNIs (network interfaces) and 2 DPMACs.
>>
>>
>>    [ethA]     [ethB]     [ethC]     [ethD]     [ethE]     [ethF]
>>       :          :          :          :          :          :
>>       :          :          :          :          :          :
>> [eth drv]  [eth drv]  [                ethsw drv              ]
>>       :          :          :          :          :          :        kernel
>> ========================================================================
>>       :          :          :          :          :          :
>> hardware
>>    [DPNI]      [DPNI]     [============= DPSW =================]
>>       |          |          |          |          |          |
>>       |           ----------           |       [DPMAC]    [DPMAC]
>>        -------------------------------            |          |
>>                                                   |          |
>>                                                 [PHY]      [PHY]
>>
>> You can see it as a hardware-accelerated software bridge where
>> forwarding rules are managed from the host software partition.
> 
> Hi Ioana

Hi Andrew,

> 
> What are the use cases for this?
> 
> Configuration is rather unintuitive. To bridge etha and ethb you need
> to
> 
> ip link add name br0 type bridge
> ip link set ethc master br0
> ip link set ethd master br0
> 
> And once you make ethc and ethd actually send/receive frames, etha and
> ethc become equivalent.
> 
> If this was a PCI device, i could imagine passing etha into a VM as a
> PCI VF. But i don't think it is PCI?

Indeed it's not PCI but we can pass etha to a VM. That's the main use 
case of having DPNIs connected to a switch object.

Our direct assignment solution for DPAA2 is not upstream yet (the case 
with many of our drivers :) ) but the main idea is exposing to the VM a 
fsl-mc bus on which it can find any DPAA2 objects needed to configure a 
network interface (using firmware calls).


> 
> I'm not sure moving etha into a different name space makes much sense
> either. My guess would be, a veth pair with one end connected to the
> software bridge would be more efficient than DMAing the packet out and
> then back in again. >
>       Thanks
> 	Andrew
>


That's really an interesting test to be made since we can make a veth 
pair from DPAA2 objects just connecting two DPNIs back to back. I'll 
make some performance tests and compare against the software veth.


Thanks,
Ioana



^ permalink raw reply

* Re: [patch net-next] netdevsim: implement support for devlink region and snapshots
From: Jiri Pirko @ 2019-08-13  7:16 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, davem, mlxsw
In-Reply-To: <20190812175859.3e0275e3@cakuba.netronome.com>

Tue, Aug 13, 2019 at 02:58:59AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, 12 Aug 2019 12:16:20 +0200, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Implement dummy region of size 32K and allow user to create snapshots
>> or random data using debugfs file trigger.
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>
>I'm nacking all the netdevsim patches unless the selftest 
>is posted at the same time :/
>
>You're leaking those features one by one what if you get distracted 
>and the tests never materialize :/
>
>This is all dead code.

Okay, fair enough. Will add it now.


>
>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>> index 08ca59fc189b..e76ea6a3cb60 100644
>> --- a/drivers/net/netdevsim/dev.c
>> +++ b/drivers/net/netdevsim/dev.c
>> @@ -27,6 +27,41 @@
>>  
>>  static struct dentry *nsim_dev_ddir;
>>  
>> +#define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32)
>> +
>> +static ssize_t nsim_dev_take_snapshot_write(struct file *file,
>> +					    const char __user *data,
>> +					    size_t count, loff_t *ppos)
>> +{
>> +	struct nsim_dev *nsim_dev = file->private_data;
>> +	void *dummy_data;
>> +	u32 id;
>> +	int err;
>> +
>> +	dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL);
>> +	if (!dummy_data) {
>> +		pr_err("Failed to allocate memory for region snapshot\n");
>> +		goto out;
>> +	}
>> +
>> +	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
>> +
>> +	id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev));
>> +	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
>> +					     dummy_data, id, kfree);
>> +	if (err)
>> +		pr_err("Failed to create region snapshot\n");
>> +
>> +out:
>> +	return count;
>
>why not return an error?

Okay.


>
>> +}
>> +
>> +static const struct file_operations nsim_dev_take_snapshot_fops = {
>> +	.open = simple_open,
>> +	.write = nsim_dev_take_snapshot_write,
>> +	.llseek = generic_file_llseek,
>> +};
>> +
>>  static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
>>  {
>>  	char dev_ddir_name[16];
>> @@ -44,6 +79,8 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
>>  			   &nsim_dev->max_macs);
>>  	debugfs_create_bool("test1", 0600, nsim_dev->ddir,
>>  			    &nsim_dev->test1);
>> +	debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev,
>> +			    &nsim_dev_take_snapshot_fops);
>>  	return 0;
>>  }
>>  
>> @@ -248,6 +285,26 @@ static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink)
>>  		nsim_dev->test1 = saved_value.vbool;
>>  }
>>  
>> +#define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16
>> +
>> +static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev,
>> +				      struct devlink *devlink)
>> +{
>> +	nsim_dev->dummy_region =
>> +		devlink_region_create(devlink, "dummy",
>> +				      NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX,
>> +				      NSIM_DEV_DUMMY_REGION_SIZE);
>> +	if (IS_ERR(nsim_dev->dummy_region))
>> +		return PTR_ERR(nsim_dev->dummy_region);
>> +
>> +	return 0;
>
>PTR_ERR_OR_ZERO()

Okay.


>
>> +}
>> +
>> +static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev)
>> +{
>> +	devlink_region_destroy(nsim_dev->dummy_region);
>> +}
>> +
>>  static int nsim_dev_reload(struct devlink *devlink,
>>  			   struct netlink_ext_ack *extack)
>>  {
>

^ permalink raw reply

* Re: [RFC PATCH v7] rtl8xxxu: Improve TX performance of RTL8723BU on rtl8xxxu driver
From: Daniel Drake @ 2019-08-13  7:15 UTC (permalink / raw)
  To: Jes Sorensen
  Cc: Kalle Valo, Chris Chiu, David Miller, linux-wireless, netdev,
	Linux Kernel, Linux Upstreaming Team
In-Reply-To: <a3ac212d-b976-fb16-227f-3246a317c4a2@gmail.com>

On Mon, Aug 12, 2019 at 11:21 PM Jes Sorensen <jes.sorensen@gmail.com> wrote:
> On 8/12/19 10:32 AM, Kalle Valo wrote:
> > This is marked as RFC so I'm not sure what's the plan. Should I apply
> > this?
>
> I think it's at a point where it's worth applying - I kinda wish I had
> had time to test it, but I won't be near my stash of USB dongles for a
> little while.

The last remaining reason it was RFC was pending feedback from Jes, to
check that his earlier comments had been adequately addressed. So yes
I think it's good to apply now.

Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH net] netdevsim: Restore per-network namespace accounting for fib entries
From: Jiri Pirko @ 2019-08-13  7:14 UTC (permalink / raw)
  To: David Miller; +Cc: dsahern, netdev, dsahern
In-Reply-To: <20190812.082802.570039169834175740.davem@davemloft.net>

Mon, Aug 12, 2019 at 05:28:02PM CEST, davem@davemloft.net wrote:
>From: Jiri Pirko <jiri@resnulli.us>
>Date: Mon, 12 Aug 2019 10:36:35 +0200
>
>> I understand it with real devices, but dummy testing device, who's
>> purpose is just to test API. Why?
>
>Because you'll break all of the wonderful testing infrastructure
>people like David have created.
 
Are you referring to selftests? There is no such test there :(
But if it would be, could implement the limitations
properly (like using cgroups), change the tests and remove this
code from netdevsim?

^ permalink raw reply

* Re: [patch net-next rfc 3/7] net: rtnetlink: add commands to add and delete alternative ifnames
From: Jiri Pirko @ 2019-08-13  6:56 UTC (permalink / raw)
  To: David Ahern
  Cc: Roopa Prabhu, netdev, David Miller, Jakub Kicinski,
	Stephen Hemminger, dcbw, Michal Kubecek, Andrew Lunn, parav,
	Saeed Mahameed, mlxsw
In-Reply-To: <b43ad33c-ea0c-f441-a550-be0b1d8ca4ef@gmail.com>

Mon, Aug 12, 2019 at 06:01:59PM CEST, dsahern@gmail.com wrote:
>On 8/12/19 2:31 AM, Jiri Pirko wrote:
>> Mon, Aug 12, 2019 at 03:37:26AM CEST, dsahern@gmail.com wrote:
>>> On 8/11/19 7:34 PM, David Ahern wrote:
>>>> On 8/10/19 12:30 AM, Jiri Pirko wrote:
>>>>> Could you please write me an example message of add/remove?
>>>>
>>>> altnames are for existing netdevs, yes? existing netdevs have an id and
>>>> a name - 2 existing references for identifying the existing netdev for
>>>> which an altname will be added. Even using the altname as the main
>>>> 'handle' for a setlink change, I see no reason why the GETLINK api can
>>>> not take an the IFLA_ALT_IFNAME and return the full details of the
>>>> device if the altname is unique.
>>>>
>>>> So, what do the new RTM commands give you that you can not do with
>>>> RTM_*LINK?
>>>>
>>>
>>>
>>> To put this another way, the ALT_NAME is an attribute of an object - a
>>> LINK. It is *not* a separate object which requires its own set of
>>> commands for manipulating.
>> 
>> Okay, again, could you provide example of a message to add/remove
>> altname using existing setlink message? Thanks!
>> 
>
>Examples from your cover letter with updates
>
>$ ip link set dummy0 altname someothername
>$ ip link set dummy0 altname someotherveryveryveryverylongname
>
>$ ip link set dummy0 del altname someothername
>$ ip link set dummy0 del altname someotherveryveryveryverylongname
>
>This syntactic sugar to what is really happening:
>
>RTM_NEWLINK, dummy0, IFLA_ALT_IFNAME
>
>if you are allowing many alt names, then yes, you need a flag to say
>delete this specific one which is covered by Roopa's nested suggestion.

Yeah, so you need and op inside the message. We are on the same page,
thanks.

^ permalink raw reply

* Re: [patch net-next rfc 3/7] net: rtnetlink: add commands to add and delete alternative ifnames
From: Jiri Pirko @ 2019-08-13  6:55 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Ahern, Roopa Prabhu, netdev, David Miller, Jakub Kicinski,
	Stephen Hemminger, dcbw, Michal Kubecek, Andrew Lunn, parav,
	Saeed Mahameed, mlxsw
In-Reply-To: <20190812084039.2fbd1f01@hermes.lan>

Mon, Aug 12, 2019 at 05:40:39PM CEST, stephen@networkplumber.org wrote:
>On Mon, 12 Aug 2019 10:31:39 +0200
>Jiri Pirko <jiri@resnulli.us> wrote:
>
>> Mon, Aug 12, 2019 at 03:37:26AM CEST, dsahern@gmail.com wrote:
>> >On 8/11/19 7:34 PM, David Ahern wrote:  
>> >> On 8/10/19 12:30 AM, Jiri Pirko wrote:  
>> >>> Could you please write me an example message of add/remove?  
>> >> 
>> >> altnames are for existing netdevs, yes? existing netdevs have an id and
>> >> a name - 2 existing references for identifying the existing netdev for
>> >> which an altname will be added. Even using the altname as the main
>> >> 'handle' for a setlink change, I see no reason why the GETLINK api can
>> >> not take an the IFLA_ALT_IFNAME and return the full details of the
>> >> device if the altname is unique.
>> >> 
>> >> So, what do the new RTM commands give you that you can not do with
>> >> RTM_*LINK?
>> >>   
>> >
>> >
>> >To put this another way, the ALT_NAME is an attribute of an object - a
>> >LINK. It is *not* a separate object which requires its own set of
>> >commands for manipulating.  
>> 
>> Okay, again, could you provide example of a message to add/remove
>> altname using existing setlink message? Thanks!
>
>The existing IFALIAS takes an empty name to do deletion.

Ifalias is one and one only. Woudn't work for multiple altnames...

^ permalink raw reply

* Re: [patch net-next rfc 3/7] net: rtnetlink: add commands to add and delete alternative ifnames
From: Jiri Pirko @ 2019-08-13  6:53 UTC (permalink / raw)
  To: David Ahern
  Cc: Jakub Kicinski, Roopa Prabhu, netdev, David Miller,
	Stephen Hemminger, dcbw, Michal Kubecek, Andrew Lunn, parav,
	Saeed Mahameed, mlxsw
In-Reply-To: <52dd953a-d0c7-0086-5faa-18134f033c0b@gmail.com>

Tue, Aug 13, 2019 at 02:29:13AM CEST, dsahern@gmail.com wrote:
>On 8/12/19 3:43 PM, Jakub Kicinski wrote:
>> Is not adding commands better because it's easier to deal with the
>> RTM_NEWLINK notification? I must say it's unclear from the thread why
>> muxing the op through RTM_SETLINK is preferable. IMHO new op is
>> cleaner, do we have precedent for such IFLA_.*_OP-style attributes?
>
>An alternative name for a link is not a primary object; it is only an
>attribute of a link and links are manipulated through RTM_*LINK commands.

So? Still, doing the OP thing inside the message feels wrong, "primary
object" or now. Why can't the "secondary object" (whatever it is) have
separate command? What is the limitation? I'm trying to understand the
reason.

^ permalink raw reply

* Re: [patch net-next rfc 3/7] net: rtnetlink: add commands to add and delete alternative ifnames
From: Jiri Pirko @ 2019-08-13  6:51 UTC (permalink / raw)
  To: Roopa Prabhu
  Cc: David Ahern, netdev, David Miller, Jakub Kicinski,
	Stephen Hemminger, dcbw, Michal Kubecek, Andrew Lunn, parav,
	Saeed Mahameed, mlxsw
In-Reply-To: <CAJieiUhqAvqvxDZk517hWQP4Tx3Hk2PT7Yjq6NSGk+pB_87q8A@mail.gmail.com>

Mon, Aug 12, 2019 at 05:13:39PM CEST, roopa@cumulusnetworks.com wrote:
>On Mon, Aug 12, 2019 at 1:31 AM Jiri Pirko <jiri@resnulli.us> wrote:
>>
>> Mon, Aug 12, 2019 at 03:37:26AM CEST, dsahern@gmail.com wrote:
>> >On 8/11/19 7:34 PM, David Ahern wrote:
>> >> On 8/10/19 12:30 AM, Jiri Pirko wrote:
>> >>> Could you please write me an example message of add/remove?
>> >>
>> >> altnames are for existing netdevs, yes? existing netdevs have an id and
>> >> a name - 2 existing references for identifying the existing netdev for
>> >> which an altname will be added. Even using the altname as the main
>> >> 'handle' for a setlink change, I see no reason why the GETLINK api can
>> >> not take an the IFLA_ALT_IFNAME and return the full details of the
>> >> device if the altname is unique.
>> >>
>> >> So, what do the new RTM commands give you that you can not do with
>> >> RTM_*LINK?
>> >>
>> >
>> >
>> >To put this another way, the ALT_NAME is an attribute of an object - a
>> >LINK. It is *not* a separate object which requires its own set of
>> >commands for manipulating.
>>
>> Okay, again, could you provide example of a message to add/remove
>> altname using existing setlink message? Thanks!
>
>Will the below work ?... just throwing an example for discussion:
>
>make the name list a nested list
>IFLA_ALT_NAMES
>        IFLA_ALT_NAME_OP /* ADD or DEL used with setlink */

This is exacly what I tried to avoid. Providing an OP within a message
is weird. So I wanted to do it rather in the way similar to NEIGH for
example, where you have new/del commands.


>        IFLA_ALT_NAME
>        IFLA_ALT_NAME_LIST
>
>With RTM_NEWLINK  you can specify a list to set and unset
>With RTM_SETLINK  you can specify an individual name with a add or del op
>
>notifications will always be RTM_NEWLINK with the full list.
>
>The nested attribute can be structured differently.
>
>Only thing is i am worried about increasing the size of link dump and
>notification msgs.
>
>What is the limit on the number of names again ?

No limit.

^ permalink raw reply

* Re: [v2, 3/4] ocelot_ace: fix action of trap
From: Allan W . Nielsen @ 2019-08-13  6:30 UTC (permalink / raw)
  To: Yangbo Lu
  Cc: netdev, David S . Miller, Alexandre Belloni,
	Microchip Linux Driver Support
In-Reply-To: <20190813061651.7gtbum4wsaw5dahg@lx-anielsen.microsemi.net>

The 08/13/2019 08:16, Allan W . Nielsen wrote:
> The 08/13/2019 10:52, Yangbo Lu wrote:
> > The trap action should be copying the frame to CPU and
> > dropping it for forwarding, but current setting was just
> > copying frame to CPU.
> > 
> > Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
> > ---
> > Changes for v2:
> > 	- None.
> > ---
> >  drivers/net/ethernet/mscc/ocelot_ace.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
> > index 91250f3..59ad590 100644
> > --- a/drivers/net/ethernet/mscc/ocelot_ace.c
> > +++ b/drivers/net/ethernet/mscc/ocelot_ace.c
> > @@ -317,9 +317,9 @@ static void is2_action_set(struct vcap_data *data,
> >  		break;
> >  	case OCELOT_ACL_ACTION_TRAP:
> >  		VCAP_ACT_SET(PORT_MASK, 0x0);
> > -		VCAP_ACT_SET(MASK_MODE, 0x0);
> > -		VCAP_ACT_SET(POLICE_ENA, 0x0);
> > -		VCAP_ACT_SET(POLICE_IDX, 0x0);
> > +		VCAP_ACT_SET(MASK_MODE, 0x1);
> > +		VCAP_ACT_SET(POLICE_ENA, 0x1);
> > +		VCAP_ACT_SET(POLICE_IDX, OCELOT_POLICER_DISCARD);
> >  		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
> >  		VCAP_ACT_SET(CPU_COPY_ENA, 0x1);
> >  		break;
> 
> This is still wrong, please see the comments provided the first time you
> submitted this.
> 
> /Allan

I believe this will make it work - but I have not tested it:

 	case OCELOT_ACL_ACTION_TRAP:
 		VCAP_ACT_SET(PORT_MASK, 0x0);
-		VCAP_ACT_SET(MASK_MODE, 0x0);
+		VCAP_ACT_SET(MASK_MODE, 0x1);
 		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
 		VCAP_ACT_SET(CPU_COPY_ENA, 0x1);
 		break;

-- 
/Allan

^ permalink raw reply

* Re: [v2, 4/4] ocelot: add VCAP IS2 rule to trap PTP Ethernet frames
From: Allan W . Nielsen @ 2019-08-13  6:25 UTC (permalink / raw)
  To: Yangbo Lu
  Cc: netdev, David S . Miller, Alexandre Belloni,
	Microchip Linux Driver Support
In-Reply-To: <20190813025214.18601-5-yangbo.lu@nxp.com>

The 08/13/2019 10:52, Yangbo Lu wrote:
> All the PTP messages over Ethernet have etype 0x88f7 on them.
> Use etype as the key to trap PTP messages.
> 
> Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
> ---
> Changes for v2:
> 	- Added this patch.
> ---
>  drivers/net/ethernet/mscc/ocelot.c | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
> index 6932e61..40f4e0d 100644
> --- a/drivers/net/ethernet/mscc/ocelot.c
> +++ b/drivers/net/ethernet/mscc/ocelot.c
> @@ -1681,6 +1681,33 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
>  }
>  EXPORT_SYMBOL(ocelot_probe_port);
>  
> +static int ocelot_ace_add_ptp_rule(struct ocelot *ocelot)
> +{
> +	struct ocelot_ace_rule *rule;
> +
> +	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
> +	if (!rule)
> +		return -ENOMEM;
> +
> +	/* Entry for PTP over Ethernet (etype 0x88f7)
> +	 * Action: trap to CPU port
> +	 */
> +	rule->ocelot = ocelot;
> +	rule->prio = 1;
> +	rule->type = OCELOT_ACE_TYPE_ETYPE;
> +	/* Available on all ingress port except CPU port */
> +	rule->ingress_port = ~BIT(ocelot->num_phys_ports);
> +	rule->dmac_mc = OCELOT_VCAP_BIT_1;
> +	rule->frame.etype.etype.value[0] = 0x88;
> +	rule->frame.etype.etype.value[1] = 0xf7;
> +	rule->frame.etype.etype.mask[0] = 0xff;
> +	rule->frame.etype.etype.mask[1] = 0xff;
> +	rule->action = OCELOT_ACL_ACTION_TRAP;
> +
> +	ocelot_ace_rule_offload_add(rule);
> +	return 0;
> +}
> +
>  int ocelot_init(struct ocelot *ocelot)
>  {
>  	u32 port;
> @@ -1708,6 +1735,7 @@ int ocelot_init(struct ocelot *ocelot)
>  	ocelot_mact_init(ocelot);
>  	ocelot_vlan_init(ocelot);
>  	ocelot_ace_init(ocelot);
> +	ocelot_ace_add_ptp_rule(ocelot);
>  
>  	for (port = 0; port < ocelot->num_phys_ports; port++) {
>  		/* Clear all counters (5 groups) */
This seems really wrong to me, and much too hard-coded...

What if I want to forward the PTP frames to be forwarded like a normal non-aware
PTP switch?

What if do not want this on all ports?

If you do not have an application behind this implementing a boundary or
transparent clock, then you are breaking PTP on the network.

/Allan

^ permalink raw reply

* Re: [v2, 3/4] ocelot_ace: fix action of trap
From: Allan W . Nielsen @ 2019-08-13  6:16 UTC (permalink / raw)
  To: Yangbo Lu
  Cc: netdev, David S . Miller, Alexandre Belloni,
	Microchip Linux Driver Support
In-Reply-To: <20190813025214.18601-4-yangbo.lu@nxp.com>

The 08/13/2019 10:52, Yangbo Lu wrote:
> The trap action should be copying the frame to CPU and
> dropping it for forwarding, but current setting was just
> copying frame to CPU.
> 
> Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
> ---
> Changes for v2:
> 	- None.
> ---
>  drivers/net/ethernet/mscc/ocelot_ace.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
> index 91250f3..59ad590 100644
> --- a/drivers/net/ethernet/mscc/ocelot_ace.c
> +++ b/drivers/net/ethernet/mscc/ocelot_ace.c
> @@ -317,9 +317,9 @@ static void is2_action_set(struct vcap_data *data,
>  		break;
>  	case OCELOT_ACL_ACTION_TRAP:
>  		VCAP_ACT_SET(PORT_MASK, 0x0);
> -		VCAP_ACT_SET(MASK_MODE, 0x0);
> -		VCAP_ACT_SET(POLICE_ENA, 0x0);
> -		VCAP_ACT_SET(POLICE_IDX, 0x0);
> +		VCAP_ACT_SET(MASK_MODE, 0x1);
> +		VCAP_ACT_SET(POLICE_ENA, 0x1);
> +		VCAP_ACT_SET(POLICE_IDX, OCELOT_POLICER_DISCARD);
>  		VCAP_ACT_SET(CPU_QU_NUM, 0x0);
>  		VCAP_ACT_SET(CPU_COPY_ENA, 0x1);
>  		break;

This is still wrong, please see the comments provided the first time you
submitted this.

/Allan

^ permalink raw reply

* Re: [PATCH 3/3] ocelot_ace: fix action of trap
From: Allan W. Nielsen @ 2019-08-13  6:16 UTC (permalink / raw)
  To: Y.b. Lu
  Cc: netdev@vger.kernel.org, David S . Miller, Alexandre Belloni,
	Microchip Linux Driver Support
In-Reply-To: <VI1PR0401MB223773EB5884D65890BD68C0F8D20@VI1PR0401MB2237.eurprd04.prod.outlook.com>

The 08/13/2019 02:12, Y.b. Lu wrote:
> > -----Original Message-----
> > From: Allan W. Nielsen <allan.nielsen@microchip.com>
> > Sent: Monday, August 12, 2019 8:32 PM
> > To: Y.b. Lu <yangbo.lu@nxp.com>
> > Cc: netdev@vger.kernel.org; David S . Miller <davem@davemloft.net>;
> > Alexandre Belloni <alexandre.belloni@bootlin.com>; Microchip Linux Driver
> > Support <UNGLinuxDriver@microchip.com>
> > Subject: Re: [PATCH 3/3] ocelot_ace: fix action of trap
> > 
> > The 08/12/2019 18:48, Yangbo Lu wrote:
> > > The trap action should be copying the frame to CPU and dropping it for
> > > forwarding, but current setting was just copying frame to CPU.
> > 
> > Are there any actions which do a "copy-to-cpu" and still forward the frame in
> > HW?
> 
> [Y.b. Lu] We're using Felix switch whose code hadn't been accepted by upstream.
> https://patchwork.ozlabs.org/project/netdev/list/?series=115399&state=*
> 
> I'd like to trap all IEEE 1588 PTP Ethernet frames to CPU through etype 0x88f7.
> When I used current TRAP option, I found the frames were not only copied to CPU, but also forwarded to other ports.
> So I just made the TRAP option same with DROP option except enabling CPU_COPY_ENA in the patch.
This is still wrong to do - and it will not work for Ocelot (and I doubt it will
work for your Felix target).

The policer setting in the drop action ensure that the frame is dropped even if
other pipe-line steps in the switch has set the copy-to-cpu flag.

I think you can fix this patch my just clearing the port mask, and not set the
policer.

/Allan


^ permalink raw reply

* [PATCH] MAINTAINERS: PHY LIBRARY: Remove sysfs-bus-mdio record
From: Denis Efremov @ 2019-08-13  6:14 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denis Efremov, joe, Florian Fainelli, David S . Miller,
	Andrew Lunn, Heiner Kallweit, netdev
In-Reply-To: <7cd8d12f59bcacd18a78f599b46dac555f7f16c0.camel@perches.com>

Update MAINTAINERS to reflect that sysfs-bus-mdio documentation
was removed.

Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: netdev@vger.kernel.org
Fixes: a6cd0d2d493a ("Documentation: net-sysfs: Remove duplicate PHY device documentation")
Signed-off-by: Denis Efremov <efremov@linux.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2776e0797ae3..ab870920ea82 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6065,7 +6065,6 @@ M:	Florian Fainelli <f.fainelli@gmail.com>
 M:	Heiner Kallweit <hkallweit1@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/ABI/testing/sysfs-bus-mdio
 F:	Documentation/devicetree/bindings/net/ethernet-phy.yaml
 F:	Documentation/devicetree/bindings/net/mdio*
 F:	Documentation/networking/phy.rst
-- 
2.21.0


^ permalink raw reply related

* Re: [patch net-next v3 1/3] net: devlink: allow to change namespaces
From: Jiri Pirko @ 2019-08-13  6:13 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: netdev, davem, stephen, dsahern, mlxsw
In-Reply-To: <20190812182122.5bc71d30@cakuba.netronome.com>

Tue, Aug 13, 2019 at 03:21:22AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, 12 Aug 2019 15:47:49 +0200, Jiri Pirko wrote:
>> @@ -6953,9 +7089,33 @@ int devlink_compat_switch_id_get(struct net_device *dev,
>>  	return 0;
>>  }
>>  
>> +static void __net_exit devlink_pernet_exit(struct net *net)
>> +{
>> +	struct devlink *devlink;
>> +
>> +	mutex_lock(&devlink_mutex);
>> +	list_for_each_entry(devlink, &devlink_list, list)
>> +		if (net_eq(devlink_net(devlink), net))
>> +			devlink_netns_change(devlink, &init_net);
>> +	mutex_unlock(&devlink_mutex);
>> +}
>
>Just to be sure - this will not cause any locking issues?
>Usually the locking order goes devlink -> rtnl

rtnl is not taken. Do I miss something?

^ permalink raw reply

* Re: [patch net-next v3 0/3] net: devlink: Finish network namespace support
From: Jiri Pirko @ 2019-08-13  6:09 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, davem, jakub.kicinski, stephen, mlxsw
In-Reply-To: <bfb879be-a232-0ef1-1c40-3a9c8bcba8f8@gmail.com>

Tue, Aug 13, 2019 at 02:24:41AM CEST, dsahern@gmail.com wrote:
>On 8/12/19 7:47 AM, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Devlink from the beginning counts with network namespaces, but the
>> instances has been fixed to init_net. The first patch allows user
>> to move existing devlink instances into namespaces:
>> 
>> $ devlink dev
>> netdevsim/netdevsim1
>> $ ip netns add ns1
>> $ devlink dev set netdevsim/netdevsim1 netns ns1
>> $ devlink -N ns1 dev
>> netdevsim/netdevsim1
>> 
>> The last patch allows user to create new netdevsim instance directly
>> inside network namespace of a caller.
>
>The namespace behavior seems odd to me. If devlink instance is created
>in a namespace and never moved, it should die with the namespace. With
>this patch set, devlink instance and its ports are moved to init_net on
>namespace delete.
>
>The fib controller needs an update to return the namespace of the
>devlink instance (on top of the patch applied to net):

I have really no clue what your fib abomination should behave. The
devlink controls device config, that's it. No relation to netns it is
in.


>
>diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
>index 89795071f085..fa7e876f2d3b 100644
>--- a/drivers/net/netdevsim/dev.c
>+++ b/drivers/net/netdevsim/dev.c
>@@ -114,11 +114,6 @@ static void nsim_dev_port_debugfs_exit(struct
>nsim_dev_port *nsim_dev_port)
>        debugfs_remove_recursive(nsim_dev_port->ddir);
> }
>
>-static struct net *nsim_devlink_net(struct devlink *devlink)
>-{
>-       return &init_net;
>-}
>-
> static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv)
> {
>        struct net *net = priv;
>@@ -154,7 +149,7 @@ static int nsim_dev_resources_register(struct
>devlink *devlink)
>                .size_granularity = 1,
>                .unit = DEVLINK_RESOURCE_UNIT_ENTRY
>        };
>-       struct net *net = nsim_devlink_net(devlink);
>+       struct net *net = devlink_net(devlink);
>        int err;
>        u64 n;
>
>@@ -309,7 +304,7 @@ static int nsim_dev_reload(struct devlink *devlink,
>                NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
>                NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES
>        };
>-       struct net *net = nsim_devlink_net(devlink);
>+       struct net *net = devlink_net(devlink);
>        int i;
>
>        for (i = 0; i < ARRAY_SIZE(res_ids); ++i) {
>

^ permalink raw reply

* [PATCH net-next] net: phy: realtek: add NBase-T PHY auto-detection
From: Heiner Kallweit @ 2019-08-13  6:09 UTC (permalink / raw)
  To: Andrew Lunn, Florian Fainelli, David Miller; +Cc: netdev@vger.kernel.org

Realtek provided information on how the new NIC-integrated PHY's
expose whether they support 2.5G/5G/10G. This allows to automatically
differentiate 1Gbps and 2.5Gbps PHY's, and therefore allows to
remove the fake PHY ID mechanism for RTL8125.
So far RTL8125 supports 2.5Gbps only, but register layout for faster
modes has been defined already, so let's use this information to be
future-proof.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
---
 drivers/net/phy/realtek.c | 48 +++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 5b466e80d..c49a1fb13 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -39,11 +39,16 @@
 #define RTL8366RB_POWER_SAVE			0x15
 #define RTL8366RB_POWER_SAVE_ON			BIT(12)
 
+#define RTL_SUPPORTS_5000FULL			BIT(14)
+#define RTL_SUPPORTS_2500FULL			BIT(13)
+#define RTL_SUPPORTS_10000FULL			BIT(0)
 #define RTL_ADV_2500FULL			BIT(7)
 #define RTL_LPADV_10000FULL			BIT(11)
 #define RTL_LPADV_5000FULL			BIT(6)
 #define RTL_LPADV_2500FULL			BIT(5)
 
+#define RTL_GENERIC_PHYID			0x001cc800
+
 MODULE_DESCRIPTION("Realtek PHY driver");
 MODULE_AUTHOR("Johnson Leung");
 MODULE_LICENSE("GPL");
@@ -263,8 +268,18 @@ static int rtl8366rb_config_init(struct phy_device *phydev)
 
 static int rtl8125_get_features(struct phy_device *phydev)
 {
-	linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
-			 phydev->supported);
+	int val;
+
+	val = phy_read_paged(phydev, 0xa61, 0x13);
+	if (val < 0)
+		return val;
+
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 phydev->supported, val & RTL_SUPPORTS_2500FULL);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			 phydev->supported, val & RTL_SUPPORTS_5000FULL);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			 phydev->supported, val & RTL_SUPPORTS_10000FULL);
 
 	return genphy_read_abilities(phydev);
 }
@@ -308,6 +323,29 @@ static int rtl8125_read_status(struct phy_device *phydev)
 	return genphy_read_status(phydev);
 }
 
+static bool rtlgen_supports_2_5gbps(struct phy_device *phydev)
+{
+	int val;
+
+	phy_write(phydev, RTL821x_PAGE_SELECT, 0xa61);
+	val = phy_read(phydev, 0x13);
+	phy_write(phydev, RTL821x_PAGE_SELECT, 0);
+
+	return val >= 0 && val & RTL_SUPPORTS_2500FULL;
+}
+
+static int rtlgen_match_phy_device(struct phy_device *phydev)
+{
+	return phydev->phy_id == RTL_GENERIC_PHYID &&
+	       !rtlgen_supports_2_5gbps(phydev);
+}
+
+static int rtl8125_match_phy_device(struct phy_device *phydev)
+{
+	return phydev->phy_id == RTL_GENERIC_PHYID &&
+	       rtlgen_supports_2_5gbps(phydev);
+}
+
 static struct phy_driver realtek_drvs[] = {
 	{
 		PHY_ID_MATCH_EXACT(0x00008201),
@@ -378,15 +416,15 @@ static struct phy_driver realtek_drvs[] = {
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
 	}, {
-		PHY_ID_MATCH_EXACT(0x001cc800),
-		.name		= "Generic Realtek PHY",
+		.name		= "Generic FE-GE Realtek PHY",
+		.match_phy_device = rtlgen_match_phy_device,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
 	}, {
-		PHY_ID_MATCH_EXACT(0x001cca50),
 		.name		= "RTL8125 2.5Gbps internal",
+		.match_phy_device = rtl8125_match_phy_device,
 		.get_features	= rtl8125_get_features,
 		.config_aneg	= rtl8125_config_aneg,
 		.read_status	= rtl8125_read_status,
-- 
2.22.0


^ permalink raw reply related

* [PATCH] MAINTAINERS: r8169: Update path to the driver
From: Denis Efremov @ 2019-08-13  6:07 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denis Efremov, joe, Heiner Kallweit, nic_swsd, David S . Miller,
	netdev
In-Reply-To: <7cd8d12f59bcacd18a78f599b46dac555f7f16c0.camel@perches.com>

Update MAINTAINERS record to reflect the filename change
from r8169.c to r8169_main.c

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: nic_swsd@realtek.com
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Fixes: 25e992a4603c ("r8169: rename r8169.c to r8169_main.c")
Signed-off-by: Denis Efremov <efremov@linux.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 99a7392ad6bc..25eb86f3261e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -183,7 +183,7 @@ M:	Realtek linux nic maintainers <nic_swsd@realtek.com>
 M:	Heiner Kallweit <hkallweit1@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	drivers/net/ethernet/realtek/r8169.c
+F:	drivers/net/ethernet/realtek/r8169_main.c
 
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-- 
2.21.0


^ permalink raw reply related

* Re: [patch net-next v3 0/3] net: devlink: Finish network namespace support
From: Jiri Pirko @ 2019-08-13  6:07 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: David Ahern, netdev, davem, stephen, mlxsw
In-Reply-To: <20190812181100.1cfd8b9d@cakuba.netronome.com>

Tue, Aug 13, 2019 at 03:11:00AM CEST, jakub.kicinski@netronome.com wrote:
>On Mon, 12 Aug 2019 18:24:41 -0600, David Ahern wrote:
>> On 8/12/19 7:47 AM, Jiri Pirko wrote:
>> > From: Jiri Pirko <jiri@mellanox.com>
>> > 
>> > Devlink from the beginning counts with network namespaces, but the
>> > instances has been fixed to init_net. The first patch allows user
>> > to move existing devlink instances into namespaces:
>> > 
>> > $ devlink dev
>> > netdevsim/netdevsim1
>> > $ ip netns add ns1
>> > $ devlink dev set netdevsim/netdevsim1 netns ns1
>> > $ devlink -N ns1 dev
>> > netdevsim/netdevsim1
>> > 
>> > The last patch allows user to create new netdevsim instance directly
>> > inside network namespace of a caller.  
>> 
>> The namespace behavior seems odd to me. If devlink instance is created
>> in a namespace and never moved, it should die with the namespace. With
>> this patch set, devlink instance and its ports are moved to init_net on
>> namespace delete.
>
>If the devlink instance just disappeared - that'd be a very very strange
>thing. Only software objects disappear with the namespace. 
>Netdevices without ->rtnl_link_ops go back to init_net.

Agreed. It makes sense to be moved to init_net.

^ permalink raw reply

* Re: [PATCH v4 13/14] net: phy: adin: add ethtool get_stats support
From: Ardelean, Alexandru @ 2019-08-13  6:07 UTC (permalink / raw)
  To: andrew@lunn.ch
  Cc: davem@davemloft.net, hkallweit1@gmail.com,
	devicetree@vger.kernel.org, mark.rutland@arm.com,
	linux-kernel@vger.kernel.org, f.fainelli@gmail.com,
	netdev@vger.kernel.org, robh+dt@kernel.org
In-Reply-To: <20190812142637.GR14290@lunn.ch>

On Mon, 2019-08-12 at 16:26 +0200, Andrew Lunn wrote:
> [External]
> 
> > +/* Named just like in the datasheet */
> > +static struct adin_hw_stat adin_hw_stats[] = {
> > +	{ "RxErrCnt",		0x0014,	},
> > +	{ "MseA",		0x8402,	0,	true },
> > +	{ "MseB",		0x8403,	0,	true },
> > +	{ "MseC",		0x8404,	0,	true },
> > +	{ "MseD",		0x8405,	0,	true },
> > +	{ "FcFrmCnt",		0x940A, 0x940B }, /* FcFrmCntH + FcFrmCntL */
> > +	{ "FcLenErrCnt",	0x940C },
> > +	{ "FcAlgnErrCnt",	0x940D },
> > +	{ "FcSymbErrCnt",	0x940E },
> > +	{ "FcOszCnt",		0x940F },
> > +	{ "FcUszCnt",		0x9410 },
> > +	{ "FcOddCnt",		0x9411 },
> > +	{ "FcOddPreCnt",	0x9412 },
> > +	{ "FcDribbleBitsCnt",	0x9413 },
> > +	{ "FcFalseCarrierCnt",	0x9414 },
> 
> I see some value in using the names from the datasheet. However, i
> found it quite hard to now what these counters represent given there
> current name. What is Mse? How does MseA differ from MseB? You have up
> to ETH_GSTRING_LEN characters, so maybe longer names would be better?

I'll expand the names.

Regarding MseA/B/C/D, I'll admit I am also a bit fuzzy about them.
They describe link-quality settings, and the values have some meaning to the chip guys [when I talked witht them about
it], but I did not insist on getting a deep explanation about them [and what their values represent].
I guess for this PHY driver, we could drop them, and if they're needed they can be accessed via phytool, and if they're
really needed, I can try to add them later with more complete detail [about them and their use/value].

I included them here, because they are listed in the error-counter register "group" [in the datasheet], and I inertially
added them.

> 
>    Andrew

^ permalink raw reply

* [PATCH] MAINTAINERS: net_failover: Fix typo in a filepath
From: Denis Efremov @ 2019-08-13  6:05 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denis Efremov, joe, Sridhar Samudrala, David S . Miller, netdev
In-Reply-To: <20190325212732.27253-1-joe@perches.com>

Replace "driver" with "drivers" in the filepath to net_failover.c

Cc: Sridhar Samudrala <sridhar.samudrala@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Fixes: cfc80d9a1163 ("net: Introduce net_failover driver")
Signed-off-by: Denis Efremov <efremov@linux.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 51ab502485ac..c2117e5f4ff8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11071,7 +11071,7 @@ NET_FAILOVER MODULE
 M:	Sridhar Samudrala <sridhar.samudrala@intel.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	driver/net/net_failover.c
+F:	drivers/net/net_failover.c
 F:	include/net/net_failover.h
 F:	Documentation/networking/net_failover.rst
 
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH v4 13/14] net: phy: adin: add ethtool get_stats support
From: Ardelean, Alexandru @ 2019-08-13  5:48 UTC (permalink / raw)
  To: andrew@lunn.ch
  Cc: davem@davemloft.net, hkallweit1@gmail.com,
	devicetree@vger.kernel.org, mark.rutland@arm.com,
	linux-kernel@vger.kernel.org, f.fainelli@gmail.com,
	netdev@vger.kernel.org, robh+dt@kernel.org
In-Reply-To: <20190812143315.GS14290@lunn.ch>

On Mon, 2019-08-12 at 16:33 +0200, Andrew Lunn wrote:
> [External]
> 
> > +static int adin_read_mmd_stat_regs(struct phy_device *phydev,
> > +				   struct adin_hw_stat *stat,
> > +				   u32 *val)
> > +{
> > +	int ret;
> > +
> > +	ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, stat->reg1);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	*val = (ret & 0xffff);
> > +
> > +	if (stat->reg2 == 0)
> > +		return 0;
> > +
> > +	ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, stat->reg2);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	*val <<= 16;
> > +	*val |= (ret & 0xffff);
> > +
> > +	return 0;
> > +}
> 
> It still looks like you have not dealt with overflow from the LSB into
> the MSB between the two reads.

Apologies for forgetting to address this.
I did not intentionally leave it out; this item got lost after V1 [which had the most remarks].
Changelog V1 -> V2 was quite bulky, and I did not look at V1 remarks after I finished V2.

Thanks for snippet.

> 
> 	do {
> 		hi1 = phy_read_mmd(phydev, MDIO_MMD_VEND1, stat->reg2);
> 		if (hi1 < 0)
> 			return hi1;
> 		
> 		low = phy_read_mmd(phydev, MDIO_MMD_VEND1, stat->reg1);
> 		if (low < 0)
> 			return low;
> 
> 		hi2 = phy_read_mmd(phydev, MDIO_MMD_VEND1, stat->reg2);
> 		if (hi2 < 0)
> 			return hi1;
> 	} while (hi1 != hi2)
> 
> 	return low | (hi << 16);
> 
> 	Andrew

^ permalink raw reply

* Re: [PATCH v4 10/14] net: phy: adin: implement PHY subsystem software reset
From: Ardelean, Alexandru @ 2019-08-13  5:42 UTC (permalink / raw)
  To: andrew@lunn.ch
  Cc: davem@davemloft.net, hkallweit1@gmail.com,
	devicetree@vger.kernel.org, mark.rutland@arm.com,
	linux-kernel@vger.kernel.org, f.fainelli@gmail.com,
	netdev@vger.kernel.org, robh+dt@kernel.org
In-Reply-To: <20190812141954.GP14290@lunn.ch>

On Mon, 2019-08-12 at 16:19 +0200, Andrew Lunn wrote:
> [External]
> 
> > +static int adin_reset(struct phy_device *phydev)
> > +{
> > +	/* If there is a reset GPIO just exit */
> > +	if (!IS_ERR_OR_NULL(phydev->mdio.reset_gpio))
> > +		return 0;
> 
> I'm not so happy with this.
> 
> First off, there are two possible GPIO configurations. The GPIO can be
> applied to all PHYs on the MDIO bus. That GPIO is used when the bus is
> probed. There can also be a per PHY GPIO, which is what you are
> looking at here.
> 
> The idea of putting the GPIO handling in the core is that PHYs don't
> need to worry about it. How much of a difference does it make if the
> PHY is both reset via GPIO and then again in software? How slow is the
> software reset? Maybe just unconditionally do the reset, if it is not
> too slow.
> 

Ack.
Will do it unconditionally.
The reset is not too slow.


> > +
> > +	/* Reset PHY core regs & subsystem regs */
> > +	return adin_subsytem_soft_reset(phydev);
> > +}
> > +
> > +static int adin_probe(struct phy_device *phydev)
> > +{
> > +	return adin_reset(phydev);
> > +}
> 
> Why did you decide to do this as part of probe, and not use the
> .soft_reset member of phy_driver?

Hmmm.
This is a left-over from when I had the GPIO handling in this PHY driver.
It's also a habit picked up from writing IIO drivers, where there is no {soft_}reset hook.
Typically, the reset is done in the probe.

> 
> > +
> >  static struct phy_driver adin_driver[] = {
> >  	{
> >  		PHY_ID_MATCH_MODEL(PHY_ID_ADIN1200),
> >  		.name		= "ADIN1200",
> >  		.config_init	= adin_config_init,
> > +		.probe		= adin_probe,
> >  		.config_aneg	= adin_config_aneg,
> >  		.read_status	= adin_read_status,
> >  		.ack_interrupt	= adin_phy_ack_intr,
> > @@ -461,6 +503,7 @@ static struct phy_driver adin_driver[] = {
> >  		PHY_ID_MATCH_MODEL(PHY_ID_ADIN1300),
> >  		.name		= "ADIN1300",
> >  		.config_init	= adin_config_init,
> > +		.probe		= adin_probe,
> >  		.config_aneg	= adin_config_aneg,
> >  		.read_status	= adin_read_status,
> >  		.ack_interrupt	= adin_phy_ack_intr,
> 
> Thanks
> 	Andrew

^ permalink raw reply

* Re: [PATCH v2 15/34] staging/vc04_services: convert put_page() to put_user_page*()
From: Stefan Wahren @ 2019-08-13  5:23 UTC (permalink / raw)
  To: john.hubbard, Andrew Morton
  Cc: linux-fbdev, Jan Kara, kvm, Dave Hansen, Dave Chinner, dri-devel,
	linux-mm, sparclinux, Ira Weiny, ceph-devel, devel, rds-devel,
	linux-rdma, Suniel Mahesh, x86, amd-gfx, Christoph Hellwig,
	Jason Gunthorpe, Mihaela Muraru, xen-devel, devel, linux-media,
	John Hubbard, intel-gfx, Kishore KP, linux-block,
	Jérôme Glisse, linux-rpi-kernel, Dan Williams,
	Sidong Yang, linux-arm-kernel, linux-nfs, Eric Anholt, netdev,
	LKML, linux-xfs, linux-crypto, Greg Kroah-Hartman, linux-fsdevel,
	Al Viro
In-Reply-To: <20190804224915.28669-16-jhubbard@nvidia.com>

On 05.08.19 00:48, john.hubbard@gmail.com wrote:
> From: John Hubbard <jhubbard@nvidia.com>
>
> For pages that were retained via get_user_pages*(), release those pages
> via the new put_user_page*() routines, instead of via put_page() or
> release_pages().
>
> This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> ("mm: introduce put_user_page*(), placeholder versions").
>
> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>
> Cc: Eric Anholt <eric@anholt.net>
> Cc: Stefan Wahren <stefan.wahren@i2se.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Mihaela Muraru <mihaela.muraru21@gmail.com>
> Cc: Suniel Mahesh <sunil.m@techveda.org>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: Sidong Yang <realwakka@gmail.com>
> Cc: Kishore KP <kishore.p@techveda.org>
> Cc: linux-rpi-kernel@lists.infradead.org
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: devel@driverdev.osuosl.org
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>

^ permalink raw reply

* Re: [PATCH bpf-next v2 2/4] bpf: support cloning sk storage on accept()
From: Stanislav Fomichev @ 2019-08-13  5:05 UTC (permalink / raw)
  To: Martin Lau
  Cc: Stanislav Fomichev, netdev@vger.kernel.org, bpf@vger.kernel.org,
	davem@davemloft.net, ast@kernel.org, daniel@iogearbox.net,
	Yonghong Song
In-Reply-To: <20190813014753.vftgwwzqxzx2pawg@kafai-mbp>

On 08/13, Martin Lau wrote:
> On Fri, Aug 09, 2019 at 09:10:36AM -0700, Stanislav Fomichev wrote:
> > Add new helper bpf_sk_storage_clone which optionally clones sk storage
> > and call it from sk_clone_lock.
> Thanks for v2.  Sorry for the delay.  I am traveling.
No worries, take your time, if you're OOO feel free to do another
round when you get back, not urgent.

> > 
> > Cc: Martin KaFai Lau <kafai@fb.com>
> > Cc: Yonghong Song <yhs@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/net/bpf_sk_storage.h |  10 ++++
> >  include/uapi/linux/bpf.h     |   3 ++
> >  net/core/bpf_sk_storage.c    | 100 +++++++++++++++++++++++++++++++++--
> >  net/core/sock.c              |   9 ++--
> >  4 files changed, 116 insertions(+), 6 deletions(-)
> > 
> > diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
> > index b9dcb02e756b..8e4f831d2e52 100644
> > --- a/include/net/bpf_sk_storage.h
> > +++ b/include/net/bpf_sk_storage.h
> > @@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
> >  extern const struct bpf_func_proto bpf_sk_storage_get_proto;
> >  extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
> >  
> > +#ifdef CONFIG_BPF_SYSCALL
> > +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
> > +#else
> > +static inline int bpf_sk_storage_clone(const struct sock *sk,
> > +				       struct sock *newsk)
> > +{
> > +	return 0;
> > +}
> > +#endif
> > +
> >  #endif /* _BPF_SK_STORAGE_H */
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4393bd4b2419..0ef594ac3899 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -337,6 +337,9 @@ enum bpf_attach_type {
> >  #define BPF_F_RDONLY_PROG	(1U << 7)
> >  #define BPF_F_WRONLY_PROG	(1U << 8)
> >  
> > +/* Clone map from listener for newly accepted socket */
> > +#define BPF_F_CLONE		(1U << 9)
> > +
> >  /* flags for BPF_PROG_QUERY */
> >  #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
> >  
> > diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
> > index 94c7f77ecb6b..584e08ee0ca3 100644
> > --- a/net/core/bpf_sk_storage.c
> > +++ b/net/core/bpf_sk_storage.c
> > @@ -12,6 +12,9 @@
> >  
> >  static atomic_t cache_idx;
> >  
> > +#define SK_STORAGE_CREATE_FLAG_MASK					\
> > +	(BPF_F_NO_PREALLOC | BPF_F_CLONE)
> > +
> >  struct bucket {
> >  	struct hlist_head list;
> >  	raw_spinlock_t lock;
> > @@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
> >  		kfree_rcu(sk_storage, rcu);
> >  }
> >  
> > -/* sk_storage->lock must be held and sk_storage->list cannot be empty */
> >  static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
> >  			    struct bpf_sk_storage_elem *selem)
> >  {
> > @@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
> >  	return 0;
> >  }
> >  
> > -/* Called by __sk_destruct() */
> > +/* Called by __sk_destruct() & bpf_sk_storage_clone() */
> >  void bpf_sk_storage_free(struct sock *sk)
> >  {
> >  	struct bpf_sk_storage_elem *selem;
> > @@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
> >  
> >  	smap = (struct bpf_sk_storage_map *)map;
> >  
> > +	/* Note that this map might be concurrently cloned from
> > +	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
> > +	 * RCU read section to finish before proceeding. New RCU
> > +	 * read sections should be prevented via bpf_map_inc_not_zero.
> > +	 */
> Thanks!
> 
> >  	synchronize_rcu();
> >  
> >  	/* bpf prog and the userspace can no longer access this map
> > @@ -601,7 +608,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
> >  
> >  static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
> >  {
> > -	if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
> > +	if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
> > +	    attr->max_entries ||
> I think "!(attr->map_flags & BPF_F_NO_PREALLOC)" should also be needed.
Makes sense, we always want to have BPF_F_NO_PREALLOC set. Will add,
thanks!

> >  	    attr->key_size != sizeof(int) || !attr->value_size ||
> >  	    /* Enforce BTF for userspace sk dumping */
> >  	    !attr->btf_key_type_id || !attr->btf_value_type_id)
> > @@ -739,6 +747,92 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
> >  	return err;
> >  }
> >  
> > +static struct bpf_sk_storage_elem *
> > +bpf_sk_storage_clone_elem(struct sock *newsk,
> > +			  struct bpf_sk_storage_map *smap,
> > +			  struct bpf_sk_storage_elem *selem)
> > +{
> > +	struct bpf_sk_storage_elem *copy_selem;
> > +
> > +	copy_selem = selem_alloc(smap, newsk, NULL, true);
> > +	if (!copy_selem)
> > +		return NULL;
> > +
> > +	if (map_value_has_spin_lock(&smap->map))
> > +		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
> > +				      SDATA(selem)->data, true);
> > +	else
> > +		copy_map_value(&smap->map, SDATA(copy_selem)->data,
> > +			       SDATA(selem)->data);
> > +
> > +	return copy_selem;
> > +}
> > +
> > +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
> > +{
> > +	struct bpf_sk_storage *new_sk_storage = NULL;
> > +	struct bpf_sk_storage *sk_storage;
> > +	struct bpf_sk_storage_elem *selem;
> > +	int ret;
> > +
> > +	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
> > +
> > +	rcu_read_lock();
> > +	sk_storage = rcu_dereference(sk->sk_bpf_storage);
> > +
> > +	if (!sk_storage || hlist_empty(&sk_storage->list))
> > +		goto out;
> > +
> > +	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
> > +		struct bpf_sk_storage_elem *copy_selem;
> > +		struct bpf_sk_storage_map *smap;
> > +		struct bpf_map *map;
> > +		int refold;
> > +
> > +		smap = rcu_dereference(SDATA(selem)->smap);
> > +		if (!(smap->map.map_flags & BPF_F_CLONE))
> > +			continue;
> > +
> > +		map = bpf_map_inc_not_zero(&smap->map, false);
> > +		if (IS_ERR(map))
> > +			continue;
> > +
> > +		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
> > +		if (!copy_selem) {
> > +			ret = -ENOMEM;
> > +			bpf_map_put(map);
> > +			goto err;
> > +		}
> > +
> > +		if (new_sk_storage) {
> > +			selem_link_map(smap, copy_selem);
> > +			__selem_link_sk(new_sk_storage, copy_selem);
> > +		} else {
> > +			ret = sk_storage_alloc(newsk, smap, copy_selem);
> > +			if (ret) {
> > +				kfree(copy_selem);
> > +				atomic_sub(smap->elem_size,
> > +					   &newsk->sk_omem_alloc);
> > +				bpf_map_put(map);
> > +				goto err;
> > +			}
> > +
> > +			new_sk_storage = rcu_dereference(copy_selem->sk_storage);
> > +		}
> > +		bpf_map_put(map);
> > +	}
> > +
> > +out:
> > +	rcu_read_unlock();
> > +	return 0;
> > +
> > +err:
> > +	rcu_read_unlock();
> > +
> > +	bpf_sk_storage_free(newsk);
> The later sk_free_unlock_clone(newsk) should eventually call
> bpf_sk_storage_free(newsk) also?
Hm, good point, I can drop it from here and rely on
sk_free_unlock_clone to call __sk_destruct/bpf_sk_storage_free.
That probably deserves a comment though :-)

> Others LGTM.
Thank you for a review!

> > +	return ret;
> > +}
> > +
> >  BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
> >  	   void *, value, u64, flags)
> >  {
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index d57b0cc995a0..f5e801a9cea4 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
> >  			goto out;
> >  		}
> >  		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
> > -#ifdef CONFIG_BPF_SYSCALL
> > -		RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
> > -#endif
> > +
> > +		if (bpf_sk_storage_clone(sk, newsk)) {
> > +			sk_free_unlock_clone(newsk);
> > +			newsk = NULL;
> > +			goto out;
> > +		}
> >  
> >  		newsk->sk_err	   = 0;
> >  		newsk->sk_err_soft = 0;
> > -- 
> > 2.23.0.rc1.153.gdeed80330f-goog
> > 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox