Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 2/3] atl1e: update statistics code
From: Sabrina Dubroca @ 2014-01-10 16:08 UTC (permalink / raw)
  To: davem; +Cc: netdev, bhutchings, jcliburn, chris.snook, Sabrina Dubroca
In-Reply-To: <1389370103-3009-1-git-send-email-sd@queasysnail.net>

As Ben Hutchings pointed out for the stats in alx, some
hardware-specific stats aren't matched to the right net_device_stats
field. Also fix the collision field and include errors in the total
number of RX/TX packets.

Minor whitespace fixes to match the style in alx.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 30 ++++++++++++++++---------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 7a73f3a..d5c2d3e 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1177,32 +1177,40 @@ static struct net_device_stats *atl1e_get_stats(struct net_device *netdev)
 	struct atl1e_hw_stats  *hw_stats = &adapter->hw_stats;
 	struct net_device_stats *net_stats = &netdev->stats;
 
-	net_stats->rx_packets = hw_stats->rx_ok;
-	net_stats->tx_packets = hw_stats->tx_ok;
 	net_stats->rx_bytes   = hw_stats->rx_byte_cnt;
 	net_stats->tx_bytes   = hw_stats->tx_byte_cnt;
 	net_stats->multicast  = hw_stats->rx_mcast;
 	net_stats->collisions = hw_stats->tx_1_col +
-				hw_stats->tx_2_col * 2 +
-				hw_stats->tx_late_col + hw_stats->tx_abort_col;
+				hw_stats->tx_2_col +
+				hw_stats->tx_late_col +
+				hw_stats->tx_abort_col;
+
+	net_stats->rx_errors  = hw_stats->rx_frag +
+				hw_stats->rx_fcs_err +
+				hw_stats->rx_len_err +
+				hw_stats->rx_sz_ov +
+				hw_stats->rx_rrd_ov +
+				hw_stats->rx_align_err +
+				hw_stats->rx_rxf_ov;
 
-	net_stats->rx_errors  = hw_stats->rx_frag + hw_stats->rx_fcs_err +
-				hw_stats->rx_len_err + hw_stats->rx_sz_ov +
-				hw_stats->rx_rrd_ov + hw_stats->rx_align_err;
 	net_stats->rx_fifo_errors   = hw_stats->rx_rxf_ov;
 	net_stats->rx_length_errors = hw_stats->rx_len_err;
 	net_stats->rx_crc_errors    = hw_stats->rx_fcs_err;
 	net_stats->rx_frame_errors  = hw_stats->rx_align_err;
-	net_stats->rx_over_errors   = hw_stats->rx_rrd_ov + hw_stats->rx_rxf_ov;
+	net_stats->rx_dropped       = hw_stats->rx_rrd_ov;
 
-	net_stats->rx_missed_errors = hw_stats->rx_rrd_ov + hw_stats->rx_rxf_ov;
+	net_stats->tx_errors = hw_stats->tx_late_col +
+			       hw_stats->tx_abort_col +
+			       hw_stats->tx_underrun +
+			       hw_stats->tx_trunc;
 
-	net_stats->tx_errors = hw_stats->tx_late_col + hw_stats->tx_abort_col +
-			       hw_stats->tx_underrun + hw_stats->tx_trunc;
 	net_stats->tx_fifo_errors    = hw_stats->tx_underrun;
 	net_stats->tx_aborted_errors = hw_stats->tx_abort_col;
 	net_stats->tx_window_errors  = hw_stats->tx_late_col;
 
+	net_stats->rx_packets = hw_stats->rx_ok + net_stats->rx_errors;
+	net_stats->tx_packets = hw_stats->tx_ok + net_stats->tx_errors;
+
 	return net_stats;
 }
 
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 1/3] atl1c: update statistics code
From: Sabrina Dubroca @ 2014-01-10 16:08 UTC (permalink / raw)
  To: davem; +Cc: netdev, bhutchings, jcliburn, chris.snook, Sabrina Dubroca
In-Reply-To: <1389370103-3009-1-git-send-email-sd@queasysnail.net>

As Ben Hutchings pointed out for the stats in alx, some
hardware-specific stats aren't matched to the right net_device_stats
field. Also fix the collision field and include errors in the total
number of RX/TX packets.

Minor whitespace fixes to match the style in alx.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 31 ++++++++++++++++---------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 2980175..4d3258d 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1500,31 +1500,40 @@ static struct net_device_stats *atl1c_get_stats(struct net_device *netdev)
 	struct net_device_stats *net_stats = &netdev->stats;
 
 	atl1c_update_hw_stats(adapter);
-	net_stats->rx_packets = hw_stats->rx_ok;
-	net_stats->tx_packets = hw_stats->tx_ok;
 	net_stats->rx_bytes   = hw_stats->rx_byte_cnt;
 	net_stats->tx_bytes   = hw_stats->tx_byte_cnt;
 	net_stats->multicast  = hw_stats->rx_mcast;
 	net_stats->collisions = hw_stats->tx_1_col +
-				hw_stats->tx_2_col * 2 +
-				hw_stats->tx_late_col + hw_stats->tx_abort_col;
-	net_stats->rx_errors  = hw_stats->rx_frag + hw_stats->rx_fcs_err +
-				hw_stats->rx_len_err + hw_stats->rx_sz_ov +
-				hw_stats->rx_rrd_ov + hw_stats->rx_align_err;
+				hw_stats->tx_2_col +
+				hw_stats->tx_late_col +
+				hw_stats->tx_abort_col;
+
+	net_stats->rx_errors  = hw_stats->rx_frag +
+				hw_stats->rx_fcs_err +
+				hw_stats->rx_len_err +
+				hw_stats->rx_sz_ov +
+				hw_stats->rx_rrd_ov +
+				hw_stats->rx_align_err +
+				hw_stats->rx_rxf_ov;
+
 	net_stats->rx_fifo_errors   = hw_stats->rx_rxf_ov;
 	net_stats->rx_length_errors = hw_stats->rx_len_err;
 	net_stats->rx_crc_errors    = hw_stats->rx_fcs_err;
 	net_stats->rx_frame_errors  = hw_stats->rx_align_err;
-	net_stats->rx_over_errors   = hw_stats->rx_rrd_ov + hw_stats->rx_rxf_ov;
+	net_stats->rx_dropped       = hw_stats->rx_rrd_ov;
 
-	net_stats->rx_missed_errors = hw_stats->rx_rrd_ov + hw_stats->rx_rxf_ov;
+	net_stats->tx_errors = hw_stats->tx_late_col +
+			       hw_stats->tx_abort_col +
+			       hw_stats->tx_underrun +
+			       hw_stats->tx_trunc;
 
-	net_stats->tx_errors = hw_stats->tx_late_col + hw_stats->tx_abort_col +
-				hw_stats->tx_underrun + hw_stats->tx_trunc;
 	net_stats->tx_fifo_errors    = hw_stats->tx_underrun;
 	net_stats->tx_aborted_errors = hw_stats->tx_abort_col;
 	net_stats->tx_window_errors  = hw_stats->tx_late_col;
 
+	net_stats->rx_packets = hw_stats->rx_ok + net_stats->rx_errors;
+	net_stats->tx_packets = hw_stats->tx_ok + net_stats->tx_errors;
+
 	return net_stats;
 }
 
-- 
1.8.5.2

^ permalink raw reply related

* [PATCH 0/3] atheros: modify statistics code
From: Sabrina Dubroca @ 2014-01-10 16:08 UTC (permalink / raw)
  To: davem; +Cc: netdev, bhutchings, jcliburn, chris.snook, Sabrina Dubroca

Following Ben Hutchings's advice on how to fill net_stats in alx [1],
this patch modifies the other atheros ethernet drivers
similarly. Minor whitespace/empty line changes in atl1c and atl1e to
make the code completely consistent between atl1c, atl1e, and alx
(after this [2]).

I don't have this hardware, so these patches have only been
compile-tested.

Detail of the changes:
* atl1/atl1c/atl1e
  - fix collisions computation
  - rx_dropped = rx_rrd_ov
  - rx_over_errors = 0
  - rx_missed_errors = 0
  - X_packets = X_ok + X_errors

* only atl1c/atl1e
  - add rx_rxf_ov to rx_errors

Note: These patches don't depend on the alx patchset [2], but the
commit-log messages reference it.

[1] http://www.spinics.net/lists/netdev/msg264930.html
[2] http://www.spinics.net/lists/netdev/msg265564.html

Sabrina Dubroca (3):
  atl1c: update statistics code
  atl1e: update statistics code
  atl1: update statistics code

 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 31 ++++++++++++++++---------
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 30 +++++++++++++++---------
 drivers/net/ethernet/atheros/atlx/atl1.c        | 16 ++++++-------
 3 files changed, 47 insertions(+), 30 deletions(-)

-- 
1.8.5.2

^ permalink raw reply

* Re: [Xen-devel] [PATCH net-next v3 2/9] xen-netback: Change TX path from grant copy to mapping
From: Wei Liu @ 2014-01-10 16:08 UTC (permalink / raw)
  To: David Vrabel
  Cc: Zoltan Kiss, Wei Liu, xen-devel, jonathan.davies, ian.campbell,
	linux-kernel, netdev
In-Reply-To: <52D0198C.6000600@citrix.com>

On Fri, Jan 10, 2014 at 04:02:20PM +0000, David Vrabel wrote:
> On 10/01/14 15:24, Zoltan Kiss wrote:
> > On 10/01/14 11:45, Wei Liu wrote:
> >> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> >> [...]
> >>>
> >>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif
> >>>>> *vif,
> >>>>>       err = gop->status;
> >>>>>       if (unlikely(err))
> >>>>>           xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> >>>>> +    else {
> >>>>> +        if (vif->grant_tx_handle[pending_idx] !=
> >>>>> +            NETBACK_INVALID_HANDLE) {
> >>>>> +            netdev_err(vif->dev,
> >>>>> +                "Stale mapped handle! pending_idx %x handle %x\n",
> >>>>> +                pending_idx, vif->grant_tx_handle[pending_idx]);
> >>>>> +            BUG();
> >>>>> +        }
> >>>>> +        set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> >>>>> +            FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
> >>>>
> >>>> What happens when you don't have this?
> >>> Your frags will be filled with garbage. I don't understand exactly
> >>> what this function does, someone might want to enlighten us? I've
> >>> took it's usage from classic kernel.
> >>> Also, it might be worthwhile to check the return value and BUG if
> >>> it's false, but I don't know what exactly that return value means.
> >>>
> >>
> >> This is actually part of gnttab_map_refs. As you're using hypercall
> >> directly this becomes very fragile.
> >>
> >> So the right thing to do is to fix gnttab_map_refs.
> > I agree, as I mentioned in other email in this thread, I think that
> > should be the topic of an another patchseries. In the meantime, I will
> > use gnttab_batch_map instead of the direct hypercall, it handles the
> > GNTST_eagain scenario, and I will use set_phys_to_machine the same way
> > as m2p_override does:
> 
> If the grant table code doesn't provide the API calls you need you can
> either:
> 
> a) add the new API as a prerequisite patch.
> b) use the existing API calls and live with the performance problem,
> until you can refactor the API later on.
> 
> Adding a netback-specific hack isn't a valid option.
> 

Agreed.

Wei.

> David

^ permalink raw reply

* Re: [Xen-devel] [PATCH net-next v3 2/9] xen-netback: Change TX path from grant copy to mapping
From: David Vrabel @ 2014-01-10 16:02 UTC (permalink / raw)
  To: Zoltan Kiss
  Cc: Wei Liu, xen-devel, jonathan.davies, ian.campbell, linux-kernel,
	netdev
In-Reply-To: <52D01094.5060102@citrix.com>

On 10/01/14 15:24, Zoltan Kiss wrote:
> On 10/01/14 11:45, Wei Liu wrote:
>> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
>> [...]
>>>
>>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif
>>>>> *vif,
>>>>>       err = gop->status;
>>>>>       if (unlikely(err))
>>>>>           xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
>>>>> +    else {
>>>>> +        if (vif->grant_tx_handle[pending_idx] !=
>>>>> +            NETBACK_INVALID_HANDLE) {
>>>>> +            netdev_err(vif->dev,
>>>>> +                "Stale mapped handle! pending_idx %x handle %x\n",
>>>>> +                pending_idx, vif->grant_tx_handle[pending_idx]);
>>>>> +            BUG();
>>>>> +        }
>>>>> +        set_phys_to_machine(idx_to_pfn(vif, pending_idx),
>>>>> +            FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
>>>>
>>>> What happens when you don't have this?
>>> Your frags will be filled with garbage. I don't understand exactly
>>> what this function does, someone might want to enlighten us? I've
>>> took it's usage from classic kernel.
>>> Also, it might be worthwhile to check the return value and BUG if
>>> it's false, but I don't know what exactly that return value means.
>>>
>>
>> This is actually part of gnttab_map_refs. As you're using hypercall
>> directly this becomes very fragile.
>>
>> So the right thing to do is to fix gnttab_map_refs.
> I agree, as I mentioned in other email in this thread, I think that
> should be the topic of an another patchseries. In the meantime, I will
> use gnttab_batch_map instead of the direct hypercall, it handles the
> GNTST_eagain scenario, and I will use set_phys_to_machine the same way
> as m2p_override does:

If the grant table code doesn't provide the API calls you need you can
either:

a) add the new API as a prerequisite patch.
b) use the existing API calls and live with the performance problem,
until you can refactor the API later on.

Adding a netback-specific hack isn't a valid option.

David

^ permalink raw reply

* [PATCH v2 net-next] net: make dev_set_mtu() honor notification return code
From: Veaceslav Falico @ 2014-01-10 15:56 UTC (permalink / raw)
  To: netdev
  Cc: Veaceslav Falico, Jiri Pirko, David S. Miller, Eric Dumazet,
	Alexander Duyck, Nicolas Dichtel

Currently, after changing the MTU for a device, dev_set_mtu() calls
NETDEV_CHANGEMTU notification, however doesn't verify it's return code -
which can be NOTIFY_BAD - i.e. some of the net notifier blocks refused this
change, and continues nevertheless.

To fix this, verify the return code, and if it's an error - then revert the
MTU to the original one, notify again and pass the error code.

CC: Jiri Pirko <jiri@resnulli.us>
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---

Notes:
    v1 -> v2:
    Notify again after we've reverted the MTU back to the original, so that
    everyone has a chance to change its MTU back.

 net/core/dev.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ce01847..0991d94 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5287,6 +5287,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
 }
 EXPORT_SYMBOL(dev_change_flags);
 
+static int __dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (ops->ndo_change_mtu)
+		return ops->ndo_change_mtu(dev, new_mtu);
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
 /**
  *	dev_set_mtu - Change maximum transfer unit
  *	@dev: device
@@ -5296,8 +5307,7 @@ EXPORT_SYMBOL(dev_change_flags);
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
-	int err;
+	int err, orig_mtu;
 
 	if (new_mtu == dev->mtu)
 		return 0;
@@ -5309,14 +5319,20 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
-	err = 0;
-	if (ops->ndo_change_mtu)
-		err = ops->ndo_change_mtu(dev, new_mtu);
-	else
-		dev->mtu = new_mtu;
+	orig_mtu = dev->mtu;
+	err = __dev_set_mtu(dev, new_mtu);
 
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+	if (!err) {
+		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+		err = notifier_to_errno(err);
+		if (err) {
+			/* setting mtu back and notifying everyone again,
+			 * so that they have a chance to revert changes.
+			 */
+			__dev_set_mtu(dev, orig_mtu);
+			call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+		}
+	}
 	return err;
 }
 EXPORT_SYMBOL(dev_set_mtu);
-- 
1.8.4

^ permalink raw reply related

* Re: [net-next 14/16] i40e: enable PTP
From: Ben Hutchings @ 2014-01-10 15:55 UTC (permalink / raw)
  To: Jeff Kirsher
  Cc: davem, Jacob Keller, netdev, gospo, sassmann, Richard Cochran,
	Jesse Brandeburg
In-Reply-To: <1389344336-1558-15-git-send-email-jeffrey.t.kirsher@intel.com>

On Fri, 2014-01-10 at 00:58 -0800, Jeff Kirsher wrote:
> From: Jacob Keller <jacob.e.keller@intel.com>
> 
> New feature: Enable PTP support in the i40e driver.
> 
> Change-ID: I6a8e799f582705191f9583afb1b9231a8db96cc8
> Cc: Richard Cochran <richardcochran@gmail.com>
> Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
> Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
>  drivers/net/ethernet/intel/i40e/Makefile       |   2 +
>  drivers/net/ethernet/intel/i40e/i40e.h         |  24 +
>  drivers/net/ethernet/intel/i40e/i40e_ethtool.c |  33 +-
>  drivers/net/ethernet/intel/i40e/i40e_main.c    |  45 +-
>  drivers/net/ethernet/intel/i40e/i40e_ptp.c     | 640 +++++++++++++++++++++++++
>  drivers/net/ethernet/intel/i40e/i40e_txrx.c    |  53 ++
>  drivers/net/ethernet/intel/i40e/i40e_txrx.h    |   3 +
>  7 files changed, 798 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/net/ethernet/intel/i40e/i40e_ptp.c

This is missing a Kconfig update to select PTP_1588_CLOCK.

[...]
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -1698,6 +1698,25 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
>  }
>  
>  /**
> + * i40e_ioctl - Access the hwtstamp interface
> + * @netdev: network interface device structure
> + * @ifr: interface request data
> + * @cmd: ioctl command
> + **/
> +int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
> +{
> +	struct i40e_netdev_priv *np = netdev_priv(netdev);
> +	struct i40e_pf *pf = np->vsi->back;
> +
> +	switch (cmd) {
> +	case SIOCSHWTSTAMP:
> +		return i40e_ptp_hwtstamp_ioctl(pf, ifr, cmd);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}

Please implement SIOCGHWTSTAMP as well.

[...]
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
[...]
> +void i40e_ptp_init(struct i40e_pf *pf)
> +{
> +	struct i40e_hw *hw = &pf->hw;
> +	struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev;
> +
> +	snprintf(pf->ptp_caps.name, 16, "%pm", netdev->dev_addr);
[...]

Use sizeof(), not a magic number.  Also the clock name should perhaps be
the driver name, not the MAC address.

Is the clock shared between multiple PFs or are there independent clocks
for each PF?

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH net-next] net: make dev_set_mtu() honor notification return code
From: Veaceslav Falico @ 2014-01-10 15:47 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: netdev, Jiri Pirko, David S. Miller, Eric Dumazet,
	Nicolas Dichtel
In-Reply-To: <52D0138D.6050302@intel.com>

On Fri, Jan 10, 2014 at 07:36:45AM -0800, Alexander Duyck wrote:
>On 01/10/2014 04:48 AM, Veaceslav Falico wrote:
...snip...
>> -	err = 0;
>> -	if (ops->ndo_change_mtu)
>> -		err = ops->ndo_change_mtu(dev, new_mtu);
>> -	else
>> -		dev->mtu = new_mtu;
>> +	orig_mtu = dev->mtu;
>> +	err = __dev_set_mtu(dev, new_mtu);
>>
>> -	if (!err)
>> -		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
>> +	if (!err) {
>> +		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
>> +		err = notifier_to_errno(err);
>> +		if (err)
>> +			__dev_set_mtu(dev, orig_mtu);
>> +	}
>>  	return err;
>>  }
>>  EXPORT_SYMBOL(dev_set_mtu);
>
>So what about the netdevices that succeeded in changing the MTU based on
>the notifiers?  It seems like you still  have an inconsistent state
>after the failure unless you issue a second call with a notification
>that you reverted to the old MTU.

Good point, thank you. I'll add a second call_netdevice_notifiers() after
setting the original MTU and resend.

Thank you!

>
>Thanks,
>
>Alex

^ permalink raw reply

* Re: [PATCH net-next] net: make dev_set_mtu() honor notification return code
From: Alexander Duyck @ 2014-01-10 15:36 UTC (permalink / raw)
  To: Veaceslav Falico, netdev
  Cc: Jiri Pirko, David S. Miller, Eric Dumazet, Nicolas Dichtel
In-Reply-To: <1389358097-5396-1-git-send-email-vfalico@redhat.com>

On 01/10/2014 04:48 AM, Veaceslav Falico wrote:
> Currently, after changing the MTU for a device, dev_set_mtu() calls
> NETDEV_CHANGEMTU notification, however doesn't verify it's return code -
> which can be NOTIFY_BAD - i.e. some of the net notifier blocks refused this
> change, and continues nevertheless.
>
> To fix this, verify the return code, and if it's an error - then revert the
> MTU to the original one, and pass the error code.
>
> CC: Jiri Pirko <jiri@resnulli.us>
> CC: "David S. Miller" <davem@davemloft.net>
> CC: Eric Dumazet <edumazet@google.com>
> CC: Alexander Duyck <alexander.h.duyck@intel.com>
> CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
> ---
>  net/core/dev.c | 29 ++++++++++++++++++++---------
>  1 file changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index ce01847..1c570ff 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5287,6 +5287,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
>  }
>  EXPORT_SYMBOL(dev_change_flags);
>  
> +static int __dev_set_mtu(struct net_device *dev, int new_mtu)
> +{
> +	const struct net_device_ops *ops = dev->netdev_ops;
> +
> +	if (ops->ndo_change_mtu)
> +		return ops->ndo_change_mtu(dev, new_mtu);
> +
> +	dev->mtu = new_mtu;
> +	return 0;
> +}
> +
>  /**
>   *	dev_set_mtu - Change maximum transfer unit
>   *	@dev: device
> @@ -5296,8 +5307,7 @@ EXPORT_SYMBOL(dev_change_flags);
>   */
>  int dev_set_mtu(struct net_device *dev, int new_mtu)
>  {
> -	const struct net_device_ops *ops = dev->netdev_ops;
> -	int err;
> +	int err, orig_mtu;
>  
>  	if (new_mtu == dev->mtu)
>  		return 0;
> @@ -5309,14 +5319,15 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
>  	if (!netif_device_present(dev))
>  		return -ENODEV;
>  
> -	err = 0;
> -	if (ops->ndo_change_mtu)
> -		err = ops->ndo_change_mtu(dev, new_mtu);
> -	else
> -		dev->mtu = new_mtu;
> +	orig_mtu = dev->mtu;
> +	err = __dev_set_mtu(dev, new_mtu);
>  
> -	if (!err)
> -		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
> +	if (!err) {
> +		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
> +		err = notifier_to_errno(err);
> +		if (err)
> +			__dev_set_mtu(dev, orig_mtu);
> +	}
>  	return err;
>  }
>  EXPORT_SYMBOL(dev_set_mtu);

So what about the netdevices that succeeded in changing the MTU based on
the notifiers?  It seems like you still  have an inconsistent state
after the failure unless you issue a second call with a notification
that you reverted to the old MTU.

Thanks,

Alex

^ permalink raw reply

* Re: [PATCH net-next v3 2/9] xen-netback: Change TX path from grant copy to mapping
From: Zoltan Kiss @ 2014-01-10 15:24 UTC (permalink / raw)
  To: Wei Liu; +Cc: ian.campbell, xen-devel, netdev, linux-kernel, jonathan.davies
In-Reply-To: <20140110114534.GE29180@zion.uk.xensource.com>

On 10/01/14 11:45, Wei Liu wrote:
> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> [...]
>>
>>>> @@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
>>>>   	err = gop->status;
>>>>   	if (unlikely(err))
>>>>   		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
>>>> +	else {
>>>> +		if (vif->grant_tx_handle[pending_idx] !=
>>>> +			NETBACK_INVALID_HANDLE) {
>>>> +			netdev_err(vif->dev,
>>>> +				"Stale mapped handle! pending_idx %x handle %x\n",
>>>> +				pending_idx, vif->grant_tx_handle[pending_idx]);
>>>> +			BUG();
>>>> +		}
>>>> +		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
>>>> +			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
>>>
>>> What happens when you don't have this?
>> Your frags will be filled with garbage. I don't understand exactly
>> what this function does, someone might want to enlighten us? I've
>> took it's usage from classic kernel.
>> Also, it might be worthwhile to check the return value and BUG if
>> it's false, but I don't know what exactly that return value means.
>>
>
> This is actually part of gnttab_map_refs. As you're using hypercall
> directly this becomes very fragile.
>
> So the right thing to do is to fix gnttab_map_refs.
I agree, as I mentioned in other email in this thread, I think that 
should be the topic of an another patchseries. In the meantime, I will 
use gnttab_batch_map instead of the direct hypercall, it handles the 
GNTST_eagain scenario, and I will use set_phys_to_machine the same way 
as m2p_override does:

if (unlikely(!set_phys_to_machine(idx_to_pfn(vif, pending_idx), 
FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT))
			BUG();

Zoli

^ permalink raw reply

* Re: [PATCH net-next] IPv6: add option to use anycast addresses as source addresses for datagrams
From: François-Xavier Le Bail @ 2014-01-10 15:20 UTC (permalink / raw)
  To: netdev
  Cc: Hannes Frederic Sowa, David S. Miller, Alexey Kuznetsov,
	James Morris, Hideaki Yoshifuji, Patrick McHardy,
	Francois-Xavier Le Bail
In-Reply-To: <1389275123-3305-1-git-send-email-fx.lebail@yahoo.com>

On Thu, 1/9/14, Francois-Xavier Le Bail <fx.lebail@yahoo.com> wrote:

> This change allows to follow a recommandation of RFC4942.
> 
> - Add IPV6_ANYCAST_SRC_DGRAM socket option to control the use of anycast
>   addresses as source addresses for datagrams, with Advanced Sockets API
>   (RFC3542).
> - Add anycast_src_dgram sockopt flag.
> - Add ipv6_chk_acast_addr_src() to check if address is link-local on
>   given interface or is global on any interface.
> - Use them in ip6_datagram_send_ctl(), do_ipv6_setsockopt() and
>   do_ipv6_getsockopt()
> 
> Signed-off-by: Francois-Xavier Le Bail <fx.lebail@yahoo.com>
---

A v2 will be send soon.

^ permalink raw reply

* Re: [PATCH net V3 2/2] net: core: explicitly select a txq before doing l2 forwarding
From: Neil Horman @ 2014-01-10 14:30 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, e1000-devel, netdev, linux-kernel, John Fastabend, davem
In-Reply-To: <1389341906-2367-2-git-send-email-jasowang@redhat.com>

On Fri, Jan 10, 2014 at 04:18:26PM +0800, Jason Wang wrote:
> Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The
> will cause several issues:
> 
> - NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan
>   instead of lower device which misses the necessary txq synchronization for
>   lower device such as txq stopping or frozen required by dev watchdog or
>   control path.
> - dev_hard_start_xmit() was called with NULL txq which bypasses the net device
>   watchdog.
> - dev_hard_start_xmit() does not check txq everywhere which will lead a crash
>   when tso is disabled for lower device.
> 
> Fix this by explicitly introducing a new param for .ndo_select_queue() for just
> selecting queues in the case of l2 forwarding offload. netdev_pick_tx() was also
> extended to accept this parameter and dev_queue_xmit_accel() was used to do l2
> forwarding transmission.
> 
> With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need
> to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep
> a dedicated ndo_dfwd_start_xmit() and we can just reuse the code of
> dev_queue_xmit() to do the transmission.
> 
> In the future, it was also required for macvtap l2 forwarding support since it
> provides a necessary synchronization method.
> 
> Cc: John Fastabend <john.r.fastabend@intel.com>
> Cc: Neil Horman <nhorman@tuxdriver.com>
> Cc: e1000-devel@lists.sourceforge.net
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> 
Acked-by: Neil Horman <nhorman@tuxdriver.com>


------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today. 
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* Re: [PATCH net-next] sctp: make sctp_addto_chunk_fixed local
From: Neil Horman @ 2014-01-10 14:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Vlad Yasevich, David Miller, netdev
In-Reply-To: <20140109223111.71e55286@nehalam.linuxnetplumber.net>

On Thu, Jan 09, 2014 at 10:31:11PM -0800, Stephen Hemminger wrote:
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> 
> ---
>  include/net/sctp/structs.h |    1 -
>  net/sctp/sm_make_chunk.c   |    6 ++++--
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> --- a/include/net/sctp/structs.h	2014-01-09 22:29:51.871663405 -0800
> +++ b/include/net/sctp/structs.h	2014-01-09 22:29:58.323581984 -0800
> @@ -649,7 +649,6 @@ int sctp_user_addto_chunk(struct sctp_ch
>  			  struct iovec *data);
>  void sctp_chunk_free(struct sctp_chunk *);
>  void  *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
> -void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len, const void *data);
>  struct sctp_chunk *sctp_chunkify(struct sk_buff *,
>  				 const struct sctp_association *,
>  				 struct sock *);
> --- a/net/sctp/sm_make_chunk.c	2014-01-09 22:29:51.871663405 -0800
> +++ b/net/sctp/sm_make_chunk.c	2014-01-09 22:29:58.323581984 -0800
> @@ -78,6 +78,8 @@ static int sctp_process_param(struct sct
>  			      gfp_t gfp);
>  static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
>  			      const void *data);
> +static void  *sctp_addto_chunk_fixed(struct sctp_chunk *, int len,
> +				     const void *data);
>  
>  /* Control chunk destructor */
>  static void sctp_control_release_owner(struct sk_buff *skb)
> @@ -1475,8 +1477,8 @@ void *sctp_addto_chunk(struct sctp_chunk
>  /* Append bytes to the end of a chunk. Returns NULL if there isn't sufficient
>   * space in the chunk
>   */
> -void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
> -			     int len, const void *data)
> +static void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
> +				    int len, const void *data)
>  {
>  	if (skb_tailroom(chunk->skb) >= len)
>  		return sctp_addto_chunk(chunk, len, data);
> 
Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply

* Re: [PATCH net V2 2/2] net: core: explicitly select a txq before doing l2 forwarding
From: Neil Horman @ 2014-01-10 14:27 UTC (permalink / raw)
  To: Jason Wang; +Cc: davem, netdev, linux-kernel, mst, John Fastabend, e1000-devel
In-Reply-To: <52CF9B25.7020909@redhat.com>

On Fri, Jan 10, 2014 at 03:03:01PM +0800, Jason Wang wrote:
> On 01/09/2014 08:31 PM, Neil Horman wrote:
> > On Thu, Jan 09, 2014 at 05:37:32PM +0800, Jason Wang wrote:
> >> Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The
> >> will cause several issues:
> >>
> >> - NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan
> >>   instead of lower device which misses the necessary txq synchronization for
> >>   lower device such as txq stopping or frozen required by dev watchdog or
> >>   control path.
> >> - dev_hard_start_xmit() was called with NULL txq which bypasses the net device
> >>   watchdog.
> >> - dev_hard_start_xmit() does not check txq everywhere which will lead a crash
> >>   when tso is disabled for lower device.
> >>
> >> Fix this by explicitly introducing a new param for .ndo_select_queue() for just
> >> selecting queues in the case of l2 forwarding offload. And also introducing
> >> dfwd_direct_xmit() to do the queue selecting, txq holding and transmitting for
> >> l2 forwarding.
> >>
> >> With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need
> >> to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep
> >> a dedicated ndo_dfwd_start_xmit().
> >>
> >> In the future, it was also required for macvtap l2 forwarding support since it
> >> provides a necessary synchronization method.
> >>
> >> Cc: John Fastabend <john.r.fastabend@intel.com>
> >> Cc: Neil Horman <nhorman@tuxdriver.com>
> >> Cc: e1000-devel@lists.sourceforge.net
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>
> >> ---
> >> Changes from V1:
> >> - Adding a new parameter to ndo_select_queue instead of a new method to select
> >>   queue for l2 forwarding.
> >> - Remove the unnecessary ndo_dfwd_start_xmit() since txq was selected
> >>   explicitly.
> >> - Keep NETIF_F_LLTX when netdev feature is changed.
> >> - Shape the commit log
> > A few minor nits inline.
> >> <snip>
> >>  }
> >> diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
> >> index 5360f73..7eb4c82 100644
> >> --- a/drivers/net/macvlan.c
> >> +++ b/drivers/net/macvlan.c
> >> @@ -299,7 +299,7 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
> >>  
> >>  	if (vlan->fwd_priv) {
> >>  		skb->dev = vlan->lowerdev;
> >> -		ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
> >> +		ret = dfwd_direct_xmit(skb, skb->dev, vlan->fwd_priv);
> >>  	} else {
> >>  		ret = macvlan_queue_xmit(skb, dev);
> >>  	}
> >> @@ -366,7 +366,6 @@ static int macvlan_open(struct net_device *dev)
> >>  		if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
> >>  			vlan->fwd_priv = NULL;
> >>  		} else {
> >> -			dev->features &= ~NETIF_F_LLTX;
> >>  			return 0;
> >>  		}
> > After removing the features flag operation here, you don't need the braces
> > around the else statement either.
> 
> Ok.
> >> <snip>
> >> +int dfwd_direct_xmit(struct sk_buff *skb, struct net_device *dev,
> >> +		     void *accel_priv)
> >> +{
> >> +	struct netdev_queue *txq;
> >> +	int ret = NETDEV_TX_BUSY;
> >> +	int index;
> >> +
> >> +	BUG_ON(!dev->netdev_ops->ndo_select_queue);
> >> +	index =	dev->netdev_ops->ndo_select_queue(dev, skb, accel_priv);
> >> +
> >> +	local_bh_disable();
> >> +
> >> +	skb_set_queue_mapping(skb, index);
> >> +	txq = netdev_get_tx_queue(dev, index);
> >> +
> >> +	HARD_TX_LOCK(dev, txq, smp_processor_id());
> >> +	if (!netif_xmit_frozen_or_stopped(txq))
> >> +		ret = dev_hard_start_xmit(skb, dev, txq);
> >> +	HARD_TX_UNLOCK(dev, txq);
> >> +
> >> +	local_bh_enable();
> >> +	return ret;
> >> +}
> >> +EXPORT_SYMBOL_GPL(dfwd_direct_xmit);
> >> +
> > Now that we're using the common path to select a queue, can we just use
> > dev_queue_xmit here instead of creating our own transmit function?  The txq we
> > select from the ixgbe card will just have a pfifo_fast queue on it (if not a
> > noop queue), so dev_queue_xmit should just fall into the dev_hard_start_xmit
> > path, and save us this extra coding.
> >
> > Neil
> 
> Ture, and this will make no difference with the case without l2
> forwarding. To not trouble other parts too much, I will keep the current
> dev_queue_xmit() API and rename the current dev_queue_xmit() to
> __dev_queue_xmit() can make it can accept a accel_priv parameter. So
> dev_queue_xmit() will call this will NULL accel_priv and introduce a
> dev_queue_xmit_accel() that can accept a accel_priv parameter.
> 

Sounds perfect, thanks!
Neil

> Thanks
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> 

^ permalink raw reply

* Re: [PATCH v2 01/16] reset: add non CONFIG_RESET_CONTROLLER routines
From: Philipp Zabel @ 2014-01-10 13:30 UTC (permalink / raw)
  To: Chen-Yu Tsai
  Cc: Srinivas Kandagatla, Giuseppe Cavallaro, Maxime Ripard, netdev,
	linux-arm-kernel, linux-sunxi, linux-kernel, Rob Herring,
	Emilio Lopez, Mike Turquette, Ivan T. Ivanov, Barry Song,
	Stephen Warren
In-Reply-To: <1389337217-29032-2-git-send-email-wens@csie.org>

Hi,

[Added Ivan, Stephen and Barry to Cc:]

Am Freitag, den 10.01.2014, 15:00 +0800 schrieb Chen-Yu Tsai:
> Some drivers are shared between platforms that may or may not
> have RESET_CONTROLLER selected for them.

I expected that drivers compiled for platforms without reset controllers
but use the reset API should select or depend on RESET_CONTROLLER.
Stubbing out device_reset and reset_control_get will turn a compile time
error into a runtime error for everyone forgetting to do this when
writing a new driver that needs the reset.

>  Add dummy functions
> when RESET_CONTROLLER is not selected, thereby eliminating the
> need for drivers to enclose reset_control_*() calls within
> #ifdef CONFIG_RESET_CONTROLLER, #endif
> 
> Signed-off-by: Chen-Yu Tsai <wens@csie.org>

This was already proposed by Ivan and Barry earlier, and so far we
didn't get to a proper conclusion:

https://lkml.org/lkml/2013/10/10/179
http://lists.infradead.org/pipermail/linux-arm-kernel/2013-June/174758.html

If included, the stubs should at least return an error to indicate a
reset couldn't be performed, but then I lose the compile time error for
drivers which should select RESET_CONTROLLER but don't.

Also this alone won't help you if you build multi-arch kernels where one
platform enables RESET_CONTROLLER and the other expects it to be
disabled.

regards
Philipp

> ---
>  include/linux/reset.h | 39 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/include/linux/reset.h b/include/linux/reset.h
> index 6082247..38aa616 100644
> --- a/include/linux/reset.h
> +++ b/include/linux/reset.h
> @@ -4,6 +4,8 @@
>  struct device;
>  struct reset_control;
>  
> +#ifdef CONFIG_RESET_CONTROLLER
> +
>  int reset_control_reset(struct reset_control *rstc);
>  int reset_control_assert(struct reset_control *rstc);
>  int reset_control_deassert(struct reset_control *rstc);
> @@ -14,4 +16,41 @@ struct reset_control *devm_reset_control_get(struct device *dev, const char *id)
>  
>  int device_reset(struct device *dev);
>  
> +#else /* !CONFIG_RESET_CONTROLLER */
> +
> +static inline int reset_control_reset(struct reset_control *rstc)
> +{
> +	return 0;
> +}
> +
> +static inline int reset_control_assert(struct reset_control *rstc)
> +{
> +	return 0;
> +}
> +
> +static inline int reset_control_deassert(struct reset_control *rstc)
> +{
> +	return 0;
> +}

Those should probably have a WARN_ON(1) like the GPIO API stubs.

> +
> +static inline struct reset_control *reset_control_get(struct device *dev,
> +		const char *id)
> +{
> +	return NULL;
> +}
[...]
> +static inline struct reset_control *devm_reset_control_get(struct device *dev,
> +		const char *id)
> +{
> +	return NULL;
> +}

These should return ERR_PTR(-ENOSYS).

> +
> +static inline int device_reset(struct device *dev)
> +{
> +	return 0;
> +}

And this should return -ENOSYS.

Drivers that also need to run on platforms with CONFIG_RESET_CONTROLLER
disabled (and that don't need the reset) should ignore -ENOSYS and
-ENOENT return values from device_reset/(devm_)reset_control_get.

I wonder if it might be a good idea to add a RESET_CONTROLLER_OPTIONAL
that drivers need to select to enable the API stubs? That way we could
keep the compile time error for new drivers that need resets but forget
to select RESET_CONTROLLER.
Or add a
#warning If this driver can work without reset, please select CONFIG_RESET_CONTROLLER_OPTIONAL

Another possibility would be to add device_reset_optional and
(devm_)reset_control_get_optional variants and only provide stubs for
those, but not for device_reset/(devm_)reset_control_get. Then drivers
that need to work on platforms without the reset controller API enabled
could explicitly use the _optional variants, and all other drivers would
still be checked at compile time.

regards
Philipp

^ permalink raw reply

* [RFC net-next 3/3] bonding: convert packets_per_slave to use the new option API
From: Nikolay Aleksandrov @ 2014-01-10 13:11 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov
In-Reply-To: <1389359495-9700-1-git-send-email-nikolay@redhat.com>

This patch adds the necessary changes so packets_per_slave would use the
new bonding option API.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
 drivers/net/bonding/bond_netlink.c |  4 ++--
 drivers/net/bonding/bond_options.c | 49 +++++++++++++++++++++-----------------
 drivers/net/bonding/bond_options.h |  2 ++
 drivers/net/bonding/bond_sysfs.c   | 17 ++++---------
 drivers/net/bonding/bonding.h      |  2 --
 5 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 51a9be3..ee01d8f 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -250,8 +250,8 @@ static int bond_changelink(struct net_device *bond_dev,
 		int packets_per_slave =
 			nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]);
 
-		err = bond_option_packets_per_slave_set(bond,
-							packets_per_slave);
+		err = __bond_opt_intset(bond, BOND_OPT_PACKETS_PER_SLAVE,
+					packets_per_slave);
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 4aa752f..88b5338 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -31,6 +31,12 @@ static struct bond_value_tbl bond_mode_tbl[] = {
 	{ NULL,            -1,                     0},
 };
 
+static struct bond_value_tbl bond_pps_tbl[] = {
+	{ "default", 1,         BOND_VALFLAG_DEFAULT},
+	{ "maxval",  USHRT_MAX, BOND_VALFLAG_MAX},
+	{ NULL,      -1,        0},
+};
+
 static struct bond_option bond_opts[] = {
 	[BOND_OPT_MODE] = {
 		.id = BOND_OPT_MODE,
@@ -41,6 +47,15 @@ static struct bond_option bond_opts[] = {
 		.values = bond_mode_tbl,
 		.set = bond_option_mode_set
 	},
+	[BOND_OPT_PACKETS_PER_SLAVE] = {
+		.id = BOND_OPT_PACKETS_PER_SLAVE,
+		.name = "packets_per_slave",
+		.desc = "Packets to send per slave in RR mode",
+		.valtype = BOND_OPTVAL_INTEGER,
+		.unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_ROUNDROBIN)),
+		.values = bond_pps_tbl,
+		.set = bond_option_pps_set
+	},
 	{ }
 };
 
@@ -982,28 +997,6 @@ int bond_option_lp_interval_set(struct bonding *bond, int lp_interval)
 	return 0;
 }
 
-int bond_option_packets_per_slave_set(struct bonding *bond,
-				      int packets_per_slave)
-{
-	if (packets_per_slave < 0 || packets_per_slave > USHRT_MAX) {
-		pr_err("%s: packets_per_slave must be between 0 and %u\n",
-		       bond->dev->name, USHRT_MAX);
-		return -EINVAL;
-	}
-
-	if (bond->params.mode != BOND_MODE_ROUNDROBIN)
-		pr_warn("%s: Warning: packets_per_slave has effect only in balance-rr mode\n",
-			bond->dev->name);
-
-	if (packets_per_slave > 1)
-		bond->params.packets_per_slave =
-			reciprocal_value(packets_per_slave);
-	else
-		bond->params.packets_per_slave = packets_per_slave;
-
-	return 0;
-}
-
 int bond_option_lacp_rate_set(struct bonding *bond, int lacp_rate)
 {
 	if (bond_parm_tbl_lookup(lacp_rate, bond_lacp_tbl) < 0) {
@@ -1054,3 +1047,15 @@ int bond_option_ad_select_set(struct bonding *bond, int ad_select)
 
 	return 0;
 }
+
+int bond_option_pps_set(struct bonding *bond, void *newval)
+{
+	int *new_value = newval;
+
+	if (*new_value > 1)
+		bond->params.packets_per_slave = reciprocal_value(*new_value);
+	else
+		bond->params.packets_per_slave = *new_value;
+
+	return 0;
+}
diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
index 12c4d4f..9a3a647 100644
--- a/drivers/net/bonding/bond_options.h
+++ b/drivers/net/bonding/bond_options.h
@@ -42,6 +42,7 @@ enum {
 /* Option IDs, their bit positions correspond to their IDs */
 enum {
 	BOND_OPT_MODE,
+	BOND_OPT_PACKETS_PER_SLAVE,
 	BOND_OPT_LAST
 };
 
@@ -82,4 +83,5 @@ struct bond_option *bond_opt_get(unsigned int option);
 struct bond_value_tbl *bond_opt_get_val(unsigned int option, int val);
 
 int bond_option_mode_set(struct bonding *bond, void *newval);
+int bond_option_pps_set(struct bonding *bond, void *newval);
 #endif /* _BOND_OPTIONS_H */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 8ce5f4f..2de1bab 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -1375,22 +1375,15 @@ static ssize_t bonding_store_packets_per_slave(struct device *d,
 					       const char *buf, size_t count)
 {
 	struct bonding *bond = to_bond(d);
-	int new_value, ret;
-
-	if (sscanf(buf, "%d", &new_value) != 1) {
-		pr_err("%s: no packets_per_slave value specified.\n",
-		       bond->dev->name);
-		return -EINVAL;
-	}
+	int ret;
 
-	if (!rtnl_trylock())
+	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_PACKETS_PER_SLAVE,
+				   (char *)buf);
+	if (ret == -EAGAIN)
 		return restart_syscall();
-
-	ret = bond_option_packets_per_slave_set(bond, new_value);
-	if (!ret)
+	else if (!ret)
 		ret = count;
 
-	rtnl_unlock();
 	return ret;
 }
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 50bbd28..88cd7d7 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -463,8 +463,6 @@ int bond_option_all_slaves_active_set(struct bonding *bond,
 				      int all_slaves_active);
 int bond_option_min_links_set(struct bonding *bond, int min_links);
 int bond_option_lp_interval_set(struct bonding *bond, int min_links);
-int bond_option_packets_per_slave_set(struct bonding *bond,
-				      int packets_per_slave);
 int bond_option_lacp_rate_set(struct bonding *bond, int lacp_rate);
 int bond_option_ad_select_set(struct bonding *bond, int ad_select);
 struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond);
-- 
1.8.1.4

^ permalink raw reply related

* [RFC net-next 2/3] bonding: convert mode setting to use the new option API
From: Nikolay Aleksandrov @ 2014-01-10 13:11 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov
In-Reply-To: <1389359495-9700-1-git-send-email-nikolay@redhat.com>

This patch makes the bond's mode setting use the new option API and
adds support for dependency printing which relies on having an entry for
the mode option in the bond_opts[] array.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
 drivers/net/bonding/bond_main.c    | 17 ++++----------
 drivers/net/bonding/bond_netlink.c |  2 +-
 drivers/net/bonding/bond_options.c | 45 ++++++++++++++++++++++----------------
 drivers/net/bonding/bond_options.h |  3 +++
 drivers/net/bonding/bond_sysfs.c   | 27 +++++++----------------
 drivers/net/bonding/bonding.h      |  2 --
 6 files changed, 42 insertions(+), 54 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e06c445..e102096 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -215,17 +215,6 @@ const struct bond_parm_tbl bond_lacp_tbl[] = {
 {	NULL,		-1},
 };
 
-const struct bond_parm_tbl bond_mode_tbl[] = {
-{	"balance-rr",		BOND_MODE_ROUNDROBIN},
-{	"active-backup",	BOND_MODE_ACTIVEBACKUP},
-{	"balance-xor",		BOND_MODE_XOR},
-{	"broadcast",		BOND_MODE_BROADCAST},
-{	"802.3ad",		BOND_MODE_8023AD},
-{	"balance-tlb",		BOND_MODE_TLB},
-{	"balance-alb",		BOND_MODE_ALB},
-{	NULL,			-1},
-};
-
 const struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {	"layer2",		BOND_XMIT_POLICY_LAYER2},
 {	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
@@ -3982,14 +3971,16 @@ int bond_parse_parm(const char *buf, const struct bond_parm_tbl *tbl)
 static int bond_check_params(struct bond_params *params)
 {
 	int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
+	struct bond_value_tbl *valptr;
 	int arp_all_targets_value;
 
 	/*
 	 * Convert string parameters.
 	 */
 	if (mode) {
-		bond_mode = bond_parse_parm(mode, bond_mode_tbl);
-		if (bond_mode == -1) {
+		valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), mode,
+					&bond_mode, true);
+		if (!valptr) {
 			pr_err("Error: Invalid bonding mode \"%s\"\n",
 			       mode == NULL ? "NULL" : mode);
 			return -EINVAL;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 555c783..51a9be3 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -72,7 +72,7 @@ static int bond_changelink(struct net_device *bond_dev,
 	if (data[IFLA_BOND_MODE]) {
 		int mode = nla_get_u8(data[IFLA_BOND_MODE]);
 
-		err = bond_option_mode_set(bond, mode);
+		err = __bond_opt_intset(bond, BOND_OPT_MODE, mode);
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 7765efb..4aa752f 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -20,7 +20,27 @@
 #include <linux/ctype.h>
 #include "bonding.h"
 
+static struct bond_value_tbl bond_mode_tbl[] = {
+	{ "balance-rr",    BOND_MODE_ROUNDROBIN,   BOND_VALFLAG_DEFAULT},
+	{ "active-backup", BOND_MODE_ACTIVEBACKUP, 0},
+	{ "balance-xor",   BOND_MODE_XOR,          0},
+	{ "broadcast",     BOND_MODE_BROADCAST,    0},
+	{ "802.3ad",       BOND_MODE_8023AD,       0},
+	{ "balance-tlb",   BOND_MODE_TLB,          0},
+	{ "balance-alb",   BOND_MODE_ALB,          0},
+	{ NULL,            -1,                     0},
+};
+
 static struct bond_option bond_opts[] = {
+	[BOND_OPT_MODE] = {
+		.id = BOND_OPT_MODE,
+		.name = "mode",
+		.desc = "bond device mode",
+		.flags = BOND_OPTFLAG_NOSLAVES | BOND_OPTFLAG_IFDOWN,
+		.valtype = BOND_OPTVAL_INTEGER,
+		.values = bond_mode_tbl,
+		.set = bond_option_mode_set
+	},
 	{ }
 };
 
@@ -336,29 +356,15 @@ struct bond_option *bond_opt_get(unsigned int option)
 	return &bond_opts[option];
 }
 
-int bond_option_mode_set(struct bonding *bond, int mode)
+int bond_option_mode_set(struct bonding *bond, void *newval)
 {
-	if (bond_parm_tbl_lookup(mode, bond_mode_tbl) < 0) {
-		pr_err("%s: Ignoring invalid mode value %d.\n",
-		       bond->dev->name, mode);
-		return -EINVAL;
-	}
-
-	if (bond->dev->flags & IFF_UP) {
-		pr_err("%s: unable to update mode because interface is up.\n",
-		       bond->dev->name);
-		return -EPERM;
-	}
-
-	if (bond_has_slaves(bond)) {
-		pr_err("%s: unable to update mode because bond has slaves.\n",
-			bond->dev->name);
-		return -EPERM;
-	}
+	int mode = *(int *)newval;
+	struct bond_option *opt;
 
 	if (BOND_NO_USES_ARP(mode) && bond->params.arp_interval) {
+		opt = bond_opt_get(BOND_OPT_MODE);
 		pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n",
-			bond->dev->name, bond_mode_tbl[mode].modename);
+			bond->dev->name, opt->values[mode].name);
 		/* disable arp monitoring */
 		bond->params.arp_interval = 0;
 		/* set miimon to default value */
@@ -370,6 +376,7 @@ int bond_option_mode_set(struct bonding *bond, int mode)
 	/* don't cache arp_validate between modes */
 	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
 	bond->params.mode = mode;
+
 	return 0;
 }
 
diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
index c72ecec..12c4d4f 100644
--- a/drivers/net/bonding/bond_options.h
+++ b/drivers/net/bonding/bond_options.h
@@ -41,6 +41,7 @@ enum {
 
 /* Option IDs, their bit positions correspond to their IDs */
 enum {
+	BOND_OPT_MODE,
 	BOND_OPT_LAST
 };
 
@@ -79,4 +80,6 @@ struct bond_value_tbl *bond_opt_parse(const struct bond_option *opt,
 				      void *bufarg, int *retval, bool string);
 struct bond_option *bond_opt_get(unsigned int option);
 struct bond_value_tbl *bond_opt_get_val(unsigned int option, int val);
+
+int bond_option_mode_set(struct bonding *bond, void *newval);
 #endif /* _BOND_OPTIONS_H */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 011f163..8ce5f4f 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -264,37 +264,26 @@ static ssize_t bonding_show_mode(struct device *d,
 				 struct device_attribute *attr, char *buf)
 {
 	struct bonding *bond = to_bond(d);
+	struct bond_value_tbl *val;
 
-	return sprintf(buf, "%s %d\n",
-			bond_mode_tbl[bond->params.mode].modename,
-			bond->params.mode);
+	val = bond_opt_get_val(BOND_OPT_MODE, bond->params.mode);
+
+	return sprintf(buf, "%s %d\n", val->name, bond->params.mode);
 }
 
 static ssize_t bonding_store_mode(struct device *d,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
-	int new_value, ret;
 	struct bonding *bond = to_bond(d);
+	int ret;
 
-	new_value = bond_parse_parm(buf, bond_mode_tbl);
-	if (new_value < 0)  {
-		pr_err("%s: Ignoring invalid mode value %.*s.\n",
-		       bond->dev->name, (int)strlen(buf) - 1, buf);
-		return -EINVAL;
-	}
-	if (!rtnl_trylock())
+	ret = bond_opt_tryset_rtnl(bond, BOND_OPT_MODE, (char *)buf);
+	if (ret == -EAGAIN)
 		return restart_syscall();
-
-	ret = bond_option_mode_set(bond, new_value);
-	if (!ret) {
-		pr_info("%s: setting mode to %s (%d).\n",
-			bond->dev->name, bond_mode_tbl[new_value].modename,
-			new_value);
+	else if (!ret)
 		ret = count;
-	}
 
-	rtnl_unlock();
 	return ret;
 }
 static DEVICE_ATTR(mode, S_IRUGO | S_IWUSR,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 71e751a..50bbd28 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -439,7 +439,6 @@ void bond_setup(struct net_device *bond_dev);
 unsigned int bond_get_num_tx_queues(void);
 int bond_netlink_init(void);
 void bond_netlink_fini(void);
-int bond_option_mode_set(struct bonding *bond, int mode);
 int bond_option_active_slave_set(struct bonding *bond, struct net_device *slave_dev);
 int bond_option_miimon_set(struct bonding *bond, int miimon);
 int bond_option_updelay_set(struct bonding *bond, int updelay);
@@ -549,7 +548,6 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
 /* exported from bond_main.c */
 extern int bond_net_id;
 extern const struct bond_parm_tbl bond_lacp_tbl[];
-extern const struct bond_parm_tbl bond_mode_tbl[];
 extern const struct bond_parm_tbl xmit_hashtype_tbl[];
 extern const struct bond_parm_tbl arp_validate_tbl[];
 extern const struct bond_parm_tbl arp_all_targets_tbl[];
-- 
1.8.1.4

^ permalink raw reply related

* [RFC net-next 0/3] bonding: new option API
From: Nikolay Aleksandrov @ 2014-01-10 13:11 UTC (permalink / raw)
  To: netdev
  Cc: Nikolay Aleksandrov, Andy Gospodarek, Jay Vosburgh,
	Veaceslav Falico, David S. Miller

Hi,
This patchset aims to introduce a new option API that can be easily
extended if necessary and which attempts to remove some common problems
and code. In the beginning there was support for inter-option dependencies,
but that turned out to be unnecessary as the only 2 options that _enforce_
another option to be set prior to setting are up/down delay and they can be
easily re-worked to not require miimon to be set, so we can spare ourselves
100+ lines of checks, dealing with complex dependency errors and such.
In case this becomes necessary I've kept the old version of this patch-set
which has it, and can easily re-work it at any time.
There're still a lot of things to fix/clean but I've done some limited testing
with the options that are converted and it seems to work.
The main exported functions (as can be seen) are:
__bond_opt_set() - to be used when a string is passed which needs to be
                   converted in the case of BOND_OPTVAL_INTEGER. (sysfs)
__bond_opt_intset() - to be used when a value is passed to
                      BOND_OPTVAL_INTEGER (netlink), this function can't
                      be used for BOND_OPTVAL_STRING options
These two can be used from inside other options to stop them (e.g., arp_interval
stopping miimon and vice versa).
I've also added bond_opt_tryset_rtnl() mostly for sysfs use.
See the description of patch 01 and the comments inside for more information.

Value tables of converted options are no longer exported, and can be accessed
through the API (bond_opt_get_val() & bond_opt_get_flags).
Another good side-effect is that the error codes are standard for all options
for the common errors at least.

When/if this patchset is posted for inclusion, I'll have all options converted.
I actually had them before but while on vacation during December a lot of code
went in changing the bonding options and have to re-work most of the patches.

Some of the future plans for this are:
 Verbose outputting of dependencies (done, just have to polish it)
 Automatic sysfs generation from the bond_opts[].
 Use of the API in bond_check_params() and thus cleaning it up
 Structure for accessor fn parameter passing so we can implement get/set
 in a more general manner

Sending it with 2 options converted which illustrate the use of different
features of the API. I've tested them via sysfs.

Any thoughts, comments and suggestions are very welcome.

Thanks,
 Nik

CC: Andy Gospodarek <andy@greyhouse.net>
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Veaceslav Falico <vfalico@redhat.com>
CC: David S. Miller <davem@davemloft.net>

Nikolay Aleksandrov (3):
  bonding: add infrastructure for an option API
  bonding: convert mode setting to use the new option API
  bonding: convert packets_per_slave to use the new option API

 drivers/net/bonding/bond_main.c    |  17 +-
 drivers/net/bonding/bond_netlink.c |   6 +-
 drivers/net/bonding/bond_options.c | 401 +++++++++++++++++++++++++++++++++----
 drivers/net/bonding/bond_options.h |  87 ++++++++
 drivers/net/bonding/bond_sysfs.c   |  44 ++--
 drivers/net/bonding/bonding.h      |   5 +-
 6 files changed, 473 insertions(+), 87 deletions(-)
 create mode 100644 drivers/net/bonding/bond_options.h

-- 
1.8.1.4

^ permalink raw reply

* [RFC net-next 1/3] bonding: add infrastructure for an option API
From: Nikolay Aleksandrov @ 2014-01-10 13:11 UTC (permalink / raw)
  To: netdev; +Cc: Nikolay Aleksandrov
In-Reply-To: <1389359495-9700-1-git-send-email-nikolay@redhat.com>

This patch adds the necessary basic infrastructure to support
centralized and unified option manipulation API for the bonding. The new
structure bond_option will be used to describe each option with its
dependencies on modes which will be checked automatically thus removing a
lot of duplicated code. Also automatic range checking is added for
non-string options. Currently the option setting function requires RTNL to
be acquired prior to calling it, since many options already rely on RTNL
it seemed like the best choice to protect all against common race
conditions.
In order to add an option the following steps need to be done:
1. Add an entry BOND_OPT_<option> to bond_options.h so it gets a unique id
   and a bit corresponding to the id
2. Add a bond_option entry to the bond_opts[] array in bond_options.c which
   describes the option, its dependencies and its manipulation function
3. Add code to export the option through sysfs and/or as a module parameter
   (the sysfs export will be made automatically in the future)

The current available option types are:
BOND_OPTVAL_INTEGER - range option, the flags should be used to define
                      it properly
BOND_OPTVAL_STRING - string option or an option which wants to do its
                     own validation and conversion since the buffer is
                     passed unmodified to the option manipulation function

The options can have different flags set, currently the following are
supported:
BOND_OPTFLAG_NOSLAVES - require that the bond device has no slaves prior
                        to setting the option
BOND_OPTFLAG_IFDOWN - require that the bond device is down prior to
                      setting the option

There's a new value structure to describe different types of values
which can have the following flags:
BOND_VALFLAG_DEFAULT - marks the default option (permanent string alias
                       to this option is "default")
BOND_VALFLAG_MIN - the minimum value that this option can have
BOND_VALFLAG_MAX - the maximum value that this option can have

An example would be nice here, so if we have an option which can have
the values "off"(2), "special"(4, default) and supports a range, say
16 - 32, it should be defined as follows:
"off", 2,
"special", 4, BOND_VALFLAG_DEFAULT,
"rangemin", 16, BOND_VALFLAG_MIN,
"rangemax", 32, BOND_VALFLAG_MAX
So we have the valid intervals: [2, 2], [4, 4], [16, 32]

BOND_VALFLAG_(MIN|MAX) can be used to specify a valid range for an
option, if MIN is omitted then 0 is considered as a minimum. If an
exact match is found in the values[] table it will be returned,
otherwise the range is tried (if available).
The values[] table which describes the allowed values for an option is
not mandatory for BOND_OPTVAL_STRING, but if it's present it will be
used.

For BOND_OPTVAL_INTEGER the values[] table is mandatory.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
 drivers/net/bonding/bond_options.c | 317 +++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_options.h |  82 ++++++++++
 drivers/net/bonding/bonding.h      |   1 +
 3 files changed, 400 insertions(+)
 create mode 100644 drivers/net/bonding/bond_options.h

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 945a666..7765efb 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -17,8 +17,325 @@
 #include <linux/rwlock.h>
 #include <linux/rcupdate.h>
 #include <linux/reciprocal_div.h>
+#include <linux/ctype.h>
 #include "bonding.h"
 
+static struct bond_option bond_opts[] = {
+	{ }
+};
+
+/* Searches for a value in opt's values[] table */
+struct bond_value_tbl *bond_opt_get_val(unsigned int option, int val)
+{
+	struct bond_option *opt;
+	int i;
+
+	opt = bond_opt_get(option);
+	if (WARN_ON(!opt))
+		return NULL;
+	for (i = 0; opt->values && opt->values[i].name; i++)
+		if (opt->values[i].value == val)
+			return &opt->values[i];
+
+	return NULL;
+}
+
+/* Searches for a value in opt's values[] table which matches the flagmask */
+static struct bond_value_tbl *bond_opt_get_flags(const struct bond_option *opt,
+						 u32 flagmask)
+{
+	int i;
+
+	for (i = 0; opt->values && opt->values[i].name; i++)
+		if (opt->values[i].flags & flagmask)
+			return &opt->values[i];
+
+	return NULL;
+}
+
+static bool bond_opt_check_range(const struct bond_option *opt, int val)
+{
+	struct bond_value_tbl *minval, *maxval;
+
+	minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN);
+	maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX);
+	if (val < 0 || (minval && val < minval->value) ||
+	    (maxval && val > maxval->value))
+		return false;
+
+	return true;
+}
+
+/**
+ * bond_opt_parse - parse option value
+ * @tbl: an option's values[] table to parse against
+ * @bufarg: value to parse
+ * @retval: pointer where to store the parsed value
+ * @string: how to treat bufarg (as string or integer)
+ *
+ * This functions tries to extract the value from bufarg and check if it's
+ * a possible match for the option. It shouldn't be possible to have a non-NULL
+ * return with @retval being < 0.
+ */
+struct bond_value_tbl *bond_opt_parse(const struct bond_option *opt,
+				      void *bufarg, int *retval, bool string)
+{
+	char *buf, *p, valstr[BOND_MAX_MODENAME_LEN + 1] = { 0, };
+	struct bond_value_tbl *tbl, *maxval = NULL, *ret = NULL;
+	int valint = -1, i, rv;
+
+	tbl = opt->values;
+	if (!tbl)
+		goto out;
+
+	if (string) {
+		buf = bufarg;
+		for (p = (char *)buf; *p; p++)
+			if (!(isdigit(*p) || isspace(*p)))
+				break;
+
+		if (*p)
+			rv = sscanf(buf, "%20s", valstr);
+		else
+			rv = sscanf(buf, "%d", &valint);
+
+		if (!rv)
+			goto out;
+	} else {
+		valint = *(int *)bufarg;
+	}
+
+	for (i = 0; tbl[i].name; i++) {
+		/* Obtain maxval in case we don't match anything */
+		if (tbl[i].flags & BOND_VALFLAG_MAX)
+			maxval = &tbl[i];
+
+		/* Check for exact match */
+		if (!strcmp(valstr, "default") &&
+		    (tbl[i].flags & BOND_VALFLAG_DEFAULT))
+			ret = &tbl[i];
+		else if (valint == tbl[i].value)
+			ret = &tbl[i];
+		else if (!strcmp(valstr, tbl[i].name))
+			ret = &tbl[i];
+
+		/* Exact match */
+		if (ret) {
+			valint = ret->value;
+			goto out;
+		}
+	}
+	/* Possible range match */
+	if (bond_opt_check_range(opt, valint))
+		ret = maxval;
+out:
+	if (ret)
+		*retval = valint;
+	else
+		*retval = -ENOENT;
+
+	return ret;
+}
+
+/* Check opt's dependencies against bond mode and currently set options */
+static int bond_opt_check_deps(struct bonding *bond,
+			       const struct bond_option *opt)
+{
+	struct bond_params *params = &bond->params;
+
+	if (test_bit(params->mode, &opt->unsuppmodes))
+		return -EACCES;
+	if ((opt->flags & BOND_OPTFLAG_NOSLAVES) && bond_has_slaves(bond))
+		return -ENOTEMPTY;
+	if ((opt->flags & BOND_OPTFLAG_IFDOWN) && (bond->dev->flags & IFF_UP))
+		return -EBUSY;
+
+	return 0;
+}
+
+static void bond_opt_dep_print(struct bonding *bond,
+			       const struct bond_option *opt)
+{
+	struct bond_params *params;
+
+	params = &bond->params;
+	if (test_bit(params->mode, &opt->unsuppmodes))
+		pr_err("%s: option %s: mode dependency failed\n",
+		       bond->dev->name, opt->name);
+}
+
+static void bond_opt_error_interpret(struct bonding *bond,
+				     const struct bond_option *opt,
+				     int error, char *buf)
+{
+	struct bond_value_tbl *minval, *maxval;
+	char *p;
+
+	p = strchr(buf, '\n');
+	if (p)
+		*p = '\0';
+	switch (error) {
+	case -EINVAL:
+		pr_err("%s: option %s: invalid value (%s).\n",
+		       bond->dev->name, opt->name, buf);
+		if (opt->valtype == BOND_OPTVAL_STRING)
+			break;
+		minval = bond_opt_get_flags(opt, BOND_VALFLAG_MIN);
+		maxval = bond_opt_get_flags(opt, BOND_VALFLAG_MAX);
+		if (!maxval)
+			break;
+		pr_err("%s: option %s: allowed values %d - %d.\n",
+		       bond->dev->name, opt->name, minval ? minval->value : 0,
+		       maxval->value);
+		break;
+	case -EACCES:
+		bond_opt_dep_print(bond, opt);
+		break;
+	case -ENOTEMPTY:
+		pr_err("%s: option %s: unable to set because the bond has slaves.\n",
+		       bond->dev->name, opt->name);
+		break;
+	case -EBUSY:
+		pr_err("%s: option %s: unable to set because the bond device is up.\n",
+		       bond->dev->name, opt->name);
+		break;
+	default:
+		break;
+	}
+}
+
+static int bond_opt_call_intset(struct bonding *bond,
+				const struct bond_option *opt,
+				int newval)
+{
+	struct bond_value_tbl *valptr;
+	int value = newval;
+
+	if (opt->valtype != BOND_OPTVAL_INTEGER)
+		return -EINVAL;
+	/* Check value and store the result in it */
+	valptr = bond_opt_parse(opt, &value, &value, false);
+	if (!valptr)
+		return -EINVAL;
+
+	return opt->set(bond, &value);
+}
+
+/**
+ * bond_opt_call_set - convert buf and check the value based on opt->valtype
+ * @bond: bond device to set on
+ * @opt: option to set
+ * @buf: value to set the option to
+ *
+ * This function converts buf to the appropriate value based on the opt's
+ * valtype.
+ */
+static int bond_opt_call_set(struct bonding *bond,
+			     const struct bond_option *opt,
+			     char *buf)
+{
+	struct bond_value_tbl *valptr;
+	int value = -1;
+
+	if (opt->valtype == BOND_OPTVAL_INTEGER) {
+		valptr = bond_opt_parse(opt, buf, &value, true);
+		if (!valptr)
+			return -EINVAL;
+		return opt->set(bond, &value);
+	} else {
+		return opt->set(bond, buf);
+	}
+}
+
+/**
+ * __bond_opt_set - set a bonding option
+ * @bond: target bond device
+ * @option: option to set
+ * @buf: value to set it to
+ *
+ * This function is used to change the bond's option value to newval, it can be
+ * used for both enabling/changing an option and for disabling it. RTNL lock
+ * must be obtained before calling this function.
+ */
+int __bond_opt_set(struct bonding *bond, unsigned int option, char *buf)
+{
+	const struct bond_option *opt;
+	int ret = -ENOENT;
+
+	ASSERT_RTNL();
+
+	opt = bond_opt_get(option);
+	if (WARN_ON(!buf) || WARN_ON(!opt))
+		goto out;
+	ret = bond_opt_check_deps(bond, opt);
+	if (ret)
+		goto out;
+	ret = bond_opt_call_set(bond, opt, buf);
+out:
+	if (ret)
+		bond_opt_error_interpret(bond, opt, ret, buf);
+
+	return ret;
+}
+
+/* See comment for __bond_opt_set. This function is used to set a
+ * BOND_OPTVAL_INTEGER option to a value directly without string parsing. The
+ * value is still checked against the option's values[] table.
+ */
+int __bond_opt_intset(struct bonding *bond, unsigned int option, int value)
+{
+	const struct bond_option *opt;
+	int ret = -ENOENT;
+
+	ASSERT_RTNL();
+
+	opt = bond_opt_get(option);
+	if (WARN_ON(!opt))
+		return ret;
+	ret = bond_opt_check_deps(bond, opt);
+	if (ret)
+		return ret;
+	ret = bond_opt_call_intset(bond, opt, value);
+
+	return ret;
+}
+
+/**
+ * bond_opt_tryset_rtnl - try to acquire rtnl and call __bond_opt_set
+ * @bond: target bond device
+ * @option: option to set
+ * @buf: value to set it to
+ *
+ * This function tries to acquire RTNL without blocking and if successful
+ * calls __bond_opt_set, otherwise returns -EAGAIN to notify the caller.
+ */
+int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf)
+{
+	int ret;
+
+	if (!rtnl_trylock())
+		return -EAGAIN;
+	ret = __bond_opt_set(bond, option, buf);
+	rtnl_unlock();
+
+	return ret;
+}
+
+/**
+ * bond_opt_get - get a pointer to an option
+ * @option: option for which to return a pointer
+ *
+ * This function checks if option is valid and if so returns a pointer
+ * to its entry in the bond_opts[] option array.
+ */
+struct bond_option *bond_opt_get(unsigned int option)
+{
+	if (!BOND_OPT_VALID(option))
+		return NULL;
+
+	return &bond_opts[option];
+}
+
 int bond_option_mode_set(struct bonding *bond, int mode)
 {
 	if (bond_parm_tbl_lookup(mode, bond_mode_tbl) < 0) {
diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
new file mode 100644
index 0000000..c72ecec
--- /dev/null
+++ b/drivers/net/bonding/bond_options.h
@@ -0,0 +1,82 @@
+/*
+ * drivers/net/bond/bond_options.h - bonding options
+ * Copyright (c) 2013 Nikolay Aleksandrov <nikolay@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _BOND_OPTIONS_H
+#define _BOND_OPTIONS_H
+
+#define BOND_OPT_VALID(opt) ((opt) < BOND_OPT_LAST)
+#define BOND_MODE_ALL_EX(x) (~(x))
+
+/* Option value types */
+enum {
+	BOND_OPTVAL_INTEGER,
+	BOND_OPTVAL_STRING
+};
+
+/* Option flags:
+ * BOND_OPTFLAG_NOSLAVES - check if the bond device is empty before setting
+ * BOND_OPTFLAG_IFDOWN - check if the bond device is down before setting
+ */
+enum {
+	BOND_OPTFLAG_NOSLAVES	= BIT(0),
+	BOND_OPTFLAG_IFDOWN	= BIT(1)
+};
+
+/* Value type flags:
+ * BOND_VALFLAG_DEFAULT - mark the value as default
+ * BOND_VALFLAG_(MIN|MAX) - mark the value as min/max
+ */
+enum {
+	BOND_VALFLAG_DEFAULT	= BIT(0),
+	BOND_VALFLAG_MIN	= BIT(1),
+	BOND_VALFLAG_MAX	= BIT(2)
+};
+
+/* Option IDs, their bit positions correspond to their IDs */
+enum {
+	BOND_OPT_LAST
+};
+
+struct bond_value_tbl {
+	char *name;
+	int value;
+	u32 flags;
+};
+
+struct bonding;
+
+struct bond_option {
+	int id;
+	char *name;
+	char *desc;
+	u32 flags;
+	u8 valtype;
+
+	/* unsuppmodes is used to denote modes in which the option isn't
+	 * supported.
+	 */
+	unsigned long unsuppmodes;
+	/* supported values which this option can have, can be a subset of
+	 * BOND_OPTVAL_RANGE's value range
+	 */
+	struct bond_value_tbl *values;
+
+	int (*set)(struct bonding *bond, void *newval);
+};
+
+void bond_opt_init(void);
+int __bond_opt_set(struct bonding *bond, unsigned int option, char *buf);
+int __bond_opt_intset(struct bonding *bond, unsigned int option, int value);
+int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf);
+struct bond_value_tbl *bond_opt_parse(const struct bond_option *opt,
+				      void *bufarg, int *retval, bool string);
+struct bond_option *bond_opt_get(unsigned int option);
+struct bond_value_tbl *bond_opt_get_val(unsigned int option, int val);
+#endif /* _BOND_OPTIONS_H */
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 955dc48..71e751a 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -25,6 +25,7 @@
 #include <linux/etherdevice.h>
 #include "bond_3ad.h"
 #include "bond_alb.h"
+#include "bond_options.h"
 
 #define DRV_VERSION	"3.7.1"
 #define DRV_RELDATE	"April 27, 2011"
-- 
1.8.1.4

^ permalink raw reply related

* Re: [PATCH net-next v3 2/9] xen-netback: Change TX path from grant copy to mapping
From: Wei Liu @ 2014-01-10 13:16 UTC (permalink / raw)
  To: Zoltan Kiss
  Cc: Wei Liu, ian.campbell, xen-devel, netdev, linux-kernel,
	jonathan.davies
In-Reply-To: <20140110114534.GE29180@zion.uk.xensource.com>

On Fri, Jan 10, 2014 at 11:45:34AM +0000, Wei Liu wrote:
> On Fri, Jan 10, 2014 at 11:35:08AM +0000, Zoltan Kiss wrote:
> [...]
> > 
> > >>@@ -920,6 +852,18 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
> > >>  	err = gop->status;
> > >>  	if (unlikely(err))
> > >>  		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
> > >>+	else {
> > >>+		if (vif->grant_tx_handle[pending_idx] !=
> > >>+			NETBACK_INVALID_HANDLE) {
> > >>+			netdev_err(vif->dev,
> > >>+				"Stale mapped handle! pending_idx %x handle %x\n",
> > >>+				pending_idx, vif->grant_tx_handle[pending_idx]);
> > >>+			BUG();
> > >>+		}
> > >>+		set_phys_to_machine(idx_to_pfn(vif, pending_idx),
> > >>+			FOREIGN_FRAME(gop->dev_bus_addr >> PAGE_SHIFT));
> > >
> > >What happens when you don't have this?
> > Your frags will be filled with garbage. I don't understand exactly
> > what this function does, someone might want to enlighten us? I've
> > took it's usage from classic kernel.
> > Also, it might be worthwhile to check the return value and BUG if
> > it's false, but I don't know what exactly that return value means.
> > 
> 
> This is actually part of gnttab_map_refs. As you're using hypercall
> directly this becomes very fragile.
> 

To make it clear, set_phys_to_machine is done within m2p_add_override.

Wei.

^ permalink raw reply

* [PATCH net-next] net: vxlan: when lower dev unregisters remove vxlan dev as well
From: Daniel Borkmann @ 2014-01-10 13:01 UTC (permalink / raw)
  To: davem; +Cc: netdev

We can create a vxlan device with an explicit underlying carrier.
In that case, when the carrier link is being deleted from the
system (e.g. due to module unload) we should also clean up all
created vxlan devices on top of it since otherwise we're in an
inconsistent state in vxlan device. In that case, the user needs
to remove all such devices, while in case of other virtual devs
that sit on top of physical ones, it is usually the case that
these devices do unregister automatically as well and do not
leave the burden on the user.

This work is not necessary when vxlan device was not created with
a real underlying device, as connections can resume in that case
when driver is plugged again. But at least for the other cases,
we should go ahead and do the cleanup on removal.

`ip -d link show vxlan13` after carrier removal before this patch:

5: vxlan13: <BROADCAST,MULTICAST> mtu 1450 qdisc noop state DOWN mode DEFAULT group default
    link/ether 1e:47:da:6d:4d:99 brd ff:ff:ff:ff:ff:ff promiscuity 0
    vxlan id 13 group 239.0.0.10 dev 2 port 32768 61000 ageing 300
                                 ^^^^^
While at it, we should also use vxlan_dellink() handler in
vxlan_exit_net() as otherwise we seem to leak memory on module
unload since only half of the cleanup is being done.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
---
 drivers/net/vxlan.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 7 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 481f85d..39035ba 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2656,6 +2656,54 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.fill_info	= vxlan_fill_info,
 };
 
+static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
+					     struct net_device *dev)
+{
+	struct vxlan_dev *vxlan, *next;
+	LIST_HEAD(list_kill);
+
+	BUG_ON(!rtnl_is_locked());
+
+	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
+		struct vxlan_rdst *dst = &vxlan->default_dst;
+
+		/* In case we created vxlan device with carrier
+		 * and we loose the carrier due to module unload
+		 * we also need to remove vxlan device. In other
+		 * cases, it's not necessary and remote_ifindex
+		 * is 0 here, so no matches.
+		 */
+		if (dst->remote_ifindex == dev->ifindex)
+			vxlan_dellink(vxlan->dev, &list_kill);
+	}
+
+	unregister_netdevice_many(&list_kill);
+	list_del(&list_kill);
+}
+
+static int vxlan_lowerdev_event(struct notifier_block *unused,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		/* Twiddle thumbs on netns device moves. */
+		if (dev->reg_state != NETREG_UNREGISTERING)
+			break;
+
+		vxlan_handle_lowerdev_unregister(vn, dev);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vxlan_notifier_block __read_mostly = {
+	.notifier_call = vxlan_lowerdev_event,
+};
+
 static __net_init int vxlan_init_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -2673,14 +2721,16 @@ static __net_init int vxlan_init_net(struct net *net)
 static __net_exit void vxlan_exit_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	struct vxlan_dev *vxlan;
-	LIST_HEAD(list);
+	struct vxlan_dev *vxlan, *next;
+	LIST_HEAD(list_kill);
 
 	rtnl_lock();
-	list_for_each_entry(vxlan, &vn->vxlan_list, next)
-		unregister_netdevice_queue(vxlan->dev, &list);
-	unregister_netdevice_many(&list);
+	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next)
+		vxlan_dellink(vxlan->dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	rtnl_unlock();
+
+	list_del(&list_kill);
 }
 
 static struct pernet_operations vxlan_net_ops = {
@@ -2704,12 +2754,17 @@ static int __init vxlan_init_module(void)
 	if (rc)
 		goto out1;
 
-	rc = rtnl_link_register(&vxlan_link_ops);
+	rc = register_netdevice_notifier(&vxlan_notifier_block);
 	if (rc)
 		goto out2;
 
-	return 0;
+	rc = rtnl_link_register(&vxlan_link_ops);
+	if (rc)
+		goto out3;
 
+	return 0;
+out3:
+	unregister_netdevice_notifier(&vxlan_notifier_block);
 out2:
 	unregister_pernet_device(&vxlan_net_ops);
 out1:
@@ -2721,6 +2776,7 @@ late_initcall(vxlan_init_module);
 static void __exit vxlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&vxlan_link_ops);
+	unregister_netdevice_notifier(&vxlan_notifier_block);
 	destroy_workqueue(vxlan_wq);
 	unregister_pernet_device(&vxlan_net_ops);
 	rcu_barrier();
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next] net: make dev_set_mtu() honor notification return code
From: Veaceslav Falico @ 2014-01-10 12:48 UTC (permalink / raw)
  To: netdev
  Cc: Veaceslav Falico, Jiri Pirko, David S. Miller, Eric Dumazet,
	Alexander Duyck, Nicolas Dichtel

Currently, after changing the MTU for a device, dev_set_mtu() calls
NETDEV_CHANGEMTU notification, however doesn't verify it's return code -
which can be NOTIFY_BAD - i.e. some of the net notifier blocks refused this
change, and continues nevertheless.

To fix this, verify the return code, and if it's an error - then revert the
MTU to the original one, and pass the error code.

CC: Jiri Pirko <jiri@resnulli.us>
CC: "David S. Miller" <davem@davemloft.net>
CC: Eric Dumazet <edumazet@google.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 net/core/dev.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ce01847..1c570ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5287,6 +5287,17 @@ int dev_change_flags(struct net_device *dev, unsigned int flags)
 }
 EXPORT_SYMBOL(dev_change_flags);
 
+static int __dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (ops->ndo_change_mtu)
+		return ops->ndo_change_mtu(dev, new_mtu);
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
 /**
  *	dev_set_mtu - Change maximum transfer unit
  *	@dev: device
@@ -5296,8 +5307,7 @@ EXPORT_SYMBOL(dev_change_flags);
  */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
-	int err;
+	int err, orig_mtu;
 
 	if (new_mtu == dev->mtu)
 		return 0;
@@ -5309,14 +5319,15 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	if (!netif_device_present(dev))
 		return -ENODEV;
 
-	err = 0;
-	if (ops->ndo_change_mtu)
-		err = ops->ndo_change_mtu(dev, new_mtu);
-	else
-		dev->mtu = new_mtu;
+	orig_mtu = dev->mtu;
+	err = __dev_set_mtu(dev, new_mtu);
 
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+	if (!err) {
+		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+		err = notifier_to_errno(err);
+		if (err)
+			__dev_set_mtu(dev, orig_mtu);
+	}
 	return err;
 }
 EXPORT_SYMBOL(dev_set_mtu);
-- 
1.8.4

^ permalink raw reply related

* DiePresse.com Mein Freund, der Fahrradhändler
From: franklinbuthelezi1 @ 2014-01-10 12:33 UTC (permalink / raw)
  To: netdev

Folgender Artikel auf http://diepresse.com wird Ihnen von Franklin Buthelezi empfohlen:

Mein Freund, der Fahrradhändler
Manche Dinge verändern sich über die langen Wintermonate hinweg. 

http://diepresse.com/home/leben/mode/kolumnezumtag/548717/index.do

Notiz:
Dear friend,

I am the international operations manager of ABSA bank South Africa. My name is Franklin Buthelezi. I have a sensitive and private offer from the top executive to seek your partnership in re-profiling some offshore investment funds worth 11.5M U.S.D (Eleven Million five hundred thousand United States Dollars) I am constrained however to withhold most of the details for now. This is a legitimate transaction. 

If we agree on the terms you will be given 15% of the total funds, if you are interested please write back via email providing me your personal details and phone number for further directives.

If you are interested and willing to render your assistance please respond via my private email address stated below.

I look forward to your response.

Best regards
Franklin Buthelezi
Email: franklinbuthelezi1@yahoo.com

------------------------------------
DiePresse.com übernimmt keine Haftung für den Inhalt der persönlichen Nachricht.

© DiePresse.com - http://diepresse.com

^ permalink raw reply

* Re: [PATCH net] bonding: reset the slave's mtu when its be changed
From: Veaceslav Falico @ 2014-01-10 12:19 UTC (permalink / raw)
  To: Ding Tianhong; +Cc: Jay Vosburgh, Netdev, David S. Miller
In-Reply-To: <52CFDA63.8070601@huawei.com>

On Fri, Jan 10, 2014 at 07:32:51PM +0800, Ding Tianhong wrote:
>All slave should have the same mtu with mastet's, and the bond do it when
>enslave the slave, but the user could change the slave's mtu, it will cause
>the master and slave have different mtu, althrough in AB mode, it does not
>matter if the slave is not the current slave, but in other mode, it is incorrect,
>so reset the slave's mtu like the master set.

Why "net"? It's not a bugfix, it's a feature, and really discussable.

Also, wrt the actual change - why do you think it's incorrect for slaves in
bonding mode other than AB to have different MTU values? I don't see any
reason for it, from the top of the head.

>
>Cc: Jay Vosburgh <fubar@us.ibm.com>
>Cc: Veaceslav Falico <vfalico@redhat.com>
>Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>---
> drivers/net/bonding/bond_main.c | 21 ++++++++++-----------
> 1 file changed, 10 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 398e299..e7b5bcf 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2882,18 +2882,17 @@ static int bond_slave_netdev_event(unsigned long event,
> 		 */
> 		break;
> 	case NETDEV_CHANGEMTU:
>-		/*
>-		 * TODO: Should slaves be allowed to
>-		 * independently alter their MTU?  For
>-		 * an active-backup bond, slaves need
>-		 * not be the same type of device, so
>-		 * MTUs may vary.  For other modes,
>-		 * slaves arguably should have the
>-		 * same MTUs. To do this, we'd need to
>-		 * take over the slave's change_mtu
>-		 * function for the duration of their
>-		 * servitude.
>+		/* All slave should have the same mtu
>+		 * as master.
> 		 */
>+		if (slave->dev->mtu != bond->dev->mtu) {

If we've got the event then it means it was changed to something different.
No need to verify.

>+			int res;
>+			slave->original_mtu = slave->dev->mtu;

If we're refusing to apply the *new* mtu, then why should we save it as the
original? The original_mtu is the mtu that the slave had before it was
enslaved.

>+			res = dev_set_mtu(slave->dev, bond->dev->mtu);
>+			if (res)
>+				pr_debug("Error %d calling dev_set_mtu for slave %s\n",
>+					 res, slave->dev->name);
>+		}

Also, bonding should be vocal about changing forcibly the mtu - otherwise
we'd end up with silently dropping the changes:

ifconfig eth0 mtu 9000
echo $?
-> 0
ifconfig eth0
MTU: 1500

or something like that, it will pass it up, refusing changes:

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e06c445..0b36045 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2846,19 +2846,8 @@ static int bond_slave_netdev_event(unsigned long event,
  		 */
  		break;
  	case NETDEV_CHANGEMTU:
-		/*
-		 * TODO: Should slaves be allowed to
-		 * independently alter their MTU?  For
-		 * an active-backup bond, slaves need
-		 * not be the same type of device, so
-		 * MTUs may vary.  For other modes,
-		 * slaves arguably should have the
-		 * same MTUs. To do this, we'd need to
-		 * take over the slave's change_mtu
-		 * function for the duration of their
-		 * servitude.
-		 */
-		break;
+		/* don't permit slaves to change their MTU */
+		return NOTIFY_BAD;
  	case NETDEV_CHANGENAME:
  		/*
  		 * TODO: handle changing the primary's name

> 		break;
> 	case NETDEV_CHANGENAME:
> 		/*
>--
>1.8.0
>
>

^ permalink raw reply related

* Re: [PATCH net-next 1/4] bonding: update the primary when slave name changed
From: Ding Tianhong @ 2014-01-10 11:55 UTC (permalink / raw)
  To: Veaceslav Falico; +Cc: Jay Vosburgh, David S. Miller, Netdev
In-Reply-To: <20140110111143.GB4132@redhat.com>

On 2014/1/10 19:11, Veaceslav Falico wrote:
> On Fri, Jan 10, 2014 at 07:05:34PM +0800, Ding Tianhong wrote:
>> On 2014/1/10 15:44, Veaceslav Falico wrote:
>>> If it's not the primray
>>> slave, and we don't have one - select it as a new primary and, again, see
>>> if we need to select a new active slave.
>>
>> I don't think so , I think if it is not the primary slave and we don't have one,
>> no need to do anything, just a normal slave change its name.
> 
> If primary == "my_eth0", you have 2 slaves - "eth0" and "eth1", thus null
> primary_slave, and rename eth0 to my_eth0 - then you need to set
> primary_slave to my_eth0.
> 
> If primary == "my_eth0", you have 2 slaves - "my_eth0" and "eth1", thus
> primary_slave == dev with name "my_eth0", and rename "my_eth0" to "eth0" -
> then you must set primary_slave to NULL.
> 
> And after either of these you must see if you need to re-select the active
> slave, as it might have been forced by the primary_slave, which has been
> modified.
> 
> You might also want to add some pr_info() about adding/removing
> primary_slave, as the user to be aware.
> 

Ok thanks.

Regards
Ding

> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox