* [net-next 09/11] ixgbevf: Adds function to set PSRTYPE register
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, netdev, gospo, sassmann, Alexander Duyck,
Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
This patch creates a new function to set PSRTYPE. This function helps lay
the ground work for eventual multi queue support.
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ce27d62..c0f9aad 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1082,6 +1082,21 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index)
IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
}
+static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ /* PSRTYPE must be initialized in 82599 */
+ u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR |
+ IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR |
+ IXGBE_PSRTYPE_L2HDR;
+
+ if (adapter->num_rx_queues > 1)
+ psrtype |= 1 << 29;
+
+ IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+}
+
static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
@@ -1129,8 +1144,7 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
int i, j;
u32 rdlen;
- /* PSRTYPE must be initialized in 82599 */
- IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, 0);
+ ixgbevf_setup_psrtype(adapter);
/* set_rx_buffer_len must be called before ring initialization */
ixgbevf_set_rx_buffer_len(adapter);
--
1.8.3.1
^ permalink raw reply related
* [net-next 07/11] ixgbe: use pcie_capability_read_word() to simplify code
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Yijing Wang, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Yijing Wang <wangyijing@huawei.com>
use pcie_capability_read_word() to simplify code.
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6828d0e7..ce3eb60 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -153,7 +153,6 @@ MODULE_VERSION(DRV_VERSION);
static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
u32 reg, u16 *value)
{
- int pos = 0;
struct pci_dev *parent_dev;
struct pci_bus *parent_bus;
@@ -165,11 +164,10 @@ static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
if (!parent_dev)
return -1;
- pos = pci_find_capability(parent_dev, PCI_CAP_ID_EXP);
- if (!pos)
+ if (!pci_is_pcie(parent_dev))
return -1;
- pci_read_config_word(parent_dev, pos + reg, value);
+ pcie_capability_read_word(parent_dev, reg, value);
return 0;
}
--
1.8.3.1
^ permalink raw reply related
* [net-next 08/11] ixgbe: fix rx-usecs range checks for BQL
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Emil Tantilov, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Emil Tantilov <emil.s.tantilov@intel.com>
This patch resolves an issue where the logic used to detect changes in rx-usecs
was incorrect and was masked by the call to ixgbe_update_rsc().
Setting rx-usecs between 0,2-9 and 1,10 and up requires a reset to allow
ixgbe_configure_tx_ring() to set the correct value for TXDCTL.WTHRESH in
order to avoid Tx hangs with BQL enabled.
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 90aac31..4e7c9b0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2257,13 +2257,13 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
#if IS_ENABLED(CONFIG_BQL)
/* detect ITR changes that require update of TXDCTL.WTHRESH */
- if ((adapter->tx_itr_setting > 1) &&
+ if ((adapter->tx_itr_setting != 1) &&
(adapter->tx_itr_setting < IXGBE_100K_ITR)) {
if ((tx_itr_prev == 1) ||
- (tx_itr_prev > IXGBE_100K_ITR))
+ (tx_itr_prev >= IXGBE_100K_ITR))
need_reset = true;
} else {
- if ((tx_itr_prev > 1) &&
+ if ((tx_itr_prev != 1) &&
(tx_itr_prev < IXGBE_100K_ITR))
need_reset = true;
}
--
1.8.3.1
^ permalink raw reply related
* [net-next 06/11] ixgbe: cleanup ixgbe_enumerate_functions
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Jacob Keller, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Jacob Keller <jacob.e.keller@intel.com>
This function previously had the same check as used by the
ixgbe_pcie_from_parent. As the hardcode is due to the device having an internal
switch, this function should simply use the call from ixgbe_pcie_from_parent.
This reduces code complexity and makes it less likely a developer will forget
to update the list in the future.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 43b777a..6828d0e7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7362,19 +7362,16 @@ static const struct net_device_ops ixgbe_netdev_ops = {
**/
static inline int ixgbe_enumerate_functions(struct ixgbe_adapter *adapter)
{
- struct ixgbe_hw *hw = &adapter->hw;
struct list_head *entry;
int physfns = 0;
- /* Some cards can not use the generic count PCIe functions method, and
- * so must be hardcoded to the correct value.
+ /* Some cards can not use the generic count PCIe functions method,
+ * because they are behind a parent switch, so we hardcode these with
+ * the correct number of functions.
*/
- switch (hw->device_id) {
- case IXGBE_DEV_ID_82599_SFP_SF_QP:
- case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+ if (ixgbe_pcie_from_parent(&adapter->hw)) {
physfns = 4;
- break;
- default:
+ } else {
list_for_each(entry, &adapter->pdev->bus_list) {
struct pci_dev *pdev =
list_entry(entry, struct pci_dev, bus_list);
--
1.8.3.1
^ permalink raw reply related
* [net-next 10/11] ixgbevf: implement ethtool get/set coalesce
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Jacob Keller, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Jacob Keller <jacob.e.keller@intel.com>
This patch adds support for ethtool's get_coalesce and set_coalesce command for
the ixgbevf driver. This enables dynamically updating the minimum time between
interrupts.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ethtool.c | 81 +++++++++++++++++++++++
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 +
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +-
3 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 84329b0..21adb1b 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -634,6 +634,85 @@ static int ixgbevf_nway_reset(struct net_device *netdev)
return 0;
}
+static int ixgbevf_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+ /* only valid if in constant ITR mode */
+ if (adapter->rx_itr_setting <= 1)
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+ else
+ ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+ /* if in mixed tx/rx queues per vector mode, report only rx settings */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+ return 0;
+
+ /* only valid if in constant ITR mode */
+ if (adapter->tx_itr_setting <= 1)
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+ else
+ ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+ return 0;
+}
+
+static int ixgbevf_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+ struct ixgbevf_q_vector *q_vector;
+ int num_vectors, i;
+ u16 tx_itr_param, rx_itr_param;
+
+ /* don't accept tx specific changes if we've got mixed RxTx vectors */
+ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count
+ && ec->tx_coalesce_usecs)
+ return -EINVAL;
+
+
+ if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
+ (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
+ return -EINVAL;
+
+ if (ec->rx_coalesce_usecs > 1)
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+ else
+ adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+ if (adapter->rx_itr_setting == 1)
+ rx_itr_param = IXGBE_20K_ITR;
+ else
+ rx_itr_param = adapter->rx_itr_setting;
+
+
+ if (ec->tx_coalesce_usecs > 1)
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+ else
+ adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+ if (adapter->tx_itr_setting == 1)
+ tx_itr_param = IXGBE_10K_ITR;
+ else
+ tx_itr_param = adapter->tx_itr_setting;
+
+ num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+ for (i = 0; i < num_vectors; i++) {
+ q_vector = adapter->q_vector[i];
+ if (q_vector->tx.count && !q_vector->rx.count)
+ /* tx only */
+ q_vector->itr = tx_itr_param;
+ else
+ /* rx only or mixed */
+ q_vector->itr = rx_itr_param;
+ ixgbevf_write_eitr(q_vector);
+ }
+
+ return 0;
+}
+
static const struct ethtool_ops ixgbevf_ethtool_ops = {
.get_settings = ixgbevf_get_settings,
.get_drvinfo = ixgbevf_get_drvinfo,
@@ -649,6 +728,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
.get_sset_count = ixgbevf_get_sset_count,
.get_strings = ixgbevf_get_strings,
.get_ethtool_stats = ixgbevf_get_ethtool_stats,
+ .get_coalesce = ixgbevf_get_coalesce,
+ .set_coalesce = ixgbevf_set_coalesce,
};
void ixgbevf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 64a2b91..d7837dcc 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -293,6 +293,8 @@ void ixgbevf_free_tx_resources(struct ixgbevf_adapter *, struct ixgbevf_ring *);
void ixgbevf_update_stats(struct ixgbevf_adapter *adapter);
int ethtool_ioctl(struct ifreq *ifr);
+extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
+
void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index c0f9aad..8407fd2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -580,7 +580,7 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget)
* ixgbevf_write_eitr - write VTEITR register in hardware specific way
* @q_vector: structure containing interrupt and ring information
*/
-static void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector)
+void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector)
{
struct ixgbevf_adapter *adapter = q_vector->adapter;
struct ixgbe_hw *hw = &adapter->hw;
--
1.8.3.1
^ permalink raw reply related
* [net-next 11/11] ixgbevf: bump driver version
From: Jeff Kirsher @ 2013-10-24 15:27 UTC (permalink / raw)
To: davem; +Cc: Don Skidmore, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1382628458-26601-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Don Skidmore <donald.c.skidmore@intel.com>
Bump patch to reflect what version of the out of tree driver it has
equivalent functionality with (2.11.3).
Signed-off-by: Don Skidmore <donald.c.skidmore@intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 8407fd2..87279c8a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -58,7 +58,7 @@ const char ixgbevf_driver_name[] = "ixgbevf";
static const char ixgbevf_driver_string[] =
"Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver";
-#define DRV_VERSION "2.7.12-k"
+#define DRV_VERSION "2.11.3-k"
const char ixgbevf_driver_version[] = DRV_VERSION;
static char ixgbevf_copyright[] =
"Copyright (c) 2009 - 2012 Intel Corporation.";
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net] net: sched: Don't free f before it is allocated in route4_change
From: Christoph Paasch @ 2013-10-24 15:41 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, David Miller, Jamal Hadi Salim, Jing Wang
In-Reply-To: <1382627157.7572.59.camel@edumazet-glaptop.roam.corp.google.com>
On 24/10/13 - 08:05:57, Eric Dumazet wrote:
> On Thu, 2013-10-24 at 16:59 +0200, Christoph Paasch wrote:
> > On 24/10/13 - 07:54:33, Eric Dumazet wrote:
>
> > > I see no bug here, you missed the "goto reinsert;"
> >
> > Ups - sorry...
> >
>
> Yeah, trying to find bugs in Alexey code is really tricky ;)
Yeah :)
Instead of the goto, a simple if {} else {} would have made it
more readable IMO.
^ permalink raw reply
* Re: [RFC PATCH net-next] ppp: Allow ppp device connected to an l2tp session to change of namespace
From: James Chapman @ 2013-10-24 15:43 UTC (permalink / raw)
To: François Cachereul; +Cc: Paul Mackerras, netdev, linux-ppp
In-Reply-To: <526923A7.8090108@alphalink.fr>
On 24/10/13 14:41, François Cachereul wrote:
> On 10/24/2013 12:55 PM, James Chapman wrote:
>> On 24/10/13 11:30, François Cachereul wrote:
>>> Remove NETIF_F_NETNS_LOCAL flag from ppp device in ppp_connect_channel()
>>> if the device is connected to a l2tp session socket.
>>> Restore the flag in ppp_disconnect_channel().
>>
>> What about pppd's network namespace? Also, L2TP's tunnel socket (UDP or
>> L2TP/IP) will be in a different namespace if the ppp interface is moved.
>
> That's what I'm trying to achieve. I'm not using pppd and my problem is
> as follow: I need to isolate ppp devices from each other, even when
> they are connected to sessions carried by the same L2TP tunnel.
I'm thinking about the implications of a skb in the net namespace of the
ppp interface passing through a tunnel socket which is in another
namespace. I think net namespaces are completely isolated.
To keep your ppp interfaces isolated from each other, have you
considered using netfilter to prevent data being passed between ppp
interfaces?
> Also, I
> need the authentication to be terminated to know the namespace in which
> the ppp will be moved. For that, the process runs in a namespace with
> its l2tp sockets (tunnel and session) in that same namespace and each
> ppp device is moved in a specific namespace after authentication.
>
> Regards
> François
>
--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development
^ permalink raw reply
* Re: [PATCH net] tcp: delete unused req in tcp_synack_rtt_meas()
From: Yuchung Cheng @ 2013-10-24 15:46 UTC (permalink / raw)
To: Weiping Pan; +Cc: netdev, linux-kernel@vger.kernel.org
In-Reply-To: <4fcb645d8f8f5d6dda0ebd1ce2bd0cc3bfb14311.1382607044.git.panweiping3@gmail.com>
On Thu, Oct 24, 2013 at 2:31 AM, Weiping Pan <panweiping3@gmail.com> wrote:
> The parameter req is not used since
> 375fe02c9179(tcp: consolidate SYNACK RTT sampling)
Hi Weiping:
I just sent a bug-fix "tcp: fix SYNACK RTT estimation in Fast Open"
that will also take care of this redundant parameter. Thanks.
.
>
> Signed-off-by: Weiping Pan <panweiping3@gmail.com>
> ---
> net/ipv4/tcp_input.c | 4 ++--
> 1 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a16b01b..ac8781a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2871,7 +2871,7 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
> }
>
> /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
> -static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
> +static void tcp_synack_rtt_meas(struct sock *sk)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> s32 seq_rtt = -1;
> @@ -5694,7 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
> tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
> tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
> tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
> - tcp_synack_rtt_meas(sk, req);
> + tcp_synack_rtt_meas(sk);
>
> if (tp->rx_opt.tstamp_ok)
> tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
> --
> 1.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH net] tcp: fix SYNACK RTT estimation in Fast Open
From: Yuchung Cheng @ 2013-10-24 15:44 UTC (permalink / raw)
To: davem, ncardwell, edumazet; +Cc: panweiping3, netdev, Yuchung Cheng
tp->lsndtime may not always be the SYNACK timestamp if a passive
Fast Open socket sends data before handshake completes. And if the
remote acknowledges both the data and the SYNACK, the RTT sample
is already taken in tcp_ack(), so no need to call
tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
net/ipv4/tcp_input.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a16b01b..305cd05 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2871,14 +2871,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
}
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
+static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
{
struct tcp_sock *tp = tcp_sk(sk);
s32 seq_rtt = -1;
- if (tp->lsndtime && !tp->total_retrans)
- seq_rtt = tcp_time_stamp - tp->lsndtime;
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+ if (synack_stamp && !tp->total_retrans)
+ seq_rtt = tcp_time_stamp - synack_stamp;
+
+ /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
+ * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
+ */
+ if (!tp->srtt)
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
}
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -5587,6 +5592,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct request_sock *req;
int queued = 0;
bool acceptable;
+ u32 synack_stamp;
tp->rx_opt.saw_tstamp = 0;
@@ -5669,9 +5675,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* so release it.
*/
if (req) {
+ synack_stamp = tcp_rsk(req)->snt_synack;
tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false);
} else {
+ synack_stamp = tp->lsndtime;
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk);
@@ -5694,7 +5702,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- tcp_synack_rtt_meas(sk, req);
+ tcp_synack_rtt_meas(sk, synack_stamp);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
--
1.8.4.1
^ permalink raw reply related
* Re: [RFC PATCH net-next] ppp: Allow ppp device connected to an l2tp session to change of namespace
From: Benjamin LaHaise @ 2013-10-24 15:53 UTC (permalink / raw)
To: James Chapman; +Cc: François Cachereul, Paul Mackerras, netdev, linux-ppp
In-Reply-To: <5269402E.2070203@katalix.com>
On Thu, Oct 24, 2013 at 04:43:42PM +0100, James Chapman wrote:
> I'm thinking about the implications of a skb in the net namespace of the
> ppp interface passing through a tunnel socket which is in another
> namespace. I think net namespaces are completely isolated.
>
> To keep your ppp interfaces isolated from each other, have you
> considered using netfilter to prevent data being passed between ppp
> interfaces?
Using network namespaces for this is far more efficient. We've already
added support for doing this to other tunneling interfaces. This approach
also makes creating VPNs where there is re-use of the private address space
between different customers far easier to implement.
-ben
--
"Thought is the essence of where you are now."
^ permalink raw reply
* [PATCH net] tcp: only take RTT from timestamps if new data is acked
From: Yuchung Cheng @ 2013-10-24 15:55 UTC (permalink / raw)
To: davem, ncardwell, edumazet; +Cc: netdev, Yuchung Cheng
Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that
it does not check if the ACK acknowledge new data before taking
the RTT sample from TCP timestamps. This patch adds the check
back as required by the RFC.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
net/ipv4/tcp_input.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 305cd05..6ffe41a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2856,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
* left edge of the send window.
* See draft-ietf-tcplw-high-performance-00, section 3.3.
*/
- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+ if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+ flag & FLAG_ACKED)
seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
if (seq_rtt < 0)
--
1.8.4.1
^ permalink raw reply related
* [PATCH net] tcp: do not rearm RTO when future data are sacked
From: Yuchung Cheng @ 2013-10-24 15:59 UTC (permalink / raw)
To: davem, ncardwell, edumazet, brakmo; +Cc: netdev, Yuchung Cheng
Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon
obtaining a RTT sample from newly sacked data.
But technically RTO should only be re-armed when the data sent before
the last (re)transmission of write queue head are (s)acked. Otherwise
the RTO may continue to extend during loss recovery on data sent
in the future.
Note that RTTs from ACK or timestamps do not have this problem, as the RTT
source must be from data sent before.
The new RTO re-arm policy is
1) Always re-arm RTO if SND.UNA is advanced
2) Re-arm RTO if sack RTT is available, provided the sacked data was
sent before the last time write_queue_head was sent.
Signed-off-by: Larry Brakmo <brakmo@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
---
net/ipv4/tcp_input.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6ffe41a..068c8fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2987,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
s32 seq_rtt = -1;
s32 ca_seq_rtt = -1;
ktime_t last_ackt = net_invalid_timestamp();
+ bool rtt_update;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
@@ -3063,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
- if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
- (flag & FLAG_ACKED))
- tcp_rearm_rto(sk);
+ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
if (flag & FLAG_ACKED) {
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
+ tcp_rearm_rto(sk);
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
tcp_mtup_probe_success(sk);
@@ -3109,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
}
+ } else if (skb && rtt_update && sack_rtt >= 0 &&
+ sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
+ /* Do not re-arm RTO if the sack RTT is measured from data sent
+ * after when the head was last (re)transmitted. Otherwise the
+ * timeout may continue to extend in loss recovery.
+ */
+ tcp_rearm_rto(sk);
}
#if FASTRETRANS_DEBUG > 0
--
1.8.4.1
^ permalink raw reply related
* Re: [PATCH v2 net-next] fix rtnl notification in atomic context
From: Nicolas Dichtel @ 2013-10-24 16:28 UTC (permalink / raw)
To: Alexei Starovoitov, David S. Miller; +Cc: Cong Wang, Veaceslav Falico, netdev
In-Reply-To: <1382569362-4820-1-git-send-email-ast@plumgrid.com>
Le 24/10/2013 01:02, Alexei Starovoitov a écrit :
> commit 991fb3f74c "dev: always advertise rx_flags changes via netlink"
> introduced rtnl notification from __dev_set_promiscuity(),
> which can be called in atomic context.
>
> Steps to reproduce:
> ip tuntap add dev tap1 mode tap
> ifconfig tap1 up
> tcpdump -nei tap1 &
> ip tuntap del dev tap1 mode tap
>
> [ 271.627994] device tap1 left promiscuous mode
> [ 271.639897] BUG: sleeping function called from invalid context at mm/slub.c:940
> [ 271.664491] in_atomic(): 1, irqs_disabled(): 0, pid: 3394, name: ip
> [ 271.677525] INFO: lockdep is turned off.
> [ 271.690503] CPU: 0 PID: 3394 Comm: ip Tainted: G W 3.12.0-rc3+ #73
> [ 271.703996] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012
> [ 271.731254] ffffffff81a58506 ffff8807f0d57a58 ffffffff817544e5 ffff88082fa0f428
> [ 271.760261] ffff8808071f5f40 ffff8807f0d57a88 ffffffff8108bad1 ffffffff81110ff8
> [ 271.790683] 0000000000000010 00000000000000d0 00000000000000d0 ffff8807f0d57af8
> [ 271.822332] Call Trace:
> [ 271.838234] [<ffffffff817544e5>] dump_stack+0x55/0x76
> [ 271.854446] [<ffffffff8108bad1>] __might_sleep+0x181/0x240
> [ 271.870836] [<ffffffff81110ff8>] ? rcu_irq_exit+0x68/0xb0
> [ 271.887076] [<ffffffff811a80be>] kmem_cache_alloc_node+0x4e/0x2a0
> [ 271.903368] [<ffffffff810b4ddc>] ? vprintk_emit+0x1dc/0x5a0
> [ 271.919716] [<ffffffff81614d67>] ? __alloc_skb+0x57/0x2a0
> [ 271.936088] [<ffffffff810b4de0>] ? vprintk_emit+0x1e0/0x5a0
> [ 271.952504] [<ffffffff81614d67>] __alloc_skb+0x57/0x2a0
> [ 271.968902] [<ffffffff8163a0b2>] rtmsg_ifinfo+0x52/0x100
> [ 271.985302] [<ffffffff8162ac6d>] __dev_notify_flags+0xad/0xc0
> [ 272.001642] [<ffffffff8162ad0c>] __dev_set_promiscuity+0x8c/0x1c0
> [ 272.017917] [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
> [ 272.033961] [<ffffffff8162b109>] dev_set_promiscuity+0x29/0x50
> [ 272.049855] [<ffffffff8172e937>] packet_dev_mc+0x87/0xc0
> [ 272.065494] [<ffffffff81732052>] packet_notifier+0x1b2/0x380
> [ 272.080915] [<ffffffff81731ea5>] ? packet_notifier+0x5/0x380
> [ 272.096009] [<ffffffff81761c66>] notifier_call_chain+0x66/0x150
> [ 272.110803] [<ffffffff8108503e>] __raw_notifier_call_chain+0xe/0x10
> [ 272.125468] [<ffffffff81085056>] raw_notifier_call_chain+0x16/0x20
> [ 272.139984] [<ffffffff81620190>] call_netdevice_notifiers_info+0x40/0x70
> [ 272.154523] [<ffffffff816201d6>] call_netdevice_notifiers+0x16/0x20
> [ 272.168552] [<ffffffff816224c5>] rollback_registered_many+0x145/0x240
> [ 272.182263] [<ffffffff81622641>] rollback_registered+0x31/0x40
> [ 272.195369] [<ffffffff816229c8>] unregister_netdevice_queue+0x58/0x90
> [ 272.208230] [<ffffffff81547ca0>] __tun_detach+0x140/0x340
> [ 272.220686] [<ffffffff81547ed6>] tun_chr_close+0x36/0x60
>
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
^ permalink raw reply
* Re: [RFC PATCH net-next] ppp: Allow ppp device connected to an l2tp session to change of namespace
From: James Chapman @ 2013-10-24 16:51 UTC (permalink / raw)
To: Benjamin LaHaise
Cc: François Cachereul, Paul Mackerras, netdev, linux-ppp
In-Reply-To: <20131024155354.GQ2704@kvack.org>
On 24/10/13 16:53, Benjamin LaHaise wrote:
> On Thu, Oct 24, 2013 at 04:43:42PM +0100, James Chapman wrote:
>> I'm thinking about the implications of a skb in the net namespace of the
>> ppp interface passing through a tunnel socket which is in another
>> namespace. I think net namespaces are completely isolated.
>>
>> To keep your ppp interfaces isolated from each other, have you
>> considered using netfilter to prevent data being passed between ppp
>> interfaces?
>
> Using network namespaces for this is far more efficient. We've already
> added support for doing this to other tunneling interfaces. This approach
> also makes creating VPNs where there is re-use of the private address space
> between different customers far easier to implement.
>
> -ben
Yes, it's definitely more efficient and potentially useful, I agree.
But unlike the other tunneling interfaces for which this has already
been done, L2TP uses a socket for its tunnel and a skb will cross net
namespace boundaries while passing through the socket. I remember a
similar discussion came up several months ago with vxlan which also uses
UDP sockets. See http://www.spinics.net/lists/netdev/msg221498.html.
Changing the behaviour of ppp interfaces only when they are created by
l2tp feels wrong to me, unless it is the first step in doing the same
for all ppp interfaces.
--
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development
^ permalink raw reply
* Re: [patch net-next] ipv6: allow userspace to create address with IFLA_F_TEMPORARY flag
From: Jiri Pirko @ 2013-10-24 16:59 UTC (permalink / raw)
To: netdev, davem, kuznet, jmorris, yoshfuji, kaber, thaller, stephen
In-Reply-To: <20131024140253.GF15744@order.stressinduktion.org>
Thu, Oct 24, 2013 at 04:02:53PM CEST, hannes@stressinduktion.org wrote:
>On Thu, Oct 24, 2013 at 03:45:55PM +0200, Jiri Pirko wrote:
>> This is needed in order to implement userspace address configuration,
>> namely ip6-privacy (rfc4941) in NetworkManager.
>>
>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>> ---
>> net/ipv6/addrconf.c | 3 ++-
>> 1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>> index cd3fb30..962c7c9 100644
>> --- a/net/ipv6/addrconf.c
>> +++ b/net/ipv6/addrconf.c
>> @@ -3715,7 +3715,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
>> return -ENODEV;
>>
>> /* We ignore other flags so far. */
>> - ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS);
>> + ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS |
>> + IFA_F_TEMPORARY);
>>
>> ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
>> if (ifa == NULL) {
>
>Hm, the kernel will pick up IFA_F_TEMPORARY marked addresses and do ipv6 address
>regeneration (depending on lifetimes). Is this intended?
I think that that behaviour is valid. It is in compliance with valid lft and
preferred lft behaviour.
>
>Btw. I am very interested in this topic as there is currently work in the IETF
>to move away from eui-48 generation of addresses:
>
>https://datatracker.ietf.org/doc/draft-ietf-6man-stable-privacy-addresses/
>
>This needs to be done in userspace as we depend on a secret generated at
>system install time.
>
>Greetings,
>
> Hannes
>
^ permalink raw reply
* Re: [PATCH net] tcp: fix SYNACK RTT estimation in Fast Open
From: Neal Cardwell @ 2013-10-24 17:02 UTC (permalink / raw)
To: Yuchung Cheng; +Cc: David Miller, Eric Dumazet, panweiping3, Netdev
In-Reply-To: <1382629465-20310-1-git-send-email-ycheng@google.com>
On Thu, Oct 24, 2013 at 11:44 AM, Yuchung Cheng <ycheng@google.com> wrote:
> tp->lsndtime may not always be the SYNACK timestamp if a passive
> Fast Open socket sends data before handshake completes. And if the
> remote acknowledges both the data and the SYNACK, the RTT sample
> is already taken in tcp_ack(), so no need to call
> tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> ---
> net/ipv4/tcp_input.c | 18 +++++++++++++-----
> 1 file changed, 13 insertions(+), 5 deletions(-)
Acked-by: Neal Cardwell <ncardwell@google.com>
neal
^ permalink raw reply
* Re: [PATCH net] tcp: only take RTT from timestamps if new data is acked
From: Neal Cardwell @ 2013-10-24 17:02 UTC (permalink / raw)
To: Yuchung Cheng; +Cc: David Miller, Eric Dumazet, Netdev
In-Reply-To: <1382630125-21416-1-git-send-email-ycheng@google.com>
On Thu, Oct 24, 2013 at 11:55 AM, Yuchung Cheng <ycheng@google.com> wrote:
> Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that
> it does not check if the ACK acknowledge new data before taking
> the RTT sample from TCP timestamps. This patch adds the check
> back as required by the RFC.
>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> ---
> net/ipv4/tcp_input.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
Acked-by: Neal Cardwell <ncardwell@google.com>
neal
^ permalink raw reply
* Re: [PATCH net] tcp: do not rearm RTO when future data are sacked
From: Neal Cardwell @ 2013-10-24 17:03 UTC (permalink / raw)
To: Yuchung Cheng; +Cc: David Miller, Eric Dumazet, Lawrence Brakmo, Netdev
In-Reply-To: <1382630367-21964-1-git-send-email-ycheng@google.com>
On Thu, Oct 24, 2013 at 11:59 AM, Yuchung Cheng <ycheng@google.com> wrote:
> Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon
> obtaining a RTT sample from newly sacked data.
>
> But technically RTO should only be re-armed when the data sent before
> the last (re)transmission of write queue head are (s)acked. Otherwise
> the RTO may continue to extend during loss recovery on data sent
> in the future.
>
> Note that RTTs from ACK or timestamps do not have this problem, as the RTT
> source must be from data sent before.
>
> The new RTO re-arm policy is
> 1) Always re-arm RTO if SND.UNA is advanced
> 2) Re-arm RTO if sack RTT is available, provided the sacked data was
> sent before the last time write_queue_head was sent.
>
> Signed-off-by: Larry Brakmo <brakmo@google.com>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> ---
> net/ipv4/tcp_input.c | 13 ++++++++++---
> 1 file changed, 10 insertions(+), 3 deletions(-)
Acked-by: Neal Cardwell <ncardwell@google.com>
neal
^ permalink raw reply
* Re: [PATCHv2 net] netpoll: fix rx_hook() interface by passing the skb
From: David Miller @ 2013-10-24 17:53 UTC (permalink / raw)
To: David.Laight; +Cc: antonio, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B73A5@saturn3.aculab.com>
From: "David Laight" <David.Laight@ACULAB.COM>
Date: Thu, 24 Oct 2013 09:43:38 +0100
> From a code optimisation point of view you probably don't want to be
> calculating the source, offset and length early.
> It is quite likely that the local variables will have to be written
> to the stack (because of the function calls) - so it is almost
> certainly more efficient to calculate them just before the call.
But this change is being made from a bug prevention point of view,
we already we're already getting this code path wrong as-is.
^ permalink raw reply
* Re: [PATCH net] netconsole: fix NULL pointer dereference
From: David Miller @ 2013-10-24 17:56 UTC (permalink / raw)
To: David.Laight; +Cc: vfalico, nikolay, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B73A9@saturn3.aculab.com>
From: "David Laight" <David.Laight@ACULAB.COM>
Date: Thu, 24 Oct 2013 11:39:02 +0100
>> >Taking the spinlock seems like the cleanest way to insure there's noone
>> >running in parallel, but I'm open to suggestions as I'm not satisfied with
>> >the looks of this. I'll prepare a net-next patchset for netconsole soon to
>> >clean it up properly, all of these can be easily simplified.
>>
>> First when I've seen 'spin_lock(); a = 1; spin_unlock()' I've thought
>> "WTF?", however indeed it will stop us racing with write_msg().
>
> Ditto - might be worth saying:
> /* Acquire lock to wait for any write_msg() to complete. */
Something this subtle definitely requires a comment.
^ permalink raw reply
* [PATCH RFC v2 3/4] net: mvmdio: slight optimisation of orion_mdio_write
From: Leigh Brown @ 2013-10-24 18:09 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Leigh Brown, Thomas Petazzoni, netdev, sebastian.hesselbarth
In-Reply-To: <cover.1382637156.git.leigh@solinno.co.uk>
Make only a single call to mutex_unlock in orion_mdio_write.
Signed-off-by: Leigh Brown <leigh@solinno.co.uk>
---
drivers/net/ethernet/marvell/mvmdio.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index f053475..13f4614 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -151,10 +151,8 @@ static int orion_mdio_write(struct mii_bus *bus, int mii_id,
mutex_lock(&dev->lock);
ret = orion_mdio_wait_ready(bus);
- if (ret < 0) {
- mutex_unlock(&dev->lock);
- return ret;
- }
+ if (ret != 0)
+ goto out;
writel(((mii_id << MVMDIO_SMI_PHY_ADDR_SHIFT) |
(regnum << MVMDIO_SMI_PHY_REG_SHIFT) |
@@ -162,9 +160,9 @@ static int orion_mdio_write(struct mii_bus *bus, int mii_id,
(value << MVMDIO_SMI_DATA_SHIFT)),
dev->regs);
+out:
mutex_unlock(&dev->lock);
-
- return 0;
+ return ret;
}
static int orion_mdio_reset(struct mii_bus *bus)
--
1.7.10.4
^ permalink raw reply related
* [PATCH RFC v2 2/4] net: mvmdio: orion_mdio_ready: remove manual poll
From: Leigh Brown @ 2013-10-24 18:09 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Leigh Brown, Thomas Petazzoni, netdev, sebastian.hesselbarth
In-Reply-To: <cover.1382637156.git.leigh@solinno.co.uk>
Replace manual poll of MVMDIO_SMI_READ_VALID with a call to
orion_mdio_wait_ready. This ensures a consistent timeout,
eliminates a busy loop, and allows for use of interrupts on
systems that support them.
Signed-off-by: Leigh Brown <leigh@solinno.co.uk>
---
drivers/net/ethernet/marvell/mvmdio.c | 36 +++++++++++++--------------------
1 file changed, 14 insertions(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 6ed6f00..f053475 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -111,43 +111,35 @@ static int orion_mdio_read(struct mii_bus *bus, int mii_id,
int regnum)
{
struct orion_mdio_dev *dev = bus->priv;
- int count;
u32 val;
int ret;
mutex_lock(&dev->lock);
ret = orion_mdio_wait_ready(bus);
- if (ret < 0) {
- mutex_unlock(&dev->lock);
- return ret;
- }
+ if (ret < 0)
+ goto out;
writel(((mii_id << MVMDIO_SMI_PHY_ADDR_SHIFT) |
(regnum << MVMDIO_SMI_PHY_REG_SHIFT) |
MVMDIO_SMI_READ_OPERATION),
dev->regs);
- /* Wait for the value to become available */
- count = 0;
- while (1) {
- val = readl(dev->regs);
- if (val & MVMDIO_SMI_READ_VALID)
- break;
-
- if (count > 100) {
- dev_err(bus->parent, "Timeout when reading PHY\n");
- mutex_unlock(&dev->lock);
- return -ETIMEDOUT;
- }
-
- udelay(10);
- count++;
+ ret = orion_mdio_wait_ready(bus);
+ if (ret < 0)
+ goto out;
+
+ val = readl(dev->regs);
+ if (!(val & MVMDIO_SMI_READ_VALID)) {
+ dev_err(bus->parent, "SMI bus read not valid\n");
+ ret = -ENODEV;
+ goto out;
}
+ ret = val & 0xFFFF;
+out:
mutex_unlock(&dev->lock);
-
- return val & 0xFFFF;
+ return ret;
}
static int orion_mdio_write(struct mii_bus *bus, int mii_id,
--
1.7.10.4
^ permalink raw reply related
* [PATCH RFC v2 4/4] net: mvmdio: doc: mvmdio now used by mv643xx_eth
From: Leigh Brown @ 2013-10-24 18:09 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Leigh Brown, Thomas Petazzoni, netdev, sebastian.hesselbarth
In-Reply-To: <cover.1382637156.git.leigh@solinno.co.uk>
Amend the documentation in the mvmdio driver to note the fact
that it is now used by both the mvneta and mv643xx_eth drivers.
Signed-off-by: Leigh Brown <leigh@solinno.co.uk>
---
drivers/net/ethernet/marvell/mvmdio.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 13f4614..79b6d32 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -4,11 +4,9 @@
* Since the MDIO interface of Marvell network interfaces is shared
* between all network interfaces, having a single driver allows to
* handle concurrent accesses properly (you may have four Ethernet
- * ports, but they in fact share the same SMI interface to access the
- * MDIO bus). Moreover, this MDIO interface code is similar between
- * the mv643xx_eth driver and the mvneta driver. For now, it is only
- * used by the mvneta driver, but it could later be used by the
- * mv643xx_eth driver as well.
+ * ports, but they in fact share the same SMI interface to access
+ * the MDIO bus). This driver is currently used by the mvneta and
+ * mv643xx_eth drivers.
*
* Copyright (C) 2012 Marvell
*
--
1.7.10.4
^ permalink raw reply related
* [PATCH RFC v2 1/4] net: mvmdio: make orion_mdio_wait_ready consistent
From: Leigh Brown @ 2013-10-24 18:09 UTC (permalink / raw)
To: linux-arm-kernel
Cc: Leigh Brown, Thomas Petazzoni, netdev, sebastian.hesselbarth
In-Reply-To: <cover.1382637156.git.leigh@solinno.co.uk>
Amend orion_mdio_wait_ready so that the same timeout is used when
polling or using wait_event_timeout. Set the timeout to 1ms.
Replace udelay with usleep_range to avoid a busy loop, and set the
polling interval range as 45us to 55us, so that the first sleep
will be enough in almost all cases.
Generate the same log message at timeout when polling or using
wait_event_timeout.
Signed-off-by: Leigh Brown <leigh@solinno.co.uk>
---
drivers/net/ethernet/marvell/mvmdio.c | 51 +++++++++++++++++++--------------
1 file changed, 30 insertions(+), 21 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index e2f6626..6ed6f00 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -44,6 +44,15 @@
#define MVMDIO_ERR_INT_SMI_DONE 0x00000010
#define MVMDIO_ERR_INT_MASK 0x0080
+/*
+ * SMI Timeout measurements:
+ * - Kirkwood 88F6281 (Globalscale Dreamplug): 45us to 95us (Interrupt)
+ * - Armada 370 (Globalscale Mirabox): 41us to 43us (Polled)
+ */
+#define MVMDIO_SMI_TIMEOUT 1000 /* 1000us = 1ms */
+#define MVMDIO_SMI_POLL_INTERVAL_MIN 45
+#define MVMDIO_SMI_POLL_INTERVAL_MAX 55
+
struct orion_mdio_dev {
struct mutex lock;
void __iomem *regs;
@@ -68,34 +77,34 @@ static int orion_mdio_smi_is_done(struct orion_mdio_dev *dev)
static int orion_mdio_wait_ready(struct mii_bus *bus)
{
struct orion_mdio_dev *dev = bus->priv;
- int count;
+ unsigned long timeout = usecs_to_jiffies(MVMDIO_SMI_TIMEOUT);
+ unsigned long end = jiffies + timeout;
+ int timedout = 0;
- if (dev->err_interrupt <= 0) {
- count = 0;
- while (1) {
- if (orion_mdio_smi_is_done(dev))
+ while (1) {
+ if (orion_mdio_smi_is_done(dev))
+ return 0;
+ else
+ if (timedout)
break;
- if (count > 100) {
- dev_err(bus->parent,
- "Timeout: SMI busy for too long\n");
- return -ETIMEDOUT;
- }
+ if (dev->err_interrupt <= 0) {
+ usleep_range(MVMDIO_SMI_POLL_INTERVAL_MIN,
+ MVMDIO_SMI_POLL_INTERVAL_MAX);
- udelay(10);
- count++;
- }
- } else {
- if (!orion_mdio_smi_is_done(dev)) {
+ if (time_is_before_jiffies(end))
+ ++timedout;
+ } else {
wait_event_timeout(dev->smi_busy_wait,
- orion_mdio_smi_is_done(dev),
- msecs_to_jiffies(100));
- if (!orion_mdio_smi_is_done(dev))
- return -ETIMEDOUT;
- }
+ orion_mdio_smi_is_done(dev),
+ timeout);
+
+ ++timedout;
+ }
}
- return 0;
+ dev_err(bus->parent, "Timeout: SMI busy for too long\n");
+ return -ETIMEDOUT;
}
static int orion_mdio_read(struct mii_bus *bus, int mii_id,
--
1.7.10.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox