* [PATCH 1/1] PHY configuration for compatible issue
From: AriesLee @ 2011-11-17 14:05 UTC (permalink / raw)
To: Guo-Fu Tseng, netdev; +Cc: AriesLee, Aries Lee
From: Aries Lee <AriesLee@jmicron.com>
To perform PHY calibration and set a different EA value by chip ID,
Whenever the NIC chip power on, ie booting or resuming, we need to
force HW to calibrate PHY parameter again, and also set a proper EA
value which gathered from experiment.
That process resolve the compatible issues(NIC is unable to link
up in some special case) in giga speed.
Signed-off-by: Aries Lee <AriesLee@jmicron.com>
---
drivers/net/ethernet/jme.c | 127 ++++++++++++++++++++++++++++++++++++++++++-
drivers/net/ethernet/jme.h | 19 +++++++
2 files changed, 143 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index df3ab83..bd9633d 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1743,6 +1743,126 @@ jme_phy_off(struct jme_adapter *jme)
if (new_phy_power_ctrl(jme->chip_main_rev))
jme_new_phy_off(jme);
}
+static int
+jme_phy_calibration(struct jme_adapter *jme)
+{
+ u32 ctrl1000, bmcr, phy_addr, phy_data;
+
+ /* Turn PHY off */
+ bmcr = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_BMCR);
+ bmcr |= BMCR_PDOWN;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_BMCR, bmcr);
+ /* Turn PHY on */
+ bmcr = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_BMCR);
+ bmcr &= ~BMCR_PDOWN;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_BMCR, bmcr);
+ /* Enabel PHY test mode 1 */
+ ctrl1000 = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_CTRL1000);
+ ctrl1000 &= ~PHY_GAD_TEST_MODE_MSK;
+ ctrl1000 |= PHY_GAD_TEST_MODE_1;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_CTRL1000, ctrl1000);
+
+
+ phy_addr = JM_PHY_SPEC_REG_READ | JM_PHY_EXT_COMM_2_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+ phy_addr);
+ phy_data = jme_mdio_read(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_DATA_REG);
+
+ phy_data &= ~JM_PHY_EXT_COMM_2_CALI_MODE_0;
+ phy_data |= JM_PHY_EXT_COMM_2_CALI_LATCH |
+ JM_PHY_EXT_COMM_2_CALI_ENABLE;
+
+ phy_addr = JM_PHY_SPEC_REG_WRITE | JM_PHY_EXT_COMM_2_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_DATA_REG,
+ phy_data);
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+ phy_addr);
+
+ msleep(20);
+
+ phy_addr = JM_PHY_SPEC_REG_READ | JM_PHY_EXT_COMM_2_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+ phy_addr);
+ phy_data = jme_mdio_read(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_DATA_REG);
+
+ phy_data &= ~(JM_PHY_EXT_COMM_2_CALI_ENABLE |
+ JM_PHY_EXT_COMM_2_CALI_MODE_0 |
+ JM_PHY_EXT_COMM_2_CALI_LATCH);
+
+ phy_addr = JM_PHY_SPEC_REG_WRITE | JM_PHY_EXT_COMM_2_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_DATA_REG,
+ phy_data);
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+ phy_addr);
+
+ /* Disable PHY test mode */
+ ctrl1000 = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_CTRL1000);
+ ctrl1000 &= ~PHY_GAD_TEST_MODE_MSK;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_CTRL1000, ctrl1000);
+ return 0;
+}
+
+static int
+jme_phy_setEA(struct jme_adapter *jme)
+{
+ u32 phy_addr, phy_comm0 = 0, phy_comm1 = 0;
+ u8 nic_ctrl;
+
+ pci_read_config_byte(jme->pdev, PCI_PRIV_SHARE_NICCTRL, &nic_ctrl);
+ if ((nic_ctrl & 0x3) == JME_FLAG_PHYEA_ENABLE)
+ return 0;
+
+ switch (jme->pdev->device) {
+ case PCI_DEVICE_ID_JMICRON_JMC250:
+ if (((jme->chip_main_rev == 5) &&
+ ((jme->chip_sub_rev == 0) || (jme->chip_sub_rev == 1) ||
+ (jme->chip_sub_rev == 3))) ||
+ (jme->chip_main_rev >= 6)) {
+ phy_comm0 = 0x008A;
+ phy_comm1 = 0x4109;
+ }
+ if ((jme->chip_main_rev == 3) &&
+ ((jme->chip_sub_rev == 1) || (jme->chip_sub_rev == 2)))
+ phy_comm0 = 0xE088;
+ break;
+ case PCI_DEVICE_ID_JMICRON_JMC260:
+ if (((jme->chip_main_rev == 5) &&
+ ((jme->chip_sub_rev == 0) || (jme->chip_sub_rev == 1) ||
+ (jme->chip_sub_rev == 3))) ||
+ (jme->chip_main_rev >= 6)) {
+ phy_comm0 = 0x008A;
+ phy_comm1 = 0x4109;
+ }
+ if ((jme->chip_main_rev == 3) &&
+ ((jme->chip_sub_rev == 1) || (jme->chip_sub_rev == 2)))
+ phy_comm0 = 0xE088;
+ if ((jme->chip_main_rev == 2) && (jme->chip_sub_rev == 0))
+ phy_comm0 = 0x608A;
+ if ((jme->chip_main_rev == 2) && (jme->chip_sub_rev == 2))
+ phy_comm0 = 0x408A;
+ break;
+ default:
+ return -ENODEV;
+ }
+ if (phy_comm0) {
+ phy_addr = JM_PHY_SPEC_REG_WRITE | JM_PHY_EXT_COMM_0_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_DATA_REG, phy_comm0);
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_ADDR_REG, phy_addr);
+ }
+ if (phy_comm1) {
+ phy_addr = JM_PHY_SPEC_REG_WRITE | JM_PHY_EXT_COMM_1_REG;
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_DATA_REG, phy_comm1);
+ jme_mdio_write(jme->dev, jme->mii_if.phy_id,
+ JM_PHY_SPEC_ADDR_REG, phy_addr);
+ }
+
+ return 0;
+}
static int
jme_open(struct net_device *netdev)
@@ -1769,7 +1889,8 @@ jme_open(struct net_device *netdev)
jme_set_settings(netdev, &jme->old_ecmd);
else
jme_reset_phy_processor(jme);
-
+ jme_phy_calibration(jme);
+ jme_phy_setEA(jme);
jme_reset_link(jme);
return 0;
@@ -3184,7 +3305,8 @@ jme_resume(struct device *dev)
jme_set_settings(netdev, &jme->old_ecmd);
else
jme_reset_phy_processor(jme);
-
+ jme_phy_calibration(jme);
+ jme_phy_setEA(jme);
jme_start_irq(jme);
netif_device_attach(netdev);
@@ -3239,4 +3361,3 @@ MODULE_DESCRIPTION("JMicron JMC2x0 PCI Express Ethernet driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
MODULE_DEVICE_TABLE(pci, jme_pci_tbl);
-
diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h
index 02ea27c..47e47a9 100644
--- a/drivers/net/ethernet/jme.h
+++ b/drivers/net/ethernet/jme.h
@@ -760,6 +760,25 @@ enum jme_rxmcs_bits {
RXMCS_CHECKSUM,
};
+/* Extern PHY common register 2 */
+
+#define PHY_GAD_TEST_MODE_1 0x00002000
+#define PHY_GAD_TEST_MODE_MSK 0x0000E000
+#define JM_PHY_SPEC_REG_READ 0x00004000
+#define JM_PHY_SPEC_REG_WRITE 0x00008000
+#define PHY_CALIBRATION_DELAY 20
+#define JM_PHY_SPEC_ADDR_REG 0x1E
+#define JM_PHY_SPEC_DATA_REG 0x1F
+
+#define JM_PHY_EXT_COMM_0_REG 0x30
+#define JM_PHY_EXT_COMM_1_REG 0x31
+#define JM_PHY_EXT_COMM_2_REG 0x32
+#define JM_PHY_EXT_COMM_2_CALI_ENABLE 0x01
+#define JM_PHY_EXT_COMM_2_CALI_MODE_0 0x02
+#define JM_PHY_EXT_COMM_2_CALI_LATCH 0x10
+#define PCI_PRIV_SHARE_NICCTRL 0xF5
+#define JME_FLAG_PHYEA_ENABLE 0x2
+
/*
* Wakeup Frame setup interface registers
*/
--
1.7.4.4
^ permalink raw reply related
* Re: Regarding Routing cache
From: Ajith Adapa @ 2011-11-17 6:04 UTC (permalink / raw)
To: netdev
In-Reply-To: <CADAe=+Lmp=e3NRH5NOYewJp=XZ0CncPzC-4V=Ct-TEuGjfq3nw@mail.gmail.com>
Hi,
Actually I have doubt with IPv6 related packets.
In case of IPv6 packet in ip6_route_output function is called for
destination related information.
where ip6_route_output calls "fib6_rule_lookup" function. Why lookup
is done in fib table instead of routing cache in case of IPv6 packet ?
In case of IPv4 packet ... ip_route_output checks in routing cache and
if there is a cache miss then it checks the fib table.
Regards,
Ajith
On Thu, Nov 17, 2011 at 10:30 AM, Ajith Adapa <adapa.ajith@gmail.com> wrote:
> Hi,
>
> I have a small doubt regarding routing cache in linux kernel.
>
> It seems ip_route_connect is the way we have to access routing cache
> entries. In case of all locally generated packets I see that struct
> dst_entry is filled up with a lookup in routing cache.
>
> What about in case of forwarding packets ? I dont see any usage of
> routing cache mechanism to fill up the struct dst_entry. So it seems
> we directly check the fib_rules or fib table to fill the structure.
> If it is true then it would be very slow right ?
>
> Sorry if I am wrong about above findings. Do correct me if I am wrong about it ?
>
> Regards,
> Ajith
>
^ permalink raw reply
* Re: [PATCH 4/5] tcp: allow undo from reordered DSACKs
From: Ilpo Järvinen @ 2011-11-17 5:18 UTC (permalink / raw)
To: Neal Cardwell
Cc: David Miller, Netdev, Nandita Dukkipati, Yuchung Cheng,
Tom Herbert
In-Reply-To: <1321469885-10885-4-git-send-email-ncardwell@google.com>
On Wed, 16 Nov 2011, Neal Cardwell wrote:
> Previously, SACK-enabled connections hung around in TCP_CA_Disorder
> state while snd_una==high_seq, just waiting to accumulate DSACKs and
> hopefully undo a cwnd reduction. This could and did lead to the
> following unfortunate scenario: if some incoming ACKs advance snd_una
> beyond high_seq then we were setting undo_marker to 0 and moving to
> TCP_CA_Open, so if (due to reordering in the ACK return path) we
> shortly thereafter received a DSACK then we were no longer able to
> undo the cwnd reduction.
>
> The change: Simplify the congestion avoidance state machine by
> removing the behavior where SACK-enabled connections hung around in
> the TCP_CA_Disorder state just waiting for DSACKs. Instead, when
> snd_una advances to high_seq or beyond we typically move to
> TCP_CA_Open immediately and allow an undo in either TCP_CA_Open or
> TCP_CA_Disorder if we later receive enough DSACKs.
>
> Other patches in this series will provide other changes that are
> necessary to fully fix this problem.
>
> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> ---
> net/ipv4/tcp_input.c | 15 ++-------------
> 1 files changed, 2 insertions(+), 13 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 751d390..a4efdd7 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk)
> struct tcp_sock *tp = tcp_sk(sk);
> int state = TCP_CA_Open;
>
> - if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
> + if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
> state = TCP_CA_Disorder;
>
> if (inet_csk(sk)->icsk_ca_state != state) {
> @@ -3066,17 +3066,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
> }
> break;
>
> - case TCP_CA_Disorder:
> - tcp_try_undo_dsack(sk);
> - if (!tp->undo_marker ||
> - /* For SACK case do not Open to allow to undo
> - * catching for all duplicate ACKs. */
> - tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
> - tp->undo_marker = 0;
> - tcp_set_ca_state(sk, TCP_CA_Open);
> - }
> - break;
> -
> case TCP_CA_Recovery:
> if (tcp_is_reno(tp))
> tcp_reset_reno_sack(tp);
> @@ -3117,7 +3106,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
> tcp_add_reno_sack(sk);
> }
>
> - if (icsk->icsk_ca_state == TCP_CA_Disorder)
> + if (icsk->icsk_ca_state <= TCP_CA_Disorder)
> tcp_try_undo_dsack(sk);
>
> if (!tcp_time_to_recover(sk)) {
How about extending Disorder state until second cumulative ACK that is
acking >= high_seq?
--
i.
^ permalink raw reply
* Re: [PATCH 5/5] tcp: skip cwnd moderation in TCP_CA_Open in tcp_try_to_open
From: Ilpo Järvinen @ 2011-11-17 5:14 UTC (permalink / raw)
To: Neal Cardwell
Cc: David Miller, Netdev, Nandita Dukkipati, Yuchung Cheng,
Tom Herbert
In-Reply-To: <1321469885-10885-5-git-send-email-ncardwell@google.com>
On Wed, 16 Nov 2011, Neal Cardwell wrote:
> The problem: Senders were overriding cwnd values picked during an undo
> by calling tcp_moderate_cwnd() in tcp_try_to_open().
I think it's intentional. Because of receiver lying bandwidth cheats all
unlimited undos are bit dangerous.
> The fix: Don't moderate cwnd in tcp_try_to_open() if we're in
> TCP_CA_Open, since doing so is generally unnecessary and specifically
> would override a DSACK-based undo of a cwnd reduction made in fast
> recovery.
>
> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> ---
> net/ipv4/tcp_input.c | 3 ++-
> 1 files changed, 2 insertions(+), 1 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a4efdd7..78dd38c 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)
>
> if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
> tcp_try_keep_open(sk);
> - tcp_moderate_cwnd(tp);
> + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
> + tcp_moderate_cwnd(tp);
> } else {
> tcp_cwnd_down(sk, flag);
> }
Wouldn't it be enough if tcp max burst is increased to match IW (iirc we
had 3 still there as a magic number)?
--
i.
^ permalink raw reply
* Re: [PATCH 2/2 v4] net/smsc911x: Add regulator support
From: Mike Frysinger @ 2011-11-17 5:07 UTC (permalink / raw)
To: Robert MARKLUND
Cc: netdev@vger.kernel.org, Steve Glendinning, Mathieu Poirier,
Paul Mundt, linux-sh@vger.kernel.org, Sascha Hauer, Tony Lindgren,
linux-omap@vger.kernel.org,
uclinux-dist-devel@blackfin.uclinux.org, Linus Walleij
In-Reply-To: <2B1D156D95AE9B4EAD379CB9E465FE7324AECE3650@EXDCVYMBSTM005.EQ1STM.local>
[-- Attachment #1: Type: Text/Plain, Size: 521 bytes --]
On Wednesday 16 November 2011 07:59:41 Robert MARKLUND wrote:
> From: Mike Frysinger [mailto:vapier@gentoo.org]
> > On Monday 31 October 2011 08:38:39 Robert Marklund wrote:
> > > ChangeLog v3->v4:
> > > - Remove dual prints and old comment on Mike's request.
> > > - Split the request_free fucntion on Mike and Sascha request.
> >
> > would be nice if the enable/disable were split as well ...
>
> I interpret this as "nice if", if it's a "must be" then ill change it.
i would prefer it were split
-mike
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* Regarding Routing cache
From: Ajith Adapa @ 2011-11-17 5:00 UTC (permalink / raw)
To: netdev
Hi,
I have a small doubt regarding routing cache in linux kernel.
It seems ip_route_connect is the way we have to access routing cache
entries. In case of all locally generated packets I see that struct
dst_entry is filled up with a lookup in routing cache.
What about in case of forwarding packets ? I dont see any usage of
routing cache mechanism to fill up the struct dst_entry. So it seems
we directly check the fib_rules or fib table to fill the structure.
If it is true then it would be very slow right ?
Sorry if I am wrong about above findings. Do correct me if I am wrong about it ?
Regards,
Ajith
^ permalink raw reply
* Re: [PATCH FIX net v2] net-e1000(e): Fix default interrupt throttle rate not set in NIC HW
From: Jeff Kirsher @ 2011-11-17 4:02 UTC (permalink / raw)
To: David Decotigny
Cc: Brandeburg, Jesse, Allan, Bruce W, Wyborny, Carolyn,
Skidmore, Donald C, Rose, Gregory V, Waskiewicz Jr, Peter P,
Duyck, Alexander H, Ronciak, John,
e1000-devel@lists.sourceforge.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, Paul Gortmaker, Ying Cai
In-Reply-To: <75e944ced8ad7c58a0b838c0fe2a9e315f9e0c37.1321494268.git.david.decotigny@google.com>
[-- Attachment #1: Type: text/plain, Size: 8357 bytes --]
On Wed, 2011-11-16 at 17:46 -0800, David Decotigny wrote:
> From: Ying Cai <ycai@google.com>
>
> This change ensures that the itr/itr_setting adjustment logic is used,
> even for the default/compiled-in value.
>
> Context:
> When we changed the default InterruptThrottleRate value from default
> (3 = dynamic mode) to 8000 for example, only adapter->itr_setting
> (which controls interrupt coalescing mode) was set to 8000, but
> adapter->itr (which controls the value set in NIC register) was not
> updated accordingly. So from ethtool, it seemed the interrupt
> throttling is enabled at 8000 intr/s, but the NIC actually was
> running in dynamic mode which has lower CPU efficiency especially
> when throughput is not high.
>
>
>
> Signed-off-by: David Decotigny <david.decotigny@google.com>
> ---
> drivers/net/ethernet/intel/e1000/e1000_param.c | 81 +++++++++++--------
> drivers/net/ethernet/intel/e1000e/param.c | 98 +++++++++++++-----------
> 2 files changed, 99 insertions(+), 80 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
> index 1301eba..595e462 100644
> --- a/drivers/net/ethernet/intel/e1000/e1000_param.c
> +++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
> @@ -173,7 +173,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
>
> /* Interrupt Throttle Rate (interrupts/sec)
> *
> - * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
> + * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
> */
> E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
> #define DEFAULT_ITR 3
> @@ -460,41 +460,54 @@ void __devinit e1000_check_options(struct e1000_adapter *adapter)
> };
>
> if (num_InterruptThrottleRate > bd) {
> - adapter->itr = InterruptThrottleRate[bd];
> - switch (adapter->itr) {
> - case 0:
> - e_dev_info("%s turned off\n", opt.name);
> - break;
> - case 1:
> - e_dev_info("%s set to dynamic mode\n",
> - opt.name);
> - adapter->itr_setting = adapter->itr;
> - adapter->itr = 20000;
> - break;
> - case 3:
> - e_dev_info("%s set to dynamic conservative "
> - "mode\n", opt.name);
> - adapter->itr_setting = adapter->itr;
> - adapter->itr = 20000;
> - break;
> - case 4:
> - e_dev_info("%s set to simplified "
> - "(2000-8000) ints mode\n", opt.name);
> - adapter->itr_setting = adapter->itr;
> - break;
> - default:
> - e1000_validate_option(&adapter->itr, &opt,
> - adapter);
> - /* save the setting, because the dynamic bits
> - * change itr.
> - * clear the lower two bits because they are
> - * used as control */
> - adapter->itr_setting = adapter->itr & ~3;
> - break;
> - }
> + /* Make sure a message is printed for
> + * non-special values. And in case of an
> + * invalid option, display warning, use
> + * default and go through itr/itr_setting
> + * adjustment logic below */
> + if ((adapter->itr < 0 || adapter->itr > 4)
> + && e1000_validate_option(&adapter->itr, &opt,
> + adapter))
> + adapter->itr = opt.def;
> } else {
> - adapter->itr_setting = opt.def;
> + /* if no option specified, use default value
> + and go through the logic below to adjust
> + itr/itr_setting */
> + adapter->itr = opt.def;
> +
> + /* Make sure a message is printed for
> + * non-special default values */
> + if (adapter->itr < 0 || adapter->itr > 4)
> + e_dev_info("%s set to default %d\n",
> + opt.name, adapter->itr);
> + }
> +
> + adapter->itr_setting = adapter->itr;
> + switch (adapter->itr) {
> + case 0:
> + e_dev_info("%s turned off\n", opt.name);
> + break;
> + case 1:
> + e_dev_info("%s set to dynamic mode\n",
> + opt.name);
> + adapter->itr = 20000;
> + break;
> + case 3:
> + e_dev_info("%s set to dynamic conservative "
> + "mode\n", opt.name);
> adapter->itr = 20000;
> + break;
> + case 4:
> + e_dev_info("%s set to simplified "
> + "(2000-8000) ints mode\n", opt.name);
> + break;
> + default:
> + /* save the setting, because the dynamic bits
> + * change itr.
> + * clear the lower two bits because they are
> + * used as control */
> + adapter->itr_setting &= ~3;
> + break;
> }
> }
> { /* Smart Power Down */
> diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
> index 20e93b0..41937e5 100644
> --- a/drivers/net/ethernet/intel/e1000e/param.c
> +++ b/drivers/net/ethernet/intel/e1000e/param.c
> @@ -106,7 +106,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
> /*
> * Interrupt Throttle Rate (interrupts/sec)
> *
> - * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
> + * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
> */
> E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
> #define DEFAULT_ITR 3
> @@ -335,53 +335,59 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter)
>
> if (num_InterruptThrottleRate > bd) {
> adapter->itr = InterruptThrottleRate[bd];
> - switch (adapter->itr) {
> - case 0:
> - e_info("%s turned off\n", opt.name);
> - break;
> - case 1:
> - e_info("%s set to dynamic mode\n", opt.name);
> - adapter->itr_setting = adapter->itr;
> - adapter->itr = 20000;
> - break;
> - case 3:
> - e_info("%s set to dynamic conservative mode\n",
> - opt.name);
> - adapter->itr_setting = adapter->itr;
> - adapter->itr = 20000;
> - break;
> - case 4:
> - e_info("%s set to simplified (2000-8000 ints) "
> - "mode\n", opt.name);
> - adapter->itr_setting = 4;
> - break;
> - default:
> - /*
> - * Save the setting, because the dynamic bits
> - * change itr.
> - */
> - if (e1000_validate_option(&adapter->itr, &opt,
> - adapter) &&
> - (adapter->itr == 3)) {
> - /*
> - * In case of invalid user value,
> - * default to conservative mode.
> - */
> - adapter->itr_setting = adapter->itr;
> - adapter->itr = 20000;
> - } else {
> - /*
> - * Clear the lower two bits because
> - * they are used as control.
> - */
> - adapter->itr_setting =
> - adapter->itr & ~3;
> - }
> - break;
> - }
> +
> + /* Make sure a message is printed for
> + * non-special values. And in case of an
> + * invalid option, display warning, use
> + * default and go through itr/itr_setting
> + * adjustment logic below */
> + if ((adapter->itr < 0 || adapter->itr > 4)
> + && e1000_validate_option(&adapter->itr, &opt,
> + adapter))
> + adapter->itr = opt.def;
> } else {
> - adapter->itr_setting = opt.def;
> + /* if no option specified, use default value
> + and go through the logic below to adjust
> + itr/itr_setting */
> + adapter->itr = opt.def;
> +
> + /* Make sure a message is printed for
> + * non-special default values */
> + if (adapter->itr < 0 || adapter->itr > 4)
> + e_info("%s set to default %d\n",
> + opt.name, adapter->itr);
> + }
> +
> +
> +
> + adapter->itr_setting = adapter->itr;
> + switch (adapter->itr) {
> + case 0:
> + e_info("%s turned off\n", opt.name);
> + break;
> + case 1:
> + e_info("%s set to dynamic mode\n", opt.name);
> + adapter->itr = 20000;
> + break;
> + case 3:
> + e_info("%s set to dynamic conservative mode\n",
> + opt.name);
> adapter->itr = 20000;
> + break;
> + case 4:
> + e_info("%s set to simplified (2000-8000 ints) "
> + "mode\n", opt.name);
> + break;
> + default:
> + /*
> + * Save the setting, because the dynamic bits
> + * change itr.
> + *
> + * Clear the lower two bits because
> + * they are used as control.
> + */
> + adapter->itr_setting &= ~3;
> + break;
> }
> }
> { /* Interrupt Mode */
Thanks David, I have added this to my e1000e queue of patches.
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: Eric Dumazet @ 2011-11-17 3:19 UTC (permalink / raw)
To: David Miller
Cc: bhutchings, david.decotigny, netdev, linux-kernel, ian.campbell,
jeffrey.t.kirsher, jpirko, joe, szymon, rick.jones2, AAbdulla,
therbert
In-Reply-To: <20111116.215901.100218898762260021.davem@davemloft.net>
Le mercredi 16 novembre 2011 à 21:59 -0500, David Miller a écrit :
> They shouldn't be, they are internal dependency descriptions and should
> be invisible to the user.
Unless someone has evidence of a performance cost :)
The rps_lock() adds an atomic operation in netif_rx(), even if RPS is
not really used/configured...
^ permalink raw reply
* Re: [PATCH 5/5] net-next:asix: v2 update VERSION only
From: Valdis.Kletnieks @ 2011-11-17 3:18 UTC (permalink / raw)
To: Grant Grundler; +Cc: davem, netdev, linux-kernel, Allan Chou, Freddy Xin
In-Reply-To: <CANEJEGtnUS5B1ggdoPn8gcCbFm==dfgHCgXVgcgpuooFUAowJA@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 619 bytes --]
On Tue, 15 Nov 2011 17:37:47 PST, Grant Grundler said:
> Upstream drivers are routinely back ported to distro releases. Version
> is the primary means for a vendor (like ASIX) to determine what was
> back ported. Distro maintainers and 1st tier support organizations
> also need to track versions and "ethtool -i" is the canonical way to
> look this up on a running system.
Oh, OK. I figured the distro would know what they backported based on the
kernel version they generated - I forgot that sometimes users will bypass the
distro and go to the upstream vendor, and you may not know what their distro
had done...
[-- Attachment #2: Type: application/pgp-signature, Size: 227 bytes --]
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Miller @ 2011-11-17 3:12 UTC (permalink / raw)
To: david.decotigny
Cc: netdev, linux-kernel, ian.campbell, eric.dumazet,
jeffrey.t.kirsher, bhutchings, jpirko, joe, szymon, rick.jones2,
AAbdulla
In-Reply-To: <CAG88wWYgUkStZk988iWzbWf_=1Ze-Rtvs82sMrZoeuFPQphaUA@mail.gmail.com>
From: David Decotigny <david.decotigny@google.com>
Date: Wed, 16 Nov 2011 19:04:58 -0800
> David, should I re-send an updated v7 version of this series with this
> patch removed? Or is it Ok if I simply mark this patch as rejected in
> patchwork and stick to patch series v6?
I'll try to apply v6 as-is sans patch #3.
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Decotigny @ 2011-11-17 3:04 UTC (permalink / raw)
To: David Miller
Cc: netdev, linux-kernel, ian.campbell, eric.dumazet,
jeffrey.t.kirsher, bhutchings, jpirko, joe, szymon, rick.jones2,
AAbdulla
In-Reply-To: <20111116.215808.299077226767455988.davem@davemloft.net>
On Wed, Nov 16, 2011 at 6:58 PM, David Miller <davem@davemloft.net> wrote:
> From: David Decotigny <david.decotigny@google.com>
> Date: Wed, 16 Nov 2011 18:39:05 -0800
>
>> This adds a description of RPS/XPS options and allow them to be
>> changed at make menuconfig time.
>
> These aren't meant to be selectable.
Ok. I only needed this for our compilation scripts in order to make
sure I didn't have my CONFIG_* wrong in patch 4/9. I don't need this
patch anymore.
David, should I re-send an updated v7 version of this series with this
patch removed? Or is it Ok if I simply mark this patch as rejected in
patchwork and stick to patch series v6?
^ permalink raw reply
* Re: [PATCH 1/3] NET: MIPS: lantiq: make etop ethernet work on ase/ar9
From: David Miller @ 2011-11-17 3:02 UTC (permalink / raw)
To: blogic; +Cc: netdev
In-Reply-To: <1321454508-4754-1-git-send-email-blogic@openwrt.org>
From: John Crispin <blogic@openwrt.org>
Date: Wed, 16 Nov 2011 15:41:46 +0100
> Extend the driver to handle the different DMA channel layout for AR9 and
> Amazon-SE SoCs. The patch also adds support for the integrated PHY found
> on Amazon-SE and the gigabit switch found inside the AR9.
>
> Signed-off-by: John Crispin <blogic@openwrt.org>
> Cc: netdev@vger.kernel.org
Since these patches (at least partially) modify MIPS files, please submit
them via the MIPS maintainer.
Feel free to add:
Acked-by: David S. Miller <davem@davemloft.net>
^ permalink raw reply
* Re: [PATCH] net: ethtool: fix coding style
From: David Miller @ 2011-11-17 3:00 UTC (permalink / raw)
To: mirq-linux; +Cc: netdev, bhutchings
In-Reply-To: <8c950d750a6b1d491cb321f024e2bca9f238ab9c.1321489851.git.mirq-linux@rere.qmqm.pl>
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Thu, 17 Nov 2011 01:32:03 +0100 (CET)
> Add missing spaces around multiplication operator.
>
> Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Miller @ 2011-11-17 2:59 UTC (permalink / raw)
To: bhutchings
Cc: david.decotigny, netdev, linux-kernel, ian.campbell, eric.dumazet,
jeffrey.t.kirsher, jpirko, joe, szymon, rick.jones2, AAbdulla,
therbert
In-Reply-To: <1321498443.2885.80.camel@deadeye>
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 17 Nov 2011 02:54:03 +0000
> On Wed, 2011-11-16 at 17:54 -0800, David Decotigny wrote:
>> On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
>> <bhutchings@solarflare.com> wrote:
>> > On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
>> >> This adds a description of RPS/XPS options and allow them to be
>> >> changed at make menuconfig time.
>> >
>> > I'm not sure why you think this is necessary.
>>
>> On my copy, make menuconfig doesn't let me change these CONFIG_ items,
>> unless I add the string after "boolean", which this patch does. I
>> agree, the help I added after is pure cosmetic.
> [...]
>
> I know, but I'm asking why you think it's necessary to make them
> optional.
They shouldn't be, they are internal dependency descriptions and should
be invisible to the user.
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Miller @ 2011-11-17 2:58 UTC (permalink / raw)
To: david.decotigny
Cc: netdev, linux-kernel, ian.campbell, eric.dumazet,
jeffrey.t.kirsher, bhutchings, jpirko, joe, szymon, rick.jones2,
AAbdulla
In-Reply-To: <e0bb736123d687a921f15e4bb0254eeeabe22dcb.1321496595.git.david.decotigny@google.com>
From: David Decotigny <david.decotigny@google.com>
Date: Wed, 16 Nov 2011 18:39:05 -0800
> This adds a description of RPS/XPS options and allow them to be
> changed at make menuconfig time.
These aren't meant to be selectable.
They are expressing internal dependencies so we don't have to crap
up the code with the complicated dependency conditional test.
They are meant to be turned on, unconditionally, when the dependencies
are met. They are not meant to be controllable by the user.
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: Ben Hutchings @ 2011-11-17 2:54 UTC (permalink / raw)
To: David Decotigny
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc, Richard Jones,
Ayaz Abdulla, Tom Herbert
In-Reply-To: <CAG88wWZ853cHkykNbQ+Lqrk_D1p-SCmB0Y29YE6GpSSUORwCxg@mail.gmail.com>
On Wed, 2011-11-16 at 17:54 -0800, David Decotigny wrote:
> On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
> > On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
> >> This adds a description of RPS/XPS options and allow them to be
> >> changed at make menuconfig time.
> >
> > I'm not sure why you think this is necessary.
>
> On my copy, make menuconfig doesn't let me change these CONFIG_ items,
> unless I add the string after "boolean", which this patch does. I
> agree, the help I added after is pure cosmetic.
[...]
I know, but I'm asking why you think it's necessary to make them
optional.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: under-performing bonded interfaces
From: Ben Hutchings @ 2011-11-17 2:51 UTC (permalink / raw)
To: Simon Chen; +Cc: Ben Greear, netdev
In-Reply-To: <CANj2Ebdd89=Fk6i4e4Txsb3ANuzz-x4t+tT6CAnWDNZe0zvx9g@mail.gmail.com>
On Wed, 2011-11-16 at 20:38 -0500, Simon Chen wrote:
> Thanks, Ben. That's good discovery...
>
> Are you saying that both 10G NICs are on the same PCIe x4 slot, so
> that they're subject to the 12G throughput bottleneck?
I assumed you were using 2 ports on the same board, i.e. the same slot.
If you were using 1 port each of 2 boards then I would have expected
them both to be usable at full speed. So far as I can remember, PCIe
bridges are usually set up so there isn't contention for bandwidth
between slots.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [PATCH] Add ethtool to mii advertisment conversion helpers
From: Ben Hutchings @ 2011-11-17 2:45 UTC (permalink / raw)
To: Matt Carlson; +Cc: davem@davemloft.net, netdev@vger.kernel.org, Michael Chan
In-Reply-To: <20111117020613.GA18232@mcarlson.broadcom.com>
On Wed, 2011-11-16 at 18:06 -0800, Matt Carlson wrote:
> On Wed, Nov 16, 2011 at 05:29:42PM -0800, Ben Hutchings wrote:
> > On Wed, 2011-11-16 at 17:16 -0800, Matt Carlson wrote:
> > > On Wed, Nov 16, 2011 at 04:34:37PM -0800, Ben Hutchings wrote:
> > [...]
> > > > > +#define mii_lpa_to_ethtool_100bt(lpa) mii_adv_to_ethtool_100bt(lpa)
> > > >
> > > > Shouldn't this additionally translate LPA_LPACK into ADVERTISED_Autoneg?
> > >
> > > You mean, like this?
> > >
> > > static inline u32 mii_lpa_to_ethtool_100bt(u32 lpa)
> > > {
> > > u32 result = 0;
> > >
> > > if (lpa & LPA_LPACK)
> > > result |= ADVERTISED_Autoneg;
> > >
> > > return result | mii_adv_to_ethtool_100bt(lpa);
> > > }
> > >
> > > Yes, that looks like a better implementation.
> >
> > Think so.
> >
> > And I think the mii_adv_to_ethtool_* functions should add
> > ADVERTISED_Autoneg unconditionally. But I'm not entirely sure that's
> > right.
>
> The primary purpose of these functions is to translate the information
> between two representations. It seems wise to be careful not to add
> from it or take anything away from it. It is certainly possible to
> have a valid AN advertisement register configuration, but not have
> autoneg enabled. Keeping the ADVERTISED_Autoneg out could prevent
> misuse. Do you agree?
I'm not sure what you mean about misuse. If autoneg is not enabled then
the register contents are not used and I don't think the
mii_adv_to_ethtool functions should be called at all. But I'm not that
bothered either way.
> > > > Shouldn't there be mii_lpa_to_ethtool_1000X (or
> > > > mii_lpa_to_ethtool_lpa_x)?
> > >
> > > Yes. You're right. Should it just be a preprocessor definition that
> > > points to mii_adv_to_ethtool_1000X()?
> >
> > I think that would need to handle LPA_LPACK as well.
>
> I was wondering if that was present in 1000Base-X mode. I didn't see it
> in my passing glance at a spec.
IEEE 802.3-2005 §37.2.1.6 seems to define it similarly to the TP case.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Decotigny @ 2011-11-17 2:39 UTC (permalink / raw)
To: netdev, linux-kernel
Cc: David S. Miller, Ian Campbell, Eric Dumazet, Jeff Kirsher,
Ben Hutchings, Jiri Pirko, Joe Perches, Szymon Janc,
Richard Jones, Ayaz Abdulla, David Decotigny
In-Reply-To: <cover.1321496595.git.david.decotigny@google.com>
This adds a description of RPS/XPS options and allow them to be
changed at make menuconfig time.
It also fixes following checkpatch syntax warnings:
ERROR: trailing whitespace
+^I $
ERROR: trailing whitespace
+^I$
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
net/Kconfig | 21 ++++++++++++++++-----
1 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/net/Kconfig b/net/Kconfig
index a073148..991379e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -10,7 +10,7 @@ menuconfig NET
The reason is that some programs need kernel networking support even
when running on a stand-alone machine that isn't connected to any
other computer.
-
+
If you are upgrading from an older kernel, you
should consider updating your networking tools too because changes
in the kernel and the tools often go hand in hand. The tools are
@@ -217,20 +217,32 @@ source "net/dns_resolver/Kconfig"
source "net/batman-adv/Kconfig"
config RPS
- boolean
+ boolean "Enable Receive Packet Steering"
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+ help
+ RPS distributes the load of received packet processing
+ across multiple CPUs. If unsure, say Y.
config RFS_ACCEL
- boolean
+ boolean "Enable Hardware Acceleration of RFS"
depends on RPS && GENERIC_HARDIRQS
select CPU_RMAP
default y
+ help
+ Allow drivers for multiqueue hardware with flow filter
+ tables to accelerate RFS. If unsure, say Y.
config XPS
- boolean
+ boolean "Enable Transmit Packet Steering"
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
default y
+ help
+ For multiqueue devices, XPS selects a transmit queue during
+ packet transmission based on configuration. This is done by
+ mapping the CPU transmitting the packet to a queue. XPS can
+ reduce transmit network latency on SMP systems. If unsure,
+ say Y.
config HAVE_BPF_JIT
bool
@@ -274,7 +286,6 @@ config NET_TCPPROBE
Documentation on how to use TCP connection probing can be found
at:
-
http://www.linuxfoundation.org/collaborate/workgroups/networking/tcpprobe
To compile this code as a module, choose M here: the
--
1.7.3.1
^ permalink raw reply related
* Re: [PATCH] Add ethtool to mii advertisment conversion helpers
From: Ben Hutchings @ 2011-11-17 2:29 UTC (permalink / raw)
To: David Miller; +Cc: mchan, mcarlson, netdev
In-Reply-To: <20111116.203850.1548671990526136907.davem@davemloft.net>
On Wed, 2011-11-16 at 20:38 -0500, David Miller wrote:
> From: "Michael Chan" <mchan@broadcom.com>
> Date: Wed, 16 Nov 2011 17:21:32 -0800
>
> >
> > On Wed, 2011-11-16 at 17:16 -0800, Matt Carlson wrote:
> >> > Finally, do these need to be inline?
> >>
> >> I don't have a strong preference here either. Phy code tends to be
> >> slower, so there isn't really a strong performance argument. The
> >> implementations don't seem to be so large to argue against it though.
> >> Would you prefer they not be inlined?
> >>
> >
> > Since we are defining these in .h file, they need to be inline, right?
> > Otherwise multiple source files including the same .h file will have
> > conflict.
>
> Yes, if you keep them in the header you have to keep them inline.
>
> Ben, by suggesting to not inline them, is implicitly saying to put
> them out in a seperate *.c file somewhere, perhaps net/core/ethtool.c
> or similar. With appropriate EXPORT_SYMBOL() added.
I was thinking of putting them in drivers/net/mii.c, like the other
functions declared extern in include/linux/mii.h. That would require
'select MII' in Kconfig for the drivers using them.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [PATCH] Add ethtool to mii advertisment conversion helpers
From: Matt Carlson @ 2011-11-17 2:06 UTC (permalink / raw)
To: Ben Hutchings
Cc: Matthew Carlson, davem@davemloft.net, netdev@vger.kernel.org,
Michael Chan
In-Reply-To: <1321493382.2709.94.camel@bwh-desktop>
On Wed, Nov 16, 2011 at 05:29:42PM -0800, Ben Hutchings wrote:
> On Wed, 2011-11-16 at 17:16 -0800, Matt Carlson wrote:
> > On Wed, Nov 16, 2011 at 04:34:37PM -0800, Ben Hutchings wrote:
> [...]
> > > > +#define mii_lpa_to_ethtool_100bt(lpa) mii_adv_to_ethtool_100bt(lpa)
> > >
> > > Shouldn't this additionally translate LPA_LPACK into ADVERTISED_Autoneg?
> >
> > You mean, like this?
> >
> > static inline u32 mii_lpa_to_ethtool_100bt(u32 lpa)
> > {
> > u32 result = 0;
> >
> > if (lpa & LPA_LPACK)
> > result |= ADVERTISED_Autoneg;
> >
> > return result | mii_adv_to_ethtool_100bt(lpa);
> > }
> >
> > Yes, that looks like a better implementation.
>
> Think so.
>
> And I think the mii_adv_to_ethtool_* functions should add
> ADVERTISED_Autoneg unconditionally. But I'm not entirely sure that's
> right.
The primary purpose of these functions is to translate the information
between two representations. It seems wise to be careful not to add
from it or take anything away from it. It is certainly possible to
have a valid AN advertisement register configuration, but not have
autoneg enabled. Keeping the ADVERTISED_Autoneg out could prevent
misuse. Do you agree?
> > > Shouldn't there be mii_lpa_to_ethtool_1000X (or
> > > mii_lpa_to_ethtool_lpa_x)?
> >
> > Yes. You're right. Should it just be a preprocessor definition that
> > points to mii_adv_to_ethtool_1000X()?
>
> I think that would need to handle LPA_LPACK as well.
I was wondering if that was present in 1000Base-X mode. I didn't see it
in my passing glance at a spec.
^ permalink raw reply
* Re: [PATCH net-next v6 3/9] kbuild: document RPS/XPS network Kconfig options
From: David Decotigny @ 2011-11-17 1:54 UTC (permalink / raw)
To: Ben Hutchings
Cc: netdev, linux-kernel, David S. Miller, Ian Campbell, Eric Dumazet,
Jeff Kirsher, Jiri Pirko, Joe Perches, Szymon Janc, Richard Jones,
Ayaz Abdulla, Tom Herbert
In-Reply-To: <1321485122.2709.55.camel@bwh-desktop>
On Wed, Nov 16, 2011 at 3:12 PM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Wed, 2011-11-16 at 14:15 -0800, David Decotigny wrote:
>> This adds a description of RPS/XPS options and allow them to be
>> changed at make menuconfig time.
>
> I'm not sure why you think this is necessary.
On my copy, make menuconfig doesn't let me change these CONFIG_ items,
unless I add the string after "boolean", which this patch does. I
agree, the help I added after is pure cosmetic.
>> config RFS_ACCEL
[...]
> RFS refers to directing RX packet processsing of specific flows based on
Oops, you are right. I got confused and somehow read "RPS" instead of
"RFS" here. Thank you for the great explanation!
Will update this commit message. I don't know how patchwork handles
the follow-ups (it didn't work yesterday), so I'm afraid I will have
to send yet another batch of updates...
Regards,
^ permalink raw reply
* Re: [PATCH 3/5] tcp: use SACKs and DSACKs that arrive on ACKs below snd_una
From: Ilpo Järvinen @ 2011-11-17 1:52 UTC (permalink / raw)
To: Neal Cardwell
Cc: David Miller, Netdev, Nandita Dukkipati, Yuchung Cheng,
Tom Herbert
In-Reply-To: <1321469885-10885-3-git-send-email-ncardwell@google.com>
[-- Attachment #1: Type: TEXT/PLAIN, Size: 2105 bytes --]
On Wed, 16 Nov 2011, Neal Cardwell wrote:
> The bug: When the ACK field is below snd_una (which can happen when
> ACKs are reordered), senders ignored DSACKs (preventing undo) and did
> not call tcp_fastretrans_alert, so they did not increment
> prr_delivered to reflect newly-SACKed sequence ranges, and did not
> call tcp_xmit_retransmit_queue, thus passing up chances to send out
> more retransmitted and new packets based on any newly-SACKed packets.
>
> The change: When the ACK field is below snd_una (the "old_ack" goto
> label), call tcp_fastretrans_alert to allow undo based on any
> newly-arrived DSACKs and try to send out more packets based on
> newly-SACKed packets.
>
> Other patches in this series will provide other changes that are
> necessary to fully fix this problem.
>
> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> ---
> net/ipv4/tcp_input.c | 10 +++++++---
> 1 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index b49e418..751d390 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -3805,10 +3805,14 @@ invalid_ack:
> return -1;
>
> old_ack:
> + /* If data was SACKed, tag it and see if we should send more data.
> + * If data was DSACKed, see if we can undo a cwnd reduction.
> + */
> if (TCP_SKB_CB(skb)->sacked) {
> - tcp_sacktag_write_queue(sk, skb, prior_snd_una);
> - if (icsk->icsk_ca_state == TCP_CA_Open)
> - tcp_try_keep_open(sk);
> + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
> + newly_acked_sacked = tp->sacked_out - prior_sacked;
> + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
> + is_dupack, flag);
I gave FLAG_* some thought and couldn't find something that would go wrong
here (due to goto not all flag enablers are checked for).
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
...unrelated to the fix, I realized that FRTO is not fully thought through
in this old ACK case, also its RFC seems to lack considerations on what to
do in such case. ...I'll need to think the FRTO stuff a bit more.
--
i.
^ permalink raw reply
* [PATCH FIX net v2] net-e1000(e): Fix default interrupt throttle rate not set in NIC HW
From: David Decotigny @ 2011-11-17 1:46 UTC (permalink / raw)
To: Jeff Kirsher, Jesse Brandeburg, Bruce Allan, Carolyn Wyborny,
Don
Cc: Paul Gortmaker, David Decotigny, Ying Cai
In-Reply-To: <cover.1321494268.git.david.decotigny@google.com>
From: Ying Cai <ycai@google.com>
This change ensures that the itr/itr_setting adjustment logic is used,
even for the default/compiled-in value.
Context:
When we changed the default InterruptThrottleRate value from default
(3 = dynamic mode) to 8000 for example, only adapter->itr_setting
(which controls interrupt coalescing mode) was set to 8000, but
adapter->itr (which controls the value set in NIC register) was not
updated accordingly. So from ethtool, it seemed the interrupt
throttling is enabled at 8000 intr/s, but the NIC actually was
running in dynamic mode which has lower CPU efficiency especially
when throughput is not high.
Signed-off-by: David Decotigny <david.decotigny@google.com>
---
drivers/net/ethernet/intel/e1000/e1000_param.c | 81 +++++++++++--------
drivers/net/ethernet/intel/e1000e/param.c | 98 +++++++++++++-----------
2 files changed, 99 insertions(+), 80 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
index 1301eba..595e462 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_param.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
@@ -173,7 +173,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
/* Interrupt Throttle Rate (interrupts/sec)
*
- * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
+ * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
*/
E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
#define DEFAULT_ITR 3
@@ -460,41 +460,54 @@ void __devinit e1000_check_options(struct e1000_adapter *adapter)
};
if (num_InterruptThrottleRate > bd) {
- adapter->itr = InterruptThrottleRate[bd];
- switch (adapter->itr) {
- case 0:
- e_dev_info("%s turned off\n", opt.name);
- break;
- case 1:
- e_dev_info("%s set to dynamic mode\n",
- opt.name);
- adapter->itr_setting = adapter->itr;
- adapter->itr = 20000;
- break;
- case 3:
- e_dev_info("%s set to dynamic conservative "
- "mode\n", opt.name);
- adapter->itr_setting = adapter->itr;
- adapter->itr = 20000;
- break;
- case 4:
- e_dev_info("%s set to simplified "
- "(2000-8000) ints mode\n", opt.name);
- adapter->itr_setting = adapter->itr;
- break;
- default:
- e1000_validate_option(&adapter->itr, &opt,
- adapter);
- /* save the setting, because the dynamic bits
- * change itr.
- * clear the lower two bits because they are
- * used as control */
- adapter->itr_setting = adapter->itr & ~3;
- break;
- }
+ /* Make sure a message is printed for
+ * non-special values. And in case of an
+ * invalid option, display warning, use
+ * default and go through itr/itr_setting
+ * adjustment logic below */
+ if ((adapter->itr < 0 || adapter->itr > 4)
+ && e1000_validate_option(&adapter->itr, &opt,
+ adapter))
+ adapter->itr = opt.def;
} else {
- adapter->itr_setting = opt.def;
+ /* if no option specified, use default value
+ and go through the logic below to adjust
+ itr/itr_setting */
+ adapter->itr = opt.def;
+
+ /* Make sure a message is printed for
+ * non-special default values */
+ if (adapter->itr < 0 || adapter->itr > 4)
+ e_dev_info("%s set to default %d\n",
+ opt.name, adapter->itr);
+ }
+
+ adapter->itr_setting = adapter->itr;
+ switch (adapter->itr) {
+ case 0:
+ e_dev_info("%s turned off\n", opt.name);
+ break;
+ case 1:
+ e_dev_info("%s set to dynamic mode\n",
+ opt.name);
+ adapter->itr = 20000;
+ break;
+ case 3:
+ e_dev_info("%s set to dynamic conservative "
+ "mode\n", opt.name);
adapter->itr = 20000;
+ break;
+ case 4:
+ e_dev_info("%s set to simplified "
+ "(2000-8000) ints mode\n", opt.name);
+ break;
+ default:
+ /* save the setting, because the dynamic bits
+ * change itr.
+ * clear the lower two bits because they are
+ * used as control */
+ adapter->itr_setting &= ~3;
+ break;
}
}
{ /* Smart Power Down */
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 20e93b0..41937e5 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -106,7 +106,7 @@ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
/*
* Interrupt Throttle Rate (interrupts/sec)
*
- * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
+ * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
*/
E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
#define DEFAULT_ITR 3
@@ -335,53 +335,59 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter)
if (num_InterruptThrottleRate > bd) {
adapter->itr = InterruptThrottleRate[bd];
- switch (adapter->itr) {
- case 0:
- e_info("%s turned off\n", opt.name);
- break;
- case 1:
- e_info("%s set to dynamic mode\n", opt.name);
- adapter->itr_setting = adapter->itr;
- adapter->itr = 20000;
- break;
- case 3:
- e_info("%s set to dynamic conservative mode\n",
- opt.name);
- adapter->itr_setting = adapter->itr;
- adapter->itr = 20000;
- break;
- case 4:
- e_info("%s set to simplified (2000-8000 ints) "
- "mode\n", opt.name);
- adapter->itr_setting = 4;
- break;
- default:
- /*
- * Save the setting, because the dynamic bits
- * change itr.
- */
- if (e1000_validate_option(&adapter->itr, &opt,
- adapter) &&
- (adapter->itr == 3)) {
- /*
- * In case of invalid user value,
- * default to conservative mode.
- */
- adapter->itr_setting = adapter->itr;
- adapter->itr = 20000;
- } else {
- /*
- * Clear the lower two bits because
- * they are used as control.
- */
- adapter->itr_setting =
- adapter->itr & ~3;
- }
- break;
- }
+
+ /* Make sure a message is printed for
+ * non-special values. And in case of an
+ * invalid option, display warning, use
+ * default and go through itr/itr_setting
+ * adjustment logic below */
+ if ((adapter->itr < 0 || adapter->itr > 4)
+ && e1000_validate_option(&adapter->itr, &opt,
+ adapter))
+ adapter->itr = opt.def;
} else {
- adapter->itr_setting = opt.def;
+ /* if no option specified, use default value
+ and go through the logic below to adjust
+ itr/itr_setting */
+ adapter->itr = opt.def;
+
+ /* Make sure a message is printed for
+ * non-special default values */
+ if (adapter->itr < 0 || adapter->itr > 4)
+ e_info("%s set to default %d\n",
+ opt.name, adapter->itr);
+ }
+
+
+
+ adapter->itr_setting = adapter->itr;
+ switch (adapter->itr) {
+ case 0:
+ e_info("%s turned off\n", opt.name);
+ break;
+ case 1:
+ e_info("%s set to dynamic mode\n", opt.name);
+ adapter->itr = 20000;
+ break;
+ case 3:
+ e_info("%s set to dynamic conservative mode\n",
+ opt.name);
adapter->itr = 20000;
+ break;
+ case 4:
+ e_info("%s set to simplified (2000-8000 ints) "
+ "mode\n", opt.name);
+ break;
+ default:
+ /*
+ * Save the setting, because the dynamic bits
+ * change itr.
+ *
+ * Clear the lower two bits because
+ * they are used as control.
+ */
+ adapter->itr_setting &= ~3;
+ break;
}
}
{ /* Interrupt Mode */
--
1.7.3.1
^ permalink raw reply related
* Re: under-performing bonded interfaces
From: Rick Jones @ 2011-11-17 1:45 UTC (permalink / raw)
To: Simon Chen; +Cc: Ben Hutchings, Ben Greear, netdev
In-Reply-To: <CANj2Ebdd89=Fk6i4e4Txsb3ANuzz-x4t+tT6CAnWDNZe0zvx9g@mail.gmail.com>
On 11/16/2011 05:38 PM, Simon Chen wrote:
> Thanks, Ben. That's good discovery...
>
> Are you saying that both 10G NICs are on the same PCIe x4 slot, so
> that they're subject to the 12G throughput bottleneck?
>
> I'm gonna verify this with the hardware vendor...
Look at the PCI addresses - they are:
0000:03:00.0
0000:03:00.1
The numbers after the "dot" are PCI function numbers, meaning that both
*ports* of the dual-port NIC are in the same PCIe slot, in this case a
x4 slot.
more generally, the format is:
DDDD:BB:SS.F
D == DOMAIN; B == BUS; S == SLOT; F == FUNCTION. If domain, bus and
slot are the same, but functions differ, the "devices" are in the same slot.
rick jones
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox