* [PATCH 2/2] ath9k: ar9003_mac: kill off ACCESS_ONCE()
From: Mark Rutland @ 2016-12-27 18:49 UTC (permalink / raw)
To: linux-kernel
Cc: ath9k-devel, kvalo, linux-wireless, ath9k-devel, netdev,
Mark Rutland
In-Reply-To: <1482864599-19995-1-git-send-email-mark.rutland@arm.com>
For several reasons, it is desirable to use {READ,WRITE}_ONCE() in
preference to ACCESS_ONCE(), and new code is expected to use one of the
former. So far, there's been no reason to change most existing uses of
ACCESS_ONCE(), as these aren't currently harmful.
However, for some new features (e.g. KTSAN / Kernel Thread Sanitizer),
it is necessary to instrument reads and writes separately, which is not
possible with ACCESS_ONCE(). This distinction is critical to correct
operation.
It's possible to transform the bulk of kernel code using the Coccinelle
script below. However, for some files (including the ath9k ar9003 mac
driver), this mangles the formatting. As a preparatory step, this patch
converts the driver to use {READ,WRITE}_ONCE() without said mangling.
----
virtual patch
@ depends on patch @
expression E1, E2;
@@
- ACCESS_ONCE(E1) = E2
+ WRITE_ONCE(E1, E2)
@ depends on patch @
expression E;
@@
- ACCESS_ONCE(E)
+ READ_ONCE(E)
----
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: ath9k-devel@qca.qualcomm.com
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: linux-wireless@vger.kernel.org
Cc: ath9k-devel@lists.ath9k.org
Cc: netdev@vger.kernel.org
---
drivers/net/wireless/ath/ath9k/ar9003_mac.c | 92 ++++++++++++++---------------
1 file changed, 46 insertions(+), 46 deletions(-)
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index da84b70..cc5bb0a 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -39,47 +39,47 @@ ar9003_set_txdesc(struct ath_hw *ah, void *ds, struct ath_tx_info *i)
(i->qcu << AR_TxQcuNum_S) | desc_len;
checksum += val;
- ACCESS_ONCE(ads->info) = val;
+ WRITE_ONCE(ads->info, val);
checksum += i->link;
- ACCESS_ONCE(ads->link) = i->link;
+ WRITE_ONCE(ads->link, i->link);
checksum += i->buf_addr[0];
- ACCESS_ONCE(ads->data0) = i->buf_addr[0];
+ WRITE_ONCE(ads->data0, i->buf_addr[0]);
checksum += i->buf_addr[1];
- ACCESS_ONCE(ads->data1) = i->buf_addr[1];
+ WRITE_ONCE(ads->data1, i->buf_addr[1]);
checksum += i->buf_addr[2];
- ACCESS_ONCE(ads->data2) = i->buf_addr[2];
+ WRITE_ONCE(ads->data2, i->buf_addr[2]);
checksum += i->buf_addr[3];
- ACCESS_ONCE(ads->data3) = i->buf_addr[3];
+ WRITE_ONCE(ads->data3, i->buf_addr[3]);
checksum += (val = (i->buf_len[0] << AR_BufLen_S) & AR_BufLen);
- ACCESS_ONCE(ads->ctl3) = val;
+ WRITE_ONCE(ads->ctl3, val);
checksum += (val = (i->buf_len[1] << AR_BufLen_S) & AR_BufLen);
- ACCESS_ONCE(ads->ctl5) = val;
+ WRITE_ONCE(ads->ctl5, val);
checksum += (val = (i->buf_len[2] << AR_BufLen_S) & AR_BufLen);
- ACCESS_ONCE(ads->ctl7) = val;
+ WRITE_ONCE(ads->ctl7, val);
checksum += (val = (i->buf_len[3] << AR_BufLen_S) & AR_BufLen);
- ACCESS_ONCE(ads->ctl9) = val;
+ WRITE_ONCE(ads->ctl9, val);
checksum = (u16) (((checksum & 0xffff) + (checksum >> 16)) & 0xffff);
- ACCESS_ONCE(ads->ctl10) = checksum;
+ WRITE_ONCE(ads->ctl10, checksum);
if (i->is_first || i->is_last) {
- ACCESS_ONCE(ads->ctl13) = set11nTries(i->rates, 0)
+ WRITE_ONCE(ads->ctl13, set11nTries(i->rates, 0)
| set11nTries(i->rates, 1)
| set11nTries(i->rates, 2)
| set11nTries(i->rates, 3)
| (i->dur_update ? AR_DurUpdateEna : 0)
- | SM(0, AR_BurstDur);
+ | SM(0, AR_BurstDur));
- ACCESS_ONCE(ads->ctl14) = set11nRate(i->rates, 0)
+ WRITE_ONCE(ads->ctl14, set11nRate(i->rates, 0)
| set11nRate(i->rates, 1)
| set11nRate(i->rates, 2)
- | set11nRate(i->rates, 3);
+ | set11nRate(i->rates, 3));
} else {
- ACCESS_ONCE(ads->ctl13) = 0;
- ACCESS_ONCE(ads->ctl14) = 0;
+ WRITE_ONCE(ads->ctl13, 0);
+ WRITE_ONCE(ads->ctl14, 0);
}
ads->ctl20 = 0;
@@ -89,17 +89,17 @@ ar9003_set_txdesc(struct ath_hw *ah, void *ds, struct ath_tx_info *i)
ctl17 = SM(i->keytype, AR_EncrType);
if (!i->is_first) {
- ACCESS_ONCE(ads->ctl11) = 0;
- ACCESS_ONCE(ads->ctl12) = i->is_last ? 0 : AR_TxMore;
- ACCESS_ONCE(ads->ctl15) = 0;
- ACCESS_ONCE(ads->ctl16) = 0;
- ACCESS_ONCE(ads->ctl17) = ctl17;
- ACCESS_ONCE(ads->ctl18) = 0;
- ACCESS_ONCE(ads->ctl19) = 0;
+ WRITE_ONCE(ads->ctl11, 0);
+ WRITE_ONCE(ads->ctl12, i->is_last ? 0 : AR_TxMore);
+ WRITE_ONCE(ads->ctl15, 0);
+ WRITE_ONCE(ads->ctl16, 0);
+ WRITE_ONCE(ads->ctl17, ctl17);
+ WRITE_ONCE(ads->ctl18, 0);
+ WRITE_ONCE(ads->ctl19, 0);
return;
}
- ACCESS_ONCE(ads->ctl11) = (i->pkt_len & AR_FrameLen)
+ WRITE_ONCE(ads->ctl11, (i->pkt_len & AR_FrameLen)
| (i->flags & ATH9K_TXDESC_VMF ? AR_VirtMoreFrag : 0)
| SM(i->txpower[0], AR_XmitPower0)
| (i->flags & ATH9K_TXDESC_VEOL ? AR_VEOL : 0)
@@ -107,7 +107,7 @@ ar9003_set_txdesc(struct ath_hw *ah, void *ds, struct ath_tx_info *i)
| (i->flags & ATH9K_TXDESC_LOWRXCHAIN ? AR_LowRxChain : 0)
| (i->flags & ATH9K_TXDESC_CLRDMASK ? AR_ClrDestMask : 0)
| (i->flags & ATH9K_TXDESC_RTSENA ? AR_RTSEnable :
- (i->flags & ATH9K_TXDESC_CTSENA ? AR_CTSEnable : 0));
+ (i->flags & ATH9K_TXDESC_CTSENA ? AR_CTSEnable : 0)));
ctl12 = (i->keyix != ATH9K_TXKEYIX_INVALID ?
SM(i->keyix, AR_DestIdx) : 0)
@@ -135,26 +135,26 @@ ar9003_set_txdesc(struct ath_hw *ah, void *ds, struct ath_tx_info *i)
val = (i->flags & ATH9K_TXDESC_PAPRD) >> ATH9K_TXDESC_PAPRD_S;
ctl12 |= SM(val, AR_PAPRDChainMask);
- ACCESS_ONCE(ads->ctl12) = ctl12;
- ACCESS_ONCE(ads->ctl17) = ctl17;
+ WRITE_ONCE(ads->ctl12, ctl12);
+ WRITE_ONCE(ads->ctl17, ctl17);
- ACCESS_ONCE(ads->ctl15) = set11nPktDurRTSCTS(i->rates, 0)
- | set11nPktDurRTSCTS(i->rates, 1);
+ WRITE_ONCE(ads->ctl15, set11nPktDurRTSCTS(i->rates, 0)
+ | set11nPktDurRTSCTS(i->rates, 1));
- ACCESS_ONCE(ads->ctl16) = set11nPktDurRTSCTS(i->rates, 2)
- | set11nPktDurRTSCTS(i->rates, 3);
+ WRITE_ONCE(ads->ctl16, set11nPktDurRTSCTS(i->rates, 2)
+ | set11nPktDurRTSCTS(i->rates, 3));
- ACCESS_ONCE(ads->ctl18) = set11nRateFlags(i->rates, 0)
+ WRITE_ONCE(ads->ctl18, set11nRateFlags(i->rates, 0)
| set11nRateFlags(i->rates, 1)
| set11nRateFlags(i->rates, 2)
| set11nRateFlags(i->rates, 3)
- | SM(i->rtscts_rate, AR_RTSCTSRate);
+ | SM(i->rtscts_rate, AR_RTSCTSRate));
- ACCESS_ONCE(ads->ctl19) = AR_Not_Sounding;
+ WRITE_ONCE(ads->ctl19, AR_Not_Sounding);
- ACCESS_ONCE(ads->ctl20) = SM(i->txpower[1], AR_XmitPower1);
- ACCESS_ONCE(ads->ctl21) = SM(i->txpower[2], AR_XmitPower2);
- ACCESS_ONCE(ads->ctl22) = SM(i->txpower[3], AR_XmitPower3);
+ WRITE_ONCE(ads->ctl20, SM(i->txpower[1], AR_XmitPower1));
+ WRITE_ONCE(ads->ctl21, SM(i->txpower[2], AR_XmitPower2));
+ WRITE_ONCE(ads->ctl22, SM(i->txpower[3], AR_XmitPower3));
}
static u16 ar9003_calc_ptr_chksum(struct ar9003_txc *ads)
@@ -359,7 +359,7 @@ static int ar9003_hw_proc_txdesc(struct ath_hw *ah, void *ds,
ads = &ah->ts_ring[ah->ts_tail];
- status = ACCESS_ONCE(ads->status8);
+ status = READ_ONCE(ads->status8);
if ((status & AR_TxDone) == 0)
return -EINPROGRESS;
@@ -385,7 +385,7 @@ static int ar9003_hw_proc_txdesc(struct ath_hw *ah, void *ds,
if (status & AR_TxOpExceeded)
ts->ts_status |= ATH9K_TXERR_XTXOP;
- status = ACCESS_ONCE(ads->status2);
+ status = READ_ONCE(ads->status2);
ts->ts_rssi_ctl0 = MS(status, AR_TxRSSIAnt00);
ts->ts_rssi_ctl1 = MS(status, AR_TxRSSIAnt01);
ts->ts_rssi_ctl2 = MS(status, AR_TxRSSIAnt02);
@@ -395,7 +395,7 @@ static int ar9003_hw_proc_txdesc(struct ath_hw *ah, void *ds,
ts->ba_high = ads->status6;
}
- status = ACCESS_ONCE(ads->status3);
+ status = READ_ONCE(ads->status3);
if (status & AR_ExcessiveRetries)
ts->ts_status |= ATH9K_TXERR_XRETRY;
if (status & AR_Filtered)
@@ -420,7 +420,7 @@ static int ar9003_hw_proc_txdesc(struct ath_hw *ah, void *ds,
ts->ts_longretry = MS(status, AR_DataFailCnt);
ts->ts_virtcol = MS(status, AR_VirtRetryCnt);
- status = ACCESS_ONCE(ads->status7);
+ status = READ_ONCE(ads->status7);
ts->ts_rssi = MS(status, AR_TxRSSICombined);
ts->ts_rssi_ext0 = MS(status, AR_TxRSSIAnt10);
ts->ts_rssi_ext1 = MS(status, AR_TxRSSIAnt11);
@@ -437,13 +437,13 @@ static int ar9003_hw_get_duration(struct ath_hw *ah, const void *ds, int index)
switch (index) {
case 0:
- return MS(ACCESS_ONCE(adc->ctl15), AR_PacketDur0);
+ return MS(READ_ONCE(adc->ctl15), AR_PacketDur0);
case 1:
- return MS(ACCESS_ONCE(adc->ctl15), AR_PacketDur1);
+ return MS(READ_ONCE(adc->ctl15), AR_PacketDur1);
case 2:
- return MS(ACCESS_ONCE(adc->ctl16), AR_PacketDur2);
+ return MS(READ_ONCE(adc->ctl16), AR_PacketDur2);
case 3:
- return MS(ACCESS_ONCE(adc->ctl16), AR_PacketDur3);
+ return MS(READ_ONCE(adc->ctl16), AR_PacketDur3);
default:
return 0;
}
--
2.7.4
^ permalink raw reply related
* Re: [net/mm PATCH v2 0/3] Page fragment updates
From: Alexander Duyck @ 2016-12-27 18:54 UTC (permalink / raw)
To: David Miller
Cc: linux-mm, Andrew Morton, Netdev, linux-kernel@vger.kernel.org,
Jeff Kirsher
In-Reply-To: <20161223.125053.1340469257610308679.davem@davemloft.net>
On Fri, Dec 23, 2016 at 9:50 AM, David Miller <davem@davemloft.net> wrote:
> From: Alexander Duyck <alexander.duyck@gmail.com>
> Date: Fri, 23 Dec 2016 09:16:39 -0800
>
>> I tried to get in touch with Andrew about this fix but I haven't heard any
>> reply to the email I sent out on Tuesday. The last comment I had from
>> Andrew against v1 was "Looks good to me. I have it all queued for post-4.9
>> processing.", but I haven't received any notice they were applied.
>
> Andrew, please follow up with Alex.
I'm assuming Andrew is probably out for the holidays since I didn't
hear anything, and since Linux pushed 4.10-rc1 I'm assuming I have
missed the merge window.
Dave, I was wondering if you would be okay with me trying to push the
three patches though net-next. I'm thinking I might scale back the
first patch so that it is just a rename instead of making any
functional changes. The main reason why I am thinking of trying to
submit through net-next is because then I can then start working on
submitting the driver patches for net-next. Otherwise I'm looking at
this set creating a merge mess since I don't see a good way to push
the driver changes without already having these changes present.
I'll wait until Andrew can weigh in on the patches before
resubmitting. My thought was to get an Acked-by from him and then see
if I can get them accepted into net-next. That way there isn't any
funky cross-tree merging that will need to go on, and it shouldn't
really impact the mm tree all that much as the only consumers for the
page frag code are the network stack anyway.
Thanks.
- Alex
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: [net/mm PATCH v2 0/3] Page fragment updates
From: David Miller @ 2016-12-27 18:55 UTC (permalink / raw)
To: alexander.duyck; +Cc: linux-mm, akpm, netdev, linux-kernel, jeffrey.t.kirsher
In-Reply-To: <CAKgT0UeP3QkjPQcPGv4ONhO5D56-+TL=-JYx6R+YJvLcCgK3cw@mail.gmail.com>
From: Alexander Duyck <alexander.duyck@gmail.com>
Date: Tue, 27 Dec 2016 10:54:14 -0800
> Dave, I was wondering if you would be okay with me trying to push the
> three patches though net-next. I'm thinking I might scale back the
> first patch so that it is just a rename instead of making any
> functional changes. The main reason why I am thinking of trying to
> submit through net-next is because then I can then start working on
> submitting the driver patches for net-next. Otherwise I'm looking at
> this set creating a merge mess since I don't see a good way to push
> the driver changes without already having these changes present.
>
> I'll wait until Andrew can weigh in on the patches before
> resubmitting. My thought was to get an Acked-by from him and then see
> if I can get them accepted into net-next. That way there isn't any
> funky cross-tree merging that will need to go on, and it shouldn't
> really impact the mm tree all that much as the only consumers for the
> page frag code are the network stack anyway.
I'm fine with this plan.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* Re: [RFC PATCH 4.10 1/6] crypto/sha256: Refactor the API so it can be used without shash
From: Andy Lutomirski @ 2016-12-27 19:00 UTC (permalink / raw)
To: Daniel Borkmann
Cc: Herbert Xu, Ard Biesheuvel, Andy Lutomirski, Netdev, LKML,
Linux Crypto Mailing List, Jason A. Donenfeld,
Hannes Frederic Sowa, Alexei Starovoitov, Eric Dumazet,
Eric Biggers, Tom Herbert, David S. Miller
In-Reply-To: <586277AE.80401@iogearbox.net>
On Tue, Dec 27, 2016 at 6:16 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> On 12/27/2016 10:58 AM, Herbert Xu wrote:
>>
>> On Mon, Dec 26, 2016 at 10:08:48AM -0800, Andy Lutomirski wrote:
>>>
>>>
>>> According to Daniel, the networking folks want to let embedded systems
>>> include BPF without requiring the crypto core.
>>
>>
>> Last I checked the IPv4 stack depended on the crypto API so this
>> sounds bogus.
>
>
> I think there's a bit of a mixup here with what I said. To clarify,
> requirement back then from tracing folks was that bpf engine and
> therefore bpf syscall can be build w/o networking enabled for small
> devices, so dependencies preferably need to be kept on a absolute
> minimum, same counts for either making it suddenly a depend on
> CRYPTO or a select CRYPTO for just those few lines that can be
> pulled in from lib/ code instead.
Somehow I had that in my head as "networking" not "tracing", probably
because of the TCA stuff. Whoops.
Anyway, I'm rewriting the crypto part of the patch completely based on
Ard's feedback.
^ permalink raw reply
* Re: [PATCH] net: fix incorrect original ingress device index in PKTINFO
From: David Miller @ 2016-12-27 19:03 UTC (permalink / raw)
To: asuka.com; +Cc: kuznet, jmorris, yoshfuji, kaber, netdev, linux-kernel, dsa
In-Reply-To: <1482832344-24760-1-git-send-email-asuka.com@163.com>
From: Wei Zhang <asuka.com@163.com>
Date: Tue, 27 Dec 2016 17:52:24 +0800
> When we send a packet for our own local address on a non-loopback
> interface (e.g. eth0), due to the change had been introduced from
> commit 0b922b7a829c ("net: original ingress device index in PKTINFO"), the
> original ingress device index would be set as the loopback interface.
> However, the packet should be considered as if it is being arrived via the
> sending interface (eth0), otherwise it would break the expectation of the
> userspace application (e.g. the DHCPRELEASE message from dhcp_release
> binary would be ignored by the dnsmasq daemon, since it come from lo which
> is not the interface dnsmasq bind to)
>
> Signed-off-by: Wei Zhang <asuka.com@163.com>
When you are fixing a problem introduced by another change, always CC:
the author of that change as I have done so here.
David, please take a look at this, thanks.
> ---
> net/ipv4/ip_sockglue.c | 8 +++++++-
> 1 file changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index b8a2d63..76d78a7 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1202,8 +1202,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
> * which has interface index (iif) as the first member of the
> * underlying inet{6}_skb_parm struct. This code then overlays
> * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
> - * element so the iif is picked up from the prior IPCB
> + * element so the iif is picked up from the prior IPCB except
> + * iif is loopback interface which the packet should be
> + * considered as if it is being arrived via the sending
> + * interface
> */
> + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
> + pktinfo->ipi_ifindex = inet_iif(skb);
> +
> pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
> } else {
> pktinfo->ipi_ifindex = 0;
> --
> 1.8.3.1
>
>
^ permalink raw reply
* Re: driver r8169 suddenly failed
From: Robert Grasso @ 2016-12-27 19:41 UTC (permalink / raw)
To: Francois Romieu; +Cc: Realtek linux nic maintainers, netdev
In-Reply-To: <20161227001248.GA20172@electric-eye.fr.zoreil.com>
Hello,
I have some unexpected (and interesting) news. I did not run the test
yet. While I was investigating my issue, I ordered a fast
Ethernet-to-USB3 converter, able to reach 1000Mbit/s, in order to
recover my broadband quickly : here is the chip as reported by dmesg :
[ 8.114327] ax88179_178a 4-1:1.0 eth2: register 'ax88179_178a' at
usb-0000:03:00.0-1, ASIX AX88179 USB 3.0 Gigabit Ethernet, 00:0e:c6:c2:ce:d1
from the chinese brand UGreen : at first sight, you do not seem to be
related to it. On their advertisement, they allege they are compatible
with kernel 2.6 and higher (which I confirmed on various forums before
ordering it). However, guess what : I have the EXACT SAME behaviour !
- connected from the Shuttle (with USB3) on my cable modem, it fails to
acquire the IP address as well (same endless loop) !
- from my laptop with Ubuntu 16.04 now : - connected on the LAN (and
thus, on the Shuttle which runs my local DHCP server across YOUR
perfectly functioning interface and driver) it works perfectly
- connected on the cable
modem, it fails too !
So, what is your opinion :
- should I broaden my request for help to other teams than yours (kernel
maintainers) ?
- are you still interested in this test you asked for ?
Best regards
--
Robert Grasso
@home
---
UNIX was not designed to stop you from doing stupid things, because
that would also stop you from doing clever things. -- Doug Gwyn
On 27/12/2016 01:12, Francois Romieu wrote:
> Robert Grasso <robert.grasso@modulonet.fr> :
> [dhcp snafu]
>> First of all, can you confirm that I am doing right in posting to you
>> (addresses found in README.Debian) ?
> It isn't completely wrong.
>
>> If I do, can you help ? I am not very proficient with Ethernet, and I am not
>> able to figure out what my provider changed : their hotline is
>> underqualified, they are just able to tell that "the signal on the line is
>> ok".
> You're spoiled. It is more than decent for a company whose core business
> used to sell cable TV.
>
>> But if you want me to run various tests, try new versions, I would be
>> glad to do so : I would appreciate if I could salvage this Shuttle.
> Please try a recent stable vanilla kernel and send a complete dmesg
> from boot. I need to identify the specific 816x chipset.
>
> Then record some traffic:
>
> # touch gonzo.pcap && tshark -w gonzo.pcap -i eth1
>
> It should only exhibit small outgoing packets but, well, one never knows.
>
> Check the leds activity to be sure that the network interfaces have not
> been renumbered.
>
> Use ethtool to check that tso is disabled. If it isn't, disable it.
>
^ permalink raw reply
* [GIT] Networking
From: David Miller @ 2016-12-27 19:43 UTC (permalink / raw)
To: torvalds; +Cc: akpm, netdev, linux-kernel
1) Various ipvlan fixes from Eric Dumazet and Mahesh Bandewar. The most
important is to not assume the packet is RX just because the destination
address matches that of the device. Such an assumption causes problems
when an interface is put into loopback mode.
2) If we retry when creating a new tc entry (because we dropped the
RTNL mutex in order to load a module, for example) we end up with
-EAGAIN and then loop trying to replay the request. Problem is we
didn't reset some state when looping back to the top like this, and
if another thread meanwhile inserted the same tc entry we were
trying to, we re-link it creating an enless loop in the tc chain.
Fix from Daniel Borkmann.
3) There are two different WRITE bits in the MDIO address register for
the stmmac chip, depending upon the chip variant. Due to a bug we
could set them both, fix from Hock Leong Kweh.
4) Fix mlx4 bug in XDP_TX handling, from Tariq Toukan.
Please pull, thanks a lot!
The following changes since commit 50b17cfb1917b207612327d354e9043dbcbde431:
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2016-12-23 11:23:25 -0800)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
for you to fetch changes up to 5799fc905930f866c7d32aaf81b31f8027297506:
net: stmmac: fix incorrect bit set in gmac4 mdio addr register (2016-12-27 12:28:08 -0500)
----------------------------------------------------------------
Chun-Hao Lin (1):
r8169: add support for RTL8168 series add-on card.
Daniel Borkmann (1):
net, sched: fix soft lockup in tc_classify
Eric Dumazet (1):
ipvlan: fix various issues in ipvlan_process_multicast()
Florian Fainelli (1):
net: korina: Fix NAPI versus resources freeing
Haishuang Yan (1):
ipv4: Namespaceify tcp_tw_reuse knob
Jason Wang (1):
net: xdp: remove unused bfp_warn_invalid_xdp_buffer()
Jon Paul Maloy (1):
tipc: don't send FIN message from connectionless socket
Kweh, Hock Leong (1):
net: stmmac: fix incorrect bit set in gmac4 mdio addr register
Mahesh Bandewar (1):
ipvlan: fix multicast processing
Tariq Toukan (1):
net/mlx4_en: Fix user prio field in XDP forward
pravin shelar (1):
openvswitch: upcall: Fix vlan handling.
drivers/net/ethernet/korina.c | 8 ++++----
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++-
drivers/net/ethernet/realtek/r8169.c | 1 +
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 4 +++-
drivers/net/ipvlan/ipvlan.h | 5 +++++
drivers/net/ipvlan/ipvlan_core.c | 60 ++++++++++++++++++++++++++++++++++++++----------------------
drivers/net/ipvlan/ipvlan_main.c | 7 ++++++-
include/linux/filter.h | 1 -
include/net/netns/ipv4.h | 1 +
include/net/tcp.h | 1 -
net/core/filter.c | 6 ------
net/ipv4/sysctl_net_ipv4.c | 14 +++++++-------
net/ipv4/tcp_ipv4.c | 4 ++--
net/openvswitch/datapath.c | 1 -
net/openvswitch/flow.c | 54 +++++++++++++++++++++++++++---------------------------
net/sched/cls_api.c | 4 +++-
net/tipc/socket.c | 24 +++++++++++++-----------
17 files changed, 112 insertions(+), 86 deletions(-)
^ permalink raw reply
* Re: Re: [PATCH v2 1/2] drivers: net: ethernet: 3com: fix return value
From: David Dillow @ 2016-12-27 21:15 UTC (permalink / raw)
To: Thomas Preisner
Cc: sergei.shtylyov, netdev, linux-kernel, linux-kernel,
milan.stephan+linux
In-Reply-To: <1482625822-19658-1-git-send-email-thomas.preisner+linux@fau.de>
On Sun, 2016-12-25 at 01:30 +0100, Thomas Preisner wrote:
> Those spaces were actually left out purposely: The file in question (typhoon.c)
> is missing those spaces between the statements (if, for, while) and the
> following opening bracket pretty much always (except 2-3 times) and we figured
> that it might be better to keep the coding style consistent since this might
> aswell have been intended by the original author.
I'm not sure if we had the rule back then, or if I just missed it.
Either way, we should follow the rules for new code if we can.
I'm not sure it's worth fixing all of the instances -- usually
formatting-only changes are not worth the churn -- but I don't have a
strong opinion on the matter.
^ permalink raw reply
* Re: [PATCH v3 2/2] drivers: net: ethernet: 3com: fix return value
From: David Dillow @ 2016-12-27 21:17 UTC (permalink / raw)
To: Thomas Preisner
Cc: sergei.shtylyov, netdev, linux-kernel, linux-kernel,
milan.stephan+linux
In-Reply-To: <1482625822-19658-3-git-send-email-thomas.preisner+linux@fau.de>
On Sun, 2016-12-25 at 01:30 +0100, Thomas Preisner wrote:
> In some cases the return value of a failing function is not being used
> and the function typhoon_init_one() returns another negative error
> code instead.
I'm not sure these changes are especially valuable, since we'll need to
look at the dmesg log anyways to figure out what went wrong, but again I
don't feel strongly.
Fix up the subject issues and I'm happy to ack them.
^ permalink raw reply
* Re: [PATCH] staging: octeon: Call SET_NETDEV_DEV()
From: Florian Fainelli @ 2016-12-27 22:15 UTC (permalink / raw)
To: gregkh
Cc: devel, asbjorn, aaro.koskinen, netdev, nevola, linux-kernel,
jarod, bhaktipriya96, David Miller, tremyfr
In-Reply-To: <20161220.222020.1448709395205678279.davem@davemloft.net>
On 12/20/2016 07:20 PM, David Miller wrote:
> From: Florian Fainelli <f.fainelli@gmail.com>
> Date: Tue, 20 Dec 2016 17:02:37 -0800
>
>> On 12/14/2016 05:13 PM, Florian Fainelli wrote:
>>> The Octeon driver calls into PHYLIB which now checks for
>>> net_device->dev.parent, so make sure we do set it before calling into
>>> any MDIO/PHYLIB related function.
>>>
>>> Fixes: ec988ad78ed6 ("phy: Don't increment MDIO bus refcount unless it's a different owner")
>>> Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
>>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
>>
>> Greg, David, since this is a fix for a regression introduced in the net
>> tree, it may make sense that David take it via his tree.
>
> Since the change in question is in Linus's tree, it's equally valid
> for Greg to take it as well.
Sure, Greg, can you take this change? Thank you!
--
Florian
^ permalink raw reply
* Re: [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
From: tndave @ 2016-12-27 22:27 UTC (permalink / raw)
To: maowenan, jeffrey.t.kirsher@intel.com,
intel-wired-lan@lists.osuosl.org
Cc: netdev@vger.kernel.org, weiyongjun (A), Dingtianhong
In-Reply-To: <F95AC9340317A84688A5F0DF0246F3F2015209E7@szxeml504-mbs.china.huawei.com>
On 12/26/2016 03:39 AM, maowenan wrote:
>
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>> On Behalf Of Tushar Dave
>> Sent: Tuesday, December 06, 2016 1:07 AM
>> To: jeffrey.t.kirsher@intel.com; intel-wired-lan@lists.osuosl.org
>> Cc: netdev@vger.kernel.org
>> Subject: [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
>>
>> Unlike previous generation NIC (e.g. ixgbe) i40e doesn't seem to have standard
>> CSR where PCIe relaxed ordering can be set. Without PCIe relax ordering
>> enabled, i40e performance is significantly low on SPARC.
>>
> [Mao Wenan]Hi Tushar, you have referred to i40e doesn't seem to have standard CSR
> to set PCIe relaxed ordering, this CSR like TX&Rx DCA Control Register in 82599, right?
Yes.
i40e datasheet mentions some CSR that can be used to enable/disable PCIe
relaxed ordering in device; however I don't see the exact definition of
those register in datasheet.
(https://www.mail-archive.com/netdev@vger.kernel.org/msg117219.html).
> Is DMA_ATTR_WEAK_ORDERING the same as TX&RX control register in
> 82599?
No.
DMA_ATTR_WEAK_ORDERING applies to the PCIe root complex of the system.
-Tushar
>
> And to enable relax ordering mode in 82599 for SPARC using below codes:
> s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
> {
> u32 i;
>
> /* Clear the rate limiters */
> for (i = 0; i < hw->mac.max_tx_queues; i++) {
> IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
> IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
> }
> IXGBE_WRITE_FLUSH(hw);
>
> #ifndef CONFIG_SPARC
> /* Disable relaxed ordering */
> for (i = 0; i < hw->mac.max_tx_queues; i++) {
> u32 regval;
>
> regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
> regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
> IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
> }
>
> for (i = 0; i < hw->mac.max_rx_queues; i++) {
> u32 regval;
>
> regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
> regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
> IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
> IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
> }
> #endif
> return 0;
> }
>
>
>
>> This patch sets PCIe relax ordering for SPARC arch by setting dma attr
>> DMA_ATTR_WEAK_ORDERING for every tx and rx DMA map/unmap.
>> This has shown 10x increase in performance numbers.
>>
>> e.g.
>> iperf TCP test with 10 threads on SPARC S7
>>
>> Test 1: Without this patch
>>
>> [root@brm-snt1-03 net]# iperf -s
>> ------------------------------------------------------------
>> Server listening on TCP port 5001
>> TCP window size: 85.3 KByte (default)
>> ------------------------------------------------------------
>> [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40926 [ 5] local
>> 16.0.0.7 port 5001 connected with 16.0.0.1 port 40934 [ 6] local 16.0.0.7 port
>> 5001 connected with 16.0.0.1 port 40930 [ 7] local 16.0.0.7 port 5001
>> connected with 16.0.0.1 port 40928 [ 8] local 16.0.0.7 port 5001 connected
>> with 16.0.0.1 port 40922 [ 9] local 16.0.0.7 port 5001 connected with 16.0.0.1
>> port 40932 [ 10] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40920
>> [ 11] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40924 [ 14] local
>> 16.0.0.7 port 5001 connected with 16.0.0.1 port 40982 [ 12] local 16.0.0.7 port
>> 5001 connected with 16.0.0.1 port 40980
>> [ ID] Interval Transfer Bandwidth
>> [ 4] 0.0-20.0 sec 566 MBytes 237 Mbits/sec
>> [ 5] 0.0-20.0 sec 532 MBytes 223 Mbits/sec
>> [ 6] 0.0-20.0 sec 537 MBytes 225 Mbits/sec
>> [ 8] 0.0-20.0 sec 546 MBytes 229 Mbits/sec
>> [ 11] 0.0-20.0 sec 592 MBytes 248 Mbits/sec
>> [ 7] 0.0-20.0 sec 539 MBytes 226 Mbits/sec
>> [ 9] 0.0-20.0 sec 572 MBytes 240 Mbits/sec
>> [ 10] 0.0-20.0 sec 604 MBytes 253 Mbits/sec
>> [ 14] 0.0-20.0 sec 567 MBytes 238 Mbits/sec
>> [ 12] 0.0-20.0 sec 511 MBytes 214 Mbits/sec
>> [SUM] 0.0-20.0 sec 5.44 GBytes 2.33 Gbits/sec
>>
>> Test 2: with this patch:
>>
>> [root@brm-snt1-03 net]# iperf -s
>> ------------------------------------------------------------
>> Server listening on TCP port 5001
>> TCP window size: 85.3 KByte (default)
>> ------------------------------------------------------------
>> TCP: request_sock_TCP: Possible SYN flooding on port 5001. Sending cookies.
>> Check SNMP counters.
>> [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46876 [ 5] local
>> 16.0.0.7 port 5001 connected with 16.0.0.1 port 46874 [ 6] local 16.0.0.7 port
>> 5001 connected with 16.0.0.1 port 46872 [ 7] local 16.0.0.7 port 5001
>> connected with 16.0.0.1 port 46880 [ 8] local 16.0.0.7 port 5001 connected
>> with 16.0.0.1 port 46878 [ 9] local 16.0.0.7 port 5001 connected with 16.0.0.1
>> port 46884 [ 10] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46886
>> [ 11] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46890 [ 12] local
>> 16.0.0.7 port 5001 connected with 16.0.0.1 port 46888 [ 13] local 16.0.0.7 port
>> 5001 connected with 16.0.0.1 port 46882
>> [ ID] Interval Transfer Bandwidth
>> [ 4] 0.0-20.0 sec 7.45 GBytes 3.19 Gbits/sec [ 5] 0.0-20.0 sec 7.48
>> GBytes 3.21 Gbits/sec [ 7] 0.0-20.0 sec 7.34 GBytes 3.15 Gbits/sec
>> [ 8] 0.0-20.0 sec 7.42 GBytes 3.18 Gbits/sec [ 9] 0.0-20.0 sec 7.24
>> GBytes 3.11 Gbits/sec [ 10] 0.0-20.0 sec 7.40 GBytes 3.17 Gbits/sec
>> [ 12] 0.0-20.0 sec 7.49 GBytes 3.21 Gbits/sec [ 6] 0.0-20.0 sec 7.30
>> GBytes 3.13 Gbits/sec [ 11] 0.0-20.0 sec 7.44 GBytes 3.19 Gbits/sec
>> [ 13] 0.0-20.0 sec 7.22 GBytes 3.10 Gbits/sec [SUM] 0.0-20.0 sec 73.8
>> GBytes 31.6 Gbits/sec
>>
>> NOTE: In my testing, this patch does _not_ show any harm to i40e performance
>> numbers on x86.
>>
>> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>> ---
>> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 69
>> ++++++++++++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.h |
>> 1 +
>> 2 files changed, 49 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>> index 6287bf6..800dca7 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>> @@ -551,15 +551,17 @@ static void
>> i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
>> else
>> dev_kfree_skb_any(tx_buffer->skb);
>> if (dma_unmap_len(tx_buffer, len))
>> - dma_unmap_single(ring->dev,
>> - dma_unmap_addr(tx_buffer, dma),
>> - dma_unmap_len(tx_buffer, len),
>> - DMA_TO_DEVICE);
>> + dma_unmap_single_attrs(ring->dev,
>> + dma_unmap_addr(tx_buffer, dma),
>> + dma_unmap_len(tx_buffer, len),
>> + DMA_TO_DEVICE,
>> + ring->dma_attrs);
>> } else if (dma_unmap_len(tx_buffer, len)) {
>> - dma_unmap_page(ring->dev,
>> - dma_unmap_addr(tx_buffer, dma),
>> - dma_unmap_len(tx_buffer, len),
>> - DMA_TO_DEVICE);
>> + dma_unmap_single_attrs(ring->dev,
>> + dma_unmap_addr(tx_buffer, dma),
>> + dma_unmap_len(tx_buffer, len),
>> + DMA_TO_DEVICE,
>> + ring->dma_attrs);
>> }
>>
>> tx_buffer->next_to_watch = NULL;
>> @@ -662,6 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>> struct i40e_tx_buffer *tx_buf;
>> struct i40e_tx_desc *tx_head;
>> struct i40e_tx_desc *tx_desc;
>> + dma_addr_t addr;
>> + size_t size;
>> unsigned int total_bytes = 0, total_packets = 0;
>> unsigned int budget = vsi->work_limit;
>>
>> @@ -696,10 +700,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>> napi_consume_skb(tx_buf->skb, napi_budget);
>>
>> /* unmap skb header data */
>> - dma_unmap_single(tx_ring->dev,
>> - dma_unmap_addr(tx_buf, dma),
>> - dma_unmap_len(tx_buf, len),
>> - DMA_TO_DEVICE);
>> + dma_unmap_single_attrs(tx_ring->dev,
>> + dma_unmap_addr(tx_buf, dma),
>> + dma_unmap_len(tx_buf, len),
>> + DMA_TO_DEVICE,
>> + tx_ring->dma_attrs);
>>
>> /* clear tx_buffer data */
>> tx_buf->skb = NULL;
>> @@ -717,12 +722,15 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>> tx_desc = I40E_TX_DESC(tx_ring, 0);
>> }
>>
>> + addr = dma_unmap_addr(tx_buf, dma);
>> + size = dma_unmap_len(tx_buf, len);
>> /* unmap any remaining paged data */
>> if (dma_unmap_len(tx_buf, len)) {
>> - dma_unmap_page(tx_ring->dev,
>> - dma_unmap_addr(tx_buf, dma),
>> - dma_unmap_len(tx_buf, len),
>> - DMA_TO_DEVICE);
>> + dma_unmap_single_attrs(tx_ring->dev,
>> + addr,
>> + size,
>> + DMA_TO_DEVICE,
>> + tx_ring->dma_attrs);
>> dma_unmap_len_set(tx_buf, len, 0);
>> }
>> }
>> @@ -1010,6 +1018,11 @@ int i40e_setup_tx_descriptors(struct i40e_ring
>> *tx_ring)
>> */
>> tx_ring->size += sizeof(u32);
>> tx_ring->size = ALIGN(tx_ring->size, 4096);
>> +#ifdef CONFIG_SPARC
>> + tx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
>> + tx_ring->dma_attrs = 0;
>> +#endif
>> tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
>> &tx_ring->dma, GFP_KERNEL);
>> if (!tx_ring->desc) {
>> @@ -1053,7 +1066,11 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
>> if (!rx_bi->page)
>> continue;
>>
>> - dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE,
>> DMA_FROM_DEVICE);
>> + dma_unmap_single_attrs(dev,
>> + rx_bi->dma,
>> + PAGE_SIZE,
>> + DMA_FROM_DEVICE,
>> + rx_ring->dma_attrs);
>> __free_pages(rx_bi->page, 0);
>>
>> rx_bi->page = NULL;
>> @@ -1113,6 +1130,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring
>> *rx_ring)
>> /* Round up to nearest 4K */
>> rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
>> rx_ring->size = ALIGN(rx_ring->size, 4096);
>> +#ifdef CONFIG_SPARC
>> + rx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
>> + rx_ring->dma_attrs = 0;
>> +#endif
>> rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
>> &rx_ring->dma, GFP_KERNEL);
>>
>> @@ -1182,7 +1204,8 @@ static bool i40e_alloc_mapped_page(struct
>> i40e_ring *rx_ring,
>> }
>>
>> /* map page for use */
>> - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
>> DMA_FROM_DEVICE);
>> + dma = dma_map_single_attrs(rx_ring->dev, page_address(page),
>> PAGE_SIZE,
>> + DMA_FROM_DEVICE, rx_ring->dma_attrs);
>>
>> /* if mapping failed free memory back to system since
>> * there isn't much point in holding memory we can't use @@ -1695,8
>> +1718,11 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring,
>> rx_ring->rx_stats.page_reuse_count++;
>> } else {
>> /* we are not reusing the buffer so unmap it */
>> - dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
>> - DMA_FROM_DEVICE);
>> + dma_unmap_single_attrs(rx_ring->dev,
>> + rx_buffer->dma,
>> + PAGE_SIZE,
>> + DMA_FROM_DEVICE,
>> + rx_ring->dma_attrs);
>> }
>>
>> /* clear contents of buffer_info */
>> @@ -2737,7 +2763,8 @@ static inline void i40e_tx_map(struct i40e_ring
>> *tx_ring, struct sk_buff *skb,
>> first->skb = skb;
>> first->tx_flags = tx_flags;
>>
>> - dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
>> + dma = dma_map_single_attrs(tx_ring->dev, skb->data, size,
>> + DMA_TO_DEVICE, tx_ring->dma_attrs);
>>
>> tx_desc = I40E_TX_DESC(tx_ring, i);
>> tx_bi = first;
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> index 5088405..9a86212 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> @@ -327,6 +327,7 @@ struct i40e_ring {
>>
>> unsigned int size; /* length of descriptor ring in bytes */
>> dma_addr_t dma; /* physical address of ring */
>> + unsigned long dma_attrs; /* DMA attributes */
>>
>> struct i40e_vsi *vsi; /* Backreference to associated VSI */
>> struct i40e_q_vector *q_vector; /* Backreference to associated vector
>> */
>> --
>> 1.9.1
>
>
^ permalink raw reply
* Re: driver r8169 suddenly failed
From: Francois Romieu @ 2016-12-27 23:12 UTC (permalink / raw)
To: Robert Grasso; +Cc: Realtek linux nic maintainers, netdev
In-Reply-To: <fd0ba97f-8730-4f71-9bd4-7a7552f673d2@modulonet.fr>
Robert Grasso <robert.grasso@modulonet.fr> :
[...]
> So, what is your opinion :
> - should I broaden my request for help to other teams than yours (kernel
> maintainers) ?
If I had to untangle this mess, I would check that my router is not
configured with an empty dhcp range. Then I would put each and every
interface facing it in promiscuous (tcpdump) capture mode until one
of those is able to negotiate a dhcp lease. I would thereafter replace
it with the r8169 interface and compare the traffic (+ ethtool byte/packet
counters).
--
Ueimor
^ permalink raw reply
* RE: [Intel-wired-lan] [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
From: maowenan @ 2016-12-28 0:23 UTC (permalink / raw)
To: Alexander Duyck, Tushar Dave; +Cc: Jeff Kirsher, intel-wired-lan, Netdev
In-Reply-To: <CAKgT0UcyRSub0NCi7oHX2Vf+N4vQxzwdWmcc+V+jOX_J=RYgfg@mail.gmail.com>
> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
> On Behalf Of Alexander Duyck
> Sent: Tuesday, December 06, 2016 5:55 AM
> To: Tushar Dave
> Cc: Jeff Kirsher; intel-wired-lan; Netdev
> Subject: Re: [Intel-wired-lan] [RFC PATCH] i40e: enable PCIe relax ordering for
> SPARC
>
> On Mon, Dec 5, 2016 at 9:07 AM, Tushar Dave <tushar.n.dave@oracle.com>
> wrote:
> > Unlike previous generation NIC (e.g. ixgbe) i40e doesn't seem to have
> > standard CSR where PCIe relaxed ordering can be set. Without PCIe
> > relax ordering enabled, i40e performance is significantly low on SPARC.
> >
> > This patch sets PCIe relax ordering for SPARC arch by setting dma attr
> > DMA_ATTR_WEAK_ORDERING for every tx and rx DMA map/unmap.
> > This has shown 10x increase in performance numbers.
> >
> > e.g.
> > iperf TCP test with 10 threads on SPARC S7
> >
> > Test 1: Without this patch
> >
> > [root@brm-snt1-03 net]# iperf -s
> > ------------------------------------------------------------
> > Server listening on TCP port 5001
> > TCP window size: 85.3 KByte (default)
> > ------------------------------------------------------------
> > [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40926 [
> > 5] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40934 [ 6]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40930 [ 7]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40928 [ 8]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40922 [ 9]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40932 [ 10]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40920 [ 11]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40924 [ 14]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40982 [ 12]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40980
> > [ ID] Interval Transfer Bandwidth
> > [ 4] 0.0-20.0 sec 566 MBytes 237 Mbits/sec
> > [ 5] 0.0-20.0 sec 532 MBytes 223 Mbits/sec
> > [ 6] 0.0-20.0 sec 537 MBytes 225 Mbits/sec
> > [ 8] 0.0-20.0 sec 546 MBytes 229 Mbits/sec
> > [ 11] 0.0-20.0 sec 592 MBytes 248 Mbits/sec
> > [ 7] 0.0-20.0 sec 539 MBytes 226 Mbits/sec
> > [ 9] 0.0-20.0 sec 572 MBytes 240 Mbits/sec
> > [ 10] 0.0-20.0 sec 604 MBytes 253 Mbits/sec
> > [ 14] 0.0-20.0 sec 567 MBytes 238 Mbits/sec
> > [ 12] 0.0-20.0 sec 511 MBytes 214 Mbits/sec
> > [SUM] 0.0-20.0 sec 5.44 GBytes 2.33 Gbits/sec
> >
> > Test 2: with this patch:
> >
> > [root@brm-snt1-03 net]# iperf -s
> > ------------------------------------------------------------
> > Server listening on TCP port 5001
> > TCP window size: 85.3 KByte (default)
> > ------------------------------------------------------------
> > TCP: request_sock_TCP: Possible SYN flooding on port 5001. Sending
> > cookies. Check SNMP counters.
> > [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46876 [
> > 5] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46874 [ 6]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46872 [ 7]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46880 [ 8]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46878 [ 9]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46884 [ 10]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46886 [ 11]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46890 [ 12]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46888 [ 13]
> > local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46882
> > [ ID] Interval Transfer Bandwidth
> > [ 4] 0.0-20.0 sec 7.45 GBytes 3.19 Gbits/sec [ 5] 0.0-20.0 sec
> > 7.48 GBytes 3.21 Gbits/sec [ 7] 0.0-20.0 sec 7.34 GBytes 3.15
> > Gbits/sec [ 8] 0.0-20.0 sec 7.42 GBytes 3.18 Gbits/sec [ 9]
> > 0.0-20.0 sec 7.24 GBytes 3.11 Gbits/sec [ 10] 0.0-20.0 sec 7.40
> > GBytes 3.17 Gbits/sec [ 12] 0.0-20.0 sec 7.49 GBytes 3.21
> > Gbits/sec [ 6] 0.0-20.0 sec 7.30 GBytes 3.13 Gbits/sec [ 11]
> > 0.0-20.0 sec 7.44 GBytes 3.19 Gbits/sec [ 13] 0.0-20.0 sec 7.22
> > GBytes 3.10 Gbits/sec [SUM] 0.0-20.0 sec 73.8 GBytes 31.6
> > Gbits/sec
> >
> > NOTE: In my testing, this patch does _not_ show any harm to i40e
> > performance numbers on x86.
> >
> > Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
>
> You went through and replaced all of the dma_unmap/map_page calls with
> dma_map/unmap_single_attrs I would prefer you didn't do that. I have
> patches to add the ability to map and unmap pages with attributes that should
> be available for 4.10-rc1 so if you could wait on this patch until then it would be
> preferred.
>
[Mao Wenan] Have you already sent out the related patches? I want to refer to
you how to enable this ability, then we can adopt it to configure relax ordering
through DCA control register on device 82599.
Thank you.
> > ---
> > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 69
> > ++++++++++++++++++++---------
> > drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 +
> > 2 files changed, 49 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > index 6287bf6..800dca7 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > @@ -551,15 +551,17 @@ static void
> i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
> > else
> > dev_kfree_skb_any(tx_buffer->skb);
> > if (dma_unmap_len(tx_buffer, len))
> > - dma_unmap_single(ring->dev,
> > -
> dma_unmap_addr(tx_buffer, dma),
> > - dma_unmap_len(tx_buffer,
> len),
> > - DMA_TO_DEVICE);
> > + dma_unmap_single_attrs(ring->dev,
> > +
> dma_unmap_addr(tx_buffer, dma),
> > +
> dma_unmap_len(tx_buffer, len),
> > + DMA_TO_DEVICE,
> > + ring->dma_attrs);
> > } else if (dma_unmap_len(tx_buffer, len)) {
> > - dma_unmap_page(ring->dev,
> > - dma_unmap_addr(tx_buffer, dma),
> > - dma_unmap_len(tx_buffer, len),
> > - DMA_TO_DEVICE);
> > + dma_unmap_single_attrs(ring->dev,
> > + dma_unmap_addr(tx_buffer,
> dma),
> > + dma_unmap_len(tx_buffer,
> len),
> > + DMA_TO_DEVICE,
> > + ring->dma_attrs);
> > }
> >
> > tx_buffer->next_to_watch = NULL; @@ -662,6 +664,8 @@ static
> > bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > struct i40e_tx_buffer *tx_buf;
> > struct i40e_tx_desc *tx_head;
> > struct i40e_tx_desc *tx_desc;
> > + dma_addr_t addr;
> > + size_t size;
> > unsigned int total_bytes = 0, total_packets = 0;
> > unsigned int budget = vsi->work_limit;
> >
> > @@ -696,10 +700,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > napi_consume_skb(tx_buf->skb, napi_budget);
> >
> > /* unmap skb header data */
> > - dma_unmap_single(tx_ring->dev,
> > - dma_unmap_addr(tx_buf, dma),
> > - dma_unmap_len(tx_buf, len),
> > - DMA_TO_DEVICE);
> > + dma_unmap_single_attrs(tx_ring->dev,
> > + dma_unmap_addr(tx_buf,
> dma),
> > + dma_unmap_len(tx_buf, len),
> > + DMA_TO_DEVICE,
> > + tx_ring->dma_attrs);
> >
> > /* clear tx_buffer data */
> > tx_buf->skb = NULL;
> > @@ -717,12 +722,15 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > tx_desc = I40E_TX_DESC(tx_ring, 0);
> > }
> >
> > + addr = dma_unmap_addr(tx_buf, dma);
> > + size = dma_unmap_len(tx_buf, len);
>
> On some architectures this change could lead to issues since dma_unmap_len
> could be 0 meaning that addr would never be used.
>
> > /* unmap any remaining paged data */
> > if (dma_unmap_len(tx_buf, len)) {
> > - dma_unmap_page(tx_ring->dev,
> > -
> dma_unmap_addr(tx_buf, dma),
> > -
> dma_unmap_len(tx_buf, len),
> > - DMA_TO_DEVICE);
> > +
> dma_unmap_single_attrs(tx_ring->dev,
> > + addr,
> > + size,
> > +
> DMA_TO_DEVICE,
> > +
> > + tx_ring->dma_attrs);
> > dma_unmap_len_set(tx_buf, len, 0);
> > }
> > }
> > @@ -1010,6 +1018,11 @@ int i40e_setup_tx_descriptors(struct i40e_ring
> *tx_ring)
> > */
> > tx_ring->size += sizeof(u32);
> > tx_ring->size = ALIGN(tx_ring->size, 4096);
> > +#ifdef CONFIG_SPARC
> > + tx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
> > + tx_ring->dma_attrs = 0;
> > +#endif
> > tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
> > &tx_ring->dma,
> GFP_KERNEL);
> > if (!tx_ring->desc) {
>
> Also not a fan of adding yet ring attribute. Is there any reason why you
> couldn't simply add a set of inline functions at the start of i40e_txrx.c that could
> replace the DMA map/unmap operations in this code but pass either 0 or
> DMA_ATTR_WEAK_ORDERING as needed for the drivers? Then the x86 code
> doesn't have to change while the SPARC code will be able to be passed the
> attribute.
>
> > @@ -1053,7 +1066,11 @@ void i40e_clean_rx_ring(struct i40e_ring
> *rx_ring)
> > if (!rx_bi->page)
> > continue;
> >
> > - dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE,
> DMA_FROM_DEVICE);
> > + dma_unmap_single_attrs(dev,
> > + rx_bi->dma,
> > + PAGE_SIZE,
> > + DMA_FROM_DEVICE,
> > + rx_ring->dma_attrs);
> > __free_pages(rx_bi->page, 0);
> >
> > rx_bi->page = NULL;
> > @@ -1113,6 +1130,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring
> *rx_ring)
> > /* Round up to nearest 4K */
> > rx_ring->size = rx_ring->count * sizeof(union
> i40e_32byte_rx_desc);
> > rx_ring->size = ALIGN(rx_ring->size, 4096);
> > +#ifdef CONFIG_SPARC
> > + rx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
> > + rx_ring->dma_attrs = 0;
> > +#endif
> > rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
> > &rx_ring->dma,
> GFP_KERNEL);
> >
> > @@ -1182,7 +1204,8 @@ static bool i40e_alloc_mapped_page(struct
> i40e_ring *rx_ring,
> > }
> >
> > /* map page for use */
> > - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
> DMA_FROM_DEVICE);
> > + dma = dma_map_single_attrs(rx_ring->dev, page_address(page),
> PAGE_SIZE,
> > + DMA_FROM_DEVICE,
> > + rx_ring->dma_attrs);
> >
> > /* if mapping failed free memory back to system since
> > * there isn't much point in holding memory we can't use @@
> > -1695,8 +1718,11 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring
> *rx_ring,
> > rx_ring->rx_stats.page_reuse_count++;
> > } else {
> > /* we are not reusing the buffer so unmap it */
> > - dma_unmap_page(rx_ring->dev, rx_buffer->dma,
> PAGE_SIZE,
> > - DMA_FROM_DEVICE);
> > + dma_unmap_single_attrs(rx_ring->dev,
> > + rx_buffer->dma,
> > + PAGE_SIZE,
> > + DMA_FROM_DEVICE,
> > + rx_ring->dma_attrs);
> > }
> >
> > /* clear contents of buffer_info */ @@ -2737,7 +2763,8 @@
> > static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
> > first->skb = skb;
> > first->tx_flags = tx_flags;
> >
> > - dma = dma_map_single(tx_ring->dev, skb->data, size,
> DMA_TO_DEVICE);
> > + dma = dma_map_single_attrs(tx_ring->dev, skb->data, size,
> > + DMA_TO_DEVICE,
> tx_ring->dma_attrs);
> >
> > tx_desc = I40E_TX_DESC(tx_ring, i);
> > tx_bi = first;
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > index 5088405..9a86212 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> > @@ -327,6 +327,7 @@ struct i40e_ring {
> >
> > unsigned int size; /* length of descriptor ring in
> bytes */
> > dma_addr_t dma; /* physical address of ring
> */
> > + unsigned long dma_attrs; /* DMA attributes */
> >
> > struct i40e_vsi *vsi; /* Backreference to associated
> VSI */
> > struct i40e_q_vector *q_vector; /* Backreference to associated
> > vector */
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Intel-wired-lan mailing list
> > Intel-wired-lan@lists.osuosl.org
> > http://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply
* RE: [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
From: maowenan @ 2016-12-28 0:40 UTC (permalink / raw)
To: tndave, jeffrey.t.kirsher@intel.com,
intel-wired-lan@lists.osuosl.org
Cc: netdev@vger.kernel.org, weiyongjun (A), Dingtianhong
In-Reply-To: <67edcc8a-93d2-8245-f24b-47dc13244879@oracle.com>
> -----Original Message-----
> From: tndave [mailto:tushar.n.dave@oracle.com]
> Sent: Wednesday, December 28, 2016 6:28 AM
> To: maowenan; jeffrey.t.kirsher@intel.com; intel-wired-lan@lists.osuosl.org
> Cc: netdev@vger.kernel.org; weiyongjun (A); Dingtianhong
> Subject: Re: [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
>
>
>
> On 12/26/2016 03:39 AM, maowenan wrote:
> >
> >
> >> -----Original Message-----
> >> From: netdev-owner@vger.kernel.org
> >> [mailto:netdev-owner@vger.kernel.org]
> >> On Behalf Of Tushar Dave
> >> Sent: Tuesday, December 06, 2016 1:07 AM
> >> To: jeffrey.t.kirsher@intel.com; intel-wired-lan@lists.osuosl.org
> >> Cc: netdev@vger.kernel.org
> >> Subject: [RFC PATCH] i40e: enable PCIe relax ordering for SPARC
> >>
> >> Unlike previous generation NIC (e.g. ixgbe) i40e doesn't seem to have
> >> standard CSR where PCIe relaxed ordering can be set. Without PCIe
> >> relax ordering enabled, i40e performance is significantly low on SPARC.
> >>
> > [Mao Wenan]Hi Tushar, you have referred to i40e doesn't seem to have
> > standard CSR to set PCIe relaxed ordering, this CSR like TX&Rx DCA Control
> Register in 82599, right?
> Yes.
> i40e datasheet mentions some CSR that can be used to enable/disable PCIe
> relaxed ordering in device; however I don't see the exact definition of those
> register in datasheet.
> (https://www.mail-archive.com/netdev@vger.kernel.org/msg117219.html).
>
> > Is DMA_ATTR_WEAK_ORDERING the same as TX&RX control register in
> 82599?
> No.
> DMA_ATTR_WEAK_ORDERING applies to the PCIe root complex of the system.
>
> -Tushar
I understand that the PCIe Root Complex is the Host Bridge in the CPU that
connects the CPU and memory to the PCIe architecture. So this attribute
DMA_ATTR_WEAK_ORDERING is only applied on CPU side(the SPARC in you
system), it can't apply on i40e, is it right?
And it is not the same as 82599 DCA control register's relax ordering bits.
-Mao Wenan
> >
> > And to enable relax ordering mode in 82599 for SPARC using below codes:
> > s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) {
> > u32 i;
> >
> > /* Clear the rate limiters */
> > for (i = 0; i < hw->mac.max_tx_queues; i++) {
> > IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
> > IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
> > }
> > IXGBE_WRITE_FLUSH(hw);
> >
> > #ifndef CONFIG_SPARC
> > /* Disable relaxed ordering */
> > for (i = 0; i < hw->mac.max_tx_queues; i++) {
> > u32 regval;
> >
> > regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
> > regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
> > IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval);
> > }
> >
> > for (i = 0; i < hw->mac.max_rx_queues; i++) {
> > u32 regval;
> >
> > regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
> > regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN |
> > IXGBE_DCA_RXCTRL_HEAD_WRO_EN);
> > IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval);
> > }
> > #endif
> > return 0;
> > }
> >
> >
> >
> >> This patch sets PCIe relax ordering for SPARC arch by setting dma
> >> attr DMA_ATTR_WEAK_ORDERING for every tx and rx DMA map/unmap.
> >> This has shown 10x increase in performance numbers.
> >>
> >> e.g.
> >> iperf TCP test with 10 threads on SPARC S7
> >>
> >> Test 1: Without this patch
> >>
> >> [root@brm-snt1-03 net]# iperf -s
> >> ------------------------------------------------------------
> >> Server listening on TCP port 5001
> >> TCP window size: 85.3 KByte (default)
> >> ------------------------------------------------------------
> >> [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 40926 [
> >> 5] local
> >> 16.0.0.7 port 5001 connected with 16.0.0.1 port 40934 [ 6] local
> >> 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40930 [ 7] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40928 [ 8] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40922 [ 9] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40932 [ 10] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40920 [ 11] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40924 [ 14] local
> >> 16.0.0.7 port 5001 connected with 16.0.0.1 port 40982 [ 12] local
> >> 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 40980
> >> [ ID] Interval Transfer Bandwidth
> >> [ 4] 0.0-20.0 sec 566 MBytes 237 Mbits/sec
> >> [ 5] 0.0-20.0 sec 532 MBytes 223 Mbits/sec
> >> [ 6] 0.0-20.0 sec 537 MBytes 225 Mbits/sec
> >> [ 8] 0.0-20.0 sec 546 MBytes 229 Mbits/sec
> >> [ 11] 0.0-20.0 sec 592 MBytes 248 Mbits/sec
> >> [ 7] 0.0-20.0 sec 539 MBytes 226 Mbits/sec
> >> [ 9] 0.0-20.0 sec 572 MBytes 240 Mbits/sec
> >> [ 10] 0.0-20.0 sec 604 MBytes 253 Mbits/sec
> >> [ 14] 0.0-20.0 sec 567 MBytes 238 Mbits/sec
> >> [ 12] 0.0-20.0 sec 511 MBytes 214 Mbits/sec
> >> [SUM] 0.0-20.0 sec 5.44 GBytes 2.33 Gbits/sec
> >>
> >> Test 2: with this patch:
> >>
> >> [root@brm-snt1-03 net]# iperf -s
> >> ------------------------------------------------------------
> >> Server listening on TCP port 5001
> >> TCP window size: 85.3 KByte (default)
> >> ------------------------------------------------------------
> >> TCP: request_sock_TCP: Possible SYN flooding on port 5001. Sending
> cookies.
> >> Check SNMP counters.
> >> [ 4] local 16.0.0.7 port 5001 connected with 16.0.0.1 port 46876 [
> >> 5] local
> >> 16.0.0.7 port 5001 connected with 16.0.0.1 port 46874 [ 6] local
> >> 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46872 [ 7] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46880 [ 8] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46878 [ 9] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46884 [ 10] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46886 [ 11] local 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46890 [ 12] local
> >> 16.0.0.7 port 5001 connected with 16.0.0.1 port 46888 [ 13] local
> >> 16.0.0.7 port
> >> 5001 connected with 16.0.0.1 port 46882
> >> [ ID] Interval Transfer Bandwidth
> >> [ 4] 0.0-20.0 sec 7.45 GBytes 3.19 Gbits/sec [ 5] 0.0-20.0 sec
> >> 7.48 GBytes 3.21 Gbits/sec [ 7] 0.0-20.0 sec 7.34 GBytes 3.15
> >> Gbits/sec [ 8] 0.0-20.0 sec 7.42 GBytes 3.18 Gbits/sec [ 9]
> >> 0.0-20.0 sec 7.24 GBytes 3.11 Gbits/sec [ 10] 0.0-20.0 sec 7.40
> >> GBytes 3.17 Gbits/sec [ 12] 0.0-20.0 sec 7.49 GBytes 3.21
> >> Gbits/sec [ 6] 0.0-20.0 sec 7.30 GBytes 3.13 Gbits/sec [ 11]
> >> 0.0-20.0 sec 7.44 GBytes 3.19 Gbits/sec [ 13] 0.0-20.0 sec 7.22
> >> GBytes 3.10 Gbits/sec [SUM] 0.0-20.0 sec 73.8 GBytes 31.6
> >> Gbits/sec
> >>
> >> NOTE: In my testing, this patch does _not_ show any harm to i40e
> >> performance numbers on x86.
> >>
> >> Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
> >> ---
> >> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 69
> >> ++++++++++++++++++++---------
> >> ++++++++++++++++++++drivers/net/ethernet/intel/i40e/i40e_txrx.h |
> >> 1 +
> >> 2 files changed, 49 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >> b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >> index 6287bf6..800dca7 100644
> >> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >> @@ -551,15 +551,17 @@ static void
> >> i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
> >> else
> >> dev_kfree_skb_any(tx_buffer->skb);
> >> if (dma_unmap_len(tx_buffer, len))
> >> - dma_unmap_single(ring->dev,
> >> - dma_unmap_addr(tx_buffer, dma),
> >> - dma_unmap_len(tx_buffer, len),
> >> - DMA_TO_DEVICE);
> >> + dma_unmap_single_attrs(ring->dev,
> >> + dma_unmap_addr(tx_buffer, dma),
> >> + dma_unmap_len(tx_buffer, len),
> >> + DMA_TO_DEVICE,
> >> + ring->dma_attrs);
> >> } else if (dma_unmap_len(tx_buffer, len)) {
> >> - dma_unmap_page(ring->dev,
> >> - dma_unmap_addr(tx_buffer, dma),
> >> - dma_unmap_len(tx_buffer, len),
> >> - DMA_TO_DEVICE);
> >> + dma_unmap_single_attrs(ring->dev,
> >> + dma_unmap_addr(tx_buffer, dma),
> >> + dma_unmap_len(tx_buffer, len),
> >> + DMA_TO_DEVICE,
> >> + ring->dma_attrs);
> >> }
> >>
> >> tx_buffer->next_to_watch = NULL;
> >> @@ -662,6 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >> struct i40e_tx_buffer *tx_buf;
> >> struct i40e_tx_desc *tx_head;
> >> struct i40e_tx_desc *tx_desc;
> >> + dma_addr_t addr;
> >> + size_t size;
> >> unsigned int total_bytes = 0, total_packets = 0;
> >> unsigned int budget = vsi->work_limit;
> >>
> >> @@ -696,10 +700,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >> napi_consume_skb(tx_buf->skb, napi_budget);
> >>
> >> /* unmap skb header data */
> >> - dma_unmap_single(tx_ring->dev,
> >> - dma_unmap_addr(tx_buf, dma),
> >> - dma_unmap_len(tx_buf, len),
> >> - DMA_TO_DEVICE);
> >> + dma_unmap_single_attrs(tx_ring->dev,
> >> + dma_unmap_addr(tx_buf, dma),
> >> + dma_unmap_len(tx_buf, len),
> >> + DMA_TO_DEVICE,
> >> + tx_ring->dma_attrs);
> >>
> >> /* clear tx_buffer data */
> >> tx_buf->skb = NULL;
> >> @@ -717,12 +722,15 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >> tx_desc = I40E_TX_DESC(tx_ring, 0);
> >> }
> >>
> >> + addr = dma_unmap_addr(tx_buf, dma);
> >> + size = dma_unmap_len(tx_buf, len);
> >> /* unmap any remaining paged data */
> >> if (dma_unmap_len(tx_buf, len)) {
> >> - dma_unmap_page(tx_ring->dev,
> >> - dma_unmap_addr(tx_buf, dma),
> >> - dma_unmap_len(tx_buf, len),
> >> - DMA_TO_DEVICE);
> >> + dma_unmap_single_attrs(tx_ring->dev,
> >> + addr,
> >> + size,
> >> + DMA_TO_DEVICE,
> >> + tx_ring->dma_attrs);
> >> dma_unmap_len_set(tx_buf, len, 0);
> >> }
> >> }
> >> @@ -1010,6 +1018,11 @@ int i40e_setup_tx_descriptors(struct i40e_ring
> >> *tx_ring)
> >> */
> >> tx_ring->size += sizeof(u32);
> >> tx_ring->size = ALIGN(tx_ring->size, 4096);
> >> +#ifdef CONFIG_SPARC
> >> + tx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
> >> + tx_ring->dma_attrs = 0;
> >> +#endif
> >> tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
> >> &tx_ring->dma, GFP_KERNEL);
> >> if (!tx_ring->desc) {
> >> @@ -1053,7 +1066,11 @@ void i40e_clean_rx_ring(struct i40e_ring
> *rx_ring)
> >> if (!rx_bi->page)
> >> continue;
> >>
> >> - dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE,
> >> DMA_FROM_DEVICE);
> >> + dma_unmap_single_attrs(dev,
> >> + rx_bi->dma,
> >> + PAGE_SIZE,
> >> + DMA_FROM_DEVICE,
> >> + rx_ring->dma_attrs);
> >> __free_pages(rx_bi->page, 0);
> >>
> >> rx_bi->page = NULL;
> >> @@ -1113,6 +1130,11 @@ int i40e_setup_rx_descriptors(struct i40e_ring
> >> *rx_ring)
> >> /* Round up to nearest 4K */
> >> rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
> >> rx_ring->size = ALIGN(rx_ring->size, 4096);
> >> +#ifdef CONFIG_SPARC
> >> + rx_ring->dma_attrs = DMA_ATTR_WEAK_ORDERING; #else
> >> + rx_ring->dma_attrs = 0;
> >> +#endif
> >> rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
> >> &rx_ring->dma, GFP_KERNEL);
> >>
> >> @@ -1182,7 +1204,8 @@ static bool i40e_alloc_mapped_page(struct
> >> i40e_ring *rx_ring,
> >> }
> >>
> >> /* map page for use */
> >> - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
> >> DMA_FROM_DEVICE);
> >> + dma = dma_map_single_attrs(rx_ring->dev, page_address(page),
> >> PAGE_SIZE,
> >> + DMA_FROM_DEVICE, rx_ring->dma_attrs);
> >>
> >> /* if mapping failed free memory back to system since
> >> * there isn't much point in holding memory we can't use @@
> -1695,8
> >> +1718,11 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring
> >> +*rx_ring,
> >> rx_ring->rx_stats.page_reuse_count++;
> >> } else {
> >> /* we are not reusing the buffer so unmap it */
> >> - dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
> >> - DMA_FROM_DEVICE);
> >> + dma_unmap_single_attrs(rx_ring->dev,
> >> + rx_buffer->dma,
> >> + PAGE_SIZE,
> >> + DMA_FROM_DEVICE,
> >> + rx_ring->dma_attrs);
> >> }
> >>
> >> /* clear contents of buffer_info */ @@ -2737,7 +2763,8 @@ static
> >> inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff
> >> *skb,
> >> first->skb = skb;
> >> first->tx_flags = tx_flags;
> >>
> >> - dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
> >> + dma = dma_map_single_attrs(tx_ring->dev, skb->data, size,
> >> + DMA_TO_DEVICE, tx_ring->dma_attrs);
> >>
> >> tx_desc = I40E_TX_DESC(tx_ring, i);
> >> tx_bi = first;
> >> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> >> b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> >> index 5088405..9a86212 100644
> >> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> >> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> >> @@ -327,6 +327,7 @@ struct i40e_ring {
> >>
> >> unsigned int size; /* length of descriptor ring in bytes */
> >> dma_addr_t dma; /* physical address of ring */
> >> + unsigned long dma_attrs; /* DMA attributes */
> >>
> >> struct i40e_vsi *vsi; /* Backreference to associated VSI */
> >> struct i40e_q_vector *q_vector; /* Backreference to associated
> vector
> >> */
> >> --
> >> 1.9.1
> >
> >
^ permalink raw reply
* sunrpc: Illegal context switch in RCU read-side critical section!
From: Dave Jones @ 2016-12-28 0:55 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA; +Cc: linux-nfs-u79uwXL29TY76Z2rM5mHXA
Just noticed this on 4.9. Will try and repro on 4.10rc1 later, but hitting
unrelated boot problems on that machine right now.
===============================
[ INFO: suspicious RCU usage. ]
4.9.0-backup-debug+ #1 Not tainted
-------------------------------
./include/linux/rcupdate.h:557 Illegal context switch in RCU read-side critical section!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 1
5 locks held by kworker/4:1/66:
#0: ("%s"("ipv6_addrconf")){.+.+..}, at: [<ffffffff8e09dd14>] process_one_work+0x184/0x790
#1: ((addr_chk_work).work){+.+...}, at: [<ffffffff8e09dd14>] process_one_work+0x184/0x790
#2: (rtnl_mutex){+.+.+.}, at: [<ffffffff8e9eadc7>] rtnl_lock+0x17/0x20
#3: (rcu_read_lock_bh){......}, at: [<ffffffff8ea8b0a3>] addrconf_verify_rtnl+0x23/0x500
#4: (rcu_read_lock){......}, at: [<ffffffff8e0a6c65>] atomic_notifier_call_chain+0x5/0x110
stack backtrace:
CPU: 4 PID: 66 Comm: kworker/4:1 Not tainted 4.9.0-backup-debug+ #1
Workqueue: ipv6_addrconf addrconf_verify_work
ffffc90000273a28 ffffffff8e5b4ca5 ffff88042ae19780 0000000000000001
ffffc90000273a58 ffffffff8e0d530e 0000000000000000 ffffffff8efcc659
00000000000009a7 ffff8804180b8580 ffffc90000273a80 ffffffff8e0ad2b7
Call Trace:
[<ffffffff8e5b4ca5>] dump_stack+0x68/0x93
[<ffffffff8e0d530e>] lockdep_rcu_suspicious+0xce/0xf0
[<ffffffff8e0ad2b7>] ___might_sleep.part.103+0xa7/0x230
[<ffffffff8e0b74db>] __might_sleep+0x4b/0x90
[<ffffffff8e9b9ed2>] lock_sock_nested+0x32/0xb0
[<ffffffff8e9ba2cb>] sock_setsockopt+0x8b/0xa50
[<ffffffff8e088025>] ? __local_bh_enable_ip+0x65/0xb0
[<ffffffff8e9b5f39>] kernel_setsockopt+0x49/0x50
[<ffffffff8eae7d6a>] svc_tcp_kill_temp_xprt+0x4a/0x60
[<ffffffff8eafa9ef>] svc_age_temp_xprts_now+0x12f/0x1b0
[<ffffffff8e350e32>] nfsd_inet6addr_event+0x192/0x1f0
[<ffffffff8e350ca5>] ? nfsd_inet6addr_event+0x5/0x1f0
[<ffffffff8e0a69f9>] notifier_call_chain+0x39/0xa0
[<ffffffff8e0a6cce>] atomic_notifier_call_chain+0x6e/0x110
[<ffffffff8e0a6c65>] ? atomic_notifier_call_chain+0x5/0x110
[<ffffffff8eac58cb>] inet6addr_notifier_call_chain+0x1b/0x20
[<ffffffff8ea8a89c>] ipv6_del_addr+0x12c/0x200
[<ffffffff8ea8b497>] addrconf_verify_rtnl+0x417/0x500
[<ffffffff8ea8b0a3>] ? addrconf_verify_rtnl+0x23/0x500
[<ffffffff8ea8b593>] addrconf_verify_work+0x13/0x20
[<ffffffff8e09dd9b>] process_one_work+0x20b/0x790
[<ffffffff8e09dd14>] ? process_one_work+0x184/0x790
[<ffffffff8e09e36e>] worker_thread+0x4e/0x490
[<ffffffff8e09e320>] ? process_one_work+0x790/0x790
[<ffffffff8e09e320>] ? process_one_work+0x790/0x790
[<ffffffff8e0a4ecf>] kthread+0xff/0x120
[<ffffffff8e0a4dd0>] ? kthread_worker_fn+0x140/0x140
[<ffffffff8eb14517>] ret_from_fork+0x27/0x40
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] net: Use kmemdup instead of kmalloc and memcpy
From: David Miller @ 2016-12-28 1:16 UTC (permalink / raw)
To: mayhs11saini; +Cc: netdev, virtualization, mst
In-Reply-To: <1482520498-28259-1-git-send-email-mayhs11saini@gmail.com>
From: Shyam Saini <mayhs11saini@gmail.com>
Date: Sat, 24 Dec 2016 00:44:58 +0530
> when some other buffer is immediately copied into allocated region.
> Replace calls to kmalloc followed by a memcpy with a direct
> call to kmemdup.
>
> Signed-off-by: Shyam Saini <mayhs11saini@gmail.com>
Applied.
^ permalink raw reply
* Re: [PATCH] fddi: skfp: Use more common logging styles
From: David Miller @ 2016-12-28 1:17 UTC (permalink / raw)
To: joe; +Cc: colin.king, linux-kernel, netdev
In-Reply-To: <f58b60bc732ef52d75e9554bff62cb287c8622ae.1482378824.git.joe@perches.com>
Applied.
^ permalink raw reply
* Re: [PATCH] skfp: hwmtm: Use proper logging macros, correct mismatches
From: David Miller @ 2016-12-28 1:17 UTC (permalink / raw)
To: joe; +Cc: colin.king, linux-kernel, netdev
In-Reply-To: <8e5313732124084fd2084df874eed70f8f1af810.1482367105.git.joe@perches.com>
From: Joe Perches <joe@perches.com>
Date: Wed, 21 Dec 2016 16:41:52 -0800
> Logging macros should allow format and argument validation.
> The DB_TX, DB_RX, and DB_GEN macros did not.
>
> Update the macros and uses and add no_printk validation to the
> previously compiled away #ifndef DEBUG variants.
>
> Done with coccinelle and some typing.
>
> Signed-off-by: Joe Perches <joe@perches.com>
Applied.
^ permalink raw reply
* RE: [PATCH v2] net: stmmac: bug fix to synchronize stmmac_open and stmmac_dvr_probe
From: Kweh, Hock Leong @ 2016-12-28 1:40 UTC (permalink / raw)
To: David Miller, f.fainelli@gmail.com
Cc: Joao.Pinto@synopsys.com, peppe.cavallaro@st.com,
seraphin.bonnaffe@st.com, alexandre.torgue@gmail.com,
manabian@gmail.com, niklas.cassel@axis.com, johan@kernel.org,
pavel@ucw.cz, lars.persson@axis.com, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20161227.113405.362102687632949941.davem@davemloft.net>
> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Wednesday, December 28, 2016 12:34 AM
> To: Kweh, Hock Leong <hock.leong.kweh@intel.com>
> Cc: Joao.Pinto@synopsys.com; peppe.cavallaro@st.com;
> seraphin.bonnaffe@st.com; f.fainelli@gmail.com;
> alexandre.torgue@gmail.com; manabian@gmail.com; niklas.cassel@axis.com;
> johan@kernel.org; pavel@ucw.cz; Ong, Boon Leong
> <boon.leong.ong@intel.com>; Voon, Weifeng <weifeng.voon@intel.com>;
> lars.persson@axis.com; netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2] net: stmmac: bug fix to synchronize stmmac_open and
> stmmac_dvr_probe
>
> From: "Kweh, Hock Leong" <hock.leong.kweh@intel.com>
> Date: Tue, 27 Dec 2016 22:42:36 +0800
>
> > From: "Kweh, Hock Leong" <hock.leong.kweh@intel.com>
>
> You are not the author of this change, do not take credit for it.
>
> You have copied Florian's patch character by character, therefore
> he is the author.
>
> You also didn't CC: the netdev mailing list properly.
Noted & Thanks.
Hi Florian, could you submit this fix from your side so that you are the author.
I will help to test out.
Thanks & Regards,
Wilson
^ permalink raw reply
* [PATCH net] net: stmmac: Fix race between stmmac_drv_probe and stmmac_open
From: Florian Fainelli @ 2016-12-28 2:23 UTC (permalink / raw)
To: netdev
Cc: pavel, Joao.Pinto, seraphin.bonnaffe, alexandre.torgue, manabian,
niklas.cassel, johan, boon.leong.ong, weifeng.voon, lars.persson,
linux-kernel, Florian Fainelli, Giuseppe Cavallaro,
Alexandre Torgue
In-Reply-To: <F54AEECA5E2B9541821D670476DAE19C5A916C0E@PGSMSX102.gar.corp.intel.com>
There is currently a small window during which the network device registered by
stmmac can be made visible, yet all resources, including and clock and MDIO bus
have not had a chance to be set up, this can lead to the following error to
occur:
[ 473.919358] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
stmmac_dvr_probe: warning: cannot get CSR clock
[ 473.919382] stmmaceth 0000:01:00.0: no reset control found
[ 473.919412] stmmac - user ID: 0x10, Synopsys ID: 0x42
[ 473.919429] stmmaceth 0000:01:00.0: DMA HW capability register supported
[ 473.919436] stmmaceth 0000:01:00.0: RX Checksum Offload Engine supported
[ 473.919443] stmmaceth 0000:01:00.0: TX Checksum insertion supported
[ 473.919451] stmmaceth 0000:01:00.0 (unnamed net_device) (uninitialized):
Enable RX Mitigation via HW Watchdog Timer
[ 473.921395] libphy: PHY stmmac-1:00 not found
[ 473.921417] stmmaceth 0000:01:00.0 eth0: Could not attach to PHY
[ 473.921427] stmmaceth 0000:01:00.0 eth0: stmmac_open: Cannot attach to
PHY (error: -19)
[ 473.959710] libphy: stmmac: probed
[ 473.959724] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 0 IRQ POLL
(stmmac-1:00) active
[ 473.959728] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 1 IRQ POLL
(stmmac-1:01)
[ 473.959731] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 2 IRQ POLL
(stmmac-1:02)
[ 473.959734] stmmaceth 0000:01:00.0 eth0: PHY ID 01410cc2 at 3 IRQ POLL
(stmmac-1:03)
Fix this by making sure that register_netdev() is the last thing being done,
which guarantees that the clock and the MDIO bus are available.
Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove")
Reported-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bb40382e205d..5910ea51f8f6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3339,13 +3339,6 @@ int stmmac_dvr_probe(struct device *device,
spin_lock_init(&priv->lock);
- ret = register_netdev(ndev);
- if (ret) {
- netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
- __func__, ret);
- goto error_netdev_register;
- }
-
/* If a specific clk_csr value is passed from the platform
* this means that the CSR Clock Range selection cannot be
* changed at run-time and it is fixed. Viceversa the driver'll try to
@@ -3372,11 +3365,14 @@ int stmmac_dvr_probe(struct device *device,
}
}
- return 0;
+ ret = register_netdev(ndev);
+ if (ret)
+ netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
+ __func__, ret);
+
+ return ret;
error_mdio_register:
- unregister_netdev(ndev);
-error_netdev_register:
netif_napi_del(&priv->napi);
error_hw_init:
clk_disable_unprepare(priv->pclk);
--
2.9.3
^ permalink raw reply related
* Re: [PATCH net] net: stmmac: Fix race between stmmac_drv_probe and stmmac_open
From: David Miller @ 2016-12-28 2:32 UTC (permalink / raw)
To: f.fainelli
Cc: netdev, pavel, Joao.Pinto, seraphin.bonnaffe, alexandre.torgue,
manabian, niklas.cassel, johan, boon.leong.ong, weifeng.voon,
lars.persson, linux-kernel, peppe.cavallaro, alexandre.torgue
In-Reply-To: <20161228022306.14442-1-f.fainelli@gmail.com>
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 27 Dec 2016 18:23:06 -0800
> There is currently a small window during which the network device registered by
> stmmac can be made visible, yet all resources, including and clock and MDIO bus
> have not had a chance to be set up, this can lead to the following error to
> occur:
...
> Fix this by making sure that register_netdev() is the last thing being done,
> which guarantees that the clock and the MDIO bus are available.
>
> Fixes: 4bfcbd7abce2 ("stmmac: Move the mdio_register/_unregister in probe/remove")
> Reported-by: Kweh, Hock Leong <hock.leong.kweh@intel.com>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Applied and queued up for -stable, thanks Florian.
^ permalink raw reply
* Re: [PATCH] net: handle no dst on skb in icmp6_send
From: David Miller @ 2016-12-28 2:36 UTC (permalink / raw)
To: hannes; +Cc: dsa, netdev, andreyknvl
In-Reply-To: <1481205857.4073547.812616921.223287BE@webmail.messagingengine.com>
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 08 Dec 2016 15:04:17 +0100
> Hello David,
>
> On Mon, Nov 28, 2016, at 22:13, David Miller wrote:
>> From: David Ahern <dsa@cumulusnetworks.com>
>> Date: Sun, 27 Nov 2016 18:52:53 -0800
>>
>> > Andrey reported the following while fuzzing the kernel with syzkaller:
>> ...
>> > icmp6_send / icmpv6_send is invoked for both rx and tx paths. In both
>> > cases the dst->dev should be preferred for determining the L3 domain
>> > if the dst has been set on the skb. Fallback to the skb->dev if it has
>> > not. This covers the case reported here where icmp6_send is invoked on
>> > Rx before the route lookup.
>> >
>> > Fixes: 5d41ce29e ("net: icmp6_send should use dst dev to determine L3 domain")
>> > Reported-by: Andrey Konovalov <andreyknvl@google.com>
>> > Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
>>
>> Applied, thanks David.
>
> could you queue this patch up for stable. You can remotely kill machines
> with this bug.
Sure, queued up, thanks.
^ permalink raw reply
* net-next is OPEN
From: David Miller @ 2016-12-28 2:34 UTC (permalink / raw)
To: netdev
I know you are all chomping at the bit to bomb me with net-next
changes. :-)
^ permalink raw reply
* Re: [PATCH 01/12] Make and configuration files.
From: Rami Rosen @ 2016-12-28 4:47 UTC (permalink / raw)
To: David VomLehn
Cc: Netdev, Simon Edelhaus, Dmitrii Tarakanov, Alexander Loktionov
In-Reply-To: <9cc1565a3a398b4f70248ca98d12991071142682.1482844556.git.vomlehn@texas.net>
Hi, David,
For the Makefile, you should follow the pattern which is common in
Linux Kernel Ethernet drivers, for example,
http://lxr.free-electrons.com/source/drivers/net/ethernet/intel/i40e/Makefile or
http://lxr.free-electrons.com/source/drivers/net/ethernet/mellanox/mlx5/core/Makefile
Don't think that I ever saw usage of "-j" in a kernel module Makefile;
apart from it, "-j4" is specific to one platform with a given number
of cores, and of course there can be platforms with many more cores,
for which it is less suitable. You can pass the "-j" when running
"make" from the command line, there is no justification to put it in a
Makefile:
>+all:
>+ $(MAKE) -j4 CC=$(CC) -C $(BUILD_DIR) M=$(PWD) modules
>+
>+dox: .doxygen
>+ @doxygen $<
>+
>+clean:
>+ $(MAKE) -j4 -C $(BUILD_DIR) M=$(PWD) clean
Don't think I ever encountered load/unload targets in Linux Kernel
Makefiles (not talking about out of tree projects):
>+load:
>+ insmod ./$(TARGET).ko
>+
>+unload:
>+ rmmod ./$(TARGET).ko
Regards,
Rami Rosen
^ permalink raw reply
* Re: [PATCH 05/12] Support for NIC-specific code
From: Rami Rosen @ 2016-12-28 5:21 UTC (permalink / raw)
To: David VomLehn
Cc: Netdev, Simon Edelhaus, Dmitrii Tarakanov, Alexander Loktionov
In-Reply-To: <122d7e0633e8a8ddfbb0c233d6896254b2fa3eef.1482844668.git.vomlehn@texas.net>
Hi, David,
Several nitpicks and comments, from a brief overview:
The commented label //err_exit: should be removed
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
> @@ -0,0 +1,993 @@
> +//err_exit:
> +//err_exit:
Shouldn't aq_nic_rss_init() be static? isn't it called only from
aq_nic_cfg_init_defaults()?
and it always returns 0, shouldn't it be void as well ? (+ remove
checking the return code when invoking it in
aq_nic_cfg_init_defaults())
> +int aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
> +{
> + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
> + struct aq_receive_scale_parameters *rss_params = &cfg->aq_rss;
> + int i = 0;
> +
...
> + return 0;
> +}
Shouldn't aq_nic_ndev_alloc() be static ? Isn't it invoked only from
aq_nic_alloc_cold()?
> +struct net_device *aq_nic_ndev_alloc(void)
> +{
...
> +}
> +
> +static unsigned int aq_nic_map_skb_lso(struct aq_nic_s *self,
> + struct sk_buff *skb,
> + struct aq_ring_buff_s *dx)
> +{
> + unsigned int ret = 0U;
> +
> + dx->flags = 0U;
> + dx->len_pkt = skb->len;
> + dx->len_l2 = ETH_HLEN;
> + dx->len_l3 = ip_hdrlen(skb);
> + dx->len_l4 = tcp_hdrlen(skb);
> + dx->mss = skb_shinfo(skb)->gso_size;
> + dx->is_txc = 1U;
> + ret = 1U;
> +
Why not remove this "ret" variable, and simply return 1 ? the method
always returns 1:
> + return ret;
> +}
> +
> +int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
> +{
> + struct aq_ring_s *ring = NULL;
> + unsigned int frags = 0U;
> + unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
> + unsigned int tc = 0U;
> + int err = 0;
> + bool is_nic_in_bad_state;
> + bool is_locked = false;
> + bool is_busy = false;
> + struct aq_ring_buff_s buffers[AQ_CFG_SKB_FRAGS_MAX];
> +
> + frags = skb_shinfo(skb)->nr_frags + 1;
> +
> + ring = self->aq_ring_tx[AQ_NIC_TCVEC2RING(self, tc, vec)];
> +
> + atomic_inc(&self->busy_count);
> + is_busy = true;
> +
> + if (frags > AQ_CFG_SKB_FRAGS_MAX) {
> + dev_kfree_skb_any(skb);
> + goto err_exit;
> + }
> +
> + is_nic_in_bad_state = AQ_OBJ_TST(self, AQ_NIC_FLAGS_IS_NOT_TX_READY) ||
> + (aq_ring_avail_dx(ring) < AQ_CFG_SKB_FRAGS_MAX);
> +
> + if (is_nic_in_bad_state) {
> + aq_nic_ndev_queue_stop(self, ring->idx);
> + err = NETDEV_TX_BUSY;
> + goto err_exit;
> + }
> +
Usage of this internal block is not common (unless it is under #ifdef,
and also not very common also in that case). I suggest move "unsigned
int trys" to the variables definitions in the beginning of the method
and remove the opening and closing brackets of the following block:
> + {
> + unsigned int trys = AQ_CFG_LOCK_TRYS;
> +
> + frags = aq_nic_map_skb(self, skb, &buffers[0]);
> +
> + do {
> + is_locked = spin_trylock(&ring->lock);
> + } while (--trys && !is_locked);
> + if (!(is_locked)) {
> + err = NETDEV_TX_BUSY;
> + goto err_exit;
> + }
> +
Usually you don't let the mtu be less than 68, for example:
http://lxr.free-electrons.com/source/drivers/net/ethernet/intel/i40e/i40e_main.c#L2246
See also RFV 791:
https://tools.ietf.org/html/rfc791
> +int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu)
> +{
> + int err = 0;
> +
> + if (new_mtu > self->aq_hw_caps.mtu) {
> + err = 0;
> + goto err_exit;
> + }
> + self->aq_nic_cfg.mtu = new_mtu;
> +
> +err_exit:
> + return err;
> +}
> +
> diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> new file mode 100644
> index 0000000..89958e7
> --- /dev/null
> +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
> @@ -0,0 +1,111 @@
> +/*
> + * Aquantia Corporation Network Driver
> + * Copyright (C) 2014-2016 Aquantia Corporation. All rights reserved
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + */
> +
> +/*
Should be, of course, aq_nic.h:
> + * File aq_nic.c: Declaration of common code for NIC.
> + */
> +
Regards,
Rami Rosen
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox