* [PATCH 2/3] r8169: RxFIFO overflow oddities with 8168 chipsets
From: Francois Romieu @ 2011-02-03 17:26 UTC (permalink / raw)
To: davem; +Cc: netdev, Ivan Vecera, Hayes Wang
Some experiment-based action to prevent my 8168 chipsets locking-up hard
in the irq handler under load (pktgen ~1Mpps). Apparently a reset is not
always mandatory (is it at all ?).
- RTL_GIGA_MAC_VER_12
- RTL_GIGA_MAC_VER_25
Missed ~55% packets. Note:
- this is an old SiS 965L motherboard
- the 8168 chipset emits (lots of) control frames towards the sender
- RTL_GIGA_MAC_VER_26
The chipset does not go into a frenzy of mac control pause when it
crashes yet but it can still be crashed. It needs more work.
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Cc: Ivan Vecera <ivecera@redhat.com>
Cc: Hayes <hayeswang@realtek.com>
---
drivers/net/r8169.c | 30 +++++++++++++++++++++---------
1 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 9ab3b43..40dabe2 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -973,7 +973,8 @@ static void __rtl8169_check_link_status(struct net_device *dev,
if (pm)
pm_request_resume(&tp->pci_dev->dev);
netif_carrier_on(dev);
- netif_info(tp, ifup, dev, "link up\n");
+ if (net_ratelimit())
+ netif_info(tp, ifup, dev, "link up\n");
} else {
netif_carrier_off(dev);
netif_info(tp, ifdown, dev, "link down\n");
@@ -4640,13 +4641,24 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
break;
}
- /* Work around for rx fifo overflow */
- if (unlikely(status & RxFIFOOver) &&
- (tp->mac_version == RTL_GIGA_MAC_VER_11 ||
- tp->mac_version == RTL_GIGA_MAC_VER_22)) {
- netif_stop_queue(dev);
- rtl8169_tx_timeout(dev);
- break;
+ if (unlikely(status & RxFIFOOver)) {
+ switch (tp->mac_version) {
+ /* Work around for rx fifo overflow */
+ case RTL_GIGA_MAC_VER_11:
+ case RTL_GIGA_MAC_VER_22:
+ case RTL_GIGA_MAC_VER_26:
+ netif_stop_queue(dev);
+ rtl8169_tx_timeout(dev);
+ goto done;
+ /* Experimental science. Pktgen proof. */
+ case RTL_GIGA_MAC_VER_12:
+ case RTL_GIGA_MAC_VER_25:
+ if (status == RxFIFOOver)
+ goto done;
+ break;
+ default:
+ break;
+ }
}
if (unlikely(status & SYSErr)) {
@@ -4682,7 +4694,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
(status & RxFIFOOver) ? (status | RxOverflow) : status);
status = RTL_R16(IntrStatus);
}
-
+done:
return IRQ_RETVAL(handled);
}
--
1.7.3.5
^ permalink raw reply related
* [PATCH 1/3] r8169: use RxFIFO overflow workaround for 8168c chipset
From: Francois Romieu @ 2011-02-03 17:26 UTC (permalink / raw)
To: davem; +Cc: netdev, Ivan Vecera, Hayes Wang
I found that one of the 8168c chipsets (concretely XID 1c4000c0) starts
generating RxFIFO overflow errors. The result is an infinite loop in
interrupt handler as the RxFIFOOver is handled only for ...MAC_VER_11.
With the workaround everything goes fine.
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Cc: Hayes <hayeswang@realtek.com>
---
drivers/net/r8169.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index bde7d61..9ab3b43 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -3757,7 +3757,8 @@ static void rtl_hw_start_8168(struct net_device *dev)
RTL_W16(IntrMitigate, 0x5151);
/* Work around for RxFIFO overflow. */
- if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
+ if (tp->mac_version == RTL_GIGA_MAC_VER_11 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_22) {
tp->intr_event |= RxFIFOOver | PCSTimeout;
tp->intr_event &= ~RxOverflow;
}
@@ -4641,7 +4642,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
/* Work around for rx fifo overflow */
if (unlikely(status & RxFIFOOver) &&
- (tp->mac_version == RTL_GIGA_MAC_VER_11)) {
+ (tp->mac_version == RTL_GIGA_MAC_VER_11 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_22)) {
netif_stop_queue(dev);
rtl8169_tx_timeout(dev);
break;
--
1.7.3.5
^ permalink raw reply related
* [PATCH 0/3] r8169 driver fixes
From: Francois Romieu @ 2011-02-03 17:26 UTC (permalink / raw)
To: davem; +Cc: netdev, Ivan Vecera, Hayes Wang
The following series includes Ivan Rx fifo overflow fix and similar
changes I did after testing with various 8168 chipsets.
The series is available as
git://git.kernel.org/pub/scm/linux/kernel/git/romieu/netdev-2.6.git r8169-davem
Distance from 'master' (2ba451421b23636c45fabfa522858c5c124b3673)
-----------------------------------------------------------------
0331b3b022b9724019e4c42fded4a01eaa6946f5
407786ae1960eef585f97bfa7625a36512925de0
e4b6fa3d3d3db4fac32904ae12efe199de8efed0
Diffstat
--------
drivers/net/r8169.c | 41 ++++++++++++++++++++++++++++++++---------
1 files changed, 32 insertions(+), 9 deletions(-)
Shortlog
--------
Francois Romieu (2):
r8169: RxFIFO overflow oddities with 8168 chipsets
r8169: prevent RxFIFO induced loops in the irq handler.
Ivan Vecera (1):
r8169: use RxFIFO overflow workaround for 8168c chipset
Patch
-----
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index bde7d61..59ccf0c 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -973,7 +973,8 @@ static void __rtl8169_check_link_status(struct net_device *dev,
if (pm)
pm_request_resume(&tp->pci_dev->dev);
netif_carrier_on(dev);
- netif_info(tp, ifup, dev, "link up\n");
+ if (net_ratelimit())
+ netif_info(tp, ifup, dev, "link up\n");
} else {
netif_carrier_off(dev);
netif_info(tp, ifdown, dev, "link down\n");
@@ -3757,7 +3758,8 @@ static void rtl_hw_start_8168(struct net_device *dev)
RTL_W16(IntrMitigate, 0x5151);
/* Work around for RxFIFO overflow. */
- if (tp->mac_version == RTL_GIGA_MAC_VER_11) {
+ if (tp->mac_version == RTL_GIGA_MAC_VER_11 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_22) {
tp->intr_event |= RxFIFOOver | PCSTimeout;
tp->intr_event &= ~RxOverflow;
}
@@ -4639,12 +4641,33 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
break;
}
- /* Work around for rx fifo overflow */
- if (unlikely(status & RxFIFOOver) &&
- (tp->mac_version == RTL_GIGA_MAC_VER_11)) {
- netif_stop_queue(dev);
- rtl8169_tx_timeout(dev);
- break;
+ if (unlikely(status & RxFIFOOver)) {
+ switch (tp->mac_version) {
+ /* Work around for rx fifo overflow */
+ case RTL_GIGA_MAC_VER_11:
+ case RTL_GIGA_MAC_VER_22:
+ case RTL_GIGA_MAC_VER_26:
+ netif_stop_queue(dev);
+ rtl8169_tx_timeout(dev);
+ goto done;
+ /* Testers needed. */
+ case RTL_GIGA_MAC_VER_17:
+ case RTL_GIGA_MAC_VER_19:
+ case RTL_GIGA_MAC_VER_20:
+ case RTL_GIGA_MAC_VER_21:
+ case RTL_GIGA_MAC_VER_23:
+ case RTL_GIGA_MAC_VER_24:
+ case RTL_GIGA_MAC_VER_27:
+ case RTL_GIGA_MAC_VER_28:
+ /* Experimental science. Pktgen proof. */
+ case RTL_GIGA_MAC_VER_12:
+ case RTL_GIGA_MAC_VER_25:
+ if (status == RxFIFOOver)
+ goto done;
+ break;
+ default:
+ break;
+ }
}
if (unlikely(status & SYSErr)) {
@@ -4680,7 +4703,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
(status & RxFIFOOver) ? (status | RxOverflow) : status);
status = RTL_R16(IntrStatus);
}
-
+done:
return IRQ_RETVAL(handled);
}
--
Ueimor
祝你 新年快乐,兔年吉祥,心想事成,事事顺利, 万事如意,恭喜发财 !
^ permalink raw reply related
* Re: Network performance with small packets
From: Shirley Ma @ 2011-02-03 17:18 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Krishna Kumar2, David Miller, kvm, mashirle, netdev, netdev-owner,
Sridhar Samudrala, Steve Dobbelstein
In-Reply-To: <20110203162042.GA10028@redhat.com>
On Thu, 2011-02-03 at 18:20 +0200, Michael S. Tsirkin wrote:
> Just a thought: does it help to make tx queue len of the
> virtio device smaller?
Yes, that what I did before, reducing txqueuelen will cause qdisc dropp
the packet early, But it's hard to control by using tx queuelen for
performance gain. I tried on different systems, it required different
values.
Also, I tried another patch, instead of dropping packets, I used to
timer (2 jiffies) to enable/disable queue on guest without interrupts
notification, it gets better performance than original but worse
performance than dropping packets because of netif stop/wake up too
often.
vhost is definitely needed to improve for handling small message sizes.
It's unable to handle small message packets rate for queue size 256,
even with ring size 1024. QEMU seems not allowing to increase the TX
ring size to 2K (start qemu_kvm failure with no errors), I am not able
to test it out.
Thanks
Shirley
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Jan Engelhardt @ 2011-02-03 16:32 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: Hans Schillstrom, kaber@trash.net,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <4D4AD305.2060505@netfilter.org>
On Thursday 2011-02-03 17:08, Pablo Neira Ayuso wrote:
>>> Hm, this is actually not straight forward to implement, you'll have to
>>> use hook functions to avoid the module dependencies with conntrack and
>>> that's pretty annoying.
>>>
>>> I don't come up with a good solution for this.
>>
>> If it loads conntrack always, there is the option to shovel it
>> into xt_connmark.c.
>
>the problem is that Hans wants this not to depend on conntrack always.
Well you porbably won't get around the nf_conntrack module dependency,
but conntrack can still be disabled through CT --notrack
if one does not like the runtime cost.
^ permalink raw reply
* Re: Network performance with small packets
From: Michael S. Tsirkin @ 2011-02-03 16:20 UTC (permalink / raw)
To: Shirley Ma
Cc: Krishna Kumar2, David Miller, kvm, mashirle, netdev, netdev-owner,
Sridhar Samudrala, Steve Dobbelstein
In-Reply-To: <1296748680.25430.169.camel@localhost.localdomain>
On Thu, Feb 03, 2011 at 07:58:00AM -0800, Shirley Ma wrote:
> On Thu, 2011-02-03 at 08:13 +0200, Michael S. Tsirkin wrote:
> > > Initial TCP_STREAM performance results I got for guest to local
> > host
> > > 4.2Gb/s for 1K message size, (vs. 2.5Gb/s)
> > > 6.2Gb/s for 2K message size, and (vs. 3.8Gb/s)
> > > 9.8Gb/s for 4K message size. (vs.5.xGb/s)
> >
> > What is the average packet size, # bytes per ack, and the # of
> > interrupts
> > per packet? It could be that just slowing down trahsmission
> > makes GSO work better.
>
> There is no TX interrupts with dropping packet.
>
> GSO/TSO is the key for small message performance, w/o GSO/TSO, the
> performance is limited to about 2Gb/s no matter how big the message size
> it is. I think any work we try here will increase large packet size
> rate. BTW for dropping packet, TCP increased fast retrans, not slow
> start.
>
> I will collect tcpdump, netstart before and after data to compare packet
> size/rate w/o w/i the patch.
>
> Thanks
> Shirley
Just a thought: does it help to make tx queue len of the
virtio device smaller?
E.g. match the vq size?
--
MST
^ permalink raw reply
* Re: 2.6.38-rc3-git1: Reported regressions 2.6.36 -> 2.6.37
From: Takashi Iwai @ 2011-02-03 16:11 UTC (permalink / raw)
To: Linus Torvalds
Cc: Carlos R. Mafra, Keith Packard, Dave Airlie, Rafael J. Wysocki,
Linux Kernel Mailing List, Maciej Rutecki, Florian Mickler,
Andrew Morton, Kernel Testers List, Network Development,
Linux ACPI, Linux PM List, Linux SCSI List, Linux Wireless List,
DRI
In-Reply-To: <AANLkTikELiGS+fM5WguZS=Fo-sSXmej1UnKWqCFmnE0k@mail.gmail.com>
At Thu, 3 Feb 2011 07:42:05 -0800,
Linus Torvalds wrote:
>
> On Thu, Feb 3, 2011 at 3:23 AM, Carlos R. Mafra <crmafra2@gmail.com> wrote:
> > On Thu 3.Feb'11 at 1:03:41 +0100, Rafael J. Wysocki wrote:
> >>
> >> If you know of any other unresolved post-2.6.36 regressions, please let us know
> >> either and we'll add them to the list. Also, please let us know if any
> >> of the entries below are invalid.
> >
> > I'm sorry if I'm overlooking something, but as far as I can see the regression
> > reported here:
> >
> > https://lkml.org/lkml/2011/1/24/457
> >
> > is not in the list (update on that report: reverting that commit on top of
> > 2.6.37 fixes the issue).
>
> Ok, added Keith and Dave to the cc, since they are the signers of that commit.
>
> > After some time, I also ended up finding an earlier report in the kernel bugzilla
> > which I think is the same regression (it was bisected to the same commit):
> >
> > https://bugzilla.kernel.org/show_bug.cgi?id=24982
> >
> > but I do not see it in the list either, even though it's marked as a
> > regression in the bugzilla.
> >
> > The issue was also present in 2.6.38-rc2 last time I tested.
>
> Just to confirm, can you also check -rc3? I'm pretty sure nothing has
> changed, but there were a few drm patches after -rc2, so it's alsways
> good to double-check.
The problem I reported in the bugzilla above is still present in
2.6.38-rc3. I'm pretty sure that's the same issue as Carlos' case.
thanks,
Takashi
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Pablo Neira Ayuso @ 2011-02-03 16:08 UTC (permalink / raw)
To: Jan Engelhardt
Cc: Hans Schillstrom, kaber@trash.net,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <alpine.LNX.2.01.1102031706010.28180@obet.zrqbmnf.qr>
On 03/02/11 17:06, Jan Engelhardt wrote:
> On Thursday 2011-02-03 17:01, Pablo Neira Ayuso wrote:
>
>> On 03/02/11 16:42, Pablo Neira Ayuso wrote:
>>> On 03/02/11 15:23, Hans Schillstrom wrote:
>>>> On Thu, 2011-02-03 at 14:51 +0100, Pablo Neira Ayuso wrote:
>>>>> On 03/02/11 14:34, Hans Schillstrom wrote:
>>>>> this assumption is not valid in NAT handlings.
>>>>
>>>> That's true, because I want to avoid conntrack
>>>>
>>>>> If you want consistent hashing with NAT handlings you'll have to make
>>>>> this stateful and use the conntrack source and reply directions of the
>>>>> original tuples (thus making it stateful). That may be a problem because
>>>>> some people may want to use this without enabling connection tracking.
>>>>
>>>> What about a compilation switch or a sysctl ?
>>>
>>> or better some option for iptables.
>>
>> Hm, this is actually not straight forward to implement, you'll have to
>> use hook functions to avoid the module dependencies with conntrack and
>> that's pretty annoying.
>>
>> I don't come up with a good solution for this.
>
> If it loads conntrack always, there is the option to shovel it
> into xt_connmark.c.
the problem is that Hans wants this not to depend on conntrack always.
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Jan Engelhardt @ 2011-02-03 16:06 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: Hans Schillstrom, kaber@trash.net,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <4D4AD157.50707@netfilter.org>
On Thursday 2011-02-03 17:01, Pablo Neira Ayuso wrote:
>On 03/02/11 16:42, Pablo Neira Ayuso wrote:
>> On 03/02/11 15:23, Hans Schillstrom wrote:
>>> On Thu, 2011-02-03 at 14:51 +0100, Pablo Neira Ayuso wrote:
>>>> On 03/02/11 14:34, Hans Schillstrom wrote:
>>>> this assumption is not valid in NAT handlings.
>>>
>>> That's true, because I want to avoid conntrack
>>>
>>>> If you want consistent hashing with NAT handlings you'll have to make
>>>> this stateful and use the conntrack source and reply directions of the
>>>> original tuples (thus making it stateful). That may be a problem because
>>>> some people may want to use this without enabling connection tracking.
>>>
>>> What about a compilation switch or a sysctl ?
>>
>> or better some option for iptables.
>
>Hm, this is actually not straight forward to implement, you'll have to
>use hook functions to avoid the module dependencies with conntrack and
>that's pretty annoying.
>
>I don't come up with a good solution for this.
If it loads conntrack always, there is the option to shovel it
into xt_connmark.c.
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Pablo Neira Ayuso @ 2011-02-03 16:01 UTC (permalink / raw)
To: Hans Schillstrom
Cc: kaber@trash.net, jengelh@medozas.de,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <4D4ACCCB.8030902@netfilter.org>
On 03/02/11 16:42, Pablo Neira Ayuso wrote:
> On 03/02/11 15:23, Hans Schillstrom wrote:
>> On Thu, 2011-02-03 at 14:51 +0100, Pablo Neira Ayuso wrote:
>>> On 03/02/11 14:34, Hans Schillstrom wrote:
>>> this assumption is not valid in NAT handlings.
>>
>> That's true, because I want to avoid conntrack
>>
>>> If you want consistent hashing with NAT handlings you'll have to make
>>> this stateful and use the conntrack source and reply directions of the
>>> original tuples (thus making it stateful). That may be a problem because
>>> some people may want to use this without enabling connection tracking.
>>
>> What about a compilation switch or a sysctl ?
>
> or better some option for iptables.
Hm, this is actually not straight forward to implement, you'll have to
use hook functions to avoid the module dependencies with conntrack and
that's pretty annoying.
I don't come up with a good solution for this.
>>> Are you using this for (uplink) load balancing?
>>
>> Actually in both ways
>> - in front of a bunch of ipvs
to make some preliminary load-sharing between the load balancers?
>> - and in the payloads for outgoing traffic.
and then to select the uplink, right?
^ permalink raw reply
* Re: Network performance with small packets
From: Shirley Ma @ 2011-02-03 15:58 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Krishna Kumar2, David Miller, kvm, mashirle, netdev, netdev-owner,
Sridhar Samudrala, Steve Dobbelstein
In-Reply-To: <20110203061326.GC22230@redhat.com>
On Thu, 2011-02-03 at 08:13 +0200, Michael S. Tsirkin wrote:
> > Initial TCP_STREAM performance results I got for guest to local
> host
> > 4.2Gb/s for 1K message size, (vs. 2.5Gb/s)
> > 6.2Gb/s for 2K message size, and (vs. 3.8Gb/s)
> > 9.8Gb/s for 4K message size. (vs.5.xGb/s)
>
> What is the average packet size, # bytes per ack, and the # of
> interrupts
> per packet? It could be that just slowing down trahsmission
> makes GSO work better.
There is no TX interrupts with dropping packet.
GSO/TSO is the key for small message performance, w/o GSO/TSO, the
performance is limited to about 2Gb/s no matter how big the message size
it is. I think any work we try here will increase large packet size
rate. BTW for dropping packet, TCP increased fast retrans, not slow
start.
I will collect tcpdump, netstart before and after data to compare packet
size/rate w/o w/i the patch.
Thanks
Shirley
^ permalink raw reply
* Re: 2.6.38-rc3-git1: Reported regressions 2.6.36 -> 2.6.37
From: Linus Torvalds @ 2011-02-03 15:42 UTC (permalink / raw)
To: Carlos R. Mafra, Keith Packard, Dave Airlie
Cc: Linux SCSI List, Linux ACPI, Network Development,
Linux Wireless List, Linux Kernel Mailing List, DRI,
Rafael J. Wysocki, Florian Mickler, Andrew Morton,
Kernel Testers List, Linux PM List, Maciej Rutecki
In-Reply-To: <20110203112316.GA3718@linux-yscl.site>
On Thu, Feb 3, 2011 at 3:23 AM, Carlos R. Mafra <crmafra2@gmail.com> wrote:
> On Thu 3.Feb'11 at 1:03:41 +0100, Rafael J. Wysocki wrote:
>>
>> If you know of any other unresolved post-2.6.36 regressions, please let us know
>> either and we'll add them to the list. Also, please let us know if any
>> of the entries below are invalid.
>
> I'm sorry if I'm overlooking something, but as far as I can see the regression
> reported here:
>
> https://lkml.org/lkml/2011/1/24/457
>
> is not in the list (update on that report: reverting that commit on top of
> 2.6.37 fixes the issue).
Ok, added Keith and Dave to the cc, since they are the signers of that commit.
> After some time, I also ended up finding an earlier report in the kernel bugzilla
> which I think is the same regression (it was bisected to the same commit):
>
> https://bugzilla.kernel.org/show_bug.cgi?id=24982
>
> but I do not see it in the list either, even though it's marked as a
> regression in the bugzilla.
>
> The issue was also present in 2.6.38-rc2 last time I tested.
Just to confirm, can you also check -rc3? I'm pretty sure nothing has
changed, but there were a few drm patches after -rc2, so it's alsways
good to double-check.
Keithp?
Linus
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Pablo Neira Ayuso @ 2011-02-03 15:42 UTC (permalink / raw)
To: Hans Schillstrom
Cc: kaber@trash.net, jengelh@medozas.de,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <1296742995.6662.57.camel@seasc0214>
On 03/02/11 15:23, Hans Schillstrom wrote:
> On Thu, 2011-02-03 at 14:51 +0100, Pablo Neira Ayuso wrote:
>> On 03/02/11 14:34, Hans Schillstrom wrote:
>> this assumption is not valid in NAT handlings.
>
> That's true, because I want to avoid conntrack
>
>> If you want consistent hashing with NAT handlings you'll have to make
>> this stateful and use the conntrack source and reply directions of the
>> original tuples (thus making it stateful). That may be a problem because
>> some people may want to use this without enabling connection tracking.
>
> What about a compilation switch or a sysctl ?
or better some option for iptables.
>> Are you using this for (uplink) load balancing?
>
> Actually in both ways
> - in front of a bunch of ipvs
> - and in the payloads for outgoing traffic.
>
>> Could you also include one realistic example in the patch description on
>> how this is used?
> Sure, I guess you mean some nice ascii graphics,
> iptables and ip route commands
That would be great, for the record.
>> If this is accepted, I think this has to be merge with the (already
>> overloaded) MARK target.
>
> I have no opinion about that, others might have.
Better put it in the MARK target with a new revision. I think that
Patrick is going to ask you this.
I don't know why I had the impression that MARK is overload, it's
actually fine at a first glance to the code.
^ permalink raw reply
* ipv6 doesn't chose correct source address
From: Stephen Clark @ 2011-02-03 15:10 UTC (permalink / raw)
To: Linux Kernel Network Developers
Hello,
I have a Linux 2.6.32.26-175.fc12 box with
two possible ipv6 default gateways. So I was trying
to have two different ipv6 addresses on my eth0 interface.
Then by simply changing my default ipv6 gateway have packets
routed appropriately.
But when I do this the source address in the packet always is the
first ipv6 address and not the one that should be associated with
the corresponding default route.
ip -6 addr sh
4: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
inet6 2001:470:34::129:91/48 scope global
valid_lft forever preferred_lft forever
inet6 2001:4830:28::129:91/48 scope global
valid_lft forever preferred_lft forever
inet6 fe80::21c:c0ff:fe94:3a12/64 scope link
valid_lft forever preferred_lft forever
ip -6 rout sh
default via 2001:4830:28::1 dev eth0 metric 1024 mtu 1500 advmss 1440
hoplimit 0
so I would expect to have packet going out the default address use the
source address
of 2001:4830:28::129:91
but:
09:50:28.887483 IP6 2001:470:34::129:91 > 2001:558:1002:5:68:87:64:48:
ICMP6, echo request, seq 1, length 64
I would have expected the source address to be: 2001:4830:28::129:91
This works correctly with ipv4.
ip addr sh eth0
4: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
state UP qlen 1000
link/ether 00:1c:c0:94:3a:12 brd ff:ff:ff:ff:ff:ff
inet 10.0.129.91/17 brd 10.0.255.255 scope global eth0
inet 192.168.198.48/24 scope global eth0
inet 10.254.150.91/24 scope global eth0
inet 10.0.1.91/24 scope global eth0
inet 192.168.3.6/24 scope global eth0
inet 192.168.198.91/24 scope global secondary eth0
default via 192.168.198.252 dev eth0
09:52:06.968934 IP 192.168.198.48 > 68.87.64.48: ICMP echo request, id
34402, seq 1, length 64
Is this broken or a feature of ipv6 or a misconfiguration on my part?
Thanks,
Steve
--
"They that give up essential liberty to obtain temporary safety,
deserve neither liberty nor safety." (Ben Franklin)
"The course of history shows that as a government grows, liberty
decreases." (Thomas Jefferson)
^ permalink raw reply
* Re: [PATCH] bonding: added 802.3ad round-robin hashing policy for single TCP session balancing
From: Oleg V. Ukhno @ 2011-02-03 14:54 UTC (permalink / raw)
To: Jay Vosburgh
Cc: Nicolas de Pesloüan, John Fastabend, netdev@vger.kernel.org
In-Reply-To: <32505.1296669453@death>
On 02/02/2011 08:57 PM, Jay Vosburgh wrote:
> Nicolas de Pesloüan<nicolas.2p.debian@gmail.com> wrote:
>> I just propose the following option and option values : "src_mac_select"
>> (instead of mac_select), with "default" and "slave_mac" (instead of
>> slave_src_mac) as possible values. In the future, we might need a
>> "dst_mac_select" option... :-)
>
> I originally thought of using the nomenclature you propose; my
> thinking for doing it the way I ended up with is to minimize the number
> of tunable knobs that bonding has (so, the dst_mac would be a setting
> for mac_select). That works as long as there aren't a lot of settings
> that would be turned on simultaneously, since each combination would
> have to be a separate option, or the options parser would have to handle
> multiple settings (e.g., mac_select=src+dst or something like that).
>
> Anyway, after thinking about it some more, in the long run it's
> probably safer to separate these two, so, Oleg, use the above naming
> ("src_mac_select" with "default" and "slave_mac").
>
>> Also, are there any risks that this kind of session load-balancing won't
>> properly cooperate with multiqueue (as explained in "Overriding
>> Configuration for Special Cases" in Documentation/networking/bonding.txt)?
>> I think it is important to ensure we keep the ability to fine tune the
>> egress path selection
>
> I think the logic for the mac_select (or src_mac_select or
> whatever) just has to be done last, after the slave selection is done by
> the multiqueue stuff. That's probably a good tidbit to put in the
> documentation as well.
>
> -J
>
> ---
> -Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
Thank everyone for comments,
I'll resubmit modified patch after it is ready and tested, in about a
week or two I think.
Oleg
>
--
Best regards,
Oleg Ukhno
^ permalink raw reply
* Re: [PATCH] tcp: Increase the initial congestion window to 10.
From: Hagen Paul Pfeifer @ 2011-02-03 14:43 UTC (permalink / raw)
To: Hagen Paul Pfeifer; +Cc: David Miller, netdev, dccp, therbert
In-Reply-To: <47859e9273bb0c1b2c32fc1adfa450ee@localhost>
On Thu, 03 Feb 2011 15:00:28 +0100, Hagen Paul Pfeifer wrote:
> Davem, there is _no_ research how this huge IW will behave in
environments
> with a small BDP. Belief it or not, but there are networks out there
with a
> BDP similar to ~1980. Why for heaven's sake should this work in these
> environments? There are only _two_ extensive analysis one from Cherry
and
s/Cherry/Jerry/ ;-)
Hagen
^ permalink raw reply
* Re: [PATCH] tcp: Increase the initial congestion window to 10.
From: Hagen Paul Pfeifer @ 2011-02-03 14:26 UTC (permalink / raw)
To: John Heffner; +Cc: David Miller, netdev, dccp, therbert
In-Reply-To: <AANLkTinEXYXeZs1=wVQRAPDQ53YY0ne9NgCZN3x1hbaA@mail.gmail.com>
On Thu, 3 Feb 2011 09:17:14 -0500, John Heffner wrote:
> There's already a per-route tunable, right (RTAX_INITCWND)?
Yes, __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
I was a little bit nervous ... ;)
HGN
^ permalink raw reply
* Re: [PATCH] NETFILTER module xt_hmark new target for HASH MARK
From: Hans Schillstrom @ 2011-02-03 14:23 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: kaber@trash.net, jengelh@medozas.de,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
hans@schillstrom.com
In-Reply-To: <4D4AB2D6.7070302@netfilter.org>
On Thu, 2011-02-03 at 14:51 +0100, Pablo Neira Ayuso wrote:
> On 03/02/11 14:34, Hans Schillstrom wrote:
> > +/*
> > + * Calc hash value, special casre is taken on icmp and fragmented messages
> > + * i.e. fragmented messages don't use ports.
> > + */
> > +static __u32 get_hash(struct sk_buff *skb, struct xt_hmark_info *info)
> > +{
> [...]
> > + ip_proto &= info->prmask;
> > + /* get a consistent hash (same value on both flow directions) */
> > + if (addr2 < addr1)
> > + swap(addr1, addr2);
>
> this assumption is not valid in NAT handlings.
That's true, because I want to avoid conntrack
>
> If you want consistent hashing with NAT handlings you'll have to make
> this stateful and use the conntrack source and reply directions of the
> original tuples (thus making it stateful). That may be a problem because
> some people may want to use this without enabling connection tracking.
What about a compilation switch or a sysctl ?
>
> Are you using this for (uplink) load balancing?
Actually in both ways
- in front of a bunch of ipvs
- and in the payloads for outgoing traffic.
> Could you also include one realistic example in the patch description on
> how this is used?
Sure, I guess you mean some nice ascii graphics,
iptables and ip route commands
>
> If this is accepted, I think this has to be merge with the (already
> overloaded) MARK target.
I have no opinion about that, others might have.
Thanks
Hans
^ permalink raw reply
* [PATCH v4 5/5] loopback: convert to hw_features
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
This also enables TSOv6, TSO-ECN, and UFO as loopback clearly can handle them.
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
drivers/net/loopback.c | 9 ++++-----
1 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2d9663a..97b116b 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -129,10 +129,6 @@ static u32 always_on(struct net_device *dev)
static const struct ethtool_ops loopback_ethtool_ops = {
.get_link = always_on,
- .set_tso = ethtool_op_set_tso,
- .get_tx_csum = always_on,
- .get_sg = always_on,
- .get_rx_csum = always_on,
};
static int loopback_dev_init(struct net_device *dev)
@@ -169,9 +165,12 @@ static void loopback_setup(struct net_device *dev)
dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
dev->flags = IFF_LOOPBACK;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ dev->hw_features = NETIF_F_ALL_TSO;
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
- | NETIF_F_TSO
+ | NETIF_F_ALL_TSO
+ | NETIF_F_UFO
| NETIF_F_NO_CSUM
+ | NETIF_F_RXCSUM
| NETIF_F_HIGHDMA
| NETIF_F_LLTX
| NETIF_F_NETNS_LOCAL;
--
1.7.2.3
^ permalink raw reply related
* [PATCH v2] ethtool: implement G/SFEATURES calls
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
Changes from v1:
- removed deprecation of -k/-K
- mark never-changeable features in userspace
---
ethtool-copy.h | 89 +++++++++++++++++++++++-
ethtool.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 296 insertions(+), 8 deletions(-)
diff --git a/ethtool-copy.h b/ethtool-copy.h
index 75c3ae7..6430dbd 100644
--- a/ethtool-copy.h
+++ b/ethtool-copy.h
@@ -251,6 +251,7 @@ enum ethtool_stringset {
ETH_SS_STATS,
ETH_SS_PRIV_FLAGS,
ETH_SS_NTUPLE_FILTERS,
+ ETH_SS_FEATURES,
};
/* for passing string sets for data tagging */
@@ -523,6 +524,87 @@ struct ethtool_flash {
char data[ETHTOOL_FLASH_MAX_FILENAME];
};
+/* for returning and changing feature sets */
+
+/**
+ * struct ethtool_get_features_block - block with state of 32 features
+ * @available: mask of changeable features
+ * @requested: mask of features requested to be enabled if possible
+ * @active: mask of currently enabled features
+ * @never_changed: mask of features not changeable for any device
+ */
+struct ethtool_get_features_block {
+ __u32 available;
+ __u32 requested;
+ __u32 active;
+ __u32 never_changed;
+};
+
+/**
+ * struct ethtool_gfeatures - command to get state of device's features
+ * @cmd: command number = %ETHTOOL_GFEATURES
+ * @size: in: number of elements in the features[] array;
+ * out: number of elements in features[] needed to hold all features
+ * @features: state of features
+ */
+struct ethtool_gfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_get_features_block features[0];
+};
+
+/**
+ * struct ethtool_set_features_block - block with request for 32 features
+ * @valid: mask of features to be changed
+ * @requested: values of features to be changed
+ */
+struct ethtool_set_features_block {
+ __u32 valid;
+ __u32 requested;
+};
+
+/**
+ * struct ethtool_sfeatures - command to request change in device's features
+ * @cmd: command number = %ETHTOOL_SFEATURES
+ * @size: array size of the features[] array
+ * @features: feature change masks
+ */
+struct ethtool_sfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_set_features_block features[0];
+};
+
+/*
+ * %ETHTOOL_SFEATURES changes features present in features[].valid to the
+ * values of corresponding bits in features[].requested. Bits in .requested
+ * not set in .valid or not changeable are ignored.
+ *
+ * Returns %EINVAL when .valid contains undefined or never-changable bits
+ * or size is not equal to required number of features words (32-bit blocks).
+ * Returns >= 0 if request was completed; bits set in the value mean:
+ * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not
+ * changeable (not present in %ETHTOOL_GFEATURES' features[].available)
+ * those bits were ignored.
+ * %ETHTOOL_F_WISH - some or all changes requested were recorded but the
+ * resulting state of bits masked by .valid is not equal to .requested.
+ * Probably there are other device-specific constraints on some features
+ * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered
+ * here as though ignored bits were cleared.
+ *
+ * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of
+ * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands
+ * for ETH_SS_FEATURES string set. First entry in the table corresponds to least
+ * significant bit in features[0] fields. Empty strings mark undefined features.
+ */
+enum ethtool_sfeatures_retval_bits {
+ ETHTOOL_F_UNSUPPORTED__BIT,
+ ETHTOOL_F_WISH__BIT,
+};
+
+#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT)
+#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT)
+
/* CMDs currently supported */
#define ETHTOOL_GSET 0x00000001 /* Get settings. */
@@ -534,7 +616,9 @@ struct ethtool_flash {
#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */
#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */
#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */
-#define ETHTOOL_GLINK 0x0000000a /* Get link status (ethtool_value) */
+/* Get link status for host, i.e. whether the interface *and* the
+ * physical port (if there is one) are up (ethtool_value). */
+#define ETHTOOL_GLINK 0x0000000a
#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */
#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */
#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */
@@ -585,6 +669,9 @@ struct ethtool_flash {
#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */
#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */
+#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */
+#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */
+
/* compatibility with older code */
#define SPARC_ETH_GSET ETHTOOL_GSET
#define SPARC_ETH_SSET ETHTOOL_SSET
diff --git a/ethtool.c b/ethtool.c
index 1afdfe4..af43e47 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -39,6 +39,7 @@
#include <limits.h>
#include <ctype.h>
+#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
@@ -97,6 +98,9 @@ static int do_gcoalesce(int fd, struct ifreq *ifr);
static int do_scoalesce(int fd, struct ifreq *ifr);
static int do_goffload(int fd, struct ifreq *ifr);
static int do_soffload(int fd, struct ifreq *ifr);
+static void parse_sfeatures_args(int argc, char **argp, int argi);
+static int do_gfeatures(int fd, struct ifreq *ifr);
+static int do_sfeatures(int fd, struct ifreq *ifr);
static int do_gstats(int fd, struct ifreq *ifr);
static int rxflow_str_to_type(const char *str);
static int parse_rxfhashopts(char *optstr, u32 *data);
@@ -133,6 +137,8 @@ static enum {
MODE_SRING,
MODE_GOFFLOAD,
MODE_SOFFLOAD,
+ MODE_GFEATURES,
+ MODE_SFEATURES,
MODE_GSTATS,
MODE_GNFC,
MODE_SNFC,
@@ -211,6 +217,10 @@ static struct option {
" [ ntuple on|off ]\n"
" [ rxhash on|off ]\n"
},
+ { "-w", "--show-features", MODE_GFEATURES, "Get offload status" },
+ { "-W", "--request-features", MODE_SFEATURES, "Set requested offload",
+ " [ feature-name on|off [...] ]\n"
+ " see --show-features output for feature-name strings\n" },
{ "-i", "--driver", MODE_GDRV, "Show driver information" },
{ "-d", "--register-dump", MODE_GREGS, "Do a register dump",
" [ raw on|off ]\n"
@@ -743,8 +753,10 @@ static void parse_generic_cmdline(int argc, char **argp,
break;
}
}
- if( !found)
+ if( !found) {
+ fprintf(stdout, "bad param: %s\n", argp[i]);
show_usage(1);
+ }
}
}
@@ -829,6 +841,8 @@ static void parse_cmdline(int argc, char **argp)
(mode == MODE_SRING) ||
(mode == MODE_GOFFLOAD) ||
(mode == MODE_SOFFLOAD) ||
+ (mode == MODE_GFEATURES) ||
+ (mode == MODE_SFEATURES) ||
(mode == MODE_GSTATS) ||
(mode == MODE_GNFC) ||
(mode == MODE_SNFC) ||
@@ -919,6 +933,11 @@ static void parse_cmdline(int argc, char **argp)
i = argc;
break;
}
+ if (mode == MODE_SFEATURES) {
+ parse_sfeatures_args(argc, argp, i);
+ i = argc;
+ break;
+ }
if (mode == MODE_SNTUPLE) {
if (!strcmp(argp[i], "flow-type")) {
i += 1;
@@ -1947,21 +1966,30 @@ static int dump_rxfhash(int fhash, u64 val)
return 0;
}
-static int doit(void)
+static int get_control_socket(struct ifreq *ifr)
{
- struct ifreq ifr;
int fd;
/* Setup our control structures. */
- memset(&ifr, 0, sizeof(ifr));
- strcpy(ifr.ifr_name, devname);
+ memset(ifr, 0, sizeof(*ifr));
+ strcpy(ifr->ifr_name, devname);
/* Open control socket. */
fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0) {
+ if (fd < 0)
perror("Cannot get control socket");
+
+ return fd;
+}
+
+static int doit(void)
+{
+ struct ifreq ifr;
+ int fd;
+
+ fd = get_control_socket(&ifr);
+ if (fd < 0)
return 70;
- }
/* all of these are expected to populate ifr->ifr_data as needed */
if (mode == MODE_GDRV) {
@@ -1998,6 +2026,10 @@ static int doit(void)
return do_goffload(fd, &ifr);
} else if (mode == MODE_SOFFLOAD) {
return do_soffload(fd, &ifr);
+ } else if (mode == MODE_GFEATURES) {
+ return do_gfeatures(fd, &ifr);
+ } else if (mode == MODE_SFEATURES) {
+ return do_sfeatures(fd, &ifr);
} else if (mode == MODE_GSTATS) {
return do_gstats(fd, &ifr);
} else if (mode == MODE_GNFC) {
@@ -2435,6 +2467,175 @@ static int do_soffload(int fd, struct ifreq *ifr)
return 0;
}
+static int get_feature_strings(int fd, struct ifreq *ifr,
+ struct ethtool_gstrings **strs)
+{
+ struct ethtool_sset_info *sset_info;
+ struct ethtool_gstrings *strings;
+ int sz_str, n_strings, err;
+
+ sset_info = malloc(sizeof(struct ethtool_sset_info) + sizeof(u32));
+ sset_info->cmd = ETHTOOL_GSSET_INFO;
+ sset_info->sset_mask = (1ULL << ETH_SS_FEATURES);
+ ifr->ifr_data = (caddr_t)sset_info;
+ err = send_ioctl(fd, ifr);
+
+ if ((err < 0) ||
+ (!(sset_info->sset_mask & (1ULL << ETH_SS_FEATURES)))) {
+ perror("Cannot get driver strings info");
+ return -100;
+ }
+
+ n_strings = sset_info->data[0];
+ free(sset_info);
+ sz_str = n_strings * ETH_GSTRING_LEN;
+
+ strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings));
+ if (!strings) {
+ fprintf(stderr, "no memory available\n");
+ return -95;
+ }
+
+ strings->cmd = ETHTOOL_GSTRINGS;
+ strings->string_set = ETH_SS_FEATURES;
+ strings->len = n_strings;
+ ifr->ifr_data = (caddr_t) strings;
+ err = send_ioctl(fd, ifr);
+ if (err < 0) {
+ perror("Cannot get feature strings information");
+ free(strings);
+ return -96;
+ }
+
+ *strs = strings;
+ return n_strings;
+}
+
+struct ethtool_sfeatures *features_req;
+
+static void parse_sfeatures_args(int argc, char **argp, int argi)
+{
+ struct cmdline_info *cmdline_desc, *cp;
+ struct ethtool_gstrings *strings;
+ struct ifreq ifr;
+ int n_strings, sz_features, i;
+ int fd, changed = 0;
+
+ fd = get_control_socket(&ifr);
+ if (fd < 0)
+ exit(100);
+
+ n_strings = get_feature_strings(fd, &ifr, &strings);
+ if (n_strings < 0)
+ exit(-n_strings);
+
+ sz_features = sizeof(*features_req->features) * ((n_strings + 31) / 32);
+
+ cp = cmdline_desc = calloc(n_strings, sizeof(*cmdline_desc));
+ features_req = calloc(1, sizeof(*features_req) + sz_features);
+ if (!cmdline_desc || !features_req) {
+ fprintf(stderr, "no memory available\n");
+ exit(95);
+ }
+
+ features_req->size = (n_strings + 31) / 32;
+
+ for (i = 0; i < n_strings; ++i) {
+ if (!strings->data[i*ETH_GSTRING_LEN])
+ continue;
+
+ strings->data[i*ETH_GSTRING_LEN + ETH_GSTRING_LEN-1] = 0;
+ cp->name = (const char *)strings->data + i * ETH_GSTRING_LEN;
+ cp->type = CMDL_FLAG;
+ cp->flag_val = 1 << (i % 32);
+ cp->wanted_val = &features_req->features[i / 32].requested;
+ cp->seen_val = &features_req->features[i / 32].valid;
+ ++cp;
+ }
+
+ parse_generic_cmdline(argc, argp, argi, &changed,
+ cmdline_desc, cp - cmdline_desc);
+
+ free(cmdline_desc);
+ free(strings);
+ close(fd);
+
+ if (!changed) {
+ free(features_req);
+ features_req = NULL;
+ }
+}
+
+static int do_gfeatures(int fd, struct ifreq *ifr)
+{
+ struct ethtool_gstrings *strings;
+ struct ethtool_gfeatures *features;
+ int n_strings, sz_features, err, i;
+
+ n_strings = get_feature_strings(fd, ifr, &strings);
+ if (n_strings < 0)
+ return -n_strings;
+
+ sz_features = sizeof(*features->features) * ((n_strings + 31) / 32);
+ features = calloc(1, sz_features + sizeof(*features));
+ if (!features) {
+ fprintf(stderr, "no memory available\n");
+ return 95;
+ }
+
+ features->cmd = ETHTOOL_GFEATURES;
+ features->size = (n_strings + 31) / 32;
+ ifr->ifr_data = (caddr_t) features;
+ err = send_ioctl(fd, ifr);
+ if (err < 0) {
+ perror("Cannot get feature status");
+ free(strings);
+ free(features);
+ return 97;
+ }
+
+ fprintf(stdout, "Offload state: (name: enabled,wanted,changable)\n");
+ for (i = 0; i < n_strings; i++) {
+ if (!strings->data[i * ETH_GSTRING_LEN])
+ continue; /* empty */
+#define P_FLAG(f) \
+ (features->features[i / 32].f & (1 << (i % 32))) ? "yes" : " no"
+#define PN_FLAG(f) \
+ (features->features[i / 32].never_changed & (1 << (i % 32))) ? "---" : P_FLAG(f)
+ fprintf(stdout, " %-*.*s %s,%s,%s\n",
+ ETH_GSTRING_LEN, ETH_GSTRING_LEN,
+ &strings->data[i * ETH_GSTRING_LEN],
+ P_FLAG(active), PN_FLAG(requested), PN_FLAG(available));
+#undef P_FLAG
+ }
+ free(strings);
+ free(features);
+
+ return 0;
+}
+
+static int do_sfeatures(int fd, struct ifreq *ifr)
+{
+ int err;
+
+ if (!features_req) {
+ fprintf(stderr, "no features changed\n");
+ return 97;
+ }
+
+ features_req->cmd = ETHTOOL_SFEATURES;
+ ifr->ifr_data = (caddr_t) features_req;
+ err = send_ioctl(fd, ifr);
+ if (err < 0) {
+ perror("Cannot change features");
+ free(features_req);
+ return 97;
+ }
+
+ return 0;
+}
+
+
static int do_gset(int fd, struct ifreq *ifr)
{
int err;
--
1.7.2.3
^ permalink raw reply related
* [PATCH v4 1/5] net: Introduce new feature setting ops
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
This introduces a new framework to handle device features setting.
It consists of:
- new fields in struct net_device:
+ hw_features - features that hw/driver supports toggling
+ wanted_features - features that user wants enabled, when possible
- new netdev_ops:
+ feat = ndo_fix_features(dev, feat) - API checking constraints for
enabling features or their combinations
+ ndo_set_features(dev) - API updating hardware state to match
changed dev->features
- new ethtool commands:
+ ETHTOOL_GFEATURES/ETHTOOL_SFEATURES: get/set dev->wanted_features
and trigger device reconfiguration if resulting dev->features
changed
+ ETHTOOL_GSTRINGS(ETH_SS_FEATURES): get feature bits names (meaning)
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
include/linux/ethtool.h | 85 +++++++++++++++++++++++++
include/linux/netdevice.h | 45 +++++++++++++-
net/core/dev.c | 49 +++++++++++++--
net/core/ethtool.c | 154 +++++++++++++++++++++++++++++++++++++++++----
4 files changed, 314 insertions(+), 19 deletions(-)
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 1908929..806e716 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -251,6 +251,7 @@ enum ethtool_stringset {
ETH_SS_STATS,
ETH_SS_PRIV_FLAGS,
ETH_SS_NTUPLE_FILTERS,
+ ETH_SS_FEATURES,
};
/* for passing string sets for data tagging */
@@ -523,6 +524,87 @@ struct ethtool_flash {
char data[ETHTOOL_FLASH_MAX_FILENAME];
};
+/* for returning and changing feature sets */
+
+/**
+ * struct ethtool_get_features_block - block with state of 32 features
+ * @available: mask of changeable features
+ * @requested: mask of features requested to be enabled if possible
+ * @active: mask of currently enabled features
+ * @never_changed: mask of features not changeable for any device
+ */
+struct ethtool_get_features_block {
+ __u32 available;
+ __u32 requested;
+ __u32 active;
+ __u32 never_changed;
+};
+
+/**
+ * struct ethtool_gfeatures - command to get state of device's features
+ * @cmd: command number = %ETHTOOL_GFEATURES
+ * @size: in: number of elements in the features[] array;
+ * out: number of elements in features[] needed to hold all features
+ * @features: state of features
+ */
+struct ethtool_gfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_get_features_block features[0];
+};
+
+/**
+ * struct ethtool_set_features_block - block with request for 32 features
+ * @valid: mask of features to be changed
+ * @requested: values of features to be changed
+ */
+struct ethtool_set_features_block {
+ __u32 valid;
+ __u32 requested;
+};
+
+/**
+ * struct ethtool_sfeatures - command to request change in device's features
+ * @cmd: command number = %ETHTOOL_SFEATURES
+ * @size: array size of the features[] array
+ * @features: feature change masks
+ */
+struct ethtool_sfeatures {
+ __u32 cmd;
+ __u32 size;
+ struct ethtool_set_features_block features[0];
+};
+
+/*
+ * %ETHTOOL_SFEATURES changes features present in features[].valid to the
+ * values of corresponding bits in features[].requested. Bits in .requested
+ * not set in .valid or not changeable are ignored.
+ *
+ * Returns %EINVAL when .valid contains undefined or never-changable bits
+ * or size is not equal to required number of features words (32-bit blocks).
+ * Returns >= 0 if request was completed; bits set in the value mean:
+ * %ETHTOOL_F_UNSUPPORTED - there were bits set in .valid that are not
+ * changeable (not present in %ETHTOOL_GFEATURES' features[].available)
+ * those bits were ignored.
+ * %ETHTOOL_F_WISH - some or all changes requested were recorded but the
+ * resulting state of bits masked by .valid is not equal to .requested.
+ * Probably there are other device-specific constraints on some features
+ * in the set. When %ETHTOOL_F_UNSUPPORTED is set, .valid is considered
+ * here as though ignored bits were cleared.
+ *
+ * Meaning of bits in the masks are obtained by %ETHTOOL_GSSET_INFO (number of
+ * bits in the arrays - always multiple of 32) and %ETHTOOL_GSTRINGS commands
+ * for ETH_SS_FEATURES string set. First entry in the table corresponds to least
+ * significant bit in features[0] fields. Empty strings mark undefined features.
+ */
+enum ethtool_sfeatures_retval_bits {
+ ETHTOOL_F_UNSUPPORTED__BIT,
+ ETHTOOL_F_WISH__BIT,
+};
+
+#define ETHTOOL_F_UNSUPPORTED (1 << ETHTOOL_F_UNSUPPORTED__BIT)
+#define ETHTOOL_F_WISH (1 << ETHTOOL_F_WISH__BIT)
+
#ifdef __KERNEL__
#include <linux/rculist.h>
@@ -744,6 +826,9 @@ struct ethtool_ops {
#define ETHTOOL_GRXFHINDIR 0x00000038 /* Get RX flow hash indir'n table */
#define ETHTOOL_SRXFHINDIR 0x00000039 /* Set RX flow hash indir'n table */
+#define ETHTOOL_GFEATURES 0x0000003a /* Get device offload settings */
+#define ETHTOOL_SFEATURES 0x0000003b /* Change device offload settings */
+
/* compatibility with older code */
#define SPARC_ETH_GSET ETHTOOL_GSET
#define SPARC_ETH_SSET ETHTOOL_SSET
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c7d7074..4a3e554 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -783,6 +783,17 @@ struct netdev_tc_txq {
* Set hardware filter for RFS. rxq_index is the target queue index;
* flow_id is a flow ID to be passed to rps_may_expire_flow() later.
* Return the filter ID on success, or a negative error code.
+ *
+ * u32 (*ndo_fix_features)(struct net_device *dev, u32 features);
+ * Adjusts the requested feature flags according to device-specific
+ * constraints, and returns the resulting flags. Must not modify
+ * the device state.
+ *
+ * int (*ndo_set_features)(struct net_device *dev, u32 features);
+ * Called to update device configuration to new features. Passed
+ * feature set might be less than what was returned by ndo_fix_features()).
+ * Must return >0 or -errno if it changed dev->features itself.
+ *
*/
#define HAVE_NET_DEVICE_OPS
struct net_device_ops {
@@ -862,6 +873,10 @@ struct net_device_ops {
u16 rxq_index,
u32 flow_id);
#endif
+ u32 (*ndo_fix_features)(struct net_device *dev,
+ u32 features);
+ int (*ndo_set_features)(struct net_device *dev,
+ u32 features);
};
/*
@@ -913,12 +928,18 @@ struct net_device {
struct list_head napi_list;
struct list_head unreg_list;
- /* Net device features */
+ /* currently active device features */
u32 features;
-
+ /* user-changeable features */
+ u32 hw_features;
+ /* user-requested features */
+ u32 wanted_features;
/* VLAN feature mask */
u32 vlan_features;
+ /* Net device feature bits; if you change something,
+ * also update netdev_features_strings[] in ethtool.c */
+
#define NETIF_F_SG 1 /* Scatter/gather IO. */
#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */
#define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */
@@ -954,6 +975,12 @@ struct net_device {
#define NETIF_F_TSO6 (SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
#define NETIF_F_FSO (SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
+ /* Features valid for ethtool to change */
+ /* = all defined minus driver/device-class-related */
+#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
+ NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
+#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE)
+
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \
NETIF_F_TSO6 | NETIF_F_UFO)
@@ -964,6 +991,12 @@ struct net_device {
#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+#define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+
+#define NETIF_F_ALL_TX_OFFLOADS (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+ NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC)
+
/*
* If one device supports one of these features, then enable them
* for all in netdev_increment_features.
@@ -972,6 +1005,9 @@ struct net_device {
NETIF_F_SG | NETIF_F_HIGHDMA | \
NETIF_F_FRAGLIST)
+ /* changeable features with no special hardware requirements */
+#define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO)
+
/* Interface index. Unique device identifier */
int ifindex;
int iflink;
@@ -2405,8 +2441,13 @@ extern char *netdev_drivername(const struct net_device *dev, char *buffer, int l
extern void linkwatch_run_queue(void);
+static inline u32 netdev_get_wanted_features(struct net_device *dev)
+{
+ return (dev->features & ~dev->hw_features) | dev->wanted_features;
+}
u32 netdev_increment_features(u32 all, u32 one, u32 mask);
u32 netdev_fix_features(struct net_device *dev, u32 features);
+void netdev_update_features(struct net_device *dev);
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9109e26..92c690e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5246,6 +5246,12 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
features &= ~NETIF_F_TSO;
}
+ /* Software GSO depends on SG. */
+ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
+ netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
+ features &= ~NETIF_F_GSO;
+ }
+
/* UFO needs SG and checksumming */
if (features & NETIF_F_UFO) {
/* maybe split UFO into V4 and V6? */
@@ -5268,6 +5274,37 @@ u32 netdev_fix_features(struct net_device *dev, u32 features)
}
EXPORT_SYMBOL(netdev_fix_features);
+void netdev_update_features(struct net_device *dev)
+{
+ u32 features;
+ int err = 0;
+
+ features = netdev_get_wanted_features(dev);
+
+ if (dev->netdev_ops->ndo_fix_features)
+ features = dev->netdev_ops->ndo_fix_features(dev, features);
+
+ /* driver might be less strict about feature dependencies */
+ features = netdev_fix_features(dev, features);
+
+ if (dev->features == features)
+ return;
+
+ netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n",
+ dev->features, features);
+
+ if (dev->netdev_ops->ndo_set_features)
+ err = dev->netdev_ops->ndo_set_features(dev, features);
+
+ if (!err)
+ dev->features = features;
+ else if (err < 0)
+ netdev_err(dev,
+ "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
+ err, features, dev->features);
+}
+EXPORT_SYMBOL(netdev_update_features);
+
/**
* netif_stacked_transfer_operstate - transfer operstate
* @rootdev: the root or lower level device to transfer state from
@@ -5402,11 +5439,13 @@ int register_netdevice(struct net_device *dev)
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
- dev->features = netdev_fix_features(dev, dev->features);
-
- /* Enable software GSO if SG is supported. */
- if (dev->features & NETIF_F_SG)
- dev->features |= NETIF_F_GSO;
+ /* Transfer changeable features to wanted_features and enable
+ * software offloads (GSO and GRO).
+ */
+ dev->hw_features |= NETIF_F_SOFT_FEATURES;
+ dev->wanted_features = (dev->features & dev->hw_features)
+ | NETIF_F_SOFT_FEATURES;
+ netdev_update_features(dev);
/* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
* vlan_dev_init() will do the dev->features check, so these features
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5984ee0..2f1b448 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -55,6 +55,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
return 0;
}
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
{
@@ -171,6 +172,136 @@ EXPORT_SYMBOL(ethtool_ntuple_flush);
/* Handlers for each ethtool command */
+#define ETHTOOL_DEV_FEATURE_WORDS 1
+
+static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_gfeatures cmd = {
+ .cmd = ETHTOOL_GFEATURES,
+ .size = ETHTOOL_DEV_FEATURE_WORDS,
+ };
+ struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = {
+ {
+ .available = dev->hw_features,
+ .requested = dev->wanted_features,
+ .active = dev->features,
+ .never_changed = NETIF_F_NEVER_CHANGE,
+ },
+ };
+ u32 __user *sizeaddr;
+ u32 copy_size;
+
+ sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
+ if (get_user(copy_size, sizeaddr))
+ return -EFAULT;
+
+ if (copy_size > ETHTOOL_DEV_FEATURE_WORDS)
+ copy_size = ETHTOOL_DEV_FEATURE_WORDS;
+
+ if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
+ return -EFAULT;
+ useraddr += sizeof(cmd);
+ if (copy_to_user(useraddr, features, copy_size * sizeof(*features)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_sfeatures cmd;
+ struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS];
+ int ret = 0;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+ useraddr += sizeof(cmd);
+
+ if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS)
+ return -EINVAL;
+
+ if (copy_from_user(features, useraddr, sizeof(features)))
+ return -EFAULT;
+
+ if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
+ return -EINVAL;
+
+ if (features[0].valid & ~dev->hw_features) {
+ features[0].valid &= dev->hw_features;
+ ret |= ETHTOOL_F_UNSUPPORTED;
+ }
+
+ dev->wanted_features &= ~features[0].valid;
+ dev->wanted_features |= features[0].valid & features[0].requested;
+ netdev_update_features(dev);
+
+ if ((dev->wanted_features ^ dev->features) & features[0].valid)
+ ret |= ETHTOOL_F_WISH;
+
+ return ret;
+}
+
+static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
+ /* NETIF_F_SG */ "tx-scatter-gather",
+ /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
+ /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded",
+ /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic",
+ /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6",
+ /* NETIF_F_HIGHDMA */ "highdma",
+ /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist",
+ /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert",
+
+ /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse",
+ /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter",
+ /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged",
+ /* NETIF_F_GSO */ "tx-generic-segmentation",
+ /* NETIF_F_LLTX */ "tx-lockless",
+ /* NETIF_F_NETNS_LOCAL */ "netns-local",
+ /* NETIF_F_GRO */ "rx-gro",
+ /* NETIF_F_LRO */ "rx-lro",
+
+ /* NETIF_F_TSO */ "tx-tcp-segmentation",
+ /* NETIF_F_UFO */ "tx-udp-fragmentation",
+ /* NETIF_F_GSO_ROBUST */ "tx-gso-robust",
+ /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation",
+ /* NETIF_F_TSO6 */ "tx-tcp6-segmentation",
+ /* NETIF_F_FSO */ "tx-fcoe-segmentation",
+ "",
+ "",
+
+ /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc",
+ /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp",
+ /* NETIF_F_FCOE_MTU */ "fcoe-mtu",
+ /* NETIF_F_NTUPLE */ "rx-ntuple-filter",
+ /* NETIF_F_RXHASH */ "rx-hashing",
+ "",
+ "",
+ "",
+};
+
+static int __ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (sset == ETH_SS_FEATURES)
+ return ARRAY_SIZE(netdev_features_strings);
+ else if (ops && ops->get_sset_count)
+ return ops->get_sset_count(dev, sset);
+ else
+ return -EINVAL;
+}
+
+static void __ethtool_get_strings(struct net_device *dev,
+ u32 stringset, u8 *data)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+
+ if (stringset == ETH_SS_FEATURES)
+ memcpy(data, netdev_features_strings,
+ sizeof(netdev_features_strings));
+ else if (ops && ops->get_strings)
+ ops->get_strings(dev, stringset, data);
+}
+
static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
@@ -251,14 +382,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_sset_info info;
- const struct ethtool_ops *ops = dev->ethtool_ops;
u64 sset_mask;
int i, idx = 0, n_bits = 0, ret, rc;
u32 *info_buf = NULL;
- if (!ops->get_sset_count)
- return -EOPNOTSUPP;
-
if (copy_from_user(&info, useraddr, sizeof(info)))
return -EFAULT;
@@ -285,7 +412,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
if (!(sset_mask & (1ULL << i)))
continue;
- rc = ops->get_sset_count(dev, i);
+ rc = __ethtool_get_sset_count(dev, i);
if (rc >= 0) {
info.sset_mask |= (1ULL << i);
info_buf[idx++] = rc;
@@ -1287,17 +1414,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gstrings gstrings;
- const struct ethtool_ops *ops = dev->ethtool_ops;
u8 *data;
int ret;
- if (!ops->get_strings || !ops->get_sset_count)
- return -EOPNOTSUPP;
-
if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
return -EFAULT;
- ret = ops->get_sset_count(dev, gstrings.string_set);
+ ret = __ethtool_get_sset_count(dev, gstrings.string_set);
if (ret < 0)
return ret;
@@ -1307,7 +1430,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
if (!data)
return -ENOMEM;
- ops->get_strings(dev, gstrings.string_set, data);
+ __ethtool_get_strings(dev, gstrings.string_set, data);
ret = -EFAULT;
if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
@@ -1317,7 +1440,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
goto out;
ret = 0;
- out:
+out:
kfree(data);
return ret;
}
@@ -1500,6 +1623,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GRXCLSRLCNT:
case ETHTOOL_GRXCLSRULE:
case ETHTOOL_GRXCLSRLALL:
+ case ETHTOOL_GFEATURES:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -1693,6 +1817,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXFHINDIR:
rc = ethtool_set_rxfh_indir(dev, useraddr);
break;
+ case ETHTOOL_GFEATURES:
+ rc = ethtool_get_features(dev, useraddr);
+ break;
+ case ETHTOOL_SFEATURES:
+ rc = ethtool_set_features(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH v4 2/5] net: ethtool: use ndo_fix_features for offload setting
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
---
net/core/ethtool.c | 262 ++++++++++++++++++++++++----------------------------
1 files changed, 119 insertions(+), 143 deletions(-)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 2f1b448..555accf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -240,6 +240,96 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
return ret;
}
+static u32 ethtool_get_feature_mask(u32 eth_cmd)
+{
+ /* feature masks of legacy discrete ethtool ops */
+
+ switch (eth_cmd) {
+ case ETHTOOL_GTXCSUM:
+ case ETHTOOL_STXCSUM:
+ return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+ case ETHTOOL_GSG:
+ case ETHTOOL_SSG:
+ return NETIF_F_SG;
+ case ETHTOOL_GTSO:
+ case ETHTOOL_STSO:
+ return NETIF_F_ALL_TSO;
+ case ETHTOOL_GUFO:
+ case ETHTOOL_SUFO:
+ return NETIF_F_UFO;
+ case ETHTOOL_GGSO:
+ case ETHTOOL_SGSO:
+ return NETIF_F_GSO;
+ case ETHTOOL_GGRO:
+ case ETHTOOL_SGRO:
+ return NETIF_F_GRO;
+ default:
+ BUG();
+ }
+}
+
+static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr,
+ u32 ethcmd)
+{
+ struct ethtool_value edata = {
+ .cmd = ethcmd,
+ .data = !!(dev->features & ethtool_get_feature_mask(ethcmd)),
+ };
+
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_sg(struct net_device *dev, u32 data);
+static int __ethtool_set_tso(struct net_device *dev, u32 data);
+static int __ethtool_set_ufo(struct net_device *dev, u32 data);
+
+static int ethtool_set_one_feature(struct net_device *dev,
+ void __user *useraddr, u32 ethcmd)
+{
+ struct ethtool_value edata;
+ u32 mask;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+
+ mask = ethtool_get_feature_mask(ethcmd);
+ mask &= dev->hw_features;
+ if (mask) {
+ if (edata.data)
+ dev->wanted_features |= mask;
+ else
+ dev->wanted_features &= ~mask;
+
+ netdev_update_features(dev);
+ return 0;
+ }
+
+ /* Driver is not converted to ndo_fix_features or does not
+ * support changing this offload. In the latter case it won't
+ * have corresponding ethtool_ops field set.
+ *
+ * Following part is to be removed after all drivers advertise
+ * their changeable features in netdev->hw_features and stop
+ * using discrete offload setting ops.
+ */
+
+ switch (ethcmd) {
+ case ETHTOOL_STXCSUM:
+ return __ethtool_set_tx_csum(dev, edata.data);
+ case ETHTOOL_SSG:
+ return __ethtool_set_sg(dev, edata.data);
+ case ETHTOOL_STSO:
+ return __ethtool_set_tso(dev, edata.data);
+ case ETHTOOL_SUFO:
+ return __ethtool_set_ufo(dev, edata.data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = {
/* NETIF_F_SG */ "tx-scatter-gather",
/* NETIF_F_IP_CSUM */ "tx-checksum-ipv4",
@@ -1218,6 +1308,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
{
int err;
+ if (data && !(dev->features & NETIF_F_ALL_CSUM))
+ return -EINVAL;
+
if (!data && dev->ethtool_ops->set_tso) {
err = dev->ethtool_ops->set_tso(dev, 0);
if (err)
@@ -1232,26 +1325,21 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data)
return dev->ethtool_ops->set_sg(dev, data);
}
-static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
int err;
if (!dev->ethtool_ops->set_tx_csum)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (!edata.data && dev->ethtool_ops->set_sg) {
+ if (!data && dev->ethtool_ops->set_sg) {
err = __ethtool_set_sg(dev, 0);
if (err)
return err;
}
- return dev->ethtool_ops->set_tx_csum(dev, edata.data);
+ return dev->ethtool_ops->set_tx_csum(dev, data);
}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
{
@@ -1269,108 +1357,28 @@ static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_rx_csum(dev, edata.data);
}
-static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_tso(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
- if (!dev->ethtool_ops->set_sg)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data &&
- !(dev->features & NETIF_F_ALL_CSUM))
- return -EINVAL;
-
- return __ethtool_set_sg(dev, edata.data);
-}
-
-static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_tso)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data && !(dev->features & NETIF_F_SG))
+ if (data && !(dev->features & NETIF_F_SG))
return -EINVAL;
- return dev->ethtool_ops->set_tso(dev, edata.data);
+ return dev->ethtool_ops->set_tso(dev, data);
}
-static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_ufo(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_ufo)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
- if (edata.data && !(dev->features & NETIF_F_SG))
+ if (data && !(dev->features & NETIF_F_SG))
return -EINVAL;
- if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) ||
+ if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
return -EINVAL;
- return dev->ethtool_ops->set_ufo(dev, edata.data);
-}
-
-static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GGSO };
-
- edata.data = dev->features & NETIF_F_GSO;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
- if (edata.data)
- dev->features |= NETIF_F_GSO;
- else
- dev->features &= ~NETIF_F_GSO;
- return 0;
-}
-
-static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata = { ETHTOOL_GGRO };
-
- edata.data = dev->features & NETIF_F_GRO;
- if (copy_to_user(useraddr, &edata, sizeof(edata)))
- return -EFAULT;
- return 0;
-}
-
-static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
-{
- struct ethtool_value edata;
-
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (edata.data) {
- u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
- dev->ethtool_ops->get_rx_csum(dev) :
- ethtool_op_get_rx_csum(dev);
-
- if (!rxcsum)
- return -EINVAL;
- dev->features |= NETIF_F_GRO;
- } else
- dev->features &= ~NETIF_F_GRO;
-
- return 0;
+ return dev->ethtool_ops->set_ufo(dev, data);
}
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
@@ -1703,33 +1711,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXCSUM:
rc = ethtool_set_rx_csum(dev, useraddr);
break;
- case ETHTOOL_GTXCSUM:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_tx_csum ?
- dev->ethtool_ops->get_tx_csum :
- ethtool_op_get_tx_csum));
- break;
- case ETHTOOL_STXCSUM:
- rc = ethtool_set_tx_csum(dev, useraddr);
- break;
- case ETHTOOL_GSG:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_sg ?
- dev->ethtool_ops->get_sg :
- ethtool_op_get_sg));
- break;
- case ETHTOOL_SSG:
- rc = ethtool_set_sg(dev, useraddr);
- break;
- case ETHTOOL_GTSO:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_tso ?
- dev->ethtool_ops->get_tso :
- ethtool_op_get_tso));
- break;
- case ETHTOOL_STSO:
- rc = ethtool_set_tso(dev, useraddr);
- break;
case ETHTOOL_TEST:
rc = ethtool_self_test(dev, useraddr);
break;
@@ -1745,21 +1726,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GPERMADDR:
rc = ethtool_get_perm_addr(dev, useraddr);
break;
- case ETHTOOL_GUFO:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_ufo ?
- dev->ethtool_ops->get_ufo :
- ethtool_op_get_ufo));
- break;
- case ETHTOOL_SUFO:
- rc = ethtool_set_ufo(dev, useraddr);
- break;
- case ETHTOOL_GGSO:
- rc = ethtool_get_gso(dev, useraddr);
- break;
- case ETHTOOL_SGSO:
- rc = ethtool_set_gso(dev, useraddr);
- break;
case ETHTOOL_GFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
(dev->ethtool_ops->get_flags ?
@@ -1790,12 +1756,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXCLSRLINS:
rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
break;
- case ETHTOOL_GGRO:
- rc = ethtool_get_gro(dev, useraddr);
- break;
- case ETHTOOL_SGRO:
- rc = ethtool_set_gro(dev, useraddr);
- break;
case ETHTOOL_FLASHDEV:
rc = ethtool_flash_device(dev, useraddr);
break;
@@ -1823,6 +1783,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SFEATURES:
rc = ethtool_set_features(dev, useraddr);
break;
+ case ETHTOOL_GTXCSUM:
+ case ETHTOOL_GSG:
+ case ETHTOOL_GTSO:
+ case ETHTOOL_GUFO:
+ case ETHTOOL_GGSO:
+ case ETHTOOL_GGRO:
+ rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
+ break;
+ case ETHTOOL_STXCSUM:
+ case ETHTOOL_SSG:
+ case ETHTOOL_STSO:
+ case ETHTOOL_SUFO:
+ case ETHTOOL_SGSO:
+ case ETHTOOL_SGRO:
+ rc = ethtool_set_one_feature(dev, useraddr, ethcmd);
+ break;
default:
rc = -EOPNOTSUPP;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH v4 4/5] net: introduce NETIF_F_RXCSUM
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
Introduce NETIF_F_RXCSUM to replace device-private flags for RX checksum
offload. Integrate it with ndo_fix_features.
ethtool_op_get_rx_csum() is removed altogether as nothing in-tree uses it.
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
---
include/linux/ethtool.h | 1 -
include/linux/netdevice.h | 5 ++++-
net/core/ethtool.c | 36 ++++++++++++------------------------
3 files changed, 16 insertions(+), 26 deletions(-)
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 806e716..54d776c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -625,7 +625,6 @@ struct net_device;
/* Some generic methods drivers may use in their ethtool_ops */
u32 ethtool_op_get_link(struct net_device *dev);
-u32 ethtool_op_get_rx_csum(struct net_device *dev);
u32 ethtool_op_get_tx_csum(struct net_device *dev);
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4a3e554..45080bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -964,6 +964,7 @@ struct net_device {
#define NETIF_F_FCOE_MTU (1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
#define NETIF_F_NTUPLE (1 << 27) /* N-tuple filters supported */
#define NETIF_F_RXHASH (1 << 28) /* Receive hashing offload */
+#define NETIF_F_RXCSUM (1 << 29) /* Receive checksumming offload */
/* Segmentation offload features */
#define NETIF_F_GSO_SHIFT 16
@@ -979,7 +980,7 @@ struct net_device {
/* = all defined minus driver/device-class-related */
#define NETIF_F_NEVER_CHANGE (NETIF_F_HIGHDMA | NETIF_F_VLAN_CHALLENGED | \
NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS (0x1f3fffff & ~NETIF_F_NEVER_CHANGE)
+#define NETIF_F_ETHTOOL_BITS (0x3f3fffff & ~NETIF_F_NEVER_CHANGE)
/* List of features with software fallbacks. */
#define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | \
@@ -2501,6 +2502,8 @@ static inline int dev_ethtool_get_settings(struct net_device *dev,
static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
{
+ if (dev->hw_features & NETIF_F_RXCSUM)
+ return !!(dev->features & NETIF_F_RXCSUM);
if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum)
return 0;
return dev->ethtool_ops->get_rx_csum(dev);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6e7c6f2..52e4272 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev)
}
EXPORT_SYMBOL(ethtool_op_get_link);
-u32 ethtool_op_get_rx_csum(struct net_device *dev)
-{
- return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_rx_csum);
-
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
return (dev->features & NETIF_F_ALL_CSUM) != 0;
@@ -276,6 +270,9 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
case ETHTOOL_GTXCSUM:
case ETHTOOL_STXCSUM:
return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM;
+ case ETHTOOL_GRXCSUM:
+ case ETHTOOL_SRXCSUM:
+ return NETIF_F_RXCSUM;
case ETHTOOL_GSG:
case ETHTOOL_SSG:
return NETIF_F_SG;
@@ -310,6 +307,7 @@ static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr
}
static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
static int __ethtool_set_sg(struct net_device *dev, u32 data);
static int __ethtool_set_tso(struct net_device *dev, u32 data);
static int __ethtool_set_ufo(struct net_device *dev, u32 data);
@@ -347,6 +345,8 @@ static int ethtool_set_one_feature(struct net_device *dev,
switch (ethcmd) {
case ETHTOOL_STXCSUM:
return __ethtool_set_tx_csum(dev, edata.data);
+ case ETHTOOL_SRXCSUM:
+ return __ethtool_set_rx_csum(dev, edata.data);
case ETHTOOL_SSG:
return __ethtool_set_sg(dev, edata.data);
case ETHTOOL_STSO:
@@ -391,7 +391,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS
/* NETIF_F_FCOE_MTU */ "fcoe-mtu",
/* NETIF_F_NTUPLE */ "rx-ntuple-filter",
/* NETIF_F_RXHASH */ "rx-hashing",
- "",
+ /* NETIF_F_RXCSUM */ "rx-checksum",
"",
"",
};
@@ -1369,20 +1369,15 @@ static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
return dev->ethtool_ops->set_tx_csum(dev, data);
}
-static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
+static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
{
- struct ethtool_value edata;
-
if (!dev->ethtool_ops->set_rx_csum)
return -EOPNOTSUPP;
- if (copy_from_user(&edata, useraddr, sizeof(edata)))
- return -EFAULT;
-
- if (!edata.data && dev->ethtool_ops->set_sg)
+ if (!data)
dev->features &= ~NETIF_F_GRO;
- return dev->ethtool_ops->set_rx_csum(dev, edata.data);
+ return dev->ethtool_ops->set_rx_csum(dev, data);
}
static int __ethtool_set_tso(struct net_device *dev, u32 data)
@@ -1730,15 +1725,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SPAUSEPARAM:
rc = ethtool_set_pauseparam(dev, useraddr);
break;
- case ETHTOOL_GRXCSUM:
- rc = ethtool_get_value(dev, useraddr, ethcmd,
- (dev->ethtool_ops->get_rx_csum ?
- dev->ethtool_ops->get_rx_csum :
- ethtool_op_get_rx_csum));
- break;
- case ETHTOOL_SRXCSUM:
- rc = ethtool_set_rx_csum(dev, useraddr);
- break;
case ETHTOOL_TEST:
rc = ethtool_self_test(dev, useraddr);
break;
@@ -1811,6 +1797,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_set_features(dev, useraddr);
break;
case ETHTOOL_GTXCSUM:
+ case ETHTOOL_GRXCSUM:
case ETHTOOL_GSG:
case ETHTOOL_GTSO:
case ETHTOOL_GUFO:
@@ -1819,6 +1806,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_get_one_feature(dev, useraddr, ethcmd);
break;
case ETHTOOL_STXCSUM:
+ case ETHTOOL_SRXCSUM:
case ETHTOOL_SSG:
case ETHTOOL_STSO:
case ETHTOOL_SUFO:
--
1.7.2.3
^ permalink raw reply related
* [PATCH v4 0/5] net: Unified offload configuration
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
Here's a v4 of the ethtool unification patch series.
What's in it?
1:
the patch - implement unified ethtool setting ops
2..3:
implement interoperation between old and new ethtool ops
4:
include RX checksum in features and plug it into new framework
5:
convert loopback device to new framework
What is it good for?
- unifies driver behaviour wrt hardware offloads
- removes a lot of boilerplate code from drivers
- allows better fine-grained control over used offloads
I'm testing this on ARM Gemini arch now. Patch to ethtool userspace tool
will follow this series. I'm not fond of the GFEATURES output I implemented -
please throw some suggestions on it if you can.
Driver conversions stay the same as in v2 - as for v3, I'll keep them
from resending until after the core code gets accepted.
Patches 2,4,5 are unchanged from v3.
Best Regards,
Michał Mirosław
v1: http://marc.info/?l=linux-netdev&m=129245188832643&w=3
Changes from v3:
- fixed kernel-doc and other comments
- added HIGHDMA to never-changeable features
- changed GFEATURES .size interpretation
- changed feature strings
- change __ethtool_set_flags() to reject invalid changes
Changes from v2:
- rebase to net-next after merging v2 leading patches
- fix missing comma in feature name table
- force NETIF_F_SOFT_FEATURES in hw_features for simpler code
(fixes a bug that disallowed changing GSO and GRO state)
Changes from v1:
- split structures for GFEATURES/SFEATURES
- naming of feature bits using GSTRINGS ETH_SS_FEATURES
- strict checking of bits used in SFEATURES call
- more comments and kernel-doc
- rebased to net-next after 2.6.37
---
Michał Mirosław (5):
net: Introduce new feature setting ops
net: ethtool: use ndo_fix_features for offload setting
net: use ndo_fix_features for ethtool_ops->set_flags
net: introduce NETIF_F_RXCSUM
loopback: convert to hw_features
drivers/net/loopback.c | 9 +-
include/linux/ethtool.h | 86 ++++++++-
include/linux/netdevice.h | 48 +++++-
net/core/dev.c | 49 ++++-
net/core/ethtool.c | 481 ++++++++++++++++++++++++++++-----------------
5 files changed, 480 insertions(+), 193 deletions(-)
--
1.7.2.3
^ permalink raw reply
* [PATCH v4 3/5] net: use ndo_fix_features for ethtool_ops->set_flags
From: Michał Mirosław @ 2011-02-03 14:21 UTC (permalink / raw)
To: netdev; +Cc: Ben Hutchings
In-Reply-To: <cover.1296741561.git.mirq-linux@rere.qmqm.pl>
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
net/core/ethtool.c | 31 +++++++++++++++++++++++++++++--
1 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 555accf..6e7c6f2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -240,6 +240,34 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
return ret;
}
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+ u32 changed;
+
+ if (data & ~flags_dup_features)
+ return -EINVAL;
+
+ /* legacy set_flags() op */
+ if (dev->ethtool_ops->set_flags) {
+ if (unlikely(dev->hw_features & flags_dup_features))
+ netdev_warn(dev,
+ "driver BUG: mixed hw_features and set_flags()\n");
+ return dev->ethtool_ops->set_flags(dev, data);
+ }
+
+ /* allow changing only bits set in hw_features */
+ changed = (data ^ dev->wanted_features) & flags_dup_features;
+ if (changed & ~dev->hw_features)
+ return -EOPNOTSUPP;
+
+ dev->wanted_features =
+ (dev->wanted_features & ~changed) | data;
+
+ netdev_update_features(dev);
+
+ return 0;
+}
+
static u32 ethtool_get_feature_mask(u32 eth_cmd)
{
/* feature masks of legacy discrete ethtool ops */
@@ -1733,8 +1761,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
ethtool_op_get_flags));
break;
case ETHTOOL_SFLAGS:
- rc = ethtool_set_value(dev, useraddr,
- dev->ethtool_ops->set_flags);
+ rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
break;
case ETHTOOL_GPFLAGS:
rc = ethtool_get_value(dev, useraddr, ethcmd,
--
1.7.2.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox