Netdev List
 help / color / mirror / Atom feed
* Re: 3.5.0+ - Linus GIT - WARNING: at net/sched/sch_generic.c:255 dev_watchdog+0xeb/0x15f()
From: Miles Lane @ 2012-06-08 15:50 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: LKML, Andrew Morton, Wim Van Sebroeck, Jay Cliburn, Chris Snook,
	netdev, Huang Xiong
In-Reply-To: <1339053257.26966.100.camel@edumazet-glaptop>

On Thu, Jun 7, 2012 at 3:14 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2012-06-07 at 08:39 +0200, Eric Dumazet wrote:
>> On Thu, 2012-06-07 at 02:16 -0400, Miles Lane wrote:
>> > WARNING: at net/sched/sch_generic.c:255 dev_watchdog+0xeb/0x15f()
>> > Hardware name: UL50VT
>> > NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
>> > Modules linked in: hfsplus hfs vfat msdos fat snd_hrtimer ipv6
>> > snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep
>> > snd_pcm_oss snd_seq_dummy snd_mixer_oss uvcvideo videobuf2_core
>> > snd_pcm videodev snd_seq_oss snd_seq_midi snd_rawmidi media
>> > snd_seq_midi_event acpi_cpufreq videobuf2_vmalloc videobuf2_memops
>> > snd_seq iwlwifi snd_timer snd_seq_device asus_laptop mac80211
>> > sparse_keymap snd cfg80211 coretemp soundcore psmouse snd_page_alloc
>> > rtc_cmos mperf processor evdev rfkill battery led_class input_polldev
>> > ac i915 nouveau sr_mod cdrom sd_mod ehci_hcd atl1c uhci_hcd intel_agp
>> > ttm usbcore intel_gtt usb_common drm_kms_helper thermal video
>> > thermal_sys hwmon button
>> > Pid: 3025, comm: hud-service Not tainted 3.5.0-rc1+ #128
>> > Call Trace:
>> >  <IRQ>  [<ffffffff8102d42f>] warn_slowpath_common+0x7e/0x97
>> >  [<ffffffff8102d4dc>] warn_slowpath_fmt+0x41/0x43
>> >  [<ffffffff81360f1c>] dev_watchdog+0xeb/0x15f
>> >  [<ffffffff8103af44>] run_timer_softirq+0x20e/0x356
>> >  [<ffffffff8103ae7e>] ? run_timer_softirq+0x148/0x356
>> >  [<ffffffff81360e31>] ? netif_tx_unlock+0x57/0x57
>> >  [<ffffffff810344f8>] __do_softirq+0x103/0x239
>> >  [<ffffffff8107122a>] ? clockevents_program_event+0x9c/0xb9
>> >  [<ffffffff8140a4cc>] call_softirq+0x1c/0x30
>> >  [<ffffffff81003bb9>] do_softirq+0x37/0x82
>> >  [<ffffffff81034888>] irq_exit+0x4c/0xb1
>> >  [<ffffffff8101ba71>] smp_apic_timer_interrupt+0x76/0x84
>> >  [<ffffffff81409adc>] apic_timer_interrupt+0x6c/0x80
>> >  <EOI>  [<ffffffff81105161>] ? fget_raw_light+0x4c/0x7d
>> >  [<ffffffff81105161>] ? fget_raw_light+0x4c/0x7d
>> >  [<ffffffff8111153b>] sys_fcntl+0x23/0x53b
>> >  [<ffffffff81004b68>] ? print_context_stack+0x44/0xb1
>> >  [<ffffffff81408fe2>] system_call_fastpath+0x16/0x1b
>> > ---[ end trace c1f284d9c873031d ]---
>>
>> CC netdev and Huang Xiong
>>
>> Atheros drivers are known to have buggy tx completion, its incredible...
>>
>> You could try following patch, not a 'perfect' solution, but a fix.
>
> And if you feel lucky, you could try the following one as well, a step
> into right direction :
>
>  drivers/net/ethernet/atheros/atl1c/atl1c_main.c |   86 ++++----------
>  1 file changed, 30 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
> index 9cc1570..44940f4 100644
> --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
> +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
> @@ -1528,6 +1528,16 @@ static inline void atl1c_clear_phy_int(struct atl1c_adapter *adapter)
>        spin_unlock(&adapter->mdio_lock);
>  }
>
> +static inline u16 atl1c_tpd_avail(const struct atl1c_tpd_ring *tpd_ring)
> +{
> +       u16 next_to_use = tpd_ring->next_to_use;
> +       u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean);
> +
> +       return (u16)(next_to_clean > next_to_use) ?
> +               (next_to_clean - next_to_use - 1) :
> +               (tpd_ring->count + next_to_clean - next_to_use - 1);
> +}
> +
>  static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter,
>                                enum atl1c_trans_queue type)
>  {
> @@ -1551,10 +1561,14 @@ static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter,
>                atomic_set(&tpd_ring->next_to_clean, next_to_clean);
>        }
>
> +       spin_lock(&adapter->tx_lock);
> +
>        if (netif_queue_stopped(adapter->netdev) &&
> -                       netif_carrier_ok(adapter->netdev)) {
> +           netif_carrier_ok(adapter->netdev) &&
> +           atl1c_tpd_avail(tpd_ring) >= tpd_ring->count / 4)
>                netif_wake_queue(adapter->netdev);
> -       }
> +
> +       spin_unlock(&adapter->tx_lock);
>
>        return true;
>  }
> @@ -1856,20 +1870,6 @@ static void atl1c_netpoll(struct net_device *netdev)
>  }
>  #endif
>
> -static inline u16 atl1c_tpd_avail(struct atl1c_adapter *adapter, enum atl1c_trans_queue type)
> -{
> -       struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
> -       u16 next_to_use = 0;
> -       u16 next_to_clean = 0;
> -
> -       next_to_clean = atomic_read(&tpd_ring->next_to_clean);
> -       next_to_use   = tpd_ring->next_to_use;
> -
> -       return (u16)(next_to_clean > next_to_use) ?
> -               (next_to_clean - next_to_use - 1) :
> -               (tpd_ring->count + next_to_clean - next_to_use - 1);
> -}
> -
>  /*
>  * get next usable tpd
>  * Note: should call atl1c_tdp_avail to make sure
> @@ -1899,24 +1899,6 @@ atl1c_get_tx_buffer(struct atl1c_adapter *adapter, struct atl1c_tpd_desc *tpd)
>                        (struct atl1c_tpd_desc *)tpd_ring->desc];
>  }
>
> -/* Calculate the transmit packet descript needed*/
> -static u16 atl1c_cal_tpd_req(const struct sk_buff *skb)
> -{
> -       u16 tpd_req;
> -       u16 proto_hdr_len = 0;
> -
> -       tpd_req = skb_shinfo(skb)->nr_frags + 1;
> -
> -       if (skb_is_gso(skb)) {
> -               proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
> -               if (proto_hdr_len < skb_headlen(skb))
> -                       tpd_req++;
> -               if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
> -                       tpd_req++;
> -       }
> -       return tpd_req;
> -}
> -
>  static int atl1c_tso_csum(struct atl1c_adapter *adapter,
>                          struct sk_buff *skb,
>                          struct atl1c_tpd_desc **tpd,
> @@ -2099,10 +2081,10 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter,
>        buffer_info->skb = skb;
>  }
>
> -static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb,
> -                          struct atl1c_tpd_desc *tpd, enum atl1c_trans_queue type)
> +static void atl1c_tx_queue(const struct atl1c_adapter *adapter,
> +                          const struct atl1c_tpd_ring *tpd_ring,
> +                          enum atl1c_trans_queue type)
>  {
> -       struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
>        u16 reg;
>
>        reg = type == atl1c_trans_high ? REG_TPD_PRI1_PIDX : REG_TPD_PRI0_PIDX;
> @@ -2113,35 +2095,19 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
>                                          struct net_device *netdev)
>  {
>        struct atl1c_adapter *adapter = netdev_priv(netdev);
> -       unsigned long flags;
> -       u16 tpd_req = 1;
>        struct atl1c_tpd_desc *tpd;
>        enum atl1c_trans_queue type = atl1c_trans_normal;
> +       const struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type];
>
>        if (test_bit(__AT_DOWN, &adapter->flags)) {
>                dev_kfree_skb_any(skb);
>                return NETDEV_TX_OK;
>        }
>
> -       tpd_req = atl1c_cal_tpd_req(skb);
> -       if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
> -               if (netif_msg_pktdata(adapter))
> -                       dev_info(&adapter->pdev->dev, "tx locked\n");
> -               return NETDEV_TX_LOCKED;
> -       }
> -
> -       if (atl1c_tpd_avail(adapter, type) < tpd_req) {
> -               /* no enough descriptor, just stop queue */
> -               netif_stop_queue(netdev);
> -               spin_unlock_irqrestore(&adapter->tx_lock, flags);
> -               return NETDEV_TX_BUSY;
> -       }
> -
>        tpd = atl1c_get_tpd(adapter, type);
>
>        /* do TSO and check sum */
>        if (atl1c_tso_csum(adapter, skb, &tpd, type) != 0) {
> -               spin_unlock_irqrestore(&adapter->tx_lock, flags);
>                dev_kfree_skb_any(skb);
>                return NETDEV_TX_OK;
>        }
> @@ -2160,9 +2126,17 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
>                tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */
>
>        atl1c_tx_map(adapter, skb, tpd, type);
> -       atl1c_tx_queue(adapter, skb, tpd, type);
> +       atl1c_tx_queue(adapter, tpd_ring, type);
> +
> +       if (atl1c_tpd_avail(tpd_ring) < MAX_SKB_FRAGS + 4) {
> +               unsigned long flags;
> +
> +               spin_lock_irqsave(&adapter->tx_lock, flags);
> +               if (atl1c_tpd_avail(tpd_ring) < MAX_SKB_FRAGS + 4)
> +                       netif_stop_queue(netdev);
> +               spin_unlock_irqrestore(&adapter->tx_lock, flags);
> +       }
>
> -       spin_unlock_irqrestore(&adapter->tx_lock, flags);
>        return NETDEV_TX_OK;
>  }
>
>
>

With this patch applied to Linus' GIT tree (updated last night), I get
this warning:

[  187.346706] WARNING: at net/sched/sch_generic.c:255 dev_watchdog+0xeb/0x15f()
[  187.346709] Hardware name: UL50VT
[  187.346712] NETDEV WATCHDOG: eth0 (atl1c): transmit queue 0 timed out
[  187.346825] Modules linked in: snd_hrtimer ipv6
snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep
snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss
snd_seq_midi snd_rawmidi uvcvideo videobuf2_core videodev acpi_cpufreq
media snd_seq_midi_event iwlwifi videobuf2_vmalloc snd_seq mac80211
asus_laptop rtc_cmos cfg80211 videobuf2_memops sparse_keymap snd_timer
snd_seq_device mperf battery ac led_class psmouse snd input_polldev
coretemp processor soundcore snd_page_alloc rfkill evdev acpi_call(O)
i915 nouveau fbcon tileblit ttm font bitblit softcursor drm_kms_helper
intel_agp intel_gtt drm agpgart sd_mod sr_mod cdrom uhci_hcd fb fbdev
i2c_algo_bit ehci_hcd i2c_core cfbcopyarea atl1c usbcore mxm_wmi
cfbimgblt usb_common cfbfillrect thermal video backlight thermal_sys
hwmon wmi button
[  187.346832] Pid: 2899, comm: compiz Tainted: G           O 3.5.0-rc1+ #131
[  187.346834] Call Trace:
[  187.346843]  <IRQ>  [<ffffffff8102d42f>] warn_slowpath_common+0x7e/0x97
[  187.346847]  [<ffffffff8102d4dc>] warn_slowpath_fmt+0x41/0x43
[  187.346863]  [<ffffffff81302028>] dev_watchdog+0xeb/0x15f
[  187.346869]  [<ffffffff8103af44>] run_timer_softirq+0x20e/0x356
[  187.346873]  [<ffffffff8103ae7e>] ? run_timer_softirq+0x148/0x356
[  187.346878]  [<ffffffff81301f3d>] ? netif_tx_unlock+0x57/0x57
[  187.346883]  [<ffffffff810344f8>] __do_softirq+0x103/0x239
[  187.346889]  [<ffffffff8107123a>] ? clockevents_program_event+0x9c/0xb9
[  187.346894]  [<ffffffff813ab38c>] call_softirq+0x1c/0x30
[  187.346899]  [<ffffffff81003bb9>] do_softirq+0x37/0x82
[  187.346903]  [<ffffffff81034888>] irq_exit+0x4c/0xb1
[  187.346909]  [<ffffffff8101ba71>] smp_apic_timer_interrupt+0x76/0x84
[  187.346914]  [<ffffffff813aa99c>] apic_timer_interrupt+0x6c/0x80
[  187.346921]  <EOI>  [<ffffffff813a9ec7>] ? sysret_check+0x1b/0x56
[  187.346925] ---[ end trace 954b24373ae625e3 ]---

^ permalink raw reply

* Re: Deadlock, L2TP over IP are not working, 3.4.1
From: Benjamin LaHaise @ 2012-06-08 15:41 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Francois Romieu, Denys Fedoryshchenko, davem, netdev,
	linux-kernel
In-Reply-To: <1339134438.6001.13.camel@edumazet-glaptop>

On Fri, Jun 08, 2012 at 07:47:18AM +0200, Eric Dumazet wrote:
> I have no idea how many l2tp_eth devices are setup at once in typical
> conf.

Depends on the usage scenario.  L2TP is commonly used for terminating 
customer connections by wholesale ISPs.  In that kind of edge routing 
use-case, tens of thousands of interfaces are easily possible.

		-ben
-- 
"Thought is the essence of where you are now."

^ permalink raw reply

* RE: e1000e, 3.4.1, , jumbo frames are not working
From: Allan, Bruce W @ 2012-06-08 15:38 UTC (permalink / raw)
  To: Denys Fedoryshchenko, Kirsher, Jeffrey T, rkagan@parallels.com,
	stable@vger.kernel.org, Brandeburg, Jesse, Wyborny, Carolyn,
	Skidmore, Donald C, Rose, Gregory V, Waskiewicz Jr, Peter P,
	Duyck, Alexander H, Ronciak, John, dnelson@redhat.com,
	e1000-devel@lists.sourceforge.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <87ed034d58606650514bdd3b88b0b002@visp.net.lb>

> -----Original Message-----
> From: Denys Fedoryshchenko [mailto:denys@visp.net.lb]
> Sent: Friday, June 08, 2012 7:59 AM
> To: Kirsher, Jeffrey T; rkagan@parallels.com; stable@vger.kernel.org;
> Brandeburg, Jesse; Allan, Bruce W; Wyborny, Carolyn; Skidmore, Donald
> C; Rose, Gregory V; Waskiewicz Jr, Peter P; Duyck, Alexander H;
> Ronciak, John; dnelson@redhat.com; e1000-devel@lists.sourceforge.net;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: e1000e, 3.4.1, , jumbo frames are not working
> 
> Just tried to enable larger mtu on interface, and failed, even i try
> to
> disable both of mentioned in dmesg offloads.
> 
> 05:00.0 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit
> Ethernet Controller (Copper) (rev 01)
> 05:00.1 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit
> Ethernet Controller (Copper) (rev 01)
> 
> L2TP ~ # ethtool -i eth0
> driver: e1000e
> version: 1.9.5-k
> firmware-version: 1.0-0
> bus-info: 0000:05:00.0
> supports-statistics: yes
> supports-test: yes
> supports-eeprom-access: yes
> supports-register-dump: yes
> 
> Here is what i did:
> ifconfig eth1 mtu 2000
> got dmesg:
> [ 9160.679354] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be
> enabled when both receive checksum offload and receive hashing are
> enabled.  Disable one of the receive offload features before enabling
> jumbos.
> 
> ethtool -K eth1 rxhash off
> dmesg:
> [ 9194.208856] e1000e 0000:05:00.1: eth1: Reset adapter
> [ 9197.295425] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow
> Control: None
> 
> Lost connectivity for few seconds, then again i tried to:
> ifconfig eth1 mtu 2000
> 
> dmesg:
> [ 9207.797616] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be
> enabled when both receive checksum offload and receive hashing are
> enabled.  Disable one of the receive offload features before enabling
> jumbos.
> 
> ethtool -K eth1 rx off
> [ 9222.398034] e1000e 0000:05:00.1: eth1: Reset adapter
> [ 9225.497550] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow
> Control: None
> 
> Again, tried: ifconfig eth1 mtu 2000
> dmesg:
> [ 9254.795445] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be
> enabled when both receive checksum offload and receive hashing are
> enabled.  Disable one of the receive offload features before enabling
> jumbos.
> 
> L2TP ~ # ethtool -k eth1
> Offload parameters for eth1:
> rx-checksumming: off
> tx-checksumming: on
> scatter-gather: on
> tcp-segmentation-offload: on
> udp-fragmentation-offload: off
> generic-segmentation-offload: on
> generic-receive-offload: on
> large-receive-offload: off
> rx-vlan-offload: on
> tx-vlan-offload: on
> ntuple-filters: off
> receive-hashing: off
> 
> L2TP ~ # ethtool -d eth1
> MAC Registers
> -------------
> 0x00000: CTRL (Device control register)  0x40144241
>        Endian mode (buffers):             little
>        Link reset:                        normal
>        Set link up:                       1
>        Invert Loss-Of-Signal:             no
>        Receive flow control:              disabled
>        Transmit flow control:             disabled
>        VLAN mode:                         enabled
>        Auto speed detect:                 disabled
>        Speed select:                      1000Mb/s
>        Force speed:                       no
>        Force duplex:                      no
> 0x00008: STATUS (Device status register) 0x02080787
>        Duplex:                            full
>        Link up:                           link config
>        TBI mode:                          disabled
>        Link speed:                        1000Mb/s
>        Bus type:                          PCI Express
>        Port number:                       1
> 0x00100: RCTL (Receive control register) 0x04008002
>        Receiver:                          enabled
>        Store bad packets:                 disabled
>        Unicast promiscuous:               disabled
>        Multicast promiscuous:             disabled
>        Long packet:                       disabled
>        Descriptor minimum threshold size: 1/2
>        Broadcast accept mode:             accept
>        VLAN filter:                       disabled
>        Canonical form indicator:          disabled
>        Discard pause frames:              filtered
>        Pass MAC control frames:           don't pass
>        Receive buffer size:               2048
> 0x02808: RDLEN (Receive desc length)     0x00001000
> 0x02810: RDH   (Receive desc head)       0x00000046
> 0x02818: RDT   (Receive desc tail)       0x00000040
> 0x02820: RDTR  (Receive delay timer)     0x00000020
> 0x00400: TCTL (Transmit ctrl register)   0x3103F0FA
>        Transmitter:                       enabled
>        Pad short packets:                 enabled
>        Software XOFF Transmission:        disabled
>        Re-transmit on late collision:     enabled
> 0x03808: TDLEN (Transmit desc length)    0x00001000
> 0x03810: TDH   (Transmit desc head)      0x00000098
> 0x03818: TDT   (Transmit desc tail)      0x00000099
> 0x03820: TIDV  (Transmit delay timer)    0x00000008
> PHY type:                                unknown
> 
> 
> ---
> Denys Fedoryshchenko, Network Engineer, Virtual ISP S.A.L.

Thanks for the report.  We are aware of the issue and have already begun
working on a patch to resolve the issue.

Bruce.


^ permalink raw reply

* Re: [PATCH] usbnet: Activate the halt interrupt endpoint to fix endless "XactErr" error
From: Huajun Li @ 2012-06-08 15:24 UTC (permalink / raw)
  To: Alan Stern; +Cc: Ming Lei, David S. Miller, linux-usb, netdev
In-Reply-To: <Pine.LNX.4.44L0.1206080940350.1360-100000@iolanthe.rowland.org>

On Fri, Jun 8, 2012 at 9:43 PM, Alan Stern <stern@rowland.harvard.edu> wrote:
> On Fri, 8 Jun 2012, Huajun Li wrote:
>
>> > If so, looks mistaken value is returned from the host controller driver,
>> > but not sure if your device is buggy. What is your host controller?
>> >
>> Nothing related to HC.
>> I tried to find out the endpoint state, but found it was halt. I think
>> this is the root cause.
>
> No, it isn't.  Endpoint halt causes a -EPIPE error, not -EPROTO.
> -EPROTO indicates that the device's firmware has crashed.
>
>> What's your opinion to handle "-EPROTO" error in usbnet.c?
>> Please check usbnet.c again, when "-EPROTO" occurs, it just pints
>> error msg and re-submit the interrupt URB, and then causes endless
>> "EactErr" error msg.
>
> One possibility is to wait for a little while before resubmitting the
> URB, and after 10 failures in a row, attempt a reset.
>
Alan, thanks for your proposal.
You mean reset the device after 10 failures, right ?

BTW, I ever tried to sleep several seconds before submitting the 1st
interrupt URB, but it did not work.

^ permalink raw reply

* Re: [PATCH] bonding: Fix corrupted queue_mapping
From: Eric Dumazet @ 2012-06-08 15:11 UTC (permalink / raw)
  To: Tom Herbert; +Cc: David Miller, netdev
In-Reply-To: <CA+mtBx9xpy6Ly=3kiBc+sdHqFfsO5KRK8O=VAXTb3rofAgm2ow@mail.gmail.com>

On Fri, 2012-06-08 at 08:04 -0700, Tom Herbert wrote:

> Maybe the fundamental issue is that the queue mappings only allow for
> one level of multi queue device.  It might be better if bonding didn't
> have one and dev_pick_tx did the right thin (use xps on bonding
> maybe).

bonding misuses multiqueue infrastructure to divert frames on selected
slaves, or maybe I am wrong.

^ permalink raw reply

* Re: [PATCH] bonding: Fix corrupted queue_mapping
From: Tom Herbert @ 2012-06-08 15:04 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev
In-Reply-To: <1339140238.6001.42.camel@edumazet-glaptop>

> I must say I dont understand dev_pick_tx() anymore.
>
> It seems to ignore skb->queue_mapping (unless device provides its own
> ndo_select_queue() and this functions is aware of skb->queue_mapping, as
> correctly done in ixgbe)
>
> So commit fff3269907897ee (tcp: reflect SYN queue_mapping into SYNACK
> packets) works on ixgbe, but probably not on other multiqueue devices.
>
> This sounds like a regression to me.
>
Maybe the fundamental issue is that the queue mappings only allow for
one level of multi queue device.  It might be better if bonding didn't
have one and dev_pick_tx did the right thin (use xps on bonding
maybe).

Tom

>
>

^ permalink raw reply

* [PATCH net-next] af_unix: speedup /proc/net/unix
From: Eric Dumazet @ 2012-06-08 15:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Steven Whitehouse, Pavel Emelyanov

From: Eric Dumazet <edumazet@google.com>

/proc/net/unix has quadratic behavior, and can hold unix_table_lock for
a while if high number of unix sockets are alive. (90 ms for 200k
sockets...)

We already have a hash table, so its quite easy to use it.

Problem is unbound sockets are still hashed in a single hash slot
(unix_socket_table[UNIX_HASH_TABLE])

This patch also spreads unbound sockets to 256 hash slots, to speedup
both /proc/net/unix and unix_diag.

Time to read /proc/net/unix with 200k unix sockets :
(time dd if=/proc/net/unix of=/dev/null bs=4k)

before : 520 secs
after : 2 secs

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
---
 include/net/af_unix.h |    3 -
 net/unix/af_unix.c    |  110 +++++++++++++++++++++++-----------------
 net/unix/diag.c       |    6 +-
 3 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 2ee33da..b5f8988 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -14,10 +14,11 @@ extern struct sock *unix_get_socket(struct file *filp);
 extern struct sock *unix_peer_get(struct sock *);
 
 #define UNIX_HASH_SIZE	256
+#define UNIX_HASH_BITS	8
 
 extern unsigned int unix_tot_inflight;
 extern spinlock_t unix_table_lock;
-extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 
 struct unix_address {
 	atomic_t	refcnt;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 641f2e4..cf83f6b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -115,15 +115,24 @@
 #include <net/checksum.h>
 #include <linux/security.h>
 
-struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_socket_table);
 DEFINE_SPINLOCK(unix_table_lock);
 EXPORT_SYMBOL_GPL(unix_table_lock);
 static atomic_long_t unix_nr_socks;
 
-#define unix_sockets_unbound	(&unix_socket_table[UNIX_HASH_SIZE])
 
-#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
+static struct hlist_head *unix_sockets_unbound(void *addr)
+{
+	unsigned long hash = (unsigned long)addr;
+
+	hash ^= hash >> 16;
+	hash ^= hash >> 8;
+	hash %= UNIX_HASH_SIZE;
+	return &unix_socket_table[UNIX_HASH_SIZE + hash];
+}
+
+#define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 
 #ifdef CONFIG_SECURITY_NETWORK
 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
@@ -645,7 +654,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
-	unix_insert_socket(unix_sockets_unbound, sk);
+	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
 		atomic_long_dec(&unix_nr_socks);
@@ -2239,47 +2248,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 }
 
 #ifdef CONFIG_PROC_FS
-static struct sock *first_unix_socket(int *i)
-{
-	for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
-	}
-	return NULL;
-}
 
-static struct sock *next_unix_socket(int *i, struct sock *s)
-{
-	struct sock *next = sk_next(s);
-	/* More in this chain? */
-	if (next)
-		return next;
-	/* Look for next non-empty chain. */
-	for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
-		if (!hlist_empty(&unix_socket_table[*i]))
-			return __sk_head(&unix_socket_table[*i]);
-	}
-	return NULL;
-}
+#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
 
 struct unix_iter_state {
 	struct seq_net_private p;
-	int i;
 };
 
-static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
+static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
 {
-	struct unix_iter_state *iter = seq->private;
-	loff_t off = 0;
-	struct sock *s;
+	unsigned long offset = get_offset(*pos);
+	unsigned long bucket = get_bucket(*pos);
+	struct sock *sk;
+	unsigned long count = 0;
 
-	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
-		if (sock_net(s) != seq_file_net(seq))
+	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
+		if (sock_net(sk) != seq_file_net(seq))
 			continue;
-		if (off == pos)
-			return s;
-		++off;
+		if (++count == offset)
+			break;
 	}
+
+	return sk;
+}
+
+static struct sock *unix_next_socket(struct seq_file *seq,
+				     struct sock *sk,
+				     loff_t *pos)
+{
+	unsigned long bucket;
+
+	while (sk > (struct sock *)SEQ_START_TOKEN) {
+		sk = sk_next(sk);
+		if (!sk)
+			goto next_bucket;
+		if (sock_net(sk) == seq_file_net(seq))
+			return sk;
+	}
+
+	do {
+		sk = unix_from_bucket(seq, pos);
+		if (sk)
+			return sk;
+
+next_bucket:
+		bucket = get_bucket(*pos) + 1;
+		*pos = set_bucket_offset(bucket, 1);
+	} while (bucket < ARRAY_SIZE(unix_socket_table));
+
 	return NULL;
 }
 
@@ -2287,22 +2307,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(unix_table_lock)
 {
 	spin_lock(&unix_table_lock);
-	return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+
+	if (!*pos)
+		return SEQ_START_TOKEN;
+
+	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
+		return NULL;
+
+	return unix_next_socket(seq, NULL, pos);
 }
 
 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct unix_iter_state *iter = seq->private;
-	struct sock *sk = v;
 	++*pos;
-
-	if (v == SEQ_START_TOKEN)
-		sk = first_unix_socket(&iter->i);
-	else
-		sk = next_unix_socket(&iter->i, sk);
-	while (sk && (sock_net(sk) != seq_file_net(seq)))
-		sk = next_unix_socket(&iter->i, sk);
-	return sk;
+	return unix_next_socket(seq, v, pos);
 }
 
 static void unix_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 47d3002..7e8a24b 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -195,7 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	num = s_num = cb->args[1];
 
 	spin_lock(&unix_table_lock);
-	for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) {
+	for (slot = s_slot;
+	     slot < ARRAY_SIZE(unix_socket_table);
+	     s_num = 0, slot++) {
 		struct sock *sk;
 		struct hlist_node *node;
 
@@ -228,7 +230,7 @@ static struct sock *unix_lookup_by_ino(int ino)
 	struct sock *sk;
 
 	spin_lock(&unix_table_lock);
-	for (i = 0; i <= UNIX_HASH_SIZE; i++) {
+	for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) {
 		struct hlist_node *node;
 
 		sk_for_each(sk, node, &unix_socket_table[i])

^ permalink raw reply related

* e1000e, 3.4.1, , jumbo frames are not working
From: Denys Fedoryshchenko @ 2012-06-08 14:59 UTC (permalink / raw)
  To: jeffrey.t.kirsher, rkagan, stable, jesse.brandeburg,
	bruce.w.allan, carolyn.wyborny, donald.c.skidmore, gregory.v.rose,
	peter.p.waskiewicz.jr, alexander.h.duyck, john.ronciak, dnelson,
	e1000-devel, netdev, linux-kernel

Just tried to enable larger mtu on interface, and failed, even i try to 
disable both of mentioned in dmesg offloads.

05:00.0 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit 
Ethernet Controller (Copper) (rev 01)
05:00.1 Ethernet controller: Intel Corporation 80003ES2LAN Gigabit 
Ethernet Controller (Copper) (rev 01)

L2TP ~ # ethtool -i eth0
driver: e1000e
version: 1.9.5-k
firmware-version: 1.0-0
bus-info: 0000:05:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes

Here is what i did:
ifconfig eth1 mtu 2000
got dmesg:
[ 9160.679354] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be 
enabled when both receive checksum offload and receive hashing are 
enabled.  Disable one of the receive offload features before enabling 
jumbos.

ethtool -K eth1 rxhash off
dmesg:
[ 9194.208856] e1000e 0000:05:00.1: eth1: Reset adapter
[ 9197.295425] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow 
Control: None

Lost connectivity for few seconds, then again i tried to:
ifconfig eth1 mtu 2000

dmesg:
[ 9207.797616] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be 
enabled when both receive checksum offload and receive hashing are 
enabled.  Disable one of the receive offload features before enabling 
jumbos.

ethtool -K eth1 rx off
[ 9222.398034] e1000e 0000:05:00.1: eth1: Reset adapter
[ 9225.497550] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow 
Control: None

Again, tried: ifconfig eth1 mtu 2000
dmesg:
[ 9254.795445] e1000e 0000:05:00.0: eth0: Jumbo frames cannot be 
enabled when both receive checksum offload and receive hashing are 
enabled.  Disable one of the receive offload features before enabling 
jumbos.

L2TP ~ # ethtool -k eth1
Offload parameters for eth1:
rx-checksumming: off
tx-checksumming: on
scatter-gather: on
tcp-segmentation-offload: on
udp-fragmentation-offload: off
generic-segmentation-offload: on
generic-receive-offload: on
large-receive-offload: off
rx-vlan-offload: on
tx-vlan-offload: on
ntuple-filters: off
receive-hashing: off

L2TP ~ # ethtool -d eth1
MAC Registers
-------------
0x00000: CTRL (Device control register)  0x40144241
       Endian mode (buffers):             little
       Link reset:                        normal
       Set link up:                       1
       Invert Loss-Of-Signal:             no
       Receive flow control:              disabled
       Transmit flow control:             disabled
       VLAN mode:                         enabled
       Auto speed detect:                 disabled
       Speed select:                      1000Mb/s
       Force speed:                       no
       Force duplex:                      no
0x00008: STATUS (Device status register) 0x02080787
       Duplex:                            full
       Link up:                           link config
       TBI mode:                          disabled
       Link speed:                        1000Mb/s
       Bus type:                          PCI Express
       Port number:                       1
0x00100: RCTL (Receive control register) 0x04008002
       Receiver:                          enabled
       Store bad packets:                 disabled
       Unicast promiscuous:               disabled
       Multicast promiscuous:             disabled
       Long packet:                       disabled
       Descriptor minimum threshold size: 1/2
       Broadcast accept mode:             accept
       VLAN filter:                       disabled
       Canonical form indicator:          disabled
       Discard pause frames:              filtered
       Pass MAC control frames:           don't pass
       Receive buffer size:               2048
0x02808: RDLEN (Receive desc length)     0x00001000
0x02810: RDH   (Receive desc head)       0x00000046
0x02818: RDT   (Receive desc tail)       0x00000040
0x02820: RDTR  (Receive delay timer)     0x00000020
0x00400: TCTL (Transmit ctrl register)   0x3103F0FA
       Transmitter:                       enabled
       Pad short packets:                 enabled
       Software XOFF Transmission:        disabled
       Re-transmit on late collision:     enabled
0x03808: TDLEN (Transmit desc length)    0x00001000
0x03810: TDH   (Transmit desc head)      0x00000098
0x03818: TDT   (Transmit desc tail)      0x00000099
0x03820: TIDV  (Transmit delay timer)    0x00000008
PHY type:                                unknown


---
Denys Fedoryshchenko, Network Engineer, Virtual ISP S.A.L.

^ permalink raw reply

* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Jean-Michel Hautbois @ 2012-06-08 14:53 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Sathya.Perla, netdev
In-Reply-To: <1339143759.6001.53.camel@edumazet-glaptop>

2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
> On Fri, 2012-06-08 at 10:14 +0200, Jean-Michel Hautbois wrote:
>> 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
>> > On Thu, 2012-06-07 at 14:54 +0200, Jean-Michel Hautbois wrote:
>> >
>> >> eth1      Link encap:Ethernet  HWaddr 68:b5:99:b9:8d:d4
>> >>           UP BROADCAST RUNNING SLAVE MULTICAST  MTU:4096  Metric:1
>> >>           RX packets:0 errors:0 dropped:0 overruns:0 frame:0
>> >>           TX packets:15215387 errors:0 dropped:0 overruns:0 carrier:0
>> >>           collisions:0 txqueuelen:1000
>> >>           RX bytes:0 (0.0 B)  TX bytes:61476524359 (57.2 GiB)
>> >
>> >> qdisc mq 0: dev eth1 root
>> >>  Sent 61476524359 bytes 15215387 pkt (dropped 45683472, overlimits 0
>> >> requeues 17480)
>> >
>> > OK, and "tc -s -d cl show dev eth1"
>> >
>> > (How many queues are really used)
>> >
>> >
>> >
>>
>> tc -s -d cl show dev eth1
>> class mq :1 root
>>  Sent 9798071746 bytes 2425410 pkt (dropped 3442405, overlimits 0 requeues 2747)
>>  backlog 0b 0p requeues 2747
>> class mq :2 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :3 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :4 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :5 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :6 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :7 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>> class mq :8 root
>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>  backlog 0b 0p requeues 0
>
>
> Do you have the same distribution on old kernels as well ?
> (ie only queue 0 is used)
>
>
>

On the old kernel, there is nothing returned by this command.

JM

^ permalink raw reply

* Re: [PATCH] usbnet: Activate the halt interrupt endpoint to fix endless "XactErr" error
From: Ming Lei @ 2012-06-08 13:56 UTC (permalink / raw)
  To: Huajun Li; +Cc: David S. Miller, linux-usb, netdev
In-Reply-To: <CA+v9cxbogzS7sVChqkRMgDUcs=aJgNDRH8ydoQ-_htfS2t52gQ@mail.gmail.com>

On Fri, Jun 8, 2012 at 2:24 PM, Huajun Li <huajun.li.lee@gmail.com> wrote:
> On Fri, Jun 8, 2012 at 1:22 PM, Ming Lei <tom.leiming@gmail.com> wrote:
>> If so, looks mistaken value is returned from the host controller driver,
>> but not sure if your device is buggy. What is your host controller?
>>
> Nothing related to HC.
> I tried to find out the endpoint state, but found it was halt. I think
> this is the root cause.

I mean that your HCD should not return -EPROTO if only the interrupt
endpoint's HALT feature is set, and it should return -EPIPE.

>
>> Also suppose your device is buggy, and the correct solution should
>> be addding quirk for the driver to clear halt before the 1st submit status
>> urb.
>>
> I ever worked out a patch just as you said and it could work.
> However, if this can be fixed by common framework just like usbnet.c,
> and there is no sideeffect, then why not.

There is side effect, at least sending out the command of
clear feature(HALT) is mistaken in logic if  -EPROTO is returned for
the endpoint.

>>
>> I just have tried to switch configuration by sysfs interface on the g_multi
>> and don't trigger the error.
>>
> The driver is common one, but not just for a specific device.

The problem is that your device is a specific buggy device, and the interrupt
endpoint shouldn't be set HALT after SetConfiguration(), see 9.4.5 of USB 2.0
spec.

So it is reasonable to add a quirk to fix the problem for the device, that has
document benefits, also considered that the device is a very specific case.

>
>>>
>>>> Is the "XactErr" msg printed just after switching to cdc_ether interface
>>>> by changing configuration?
>>>>
>>>
>>> Yes, just as I mentioned in my original email.
>>> And it did not work even I removed the driver and re-installed it.
>>>
>>>>> Maybe this is a common issue, so fix it by activating the endpoint
>>>>> once the error occurs.
>>>>>
>>>>> Signed-off-by: Huajun Li <huajun.li.lee@gmail.com>
>>>>> ---
>>>>>  drivers/net/usb/usbnet.c   |   33 +++++++++++++++++++++++++++++++++
>>>>>  include/linux/usb/usbnet.h |   15 ++++++++-------
>>>>>  2 files changed, 41 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
>>>>> index 9f58330..f13922b 100644
>>>>> --- a/drivers/net/usb/usbnet.c
>>>>> +++ b/drivers/net/usb/usbnet.c
>>>>> @@ -537,6 +537,11 @@ static void intr_complete (struct urb *urb)
>>>>>                          "intr shutdown, code %d\n", status);
>>>>>                return;
>>>>>
>>>>> +       case -EPIPE:
>>>>> +       case -EPROTO:
>>>>
>>>> It is good to handle EPIPE error here, but looks it is no sense to
>>>> clear halt for
>>>> bus transfer failure. At least, no clear halt is done for returning -EPROTO from
>>>> rx/tx transfer currently.
>>>
>>> Just as I said above, there is different issue can cause -EPROTO, at
>>> least, for my case it is because the interrupt endpoint is not active.
>>> If the error occurs, the driver need try to fix it instead of just
>>> printing an error msg.
>>
>> One problem in your patch is that if the  -EPROTO is caused by bad cable
>> or interference, clean halt may not be sent to device successfully, and will
>> cause -EPROTO further.
>
> What's your opinion to handle "-EPROTO" error in usbnet.c?
> Please check usbnet.c again, when "-EPROTO" occurs, it just pints
> error msg and re-submit the interrupt URB, and then causes endless
> "EactErr" error msg.

Yes, it should be bug, but clear feature(HALT) is not correct for the situation.

>
> At least, this patch lets the driver try to fix the error before
> resubmit the URB.
>
>>
>>>
>>>>
>>>>> +               usbnet_defer_kevent(dev, EVENT_STS_HALT);
>>>>> +               return;
>>>>> +
>>>>>        /* NOTE:  not throttling like RX/TX, since this endpoint
>>>>>         * already polls infrequently
>>>>>         */
>>>>> @@ -967,6 +972,34 @@ fail_halt:
>>>>>                }
>>>>>        }
>>>>>
>>>>> +       if (test_bit(EVENT_STS_HALT, &dev->flags)) {
>>>>> +               unsigned pipe;
>>>>> +               struct usb_endpoint_descriptor *desc;
>>>>> +
>>>>> +               desc = &dev->status->desc;
>>>>> +               pipe = usb_rcvintpipe(dev->udev,
>>>>> +                       desc->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
>>>>> +               status = usb_autopm_get_interface(dev->intf);
>>>>> +               if (status < 0)
>>>>> +                       goto fail_sts;
>>>>> +               status = usb_clear_halt(dev->udev, pipe);
>>>>> +               usb_autopm_put_interface(dev->intf);
>>>>> +
>>>>> +               if (status < 0 && status != -EPIPE && status != -ESHUTDOWN) {
>>>>> +fail_sts:
>>>>> +                       netdev_err(dev->net,
>>>>> +                               "can't clear intr halt, status %d\n", status);
>>>>> +               } else {
>>>>> +                       clear_bit(EVENT_STS_HALT, &dev->flags);
>>>>> +                       memset(dev->interrupt->transfer_buffer, 0,
>>>>> +                               dev->interrupt->transfer_buffer_length);
>>>>
>>>> The above is not necessary.
>>>
>>> Ming, do you mean the above one line, or others ?
>>
>> Yes, it is the above line.
>>
>
> Then not sure whether the buffer will be tainted without this line.

It isn't necessary,  the buffer should include valid data if URB->status
returns zero.


Thanks,
-- 
Ming Lei

^ permalink raw reply

* Re: [PATCH] usbnet: Activate the halt interrupt endpoint to fix endless "XactErr" error
From: Alan Stern @ 2012-06-08 13:43 UTC (permalink / raw)
  To: Huajun Li
  Cc: Ming Lei, David S. Miller, linux-usb-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <CA+v9cxbogzS7sVChqkRMgDUcs=aJgNDRH8ydoQ-_htfS2t52gQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

On Fri, 8 Jun 2012, Huajun Li wrote:

> > If so, looks mistaken value is returned from the host controller driver,
> > but not sure if your device is buggy. What is your host controller?
> >
> Nothing related to HC.
> I tried to find out the endpoint state, but found it was halt. I think
> this is the root cause.

No, it isn't.  Endpoint halt causes a -EPIPE error, not -EPROTO.  
-EPROTO indicates that the device's firmware has crashed.

> What's your opinion to handle "-EPROTO" error in usbnet.c?
> Please check usbnet.c again, when "-EPROTO" occurs, it just pints
> error msg and re-submit the interrupt URB, and then causes endless
> "EactErr" error msg.

One possibility is to wait for a little while before resubmitting the 
URB, and after 10 failures in a row, attempt a reset.

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next v3 1/2] inetpeer: add namespace support for inetpeer
From: Gao feng @ 2012-06-08 13:26 UTC (permalink / raw)
  To: Gao feng; +Cc: davem, eric.dumazet, steffen.klassert, netdev, containers
In-Reply-To: <1339154441-4513-1-git-send-email-gaofeng@cn.fujitsu.com>

于 2012年06月08日 19:20, Gao feng 写道:
> now inetpeer doesn't support namespace,the information will
> be leaking across namespace.
> 
> this patch move the global vars v4_peers and v6_peers to
> netns_ipv4 and netns_ipv6 as a field peers.
> 
> add struct pernet_operations inetpeer_ops to initial pernet
> inetpeer data.
> 
> and change family_to_base and inet_getpeer to support namespace.
> 
> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
> ---

It seams I send two v3 patch.
Please ignore this duplicate,There must be some wrong with my mail client.

Thanks Eric for reminding me this.

^ permalink raw reply

* re: bnx2x: Added EEE support
From: Dan Carpenter @ 2012-06-08 13:09 UTC (permalink / raw)
  To: yuvalmin; +Cc: netdev

Hello Yuval Mintz,

The patch c8c60d88c59c: "bnx2x: Added EEE support" from Jun 6, 2012, 
leads to the following warning:
drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c:10121 
bnx2x_848x3_config_init()
	 error: buffer overflow 'params->req_duplex' 2 <= 4

drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
 10032          actual_phy_selection = bnx2x_phy_selection(params);
 10033  
 10034          switch (actual_phy_selection) {
 10035          case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
 10036                  /* Do nothing. Essentially this is like the priority copper */
 10037                  break;
 10038          case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
 10039                  val |= MDIO_CTL_REG_84823_MEDIA_PRIORITY_COPPER;
 10040                  break;
 10041          case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
We assume "actual_phy_selection" can be 4 here.

 10042                  val |= MDIO_CTL_REG_84823_MEDIA_PRIORITY_FIBER;
 10043                  break;

[snip]

 10121                  if ((params->req_duplex[actual_phy_selection] == DUPLEX_FULL) &&
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This array only has 2 elements so 4 is beyond the end.

 10122                      (params->eee_mode & EEE_MODE_ADV_LPI) &&
 10123                      (bnx2x_eee_calc_timer(params) ||
 10124                       !(params->eee_mode & EEE_MODE_ENABLE_LPI)))
 10125                          rc = bnx2x_8483x_enable_eee(phy, params, vars);
 10126                  else

regards,
dan carpenter

^ permalink raw reply

* Re: [patch] net/ethernet: ks8851_mll unregister_netdev() before freeing
From: Dan Carpenter @ 2012-06-08 12:53 UTC (permalink / raw)
  To: David Miller; +Cc: raffaele.recalcati, netdev, kernel-janitors
In-Reply-To: <20120607.131544.346298142594197233.davem@davemloft.net>

On Thu, Jun 07, 2012 at 01:15:44PM -0700, David Miller wrote:
> From: Dan Carpenter <dan.carpenter@oracle.com>
> Date: Wed, 6 Jun 2012 09:31:29 +0300
> 
> > We added another error condition here, but if we were to hit it then
> > we need to unregister_netdev() before doing the free_netdev().
> > Otherwise we would hit the BUG_ON() in free_netdev():
> > 
> > 	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
> > 
> > Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
> 
> Applied, but please be explicit that your patch is against one
> tree or another.  This one was for net-next only, but I tried
> initially to apply it to net which failed.

Sorry about that.  Will be more careful in the future.

regards,
dan carpenter

^ permalink raw reply

* [PATCH net-next v3 2/2] inetpeer: add parameter net for inet_getpeer_v4, v6
From: Gao feng @ 2012-06-08 11:21 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <1339154441-4513-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>

add struct net as a parameter of inet_getpeer_v[4,6],
use net to replace &init_net.

and modify some places to provide net for inet_getpeer_v[4,6]

Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 include/net/inetpeer.h   |   12 ++++++++----
 net/ipv4/inet_fragment.c |    2 +-
 net/ipv4/ip_fragment.c   |    6 +++++-
 net/ipv4/route.c         |    8 +++++---
 net/ipv4/tcp_ipv4.c      |    6 ++++--
 net/ipv6/route.c         |    3 ++-
 net/ipv6/tcp_ipv6.c      |    6 ++++--
 7 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index fef9dfa..20e67db 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -79,22 +79,26 @@ struct inet_peer *inet_getpeer(struct net *net,
 			       const struct inetpeer_addr *daddr,
 			       int create);
 
-static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
+static inline struct inet_peer *inet_getpeer_v4(struct net *net,
+						__be32 v4daddr,
+						int create)
 {
 	struct inetpeer_addr daddr;
 
 	daddr.addr.a4 = v4daddr;
 	daddr.family = AF_INET;
-	return inet_getpeer(&init_net, &daddr, create);
+	return inet_getpeer(net, &daddr, create);
 }
 
-static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
+static inline struct inet_peer *inet_getpeer_v6(struct net *net,
+						const struct in6_addr *v6daddr,
+						int create)
 {
 	struct inetpeer_addr daddr;
 
 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(&init_net, &daddr, create);
+	return inet_getpeer(net, &daddr, create);
 }
 
 /* can be called from BH context or outside */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5ff2a51..85190e6 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	if (q == NULL)
 		return NULL;
 
+	q->net = nf;
 	f->constructor(q, arg);
 	atomic_add(f->qsize, &nf->mem);
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
-	q->net = nf;
 
 	return q;
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9dbd3dd..22c6bab 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
+	struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
+					       frags);
+	struct net *net = container_of(ipv4, struct net, ipv4);
+
 	struct ip4_create_arg *arg = a;
 
 	qp->protocol = arg->iph->protocol;
@@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
 	qp->peer = sysctl_ipfrag_max_dist ?
-		inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
+		inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL;
 }
 
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 006c21c..2c9f73f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1328,9 +1328,10 @@ static u32 rt_peer_genid(void)
 
 void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(daddr, create);
+	peer = inet_getpeer_v4(net, daddr, create);
 
 	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
@@ -1694,7 +1695,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
 	unsigned short est_mtu = 0;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(iph->daddr, 1);
+	peer = inet_getpeer_v4(net, iph->daddr, 1);
 	if (peer) {
 		unsigned short mtu = new_mtu;
 
@@ -1935,6 +1936,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 			    struct fib_info *fi)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 	int create = 0;
 
@@ -1944,7 +1946,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 	if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
 		create = 1;
 
-	rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
+	rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create);
 	if (peer) {
 		rt->rt_peer_genid = rt_peer_genid();
 		if (inet_metrics_new(peer))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3d9c1a4..f485b45 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1824,11 +1824,12 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
 {
 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct inet_peer *peer;
 
 	if (!rt ||
 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
-		peer = inet_getpeer_v4(inet->inet_daddr, 1);
+		peer = inet_getpeer_v4(net, inet->inet_daddr, 1);
 		*release_it = true;
 	} else {
 		if (!rt->peer)
@@ -1844,8 +1845,9 @@ EXPORT_SYMBOL(tcp_v4_get_peer);
 void *tcp_v4_tw_get_peer(struct sock *sk)
 {
 	const struct inet_timewait_sock *tw = inet_twsk(sk);
+	struct net *net = sock_net(sk);
 
-	return inet_getpeer_v4(tw->tw_daddr, 1);
+	return inet_getpeer_v4(net, tw->tw_daddr, 1);
 }
 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 999a982..4eca013 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void)
 
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+	peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
 	else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8075825..1a9cdd0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1736,11 +1736,12 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 {
 	struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	struct inet_peer *peer;
 
 	if (!rt ||
 	    !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
-		peer = inet_getpeer_v6(&np->daddr, 1);
+		peer = inet_getpeer_v6(net, &np->daddr, 1);
 		*release_it = true;
 	} else {
 		if (!rt->rt6i_peer)
@@ -1756,11 +1757,12 @@ static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
 	const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 	const struct inet_timewait_sock *tw = inet_twsk(sk);
+	struct net *net = sock_net(sk);
 
 	if (tw->tw_family == AF_INET)
 		return tcp_v4_tw_get_peer(sk);
 
-	return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+	return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1);
 }
 
 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next v3 1/2] inetpeer: add namespace support for inetpeer
From: Gao feng @ 2012-06-08 11:20 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

now inetpeer doesn't support namespace,the information will
be leaking across namespace.

this patch move the global vars v4_peers and v6_peers to
netns_ipv4 and netns_ipv6 as a field peers.

add struct pernet_operations inetpeer_ops to initial pernet
inetpeer data.

and change family_to_base and inet_getpeer to support namespace.

Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 include/net/inetpeer.h   |   10 ++++---
 include/net/netns/ipv4.h |    2 +-
 include/net/netns/ipv6.h |    1 +
 net/ipv4/inetpeer.c      |   68 +++++++++++++++++++++++++++++++++------------
 net/ipv4/route.c         |    2 +-
 5 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 2040bff..fef9dfa 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -75,7 +75,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
 }
 
 /* can be called with or without local BH being disabled */
-struct inet_peer	*inet_getpeer(const struct inetpeer_addr *daddr, int create);
+struct inet_peer *inet_getpeer(struct net *net,
+			       const struct inetpeer_addr *daddr,
+			       int create);
 
 static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 {
@@ -83,7 +85,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 
 	daddr.addr.a4 = v4daddr;
 	daddr.family = AF_INET;
-	return inet_getpeer(&daddr, create);
+	return inet_getpeer(&init_net, &daddr, create);
 }
 
 static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
@@ -92,14 +94,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 
 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(&daddr, create);
+	return inet_getpeer(&init_net, &daddr, create);
 }
 
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
-extern void inetpeer_invalidate_tree(int family);
+extern void inetpeer_invalidate_tree(struct net *net, int family);
 
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index bbd023a..227f0cd 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -30,7 +30,7 @@ struct netns_ipv4 {
 
 	struct sock		**icmp_sk;
 	struct sock		*tcp_sock;
-
+	struct inet_peer_base	*peers;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*iptable_filter;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b42be53..df0a545 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -33,6 +33,7 @@ struct netns_ipv6 {
 	struct netns_sysctl_ipv6 sysctl;
 	struct ipv6_devconf	*devconf_all;
 	struct ipv6_devconf	*devconf_dflt;
+	struct inet_peer_base	*peers;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*ip6table_filter;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index dfba343..1c85273 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -88,18 +88,6 @@ struct inet_peer_base {
 	int		total;
 };
 
-static struct inet_peer_base v4_peers = {
-	.root		= peer_avl_empty_rcu,
-	.lock		= __SEQLOCK_UNLOCKED(v4_peers.lock),
-	.total		= 0,
-};
-
-static struct inet_peer_base v6_peers = {
-	.root		= peer_avl_empty_rcu,
-	.lock		= __SEQLOCK_UNLOCKED(v6_peers.lock),
-	.total		= 0,
-};
-
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
 /* Exported for sysctl_net_ipv4.  */
@@ -153,6 +141,46 @@ static void inetpeer_gc_worker(struct work_struct *work)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
+static int __net_init inetpeer_net_init(struct net *net)
+{
+	net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base),
+				  GFP_KERNEL);
+	if (net->ipv4.peers == NULL)
+		return -ENOMEM;
+
+	net->ipv4.peers->root = peer_avl_empty_rcu;
+	seqlock_init(&net->ipv4.peers->lock);
+
+	net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base),
+				  GFP_KERNEL);
+	if (net->ipv6.peers == NULL)
+		goto out_ipv6;
+
+	net->ipv6.peers->root = peer_avl_empty_rcu;
+	seqlock_init(&net->ipv6.peers->lock);
+
+	return 0;
+out_ipv6:
+	kfree(net->ipv4.peers);
+	return -ENOMEM;
+}
+
+static void __net_exit inetpeer_net_exit(struct net *net)
+{
+	inetpeer_invalidate_tree(net, AF_INET);
+	kfree(net->ipv4.peers);
+	net->ipv4.peers = NULL;
+
+	inetpeer_invalidate_tree(net, AF_INET6);
+	kfree(net->ipv6.peers);
+	net->ipv6.peers = NULL;
+}
+
+static struct pernet_operations inetpeer_ops = {
+	.init = inetpeer_net_init,
+	.exit = inetpeer_net_exit,
+};
+
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -177,6 +205,7 @@ void __init inet_initpeers(void)
 			NULL);
 
 	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+	register_pernet_subsys(&inetpeer_ops);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -401,9 +430,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
 	call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 
-static struct inet_peer_base *family_to_base(int family)
+static struct inet_peer_base *family_to_base(struct net *net,
+					     int family)
 {
-	return family == AF_INET ? &v4_peers : &v6_peers;
+	return family == AF_INET ? net->ipv4.peers : net->ipv6.peers;
 }
 
 /* perform garbage collect on all items stacked during a lookup */
@@ -443,10 +473,12 @@ static int inet_peer_gc(struct inet_peer_base *base,
 	return cnt;
 }
 
-struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
+struct inet_peer *inet_getpeer(struct net *net,
+			       const struct inetpeer_addr *daddr,
+			       int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer_base *base = family_to_base(daddr->family);
+	struct inet_peer_base *base = family_to_base(net, daddr->family);
 	struct inet_peer *p;
 	unsigned int sequence;
 	int invalidated, gccnt = 0;
@@ -571,10 +603,10 @@ static void inetpeer_inval_rcu(struct rcu_head *head)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
-void inetpeer_invalidate_tree(int family)
+void inetpeer_invalidate_tree(struct net *net, int family)
 {
 	struct inet_peer *old, *new, *prev;
-	struct inet_peer_base *base = family_to_base(family);
+	struct inet_peer_base *base = family_to_base(net, family);
 
 	write_seqlock_bh(&base->lock);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98b30d0..006c21c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -938,7 +938,7 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-	inetpeer_invalidate_tree(AF_INET);
+	inetpeer_invalidate_tree(net, AF_INET);
 }
 
 /*
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Thomas Graf @ 2012-06-08 10:18 UTC (permalink / raw)
  To: davem; +Cc: netdev

Routing of 127/8 is tradtionally forbidden, we consider
packets from that address block martian when routing and do
not process corresponding ARP requests.

This is a sane default but renders a huge address space
practically unuseable.

The RFC states that no address within the 127/8 block should
ever appear on any network anywhere but it does not forbid
the use of such addresses outside of the loopback device in
particular. For example to address a pool of virtual guests
behind a load balancer.

This patch adds a new interface option 'route_localnet'
enabling routing of the 127/8 address block and processing
of ARP requests on a specific interface.

Note that for the feature to work, the default local route
covering 127/8 dev lo needs to be removed.

Example:
  $ sysctl -w net.ipv4.conf.eth0.route_localnet=1
  $ ip route del 127.0.0.0/8 dev lo table local
  $ ip addr add 127.1.0.1/16 dev eth0
  $ ip route flush cache

Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
 Documentation/networking/ip-sysctl.txt |    5 +++++
 include/linux/inetdevice.h             |    2 ++
 net/ipv4/arp.c                         |    3 ++-
 net/ipv4/devinet.c                     |    5 ++++-
 net/ipv4/route.c                       |   30 +++++++++++++++++++++---------
 5 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 6f896b9..99d0e05 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -862,6 +862,11 @@ accept_local - BOOLEAN
 	local interfaces over the wire and have them accepted properly.
 	default FALSE
 
+route_localnet - BOOLEAN
+	Do not consider loopback addresses as martian source or destination
+	while routing. This enables the use of 127/8 for local routing purposes.
+	default FALSE
+
 rp_filter - INTEGER
 	0 - No source validation.
 	1 - Strict mode as defined in RFC3704 Strict Reverse Path
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 597f4a9..67f9dda 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -38,6 +38,7 @@ enum
 	IPV4_DEVCONF_ACCEPT_LOCAL,
 	IPV4_DEVCONF_SRC_VMARK,
 	IPV4_DEVCONF_PROXY_ARP_PVLAN,
+	IPV4_DEVCONF_ROUTE_LOCALNET,
 	__IPV4_DEVCONF_MAX
 };
 
@@ -131,6 +132,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_PROMOTE_SECONDARIES(in_dev) \
 					IN_DEV_ORCONF((in_dev), \
 						      PROMOTE_SECONDARIES)
+#define IN_DEV_ROUTE_LOCALNET(in_dev)	IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cda37be..2e560f0 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -790,7 +790,8 @@ static int arp_process(struct sk_buff *skb)
  *	Check for bad requests for 127.x.x.x and requests for multicast
  *	addresses.  If this is one such, delete it.
  */
-	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+	if (ipv4_is_multicast(tip) ||
+	    (!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
 		goto out;
 
 /*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 10e15a1..378c28b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1500,7 +1500,8 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 
 		if (cnf == net->ipv4.devconf_dflt)
 			devinet_copy_dflt_conf(net, i);
-		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
+		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
+		    i == IPV4_DEVCONF_ROUTE_LOCALNET)
 			if ((new_value == 0) && (old_value != 0))
 				rt_cache_flush(net, 0);
 	}
@@ -1617,6 +1618,8 @@ static struct devinet_sysctl_table {
 					      "force_igmp_version"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
 					      "promote_secondaries"),
+		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
+					      "route_localnet"),
 	},
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98b30d0..7509acc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2023,9 +2023,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		return -EINVAL;
 
 	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
-	    ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
+	    skb->protocol != htons(ETH_P_IP))
 		goto e_inval;
 
+	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+		if (ipv4_is_loopback(saddr))
+			goto e_inval;
+
 	if (ipv4_is_zeronet(saddr)) {
 		if (!ipv4_is_local_multicast(daddr))
 			goto e_inval;
@@ -2266,8 +2270,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	   by fib_lookup.
 	 */
 
-	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
-	    ipv4_is_loopback(saddr))
+	if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
 		goto martian_source;
 
 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
@@ -2279,9 +2282,17 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_zeronet(saddr))
 		goto martian_source;
 
-	if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
+	if (ipv4_is_zeronet(daddr))
 		goto martian_destination;
 
+	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
+		if (ipv4_is_loopback(daddr))
+			goto martian_destination;
+
+		if (ipv4_is_loopback(saddr))
+			goto martian_source;
+	}
+
 	/*
 	 *	Now we are ready to route packet.
 	 */
@@ -2520,9 +2531,14 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	u16 type = res->type;
 	struct rtable *rth;
 
-	if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+	in_dev = __in_dev_get_rcu(dev_out);
+	if (!in_dev)
 		return ERR_PTR(-EINVAL);
 
+	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
+		if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+			return ERR_PTR(-EINVAL);
+
 	if (ipv4_is_lbcast(fl4->daddr))
 		type = RTN_BROADCAST;
 	else if (ipv4_is_multicast(fl4->daddr))
@@ -2533,10 +2549,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	if (dev_out->flags & IFF_LOOPBACK)
 		flags |= RTCF_LOCAL;
 
-	in_dev = __in_dev_get_rcu(dev_out);
-	if (!in_dev)
-		return ERR_PTR(-EINVAL);
-
 	if (type == RTN_BROADCAST) {
 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
 		fi = NULL;

^ permalink raw reply related

* RE: [PATCH] be2net: fix a race in be_xmit()
From: Sathya.Perla @ 2012-06-08 10:06 UTC (permalink / raw)
  To: eric.dumazet, davem; +Cc: netdev
In-Reply-To: <1339145999.6001.62.camel@edumazet-glaptop>


>-----Original Message-----
>From: Eric Dumazet <edumazet@google.com>
>
>As soon as hardware is notified of a transmit, we no longer can assume
>skb can be dereferenced, as TX completion might have freed the packet.
>
>Signed-off-by: Eric Dumazet <edumazet@google.com>
>Cc: Sathya Perla <sathya.perla@emulex.com>

Good catch. Thanks!
Acked-by: Sathya Perla <sathya.perla@emulex.com>

^ permalink raw reply

* Re: 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Nieścierowicz Adam @ 2012-06-08  9:31 UTC (permalink / raw)
  To: Eric Dumazet, Netdev

W dniu 08.06.2012 11:41, Eric Dumazet napisał(a):

> On Fri, 2012-06-08 at 10:58 +0200, Nieścierowicz Adam wrote:
>
>> Hello, recently we changed on the router kernel from 2.6.38.1 to 
>> 3.4.1
>> and noticed 30% packet loss when traffic increases up to 250MB / s.
>> Similar is for kernel 3.5-rc1 Here a link to ifstat
>> http://wklej.org/id/767577/ [2]
>
> You should give as much as possible delails on your setup (hardware,
> software)
>
> lspci
> cat /proc/cpuinfo
> cat /proc/interrupts
> ifconfig -a
> tc -s -d qdisc
> dmesg
> netstat -s

currently running on 2.6.38.1 and traffic is 100Mb / s

lspci: http://wklej.org/id/769102/
/proc/cpuinfo: http://wklej.org/id/769104/
/proc/interrupts: http://wklej.org/id/769106/
ifconfig -a: http://wklej.org/id/769108/
tc -s -d qdisc: http://wklej.org/id/769109/
dmesg: here are some logs from iptables
netstat -s: http://wklej.org/id/769110/
lsmod: http://wklej.org/id/769117/
/proc/net/softnet_stat: http://wklej.org/id/769116/

^ permalink raw reply

* Re: 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Eric Dumazet @ 2012-06-08  9:43 UTC (permalink / raw)
  To: adam.niescierowicz; +Cc: Netdev
In-Reply-To: <1339148515.6001.65.camel@edumazet-glaptop>

On Fri, 2012-06-08 at 11:41 +0200, Eric Dumazet wrote:
> On Fri, 2012-06-08 at 10:58 +0200, Nieścierowicz Adam wrote:

> lspci
> cat /proc/cpuinfo
> cat /proc/interrupts
> ifconfig -a
> tc -s -d qdisc
> dmesg
> netstat -s
> 

cat /proc/net/softnet_stat
lsmod

^ permalink raw reply

* Re: 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Eric Dumazet @ 2012-06-08  9:41 UTC (permalink / raw)
  To: adam.niescierowicz; +Cc: Netdev
In-Reply-To: <ddae4973ddff0015d239dd66551be209@justnet.pl>

On Fri, 2012-06-08 at 10:58 +0200, Nieścierowicz Adam wrote:
> Hello,
> 
> recently we changed on the router kernel from 2.6.38.1 to 3.4.1 and 
> noticed
> 30% packet loss when traffic increases up to 250MB / s.
> 
> Similar is for kernel 3.5-rc1
> 
> Here a link to ifstat http://wklej.org/id/767577/

You should give as much as possible delails on your setup (hardware,
software)

lspci
cat /proc/cpuinfo
cat /proc/interrupts
ifconfig -a
tc -s -d qdisc
dmesg
netstat -s

^ permalink raw reply

* [PATCH v2] net: stmmac: Fix clock en-/disable calls
From: Stefan Roese @ 2012-06-08  9:21 UTC (permalink / raw)
  To: netdev; +Cc: Viresh Kumar, Giuseppe Cavallaro

clk_{un}prepare is mandatory for platforms using common clock framework.
Since these drivers are used by SPEAr platform, which supports common
clock framework, add clk_{un}prepare() support for them. Otherwise
the clocks are not correctly en-/disabled and ethernet support doesn't
work.

Signed-off-by: Stefan Roese <sr@denx.de>
Cc: Viresh Kumar <viresh.linux@gmail.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
v2:
- Patch description added. Sorry for missing this in the 1st patch
  revision.

 drivers/net/ethernet/stmicro/stmmac/stmmac.h |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 6b5d060..f46d8d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -109,7 +109,7 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 static inline int stmmac_clk_enable(struct stmmac_priv *priv)
 {
 	if (!IS_ERR(priv->stmmac_clk))
-		return clk_enable(priv->stmmac_clk);
+		return clk_prepare_enable(priv->stmmac_clk);
 
 	return 0;
 }
@@ -119,7 +119,7 @@ static inline void stmmac_clk_disable(struct stmmac_priv *priv)
 	if (IS_ERR(priv->stmmac_clk))
 		return;
 
-	clk_disable(priv->stmmac_clk);
+	clk_disable_unprepare(priv->stmmac_clk);
 }
 static inline int stmmac_clk_get(struct stmmac_priv *priv)
 {
-- 
1.7.10.4

^ permalink raw reply related

* 3.4.1 and 3.5-rc1 Packet lost at 250Mb/s
From: Nieścierowicz Adam @ 2012-06-08  8:58 UTC (permalink / raw)
  To: Netdev

Hello,

recently we changed on the router kernel from 2.6.38.1 to 3.4.1 and 
noticed
30% packet loss when traffic increases up to 250MB / s.

Similar is for kernel 3.5-rc1

Here a link to ifstat http://wklej.org/id/767577/

Regards

^ permalink raw reply

* [PATCH] be2net: fix a race in be_xmit()
From: Eric Dumazet @ 2012-06-08  8:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Sathya Perla

From: Eric Dumazet <edumazet@google.com>

As soon as hardware is notified of a transmit, we no longer can assume
skb can be dereferenced, as TX completion might have freed the packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 08efd30..fdb50ce 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -736,6 +736,8 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
 
 	copied = make_tx_wrbs(adapter, txq, skb, wrb_cnt, dummy_wrb);
 	if (copied) {
+		int gso_segs = skb_shinfo(skb)->gso_segs;
+
 		/* record the sent skb in the sent_skb table */
 		BUG_ON(txo->sent_skb_list[start]);
 		txo->sent_skb_list[start] = skb;
@@ -753,8 +755,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
 
 		be_txq_notify(adapter, txq->id, wrb_cnt);
 
-		be_tx_stats_update(txo, wrb_cnt, copied,
-				skb_shinfo(skb)->gso_segs, stopped);
+		be_tx_stats_update(txo, wrb_cnt, copied, gso_segs, stopped);
 	} else {
 		txq->head = start;
 		dev_kfree_skb_any(skb);

^ permalink raw reply related

* Re: Difficulties to get 1Gbps on be2net ethernet card
From: Eric Dumazet @ 2012-06-08  8:22 UTC (permalink / raw)
  To: Jean-Michel Hautbois; +Cc: Sathya.Perla, netdev
In-Reply-To: <CAL8zT=jPUEF62-V_0n9o3=n2Mo6y4M_kGgK+OwdXg0aLDazDgQ@mail.gmail.com>

On Fri, 2012-06-08 at 10:14 +0200, Jean-Michel Hautbois wrote:
> 2012/6/8 Eric Dumazet <eric.dumazet@gmail.com>:
> > On Thu, 2012-06-07 at 14:54 +0200, Jean-Michel Hautbois wrote:
> >
> >> eth1      Link encap:Ethernet  HWaddr 68:b5:99:b9:8d:d4
> >>           UP BROADCAST RUNNING SLAVE MULTICAST  MTU:4096  Metric:1
> >>           RX packets:0 errors:0 dropped:0 overruns:0 frame:0
> >>           TX packets:15215387 errors:0 dropped:0 overruns:0 carrier:0
> >>           collisions:0 txqueuelen:1000
> >>           RX bytes:0 (0.0 B)  TX bytes:61476524359 (57.2 GiB)
> >
> >> qdisc mq 0: dev eth1 root
> >>  Sent 61476524359 bytes 15215387 pkt (dropped 45683472, overlimits 0
> >> requeues 17480)
> >
> > OK, and "tc -s -d cl show dev eth1"
> >
> > (How many queues are really used)
> >
> >
> >
> 
> tc -s -d cl show dev eth1
> class mq :1 root
>  Sent 9798071746 bytes 2425410 pkt (dropped 3442405, overlimits 0 requeues 2747)
>  backlog 0b 0p requeues 2747
> class mq :2 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :3 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :4 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :5 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :6 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :7 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq :8 root
>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0


Do you have the same distribution on old kernels as well ?
(ie only queue 0 is used)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox