* [PATCH net-next] ixgbe: fix stats handling @ 2010-10-11 12:17 Eric Dumazet 2010-10-12 0:13 ` Jeff Kirsher 2010-10-27 22:35 ` Tantilov, Emil S 0 siblings, 2 replies; 10+ messages in thread From: Eric Dumazet @ 2010-10-11 12:17 UTC (permalink / raw) To: David Miller, Peter Waskiewicz, Emil Tantilov, Jeff Kirsher; +Cc: netdev Hi I am sending this patch for Intel people review/test and acknowledge. Thanks ! [PATCH net-next] ixgbe: fix stats handling Current ixgbe stats have following problems : - Not 64 bit safe (on 32bit arches) - Not safe in ixgbe_clean_rx_irq() : All cpus dirty a common location (netdev->stats.rx_bytes & netdev->stats.rx_packets) without proper synchronization. This slow down a bit multiqueue operations, and possibly miss some updates. Fixes : Implement ndo_get_stats64() method to provide accurate 64bit rx|tx bytes/packets counters, using 64bit safe infrastructure. ixgbe_get_ethtool_stats() also use this infrastructure to provide 64bit safe counters. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> CC: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com> CC: Emil Tantilov <emil.s.tantilov@intel.com> CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ixgbe/ixgbe.h | 3 +- drivers/net/ixgbe/ixgbe_ethtool.c | 29 +++++++++++--------- drivers/net/ixgbe/ixgbe_main.c | 40 +++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index a8c47b0..944d9e2 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -180,8 +180,9 @@ struct ixgbe_ring { */ struct ixgbe_queue_stats stats; - unsigned long reinit_state; + struct u64_stats_sync syncp; int numa_node; + unsigned long reinit_state; u64 rsc_count; /* stat for coalesced packets */ u64 rsc_flush; /* stats for flushed packets */ u32 restart_queue; /* track tx queue restarts */ diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index d4ac943..3c7f15d 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -999,12 +999,11 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - u64 *queue_stat; - int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *net_stats; - int j, k; - int i; + unsigned int start; + struct ixgbe_ring *ring; + int i, j; char *p = NULL; ixgbe_update_stats(adapter); @@ -1025,16 +1024,22 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } for (j = 0; j < adapter->num_tx_queues; j++) { - queue_stat = (u64 *)&adapter->tx_ring[j]->stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; + ring = adapter->tx_ring[j]; + do { + start = u64_stats_fetch_begin_bh(&ring->syncp); + data[i] = ring->stats.packets; + data[i+1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + i += 2; } for (j = 0; j < adapter->num_rx_queues; j++) { - queue_stat = (u64 *)&adapter->rx_ring[j]->stats; - for (k = 0; k < stat_count; k++) - data[i + k] = queue_stat[k]; - i += k; + ring = adapter->rx_ring[j]; + do { + start = u64_stats_fetch_begin_bh(&ring->syncp); + data[i] = ring->stats.packets; + data[i+1] = ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + i += 2; } if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 95dbf60..1efbcde 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -824,8 +824,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, tx_ring->total_bytes += total_bytes; tx_ring->total_packets += total_packets; + u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.packets += total_packets; tx_ring->stats.bytes += total_bytes; + u64_stats_update_end(&tx_ring->syncp); return count < tx_ring->work_limit; } @@ -1172,7 +1174,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, int *work_done, int work_to_do) { struct ixgbe_adapter *adapter = q_vector->adapter; - struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union ixgbe_adv_rx_desc *rx_desc, *next_rxd; struct ixgbe_rx_buffer *rx_buffer_info, *next_buffer; @@ -1298,8 +1299,10 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, rx_ring->rsc_count++; rx_ring->rsc_flush++; } + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets++; rx_ring->stats.bytes += skb->len; + u64_stats_update_end(&rx_ring->syncp); } else { if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) { rx_buffer_info->skb = next_buffer->skb; @@ -1375,8 +1378,6 @@ next_desc: rx_ring->total_packets += total_rx_packets; rx_ring->total_bytes += total_rx_bytes; - netdev->stats.rx_bytes += total_rx_bytes; - netdev->stats.rx_packets += total_rx_packets; return cleaned; } @@ -6559,6 +6560,38 @@ static void ixgbe_netpoll(struct net_device *netdev) } #endif +static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; + + /* accurate rx/tx bytes/packets stats */ + dev_txq_stats_fold(netdev, stats); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + u64 bytes, packets; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + stats->rx_packets += packets; + stats->rx_bytes += bytes; + } + + /* following stats updated by ixgbe_watchdog_task() */ + stats->multicast = netdev->stats.multicast; + stats->rx_errors = netdev->stats.rx_errors; + stats->rx_length_errors = netdev->stats.rx_length_errors; + stats->rx_crc_errors = netdev->stats.rx_crc_errors; + stats->rx_missed_errors = netdev->stats.rx_missed_errors; + return stats; +} + + static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, @@ -6578,6 +6611,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, + .ndo_get_stats64 = ixgbe_get_stats64, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH net-next] ixgbe: fix stats handling 2010-10-11 12:17 [PATCH net-next] ixgbe: fix stats handling Eric Dumazet @ 2010-10-12 0:13 ` Jeff Kirsher 2010-10-27 22:35 ` Tantilov, Emil S 1 sibling, 0 replies; 10+ messages in thread From: Jeff Kirsher @ 2010-10-12 0:13 UTC (permalink / raw) To: Eric Dumazet; +Cc: David Miller, Peter Waskiewicz, Emil Tantilov, netdev On Mon, Oct 11, 2010 at 05:17, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Hi > > I am sending this patch for Intel people review/test and acknowledge. > > Thanks ! > > [PATCH net-next] ixgbe: fix stats handling > > Current ixgbe stats have following problems : > > - Not 64 bit safe (on 32bit arches) > > - Not safe in ixgbe_clean_rx_irq() : > All cpus dirty a common location (netdev->stats.rx_bytes & > netdev->stats.rx_packets) without proper synchronization. > This slow down a bit multiqueue operations, and possibly miss some > updates. > > Fixes : > > Implement ndo_get_stats64() method to provide accurate 64bit rx|tx > bytes/packets counters, using 64bit safe infrastructure. > > ixgbe_get_ethtool_stats() also use this infrastructure to provide 64bit > safe counters. > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> > CC: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com> > CC: Emil Tantilov <emil.s.tantilov@intel.com> > CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> > --- > drivers/net/ixgbe/ixgbe.h | 3 +- > drivers/net/ixgbe/ixgbe_ethtool.c | 29 +++++++++++--------- > drivers/net/ixgbe/ixgbe_main.c | 40 +++++++++++++++++++++++++--- > 3 files changed, 56 insertions(+), 16 deletions(-) > Thanks Eric, I have added the patch to my queue. -- Cheers, Jeff ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH net-next] ixgbe: fix stats handling 2010-10-11 12:17 [PATCH net-next] ixgbe: fix stats handling Eric Dumazet 2010-10-12 0:13 ` Jeff Kirsher @ 2010-10-27 22:35 ` Tantilov, Emil S 2010-10-28 4:24 ` Eric Dumazet 1 sibling, 1 reply; 10+ messages in thread From: Tantilov, Emil S @ 2010-10-27 22:35 UTC (permalink / raw) To: Eric Dumazet, David Miller, Waskiewicz Jr, Peter P, Kirsher, Jeffrey T Cc: netdev >-----Original Message----- >From: Eric Dumazet [mailto:eric.dumazet@gmail.com] >Sent: Monday, October 11, 2010 5:17 AM >To: David Miller; Waskiewicz Jr, Peter P; Tantilov, Emil S; Kirsher, >Jeffrey T >Cc: netdev >Subject: [PATCH net-next] ixgbe: fix stats handling > >Hi > >I am sending this patch for Intel people review/test and acknowledge. > >Thanks ! > >[PATCH net-next] ixgbe: fix stats handling > >Current ixgbe stats have following problems : > >- Not 64 bit safe (on 32bit arches) > >- Not safe in ixgbe_clean_rx_irq() : > All cpus dirty a common location (netdev->stats.rx_bytes & >netdev->stats.rx_packets) without proper synchronization. > This slow down a bit multiqueue operations, and possibly miss some >updates. > >Fixes : > >Implement ndo_get_stats64() method to provide accurate 64bit rx|tx >bytes/packets counters, using 64bit safe infrastructure. > >ixgbe_get_ethtool_stats() also use this infrastructure to provide 64bit >safe counters. > >Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> >CC: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com> >CC: Emil Tantilov <emil.s.tantilov@intel.com> >CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> >--- > drivers/net/ixgbe/ixgbe.h | 3 +- > drivers/net/ixgbe/ixgbe_ethtool.c | 29 +++++++++++--------- > drivers/net/ixgbe/ixgbe_main.c | 40 +++++++++++++++++++++++++--- > 3 files changed, 56 insertions(+), 16 deletions(-) > >diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h >index a8c47b0..944d9e2 100644 >--- a/drivers/net/ixgbe/ixgbe.h >+++ b/drivers/net/ixgbe/ixgbe.h >@@ -180,8 +180,9 @@ struct ixgbe_ring { > */ > > struct ixgbe_queue_stats stats; >- unsigned long reinit_state; >+ struct u64_stats_sync syncp; > int numa_node; >+ unsigned long reinit_state; > u64 rsc_count; /* stat for coalesced packets */ > u64 rsc_flush; /* stats for flushed packets */ > u32 restart_queue; /* track tx queue restarts */ >diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c >b/drivers/net/ixgbe/ixgbe_ethtool.c >index d4ac943..3c7f15d 100644 >--- a/drivers/net/ixgbe/ixgbe_ethtool.c >+++ b/drivers/net/ixgbe/ixgbe_ethtool.c >@@ -999,12 +999,11 @@ static void ixgbe_get_ethtool_stats(struct net_device >*netdev, > struct ethtool_stats *stats, u64 >*data) > { > struct ixgbe_adapter *adapter = netdev_priv(netdev); >- u64 *queue_stat; >- int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); > struct rtnl_link_stats64 temp; > const struct rtnl_link_stats64 *net_stats; >- int j, k; >- int i; >+ unsigned int start; >+ struct ixgbe_ring *ring; >+ int i, j; > char *p = NULL; > > ixgbe_update_stats(adapter); >@@ -1025,16 +1024,22 @@ static void ixgbe_get_ethtool_stats(struct >net_device *netdev, > sizeof(u64)) ? *(u64 *)p : *(u32 *)p; > } > for (j = 0; j < adapter->num_tx_queues; j++) { >- queue_stat = (u64 *)&adapter->tx_ring[j]->stats; >- for (k = 0; k < stat_count; k++) >- data[i + k] = queue_stat[k]; >- i += k; >+ ring = adapter->tx_ring[j]; >+ do { >+ start = u64_stats_fetch_begin_bh(&ring->syncp); >+ data[i] = ring->stats.packets; >+ data[i+1] = ring->stats.bytes; >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); >+ i += 2; > } > for (j = 0; j < adapter->num_rx_queues; j++) { >- queue_stat = (u64 *)&adapter->rx_ring[j]->stats; >- for (k = 0; k < stat_count; k++) >- data[i + k] = queue_stat[k]; >- i += k; >+ ring = adapter->rx_ring[j]; >+ do { >+ start = u64_stats_fetch_begin_bh(&ring->syncp); >+ data[i] = ring->stats.packets; >+ data[i+1] = ring->stats.bytes; >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); >+ i += 2; > } > if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { > for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { >diff --git a/drivers/net/ixgbe/ixgbe_main.c >b/drivers/net/ixgbe/ixgbe_main.c >index 95dbf60..1efbcde 100644 >--- a/drivers/net/ixgbe/ixgbe_main.c >+++ b/drivers/net/ixgbe/ixgbe_main.c >@@ -824,8 +824,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector >*q_vector, > > tx_ring->total_bytes += total_bytes; > tx_ring->total_packets += total_packets; >+ u64_stats_update_begin(&tx_ring->syncp); > tx_ring->stats.packets += total_packets; > tx_ring->stats.bytes += total_bytes; >+ u64_stats_update_end(&tx_ring->syncp); > return count < tx_ring->work_limit; > } > >@@ -1172,7 +1174,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector >*q_vector, > int *work_done, int work_to_do) > { > struct ixgbe_adapter *adapter = q_vector->adapter; >- struct net_device *netdev = adapter->netdev; > struct pci_dev *pdev = adapter->pdev; > union ixgbe_adv_rx_desc *rx_desc, *next_rxd; > struct ixgbe_rx_buffer *rx_buffer_info, *next_buffer; >@@ -1298,8 +1299,10 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector >*q_vector, > rx_ring->rsc_count++; > rx_ring->rsc_flush++; > } >+ u64_stats_update_begin(&rx_ring->syncp); > rx_ring->stats.packets++; > rx_ring->stats.bytes += skb->len; >+ u64_stats_update_end(&rx_ring->syncp); > } else { > if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) { > rx_buffer_info->skb = next_buffer->skb; >@@ -1375,8 +1378,6 @@ next_desc: > > rx_ring->total_packets += total_rx_packets; > rx_ring->total_bytes += total_rx_bytes; >- netdev->stats.rx_bytes += total_rx_bytes; >- netdev->stats.rx_packets += total_rx_packets; > > return cleaned; > } >@@ -6559,6 +6560,38 @@ static void ixgbe_netpoll(struct net_device *netdev) > } > #endif > >+static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device >*netdev, >+ struct rtnl_link_stats64 *stats) >+{ >+ struct ixgbe_adapter *adapter = netdev_priv(netdev); >+ int i; >+ >+ /* accurate rx/tx bytes/packets stats */ >+ dev_txq_stats_fold(netdev, stats); >+ for (i = 0; i < adapter->num_rx_queues; i++) { >+ struct ixgbe_ring *ring = adapter->rx_ring[i]; >+ u64 bytes, packets; >+ unsigned int start; >+ >+ do { >+ start = u64_stats_fetch_begin_bh(&ring->syncp); >+ packets = ring->stats.packets; >+ bytes = ring->stats.bytes; >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); >+ stats->rx_packets += packets; >+ stats->rx_bytes += bytes; >+ } >+ >+ /* following stats updated by ixgbe_watchdog_task() */ >+ stats->multicast = netdev->stats.multicast; >+ stats->rx_errors = netdev->stats.rx_errors; >+ stats->rx_length_errors = netdev->stats.rx_length_errors; >+ stats->rx_crc_errors = netdev->stats.rx_crc_errors; >+ stats->rx_missed_errors = netdev->stats.rx_missed_errors; >+ return stats; >+} >+ >+ > static const struct net_device_ops ixgbe_netdev_ops = { > .ndo_open = ixgbe_open, > .ndo_stop = ixgbe_close, >@@ -6578,6 +6611,7 @@ static const struct net_device_ops ixgbe_netdev_ops = >{ > .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, > .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, > .ndo_get_vf_config = ixgbe_ndo_get_vf_config, >+ .ndo_get_stats64 = ixgbe_get_stats64, > #ifdef CONFIG_NET_POLL_CONTROLLER > .ndo_poll_controller = ixgbe_netpoll, > #endif > Eric, We are seeing intermittent hangs on ia32 arch which seem to point to this patch: BUG: unable to handle kernel NULL pointer dereference at 00000040 IP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] *pdpt = 000000002dc83001 *pde = 000000032d7e5067 Oops: 0000 [#2] SMP last sysfs file: /sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_map Modules linked in: act_skbedit cls_u32 sch_multiq ixgbe mdio netconsole configfs autofs4 8021q garp stp llc sunrpc ipv6 e xt3 jbd dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath power_meter hwmon sg ses enclosure dcdbas pcspkr serio_raw iTCO_wdt iTCO_vendor_support io atdma dca i7core_edac edac_core bnx2 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix megaraid_sas dm_mod [last unloaded: mperf ] Pid: 1939, comm: irqbalance Tainted: G D W 2.6.36-rc7-upstream-net-next-2.6-ixgbe-queue-i386-g55e1a84 #1 09CGW2/Po werEdge T610 EIP: 0060:[<f7f6b537>] EFLAGS: 00010206 CPU: 0 EIP is at ixgbe_get_stats64+0x47/0x120 [ixgbe] EAX: 00000000 EBX: ecc45e4c ECX: ebea0400 EDX: 00000000 ESI: ebea0000 EDI: 00000018 EBP: f7f846a0 ESP: ecc45d88 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process irqbalance (pid: 1939, ti=ecc44000 task=efc63a50 task.ti=ecc44000) Stack: ecc45e4c 00000000 ebea0400 ebea0000 ecc45e4c ebea0000 ecc45f04 f7f846a0 <0> c0750593 ebea0000 edfec340 ebea0000 000002b5 c075063a edfec340 c0993eec <0> ebe78000 0000021c 00000000 00000009 00000000 00000000 00000000 00000000 Call Trace: [<c0750593>] ? dev_get_stats+0x33/0xc0 [<c075063a>] ? dev_seq_printf_stats+0x1a/0x180 [<c07507aa>] ? dev_seq_show+0xa/0x20 [<c052398f>] ? seq_read+0x22f/0x3d0 [<c0523760>] ? seq_read+0x0/0x3d0 [<c054fdde>] ? proc_reg_read+0x5e/0x90 [<c054fd80>] ? proc_reg_read+0x0/0x90 [<c050a1dd>] ? vfs_read+0x9d/0x160 [<c049d4ef>] ? audit_syscall_entry+0x20f/0x230 [<c050a971>] ? sys_read+0x41/0x70 [<c0409cdf>] ? sysenter_do_call+0x12/0x28 Code: 60 4f 7e c8 8b 44 24 08 8b b8 20 06 00 00 85 ff 7e 63 c7 44 24 04 00 00 00 00 66 90 8b 54 24 04 8b 4c 24 08 8b 84 9 1 00 05 00 00 <8b> 50 40 eb 06 8d 74 26 00 89 ca f6 c2 01 0f 85 ae 00 00 00 8b EIP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] SS:ESP 0068:ecc45d88 CR2: 0000000000000040 ---[ end trace 51ea89f4e57f54f1 ]--- Emil ^ permalink raw reply [flat|nested] 10+ messages in thread
* RE: [PATCH net-next] ixgbe: fix stats handling 2010-10-27 22:35 ` Tantilov, Emil S @ 2010-10-28 4:24 ` Eric Dumazet 2010-10-28 12:02 ` [PATCH] ixgbe: delay rx_ring freeing Eric Dumazet 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2010-10-28 4:24 UTC (permalink / raw) To: Tantilov, Emil S Cc: David Miller, Waskiewicz Jr, Peter P, Kirsher, Jeffrey T, Brattain, Ross B, netdev Le mercredi 27 octobre 2010 à 16:35 -0600, Tantilov, Emil S a écrit : > >-----Original Message----- > >From: Eric Dumazet [mailto:eric.dumazet@gmail.com] > >Sent: Monday, October 11, 2010 5:17 AM > >To: David Miller; Waskiewicz Jr, Peter P; Tantilov, Emil S; Kirsher, > >Jeffrey T > >Cc: netdev > >Subject: [PATCH net-next] ixgbe: fix stats handling > > > >Hi > > > >I am sending this patch for Intel people review/test and acknowledge. > > > >Thanks ! > > > >[PATCH net-next] ixgbe: fix stats handling > > > >Current ixgbe stats have following problems : > > > >- Not 64 bit safe (on 32bit arches) > > > >- Not safe in ixgbe_clean_rx_irq() : > > All cpus dirty a common location (netdev->stats.rx_bytes & > >netdev->stats.rx_packets) without proper synchronization. > > This slow down a bit multiqueue operations, and possibly miss some > >updates. > > > >Fixes : > > > >Implement ndo_get_stats64() method to provide accurate 64bit rx|tx > >bytes/packets counters, using 64bit safe infrastructure. > > > >ixgbe_get_ethtool_stats() also use this infrastructure to provide 64bit > >safe counters. > > > >Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> > >CC: Peter Waskiewicz <peter.p.waskiewicz.jr@intel.com> > >CC: Emil Tantilov <emil.s.tantilov@intel.com> > >CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com> > >--- > > drivers/net/ixgbe/ixgbe.h | 3 +- > > drivers/net/ixgbe/ixgbe_ethtool.c | 29 +++++++++++--------- > > drivers/net/ixgbe/ixgbe_main.c | 40 +++++++++++++++++++++++++--- > > 3 files changed, 56 insertions(+), 16 deletions(-) > > > >diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h > >index a8c47b0..944d9e2 100644 > >--- a/drivers/net/ixgbe/ixgbe.h > >+++ b/drivers/net/ixgbe/ixgbe.h > >@@ -180,8 +180,9 @@ struct ixgbe_ring { > > */ > > > > struct ixgbe_queue_stats stats; > >- unsigned long reinit_state; > >+ struct u64_stats_sync syncp; > > int numa_node; > >+ unsigned long reinit_state; > > u64 rsc_count; /* stat for coalesced packets */ > > u64 rsc_flush; /* stats for flushed packets */ > > u32 restart_queue; /* track tx queue restarts */ > >diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c > >b/drivers/net/ixgbe/ixgbe_ethtool.c > >index d4ac943..3c7f15d 100644 > >--- a/drivers/net/ixgbe/ixgbe_ethtool.c > >+++ b/drivers/net/ixgbe/ixgbe_ethtool.c > >@@ -999,12 +999,11 @@ static void ixgbe_get_ethtool_stats(struct net_device > >*netdev, > > struct ethtool_stats *stats, u64 > >*data) > > { > > struct ixgbe_adapter *adapter = netdev_priv(netdev); > >- u64 *queue_stat; > >- int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); > > struct rtnl_link_stats64 temp; > > const struct rtnl_link_stats64 *net_stats; > >- int j, k; > >- int i; > >+ unsigned int start; > >+ struct ixgbe_ring *ring; > >+ int i, j; > > char *p = NULL; > > > > ixgbe_update_stats(adapter); > >@@ -1025,16 +1024,22 @@ static void ixgbe_get_ethtool_stats(struct > >net_device *netdev, > > sizeof(u64)) ? *(u64 *)p : *(u32 *)p; > > } > > for (j = 0; j < adapter->num_tx_queues; j++) { > >- queue_stat = (u64 *)&adapter->tx_ring[j]->stats; > >- for (k = 0; k < stat_count; k++) > >- data[i + k] = queue_stat[k]; > >- i += k; > >+ ring = adapter->tx_ring[j]; > >+ do { > >+ start = u64_stats_fetch_begin_bh(&ring->syncp); > >+ data[i] = ring->stats.packets; > >+ data[i+1] = ring->stats.bytes; > >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); > >+ i += 2; > > } > > for (j = 0; j < adapter->num_rx_queues; j++) { > >- queue_stat = (u64 *)&adapter->rx_ring[j]->stats; > >- for (k = 0; k < stat_count; k++) > >- data[i + k] = queue_stat[k]; > >- i += k; > >+ ring = adapter->rx_ring[j]; > >+ do { > >+ start = u64_stats_fetch_begin_bh(&ring->syncp); > >+ data[i] = ring->stats.packets; > >+ data[i+1] = ring->stats.bytes; > >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); > >+ i += 2; > > } > > if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { > > for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { > >diff --git a/drivers/net/ixgbe/ixgbe_main.c > >b/drivers/net/ixgbe/ixgbe_main.c > >index 95dbf60..1efbcde 100644 > >--- a/drivers/net/ixgbe/ixgbe_main.c > >+++ b/drivers/net/ixgbe/ixgbe_main.c > >@@ -824,8 +824,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector > >*q_vector, > > > > tx_ring->total_bytes += total_bytes; > > tx_ring->total_packets += total_packets; > >+ u64_stats_update_begin(&tx_ring->syncp); > > tx_ring->stats.packets += total_packets; > > tx_ring->stats.bytes += total_bytes; > >+ u64_stats_update_end(&tx_ring->syncp); > > return count < tx_ring->work_limit; > > } > > > >@@ -1172,7 +1174,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector > >*q_vector, > > int *work_done, int work_to_do) > > { > > struct ixgbe_adapter *adapter = q_vector->adapter; > >- struct net_device *netdev = adapter->netdev; > > struct pci_dev *pdev = adapter->pdev; > > union ixgbe_adv_rx_desc *rx_desc, *next_rxd; > > struct ixgbe_rx_buffer *rx_buffer_info, *next_buffer; > >@@ -1298,8 +1299,10 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector > >*q_vector, > > rx_ring->rsc_count++; > > rx_ring->rsc_flush++; > > } > >+ u64_stats_update_begin(&rx_ring->syncp); > > rx_ring->stats.packets++; > > rx_ring->stats.bytes += skb->len; > >+ u64_stats_update_end(&rx_ring->syncp); > > } else { > > if (rx_ring->flags & IXGBE_RING_RX_PS_ENABLED) { > > rx_buffer_info->skb = next_buffer->skb; > >@@ -1375,8 +1378,6 @@ next_desc: > > > > rx_ring->total_packets += total_rx_packets; > > rx_ring->total_bytes += total_rx_bytes; > >- netdev->stats.rx_bytes += total_rx_bytes; > >- netdev->stats.rx_packets += total_rx_packets; > > > > return cleaned; > > } > >@@ -6559,6 +6560,38 @@ static void ixgbe_netpoll(struct net_device *netdev) > > } > > #endif > > > >+static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device > >*netdev, > >+ struct rtnl_link_stats64 *stats) > >+{ > >+ struct ixgbe_adapter *adapter = netdev_priv(netdev); > >+ int i; > >+ > >+ /* accurate rx/tx bytes/packets stats */ > >+ dev_txq_stats_fold(netdev, stats); > >+ for (i = 0; i < adapter->num_rx_queues; i++) { > >+ struct ixgbe_ring *ring = adapter->rx_ring[i]; > >+ u64 bytes, packets; > >+ unsigned int start; > >+ > >+ do { > >+ start = u64_stats_fetch_begin_bh(&ring->syncp); > >+ packets = ring->stats.packets; > >+ bytes = ring->stats.bytes; > >+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); > >+ stats->rx_packets += packets; > >+ stats->rx_bytes += bytes; > >+ } > >+ > >+ /* following stats updated by ixgbe_watchdog_task() */ > >+ stats->multicast = netdev->stats.multicast; > >+ stats->rx_errors = netdev->stats.rx_errors; > >+ stats->rx_length_errors = netdev->stats.rx_length_errors; > >+ stats->rx_crc_errors = netdev->stats.rx_crc_errors; > >+ stats->rx_missed_errors = netdev->stats.rx_missed_errors; > >+ return stats; > >+} > >+ > >+ > > static const struct net_device_ops ixgbe_netdev_ops = { > > .ndo_open = ixgbe_open, > > .ndo_stop = ixgbe_close, > >@@ -6578,6 +6611,7 @@ static const struct net_device_ops ixgbe_netdev_ops = > >{ > > .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, > > .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, > > .ndo_get_vf_config = ixgbe_ndo_get_vf_config, > >+ .ndo_get_stats64 = ixgbe_get_stats64, > > #ifdef CONFIG_NET_POLL_CONTROLLER > > .ndo_poll_controller = ixgbe_netpoll, > > #endif > > > > Eric, > > We are seeing intermittent hangs on ia32 arch which seem to point to this patch: > > BUG: unable to handle kernel NULL pointer dereference at 00000040 > IP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] > *pdpt = 000000002dc83001 *pde = 000000032d7e5067 > Oops: 0000 [#2] SMP > last sysfs file: /sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_map > Modules linked in: act_skbedit cls_u32 sch_multiq ixgbe mdio netconsole configfs autofs4 8021q garp stp llc sunrpc ipv6 e > xt3 jbd dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath power_meter hwmon sg ses enclosure dcdbas pcspkr serio_raw iTCO_wdt iTCO_vendor_support io > atdma dca i7core_edac edac_core bnx2 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix megaraid_sas dm_mod [last unloaded: mperf > ] > > Pid: 1939, comm: irqbalance Tainted: G D W 2.6.36-rc7-upstream-net-next-2.6-ixgbe-queue-i386-g55e1a84 #1 09CGW2/Po > werEdge T610 > EIP: 0060:[<f7f6b537>] EFLAGS: 00010206 CPU: 0 > EIP is at ixgbe_get_stats64+0x47/0x120 [ixgbe] > EAX: 00000000 EBX: ecc45e4c ECX: ebea0400 EDX: 00000000 > ESI: ebea0000 EDI: 00000018 EBP: f7f846a0 ESP: ecc45d88 > DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > Process irqbalance (pid: 1939, ti=ecc44000 task=efc63a50 task.ti=ecc44000) > Stack: > ecc45e4c 00000000 ebea0400 ebea0000 ecc45e4c ebea0000 ecc45f04 f7f846a0 > <0> c0750593 ebea0000 edfec340 ebea0000 000002b5 c075063a edfec340 c0993eec > <0> ebe78000 0000021c 00000000 00000009 00000000 00000000 00000000 00000000 > Call Trace: > [<c0750593>] ? dev_get_stats+0x33/0xc0 > [<c075063a>] ? dev_seq_printf_stats+0x1a/0x180 > [<c07507aa>] ? dev_seq_show+0xa/0x20 > [<c052398f>] ? seq_read+0x22f/0x3d0 > [<c0523760>] ? seq_read+0x0/0x3d0 > [<c054fdde>] ? proc_reg_read+0x5e/0x90 > [<c054fd80>] ? proc_reg_read+0x0/0x90 > [<c050a1dd>] ? vfs_read+0x9d/0x160 > [<c049d4ef>] ? audit_syscall_entry+0x20f/0x230 > [<c050a971>] ? sys_read+0x41/0x70 > [<c0409cdf>] ? sysenter_do_call+0x12/0x28 > Code: 60 4f 7e c8 8b 44 24 08 8b b8 20 06 00 00 85 ff 7e 63 c7 44 24 04 00 00 00 00 66 90 8b 54 24 04 8b 4c 24 08 8b 84 9 > 1 00 05 00 00 <8b> 50 40 eb 06 8d 74 26 00 89 ca f6 c2 01 0f 85 ae 00 00 00 8b > EIP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] SS:ESP 0068:ecc45d88 > CR2: 0000000000000040 > ---[ end trace 51ea89f4e57f54f1 ]--- > > Emil Code disassembly makes no sense to me : this doesnt match C code at all 60 4f 7e c8 8b 44 24 08 mov 0x08(%esp),%eax 8b b8 20 06 00 00 mov 0x620(%eax),%edi 85 ff test %edi,%edi 7e 63 jle +0x63 c7 44 24 04 00 00 00 00 movl $0,0x4(%esp) 66 90 xchg %ax,%ax 8b 54 24 04 mov 0x04(%esp),%edx (0 in your report) 8b 4c 24 08 mov 0x08(%esp),%ecx (ebea0400 in your rep) 8b 84 91 00 05 00 00 mov 0x500(,),eax <8b> 50 40 mov 0x40(%eax),%edx NULL pointer deref eb 06 jmp +6 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi 89 ca mov %ecx,%edx f6 c2 01 test $0x1,%dl 0f 85 ae 00 00 00 jne 8b Could you disassemble your ixgbe_get_stats64() function ? Thanks Mine is : 00000e29 <ixgbe_get_stats64>: e29: 55 push %ebp e2a: 89 e5 mov %esp,%ebp e2c: 57 push %edi e2d: 56 push %esi e2e: 89 c6 mov %eax,%esi e30: 53 push %ebx e31: 89 d3 mov %edx,%ebx e33: 83 ec 18 sub $0x18,%esp e36: 8d b8 40 03 00 00 lea 0x340(%eax),%edi e3c: e8 fc ff ff ff call dev_txq_stats_fold e41: 31 c0 xor %eax,%eax e43: eb 54 jmp e99 <ixgbe_get_stats64+0x70> e45: 8b 94 87 40 0d 00 00 mov 0xd40(%edi,%eax,4),%edx e4c: 89 75 e0 mov %esi,-0x20(%ebp) e4f: 89 5d dc mov %ebx,-0x24(%ebp) e52: 8b 4a 34 mov 0x34(%edx),%ecx e55: f6 c1 01 test $0x1,%cl e58: 74 04 je e5e <ixgbe_get_stats64+0x35> e5a: f3 90 pause e5c: eb f4 jmp e52 <ixgbe_get_stats64+0x29> e5e: 8b 5a 24 mov 0x24(%edx),%ebx e61: 8b 72 28 mov 0x28(%edx),%esi e64: 89 5d ec mov %ebx,-0x14(%ebp) e67: 8b 5a 2c mov 0x2c(%edx),%ebx e6a: 89 75 f0 mov %esi,-0x10(%ebp) e6d: 8b 72 30 mov 0x30(%edx),%esi e70: 89 5d e4 mov %ebx,-0x1c(%ebp) e73: 89 75 e8 mov %esi,-0x18(%ebp) e76: 39 4a 34 cmp %ecx,0x34(%edx) e79: 75 d7 jne e52 <ixgbe_get_stats64+0x29> e7b: 8b 5d dc mov -0x24(%ebp),%ebx e7e: 8b 55 ec mov -0x14(%ebp),%edx e81: 8b 75 e0 mov -0x20(%ebp),%esi e84: 8b 4d f0 mov -0x10(%ebp),%ecx e87: 01 13 add %edx,(%ebx) e89: 8b 55 e4 mov -0x1c(%ebp),%edx e8c: 11 4b 04 adc %ecx,0x4(%ebx) e8f: 01 53 10 add %edx,0x10(%ebx) e92: 8b 4d e8 mov -0x18(%ebp),%ecx e95: 11 4b 14 adc %ecx,0x14(%ebx) e98: 40 inc %eax e99: 3b 87 40 0e 00 00 cmp 0xe40(%edi),%eax e9f: 7c a4 jl e45 <ixgbe_get_stats64+0x1c> ea1: 8b 8e 90 00 00 00 mov 0x90(%esi),%ecx ea7: c7 43 44 00 00 00 00 movl $0x0,0x44(%ebx) eae: 89 4b 40 mov %ecx,0x40(%ebx) eb1: 8b 86 80 00 00 00 mov 0x80(%esi),%eax eb7: c7 43 24 00 00 00 00 movl $0x0,0x24(%ebx) ebe: 89 43 20 mov %eax,0x20(%ebx) ec1: 8b 96 98 00 00 00 mov 0x98(%esi),%edx ec7: c7 43 54 00 00 00 00 movl $0x0,0x54(%ebx) ece: 89 53 50 mov %edx,0x50(%ebx) ed1: 8b 8e a0 00 00 00 mov 0xa0(%esi),%ecx ed7: c7 43 64 00 00 00 00 movl $0x0,0x64(%ebx) ede: 89 4b 60 mov %ecx,0x60(%ebx) ee1: 8b 86 ac 00 00 00 mov 0xac(%esi),%eax ee7: c7 43 7c 00 00 00 00 movl $0x0,0x7c(%ebx) eee: 89 43 78 mov %eax,0x78(%ebx) ef1: 83 c4 18 add $0x18,%esp ef4: 89 d8 mov %ebx,%eax ef6: 5b pop %ebx ef7: 5e pop %esi ef8: 5f pop %edi ef9: 5d pop %ebp efa: c3 ret No 0x40(ptr) deref for me Could adapter->rx_ring[i] becomes NULL sometime in your driver ? If yes, a lock/protection is needed (same for ethtool stats I guess) ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] ixgbe: delay rx_ring freeing 2010-10-28 4:24 ` Eric Dumazet @ 2010-10-28 12:02 ` Eric Dumazet 2010-10-28 12:21 ` Jeff Kirsher 0 siblings, 1 reply; 10+ messages in thread From: Eric Dumazet @ 2010-10-28 12:02 UTC (permalink / raw) To: Tantilov, Emil S Cc: David Miller, Waskiewicz Jr, Peter P, Kirsher, Jeffrey T, Brattain, Ross B, netdev Le jeudi 28 octobre 2010 à 06:24 +0200, Eric Dumazet a écrit : > Le mercredi 27 octobre 2010 à 16:35 -0600, Tantilov, Emil S a écrit : > > Eric, > > > > We are seeing intermittent hangs on ia32 arch which seem to point to this patch: > > > > BUG: unable to handle kernel NULL pointer dereference at 00000040 > > IP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] > > *pdpt = 000000002dc83001 *pde = 000000032d7e5067 > > Oops: 0000 [#2] SMP > > last sysfs file: /sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_map > > Modules linked in: act_skbedit cls_u32 sch_multiq ixgbe mdio netconsole configfs autofs4 8021q garp stp llc sunrpc ipv6 e > > xt3 jbd dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath power_meter hwmon sg ses enclosure dcdbas pcspkr serio_raw iTCO_wdt iTCO_vendor_support io > > atdma dca i7core_edac edac_core bnx2 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix megaraid_sas dm_mod [last unloaded: mperf > > ] > > > > Pid: 1939, comm: irqbalance Tainted: G D W 2.6.36-rc7-upstream-net-next-2.6-ixgbe-queue-i386-g55e1a84 #1 09CGW2/Po > > werEdge T610 > > EIP: 0060:[<f7f6b537>] EFLAGS: 00010206 CPU: 0 > > EIP is at ixgbe_get_stats64+0x47/0x120 [ixgbe] > > EAX: 00000000 EBX: ecc45e4c ECX: ebea0400 EDX: 00000000 > > ESI: ebea0000 EDI: 00000018 EBP: f7f846a0 ESP: ecc45d88 > > DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > > Process irqbalance (pid: 1939, ti=ecc44000 task=efc63a50 task.ti=ecc44000) > > Stack: > > ecc45e4c 00000000 ebea0400 ebea0000 ecc45e4c ebea0000 ecc45f04 f7f846a0 > > <0> c0750593 ebea0000 edfec340 ebea0000 000002b5 c075063a edfec340 c0993eec > > <0> ebe78000 0000021c 00000000 00000009 00000000 00000000 00000000 00000000 > > Call Trace: > > [<c0750593>] ? dev_get_stats+0x33/0xc0 > > [<c075063a>] ? dev_seq_printf_stats+0x1a/0x180 > > [<c07507aa>] ? dev_seq_show+0xa/0x20 > > [<c052398f>] ? seq_read+0x22f/0x3d0 > > [<c0523760>] ? seq_read+0x0/0x3d0 > > [<c054fdde>] ? proc_reg_read+0x5e/0x90 > > [<c054fd80>] ? proc_reg_read+0x0/0x90 > > [<c050a1dd>] ? vfs_read+0x9d/0x160 > > [<c049d4ef>] ? audit_syscall_entry+0x20f/0x230 > > [<c050a971>] ? sys_read+0x41/0x70 > > [<c0409cdf>] ? sysenter_do_call+0x12/0x28 > > Code: 60 4f 7e c8 8b 44 24 08 8b b8 20 06 00 00 85 ff 7e 63 c7 44 24 04 00 00 00 00 66 90 8b 54 24 04 8b 4c 24 08 8b 84 9 > > 1 00 05 00 00 <8b> 50 40 eb 06 8d 74 26 00 89 ca f6 c2 01 0f 85 ae 00 00 00 8b > > EIP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] SS:ESP 0068:ecc45d88 > > CR2: 0000000000000040 > > ---[ end trace 51ea89f4e57f54f1 ]--- > > > > Emil I believe I now understand the problem, please try following patch. Thanks [PATCH] ixgbe: delay rx_ring freeing "cat /proc/net/dev" uses RCU protection only. Its quite possible we call a driver get_stats() method while device is dismantling and freeing its data structures. So get_stats() methods must be very careful not accessing driver private data without appropriate locking. In ixgbe case, we access rx_ring pointers. These pointers are freed in ixgbe_clear_interrupt_scheme() and set to NULL, this can trigger NULL dereference in ixgbe_get_stats64() A possible fix is to use RCU locking in ixgbe_get_stats64() and defer rx_ring freeing after a grace period in ixgbe_clear_interrupt_scheme() Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Reported-by: Tantilov, Emil S <emil.s.tantilov@intel.com> --- drivers/net/ixgbe/ixgbe.h | 1 drivers/net/ixgbe/ixgbe_main.c | 34 +++++++++++++++++++++---------- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index ed8703c..018e143 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -192,6 +192,7 @@ struct ixgbe_ring { unsigned int size; /* length in bytes */ dma_addr_t dma; /* phys. address of descriptor ring */ + struct rcu_head rcu; } ____cacheline_internodealigned_in_smp; enum ixgbe_ring_f_enum { diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index f856312..1b16292 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4742,6 +4742,11 @@ err_set_interrupt: return err; } +static void ring_free_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct ixgbe_ring, rcu)); +} + /** * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings * @adapter: board private structure to clear interrupt scheme on @@ -4758,7 +4763,12 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) adapter->tx_ring[i] = NULL; } for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + /* ixgbe_get_stats64() might access this ring, we must wait + * a grace period before freeing it. + */ + call_rcu(&ring->rcu, ring_free_rcu); adapter->rx_ring[i] = NULL; } @@ -6551,20 +6561,23 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, /* accurate rx/tx bytes/packets stats */ dev_txq_stats_fold(netdev, stats); + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; + struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]); u64 bytes, packets; unsigned int start; - do { - start = u64_stats_fetch_begin_bh(&ring->syncp); - packets = ring->stats.packets; - bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); - stats->rx_packets += packets; - stats->rx_bytes += bytes; + if (ring) { + do { + start = u64_stats_fetch_begin_bh(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + stats->rx_packets += packets; + stats->rx_bytes += bytes; + } } - + rcu_read_unlock(); /* following stats updated by ixgbe_watchdog_task() */ stats->multicast = netdev->stats.multicast; stats->rx_errors = netdev->stats.rx_errors; @@ -7270,6 +7283,7 @@ static void __exit ixgbe_exit_module(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&ixgbe_driver); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } #ifdef CONFIG_IXGBE_DCA ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] ixgbe: delay rx_ring freeing 2010-10-28 12:02 ` [PATCH] ixgbe: delay rx_ring freeing Eric Dumazet @ 2010-10-28 12:21 ` Jeff Kirsher 2010-10-29 3:45 ` [PATCH] ixgbe: refactor ixgbe_alloc_queues() Eric Dumazet 0 siblings, 1 reply; 10+ messages in thread From: Jeff Kirsher @ 2010-10-28 12:21 UTC (permalink / raw) To: Eric Dumazet Cc: Tantilov, Emil S, David Miller, Waskiewicz Jr, Peter P, Brattain, Ross B, netdev [-- Attachment #1: Type: text/plain, Size: 3956 bytes --] On Thu, 2010-10-28 at 05:02 -0700, Eric Dumazet wrote: > Le jeudi 28 octobre 2010 à 06:24 +0200, Eric Dumazet a écrit : > > Le mercredi 27 octobre 2010 à 16:35 -0600, Tantilov, Emil S a écrit : > > > > Eric, > > > > > > We are seeing intermittent hangs on ia32 arch which seem to point to this patch: > > > > > > BUG: unable to handle kernel NULL pointer dereference at 00000040 > > > IP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] > > > *pdpt = 000000002dc83001 *pde = 000000032d7e5067 > > > Oops: 0000 [#2] SMP > > > last sysfs file: /sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_map > > > Modules linked in: act_skbedit cls_u32 sch_multiq ixgbe mdio netconsole configfs autofs4 8021q garp stp llc sunrpc ipv6 e > > > xt3 jbd dm_mirror dm_region_hash dm_log dm_round_robin dm_multipath power_meter hwmon sg ses enclosure dcdbas pcspkr serio_raw iTCO_wdt iTCO_vendor_support io > > > atdma dca i7core_edac edac_core bnx2 ext4 mbcache jbd2 sr_mod cdrom sd_mod crc_t10dif pata_acpi ata_generic ata_piix megaraid_sas dm_mod [last unloaded: mperf > > > ] > > > > > > Pid: 1939, comm: irqbalance Tainted: G D W 2.6.36-rc7-upstream-net-next-2.6-ixgbe-queue-i386-g55e1a84 #1 09CGW2/Po > > > werEdge T610 > > > EIP: 0060:[<f7f6b537>] EFLAGS: 00010206 CPU: 0 > > > EIP is at ixgbe_get_stats64+0x47/0x120 [ixgbe] > > > EAX: 00000000 EBX: ecc45e4c ECX: ebea0400 EDX: 00000000 > > > ESI: ebea0000 EDI: 00000018 EBP: f7f846a0 ESP: ecc45d88 > > > DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 > > > Process irqbalance (pid: 1939, ti=ecc44000 task=efc63a50 task.ti=ecc44000) > > > Stack: > > > ecc45e4c 00000000 ebea0400 ebea0000 ecc45e4c ebea0000 ecc45f04 f7f846a0 > > > <0> c0750593 ebea0000 edfec340 ebea0000 000002b5 c075063a edfec340 c0993eec > > > <0> ebe78000 0000021c 00000000 00000009 00000000 00000000 00000000 00000000 > > > Call Trace: > > > [<c0750593>] ? dev_get_stats+0x33/0xc0 > > > [<c075063a>] ? dev_seq_printf_stats+0x1a/0x180 > > > [<c07507aa>] ? dev_seq_show+0xa/0x20 > > > [<c052398f>] ? seq_read+0x22f/0x3d0 > > > [<c0523760>] ? seq_read+0x0/0x3d0 > > > [<c054fdde>] ? proc_reg_read+0x5e/0x90 > > > [<c054fd80>] ? proc_reg_read+0x0/0x90 > > > [<c050a1dd>] ? vfs_read+0x9d/0x160 > > > [<c049d4ef>] ? audit_syscall_entry+0x20f/0x230 > > > [<c050a971>] ? sys_read+0x41/0x70 > > > [<c0409cdf>] ? sysenter_do_call+0x12/0x28 > > > Code: 60 4f 7e c8 8b 44 24 08 8b b8 20 06 00 00 85 ff 7e 63 c7 44 24 04 00 00 00 00 66 90 8b 54 24 04 8b 4c 24 08 8b 84 9 > > > 1 00 05 00 00 <8b> 50 40 eb 06 8d 74 26 00 89 ca f6 c2 01 0f 85 ae 00 00 00 8b > > > EIP: [<f7f6b537>] ixgbe_get_stats64+0x47/0x120 [ixgbe] SS:ESP 0068:ecc45d88 > > > CR2: 0000000000000040 > > > ---[ end trace 51ea89f4e57f54f1 ]--- > > > > > > Emil > > I believe I now understand the problem, please try following patch. > > Thanks > > > > [PATCH] ixgbe: delay rx_ring freeing > > "cat /proc/net/dev" uses RCU protection only. > > Its quite possible we call a driver get_stats() method while device is > dismantling and freeing its data structures. > > So get_stats() methods must be very careful not accessing driver private > data without appropriate locking. > > In ixgbe case, we access rx_ring pointers. These pointers are freed in > ixgbe_clear_interrupt_scheme() and set to NULL, this can trigger NULL > dereference in ixgbe_get_stats64() > > A possible fix is to use RCU locking in ixgbe_get_stats64() and defer > rx_ring freeing after a grace period in ixgbe_clear_interrupt_scheme() > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> > Reported-by: Tantilov, Emil S <emil.s.tantilov@intel.com> > --- > drivers/net/ixgbe/ixgbe.h | 1 > drivers/net/ixgbe/ixgbe_main.c | 34 +++++++++++++++++++++---------- > 2 files changed, 25 insertions(+), 10 deletions(-) > Thanks Eric, I have added this patch to my queue. [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 490 bytes --] ^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH] ixgbe: refactor ixgbe_alloc_queues() 2010-10-28 12:21 ` Jeff Kirsher @ 2010-10-29 3:45 ` Eric Dumazet 2010-10-29 7:46 ` Jeff Kirsher 2010-10-29 8:38 ` Jeff Kirsher 0 siblings, 2 replies; 10+ messages in thread From: Eric Dumazet @ 2010-10-29 3:45 UTC (permalink / raw) To: jeffrey.t.kirsher Cc: Tantilov, Emil S, David Miller, Waskiewicz Jr, Peter P, Brattain, Ross B, netdev Note : compiled only patch, not tested. Thanks ! [PATCH] ixgbe: refactor ixgbe_alloc_queues() I noticed ring variable was initialized before allocations, and that memory node management was a bit ugly. We also leak memory in case of ring allocations error. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> --- drivers/net/ixgbe/ixgbe_main.c | 69 ++++++++++++------------------- 1 file changed, 28 insertions(+), 41 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 2bd3eb4..e89da9a 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4470,65 +4470,52 @@ static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) **/ static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter) { - int i; - int orig_node = adapter->node; + int rx = 0, tx = 0, nid = adapter->node; - for (i = 0; i < adapter->num_tx_queues; i++) { - struct ixgbe_ring *ring = adapter->tx_ring[i]; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct ixgbe_ring), GFP_KERNEL, - adapter->node); + if (nid < 0 || !node_online(nid)) + nid = first_online_node; + + for (; tx < adapter->num_tx_queues; tx++) { + struct ixgbe_ring *ring; + + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, nid); if (!ring) - ring = kzalloc(sizeof(struct ixgbe_ring), GFP_KERNEL); + ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) - goto err_tx_ring_allocation; + goto err_allocation; ring->count = adapter->tx_ring_count; - ring->queue_index = i; - ring->numa_node = adapter->node; + ring->queue_index = tx; + ring->numa_node = nid; - adapter->tx_ring[i] = ring; + adapter->tx_ring[tx] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; + for (; rx < adapter->num_rx_queues; rx++) { + struct ixgbe_ring *ring; - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct ixgbe_ring), GFP_KERNEL, - adapter->node); + ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, nid); if (!ring) - ring = kzalloc(sizeof(struct ixgbe_ring), GFP_KERNEL); + ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) - goto err_rx_ring_allocation; + goto err_allocation; ring->count = adapter->rx_ring_count; - ring->queue_index = i; - ring->numa_node = adapter->node; + ring->queue_index = rx; + ring->numa_node = nid; - adapter->rx_ring[i] = ring; + adapter->rx_ring[rx] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; - ixgbe_cache_ring_register(adapter); return 0; -err_rx_ring_allocation: - for (i = 0; i < adapter->num_tx_queues; i++) - kfree(adapter->tx_ring[i]); -err_tx_ring_allocation: +err_allocation: + while (tx) + kfree(adapter->tx_ring[--tx]); + + while (rx) + kfree(adapter->tx_ring[--rx]); + return -ENOMEM; } ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] ixgbe: refactor ixgbe_alloc_queues() 2010-10-29 3:45 ` [PATCH] ixgbe: refactor ixgbe_alloc_queues() Eric Dumazet @ 2010-10-29 7:46 ` Jeff Kirsher 2010-10-29 8:38 ` Jeff Kirsher 1 sibling, 0 replies; 10+ messages in thread From: Jeff Kirsher @ 2010-10-29 7:46 UTC (permalink / raw) To: Eric Dumazet Cc: Tantilov, Emil S, David Miller, Waskiewicz Jr, Peter P, Brattain, Ross B, netdev On Thu, Oct 28, 2010 at 20:45, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Note : compiled only patch, not tested. > > Thanks ! > > [PATCH] ixgbe: refactor ixgbe_alloc_queues() > > I noticed ring variable was initialized before allocations, and that > memory node management was a bit ugly. We also leak memory in case of > ring allocations error. > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> > --- > drivers/net/ixgbe/ixgbe_main.c | 69 ++++++++++++------------------- > 1 file changed, 28 insertions(+), 41 deletions(-) > Thanks Eric. I will add this to my queue of patches. -- Cheers, Jeff ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] ixgbe: refactor ixgbe_alloc_queues() 2010-10-29 3:45 ` [PATCH] ixgbe: refactor ixgbe_alloc_queues() Eric Dumazet 2010-10-29 7:46 ` Jeff Kirsher @ 2010-10-29 8:38 ` Jeff Kirsher 2010-10-29 8:48 ` Eric Dumazet 1 sibling, 1 reply; 10+ messages in thread From: Jeff Kirsher @ 2010-10-29 8:38 UTC (permalink / raw) To: Eric Dumazet Cc: Tantilov, Emil S, David Miller, Waskiewicz Jr, Peter P, Brattain, Ross B, netdev On Thu, Oct 28, 2010 at 20:45, Eric Dumazet <eric.dumazet@gmail.com> wrote: > Note : compiled only patch, not tested. > > Thanks ! > > [PATCH] ixgbe: refactor ixgbe_alloc_queues() > > I noticed ring variable was initialized before allocations, and that > memory node management was a bit ugly. We also leak memory in case of > ring allocations error. > > Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> > --- > drivers/net/ixgbe/ixgbe_main.c | 69 ++++++++++++------------------- > 1 file changed, 28 insertions(+), 41 deletions(-) > I noticed a typo when applying the patch and I fixed it in the patch in my queue. The following code: > + while (rx) > + kfree(adapter->tx_ring[--rx]); > + should be: > + while (rx) > + kfree(adapter->rx_ring[--rx]); > + -- Cheers, Jeff ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] ixgbe: refactor ixgbe_alloc_queues() 2010-10-29 8:38 ` Jeff Kirsher @ 2010-10-29 8:48 ` Eric Dumazet 0 siblings, 0 replies; 10+ messages in thread From: Eric Dumazet @ 2010-10-29 8:48 UTC (permalink / raw) To: Jeff Kirsher Cc: Tantilov, Emil S, David Miller, Waskiewicz Jr, Peter P, Brattain, Ross B, netdev Le vendredi 29 octobre 2010 à 01:38 -0700, Jeff Kirsher a écrit : > > I noticed a typo when applying the patch and I fixed it in the patch > in my queue. > > The following code: > > + while (rx) > > + kfree(adapter->tx_ring[--rx]); > > + > > should be: > > + while (rx) > > + kfree(adapter->rx_ring[--rx]); > > + > Good catch, thanks ! ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2010-10-29 8:49 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-10-11 12:17 [PATCH net-next] ixgbe: fix stats handling Eric Dumazet 2010-10-12 0:13 ` Jeff Kirsher 2010-10-27 22:35 ` Tantilov, Emil S 2010-10-28 4:24 ` Eric Dumazet 2010-10-28 12:02 ` [PATCH] ixgbe: delay rx_ring freeing Eric Dumazet 2010-10-28 12:21 ` Jeff Kirsher 2010-10-29 3:45 ` [PATCH] ixgbe: refactor ixgbe_alloc_queues() Eric Dumazet 2010-10-29 7:46 ` Jeff Kirsher 2010-10-29 8:38 ` Jeff Kirsher 2010-10-29 8:48 ` Eric Dumazet
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).