* [RFC][PATCH 1/2] bgmac: separate RX desc setup code into new function @ 2013-08-11 17:49 Rafał Miłecki 2013-08-11 17:49 ` [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it Rafał Miłecki 0 siblings, 1 reply; 8+ messages in thread From: Rafał Miłecki @ 2013-08-11 17:49 UTC (permalink / raw) To: netdev, openwrt-devel, Hauke Mehrtens, Florian Fainelli, Jonas Gorski, Robert Bradley --- drivers/net/ethernet/broadcom/bgmac.c | 41 ++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 1b8e7a3..e70ee43 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -268,6 +268,26 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, return 0; } +static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac, + struct bgmac_dma_ring *ring, int desc_idx) +{ + struct bgmac_dma_desc *dma_desc = ring->cpu_base + desc_idx; + u32 ctl0 = 0, ctl1 = 0; + + if (desc_idx == ring->num_slots - 1) + ctl0 |= BGMAC_DESC_CTL0_EOT; + ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN; + /* Is there any BGMAC device that requires extension? */ + /* ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT) & + * B43_DMA64_DCTL1_ADDREXT_MASK; + */ + + dma_desc->addr_low = cpu_to_le32(lower_32_bits(ring->slots[desc_idx].dma_addr)); + dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr)); + dma_desc->ctl0 = cpu_to_le32(ctl0); + dma_desc->ctl1 = cpu_to_le32(ctl1); +} + static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, int weight) { @@ -484,8 +504,6 @@ err_dma_free: static void bgmac_dma_init(struct bgmac *bgmac) { struct bgmac_dma_ring *ring; - struct bgmac_dma_desc *dma_desc; - u32 ctl0, ctl1; int i; for (i = 0; i < BGMAC_MAX_TX_RINGS; i++) { @@ -514,23 +532,8 @@ static void bgmac_dma_init(struct bgmac *bgmac) bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_RINGHI, upper_32_bits(ring->dma_base)); - for (j = 0, dma_desc = ring->cpu_base; j < ring->num_slots; - j++, dma_desc++) { - ctl0 = ctl1 = 0; - - if (j == ring->num_slots - 1) - ctl0 |= BGMAC_DESC_CTL0_EOT; - ctl1 |= BGMAC_RX_BUF_SIZE & BGMAC_DESC_CTL1_LEN; - /* Is there any BGMAC device that requires extension? */ - /* ctl1 |= (addrext << B43_DMA64_DCTL1_ADDREXT_SHIFT) & - * B43_DMA64_DCTL1_ADDREXT_MASK; - */ - - dma_desc->addr_low = cpu_to_le32(lower_32_bits(ring->slots[j].dma_addr)); - dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[j].dma_addr)); - dma_desc->ctl0 = cpu_to_le32(ctl0); - dma_desc->ctl1 = cpu_to_le32(ctl1); - } + for (j = 0; j < ring->num_slots; j++) + bgmac_dma_rx_setup_desc(bgmac, ring, j); bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX, ring->num_slots * sizeof(struct bgmac_dma_desc)); -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-11 17:49 [RFC][PATCH 1/2] bgmac: separate RX desc setup code into new function Rafał Miłecki @ 2013-08-11 17:49 ` Rafał Miłecki 2013-08-15 11:36 ` Rafał Miłecki 0 siblings, 1 reply; 8+ messages in thread From: Rafał Miłecki @ 2013-08-11 17:49 UTC (permalink / raw) To: netdev, openwrt-devel, Hauke Mehrtens, Florian Fainelli, Jonas Gorski, Robert Bradley Cc: Rafał Miłecki It makes more sense to allocate new (empty) skb and pass it to the hardware. That way we avoid copying whole packet into new skb which should result in better performance. --- drivers/net/ethernet/broadcom/bgmac.c | 74 ++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index e70ee43..425fe81 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -304,9 +304,9 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, struct device *dma_dev = bgmac->core->dma_dev; struct bgmac_slot_info *slot = &ring->slots[ring->start]; struct sk_buff *skb = slot->skb; - struct sk_buff *new_skb; struct bgmac_rx_header *rx; u16 len, flags; + bool resync_skb = true; /* Unmap buffer to make it accessible to the CPU */ dma_sync_single_for_cpu(dma_dev, slot->dma_addr, @@ -317,36 +317,70 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, len = le16_to_cpu(rx->len); flags = le16_to_cpu(rx->flags); - /* Check for poison and drop or pass the packet */ - if (len == 0xdead && flags == 0xbeef) { - bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n", - ring->start); - } else { + do { + /* Check for poisoned packet */ + if (len == 0xdead && flags == 0xbeef) { + bgmac_err(bgmac, "Found poisoned packet at slot %d, DMA issue!\n", + ring->start); + break; + } + /* Omit CRC. */ len -= ETH_FCS_LEN; - new_skb = netdev_alloc_skb_ip_align(bgmac->net_dev, len); - if (new_skb) { + /* Use skb_copy for small packets only */ + if (len > 1) { + dma_addr_t old_dma_addr = slot->dma_addr; + int err; + + /* Prepare new skb for further packets */ + err = bgmac_dma_rx_skb_for_slot(bgmac, slot); + if (err) { + bgmac_err(bgmac, "Couldn't allocate new skb for slot %d!\n", + ring->start); + bgmac->net_dev->stats.rx_dropped++; + break; + } + bgmac_dma_rx_setup_desc(bgmac, ring, + ring->start); + + /* Unmap old skb, we'll pass it to the netfif */ + dma_unmap_single(dma_dev, old_dma_addr, + BGMAC_RX_BUF_SIZE, + DMA_FROM_DEVICE); + resync_skb = false; + + skb_put(skb, BGMAC_RX_FRAME_OFFSET + len); + skb_pull(skb, BGMAC_RX_FRAME_OFFSET); + } else { + struct sk_buff *new_skb; + + /* Poison the old skb */ + rx->len = cpu_to_le16(0xdead); + rx->flags = cpu_to_le16(0xbeef); + + new_skb = netdev_alloc_skb_ip_align(bgmac->net_dev, len); + if (!new_skb) { + bgmac_err(bgmac, "Allocation of skb for copying packet failed!\n"); + bgmac->net_dev->stats.rx_dropped++; + break; + } + skb_put(new_skb, len); skb_copy_from_linear_data_offset(skb, BGMAC_RX_FRAME_OFFSET, new_skb->data, len); - skb_checksum_none_assert(skb); - new_skb->protocol = - eth_type_trans(new_skb, bgmac->net_dev); - netif_receive_skb(new_skb); - handled++; - } else { - bgmac->net_dev->stats.rx_dropped++; - bgmac_err(bgmac, "Allocation of skb for copying packet failed!\n"); + skb = new_skb; } - /* Poison the old skb */ - rx->len = cpu_to_le16(0xdead); - rx->flags = cpu_to_le16(0xbeef); - } + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, bgmac->net_dev); + netif_receive_skb(skb); + handled++; + } while (0); /* Make it back accessible to the hardware */ + if (resync_skb) dma_sync_single_for_device(dma_dev, slot->dma_addr, BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); -- 1.7.10.4 ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-11 17:49 ` [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it Rafał Miłecki @ 2013-08-15 11:36 ` Rafał Miłecki 2013-08-15 11:47 ` Felix Fietkau 0 siblings, 1 reply; 8+ messages in thread From: Rafał Miłecki @ 2013-08-15 11:36 UTC (permalink / raw) To: Network Development, OpenWrt Development List, Hauke Mehrtens, Florian Fainelli, Jonas Gorski, Robert Bradley 2013/8/11 Rafał Miłecki <zajec5@gmail.com>: > It makes more sense to allocate new (empty) skb and pass it to the > hardware. That way we avoid copying whole packet into new skb which > should result in better performance. I did some testing of this patch using "perf" tool and iperf -s running on the OpenWrt machine (with bgmac supported hardware). There are the results: No network usage: 64.93% [kernel] [k] arch_cpu_idle 16.10% [kernel] [k] arch_local_irq_restore 11.52% [kernel] [k] cpu_startup_entry Running iperf on PC: iperf -c 192.168.1.1 23.57% [kernel] [k] __copy_user_common 10.57% [kernel] [k] csum_partial 8.87% [kernel] [k] arch_cpu_idle 4.74% [kernel] [k] arch_local_irq_restore 4.30% [ip_tables] [k] ipt_do_table 2.91% [nf_conntrack] [k] nf_conntrack_in 2.44% [kernel] [k] __netif_receive_skb_core 2.36% [kernel] [k] r4k_dma_cache_inv 2.33% [nf_conntrack] [k] nf_conntrack_proto_fini With 0002-bgmac-pass-received-packet-to-the-netif-instead-of-c.patch 14.83% [kernel] [k] __copy_user_common 14.81% [kernel] [k] csum_partial 4.24% [ip_tables] [k] ipt_do_table 3.69% [kernel] [k] arch_local_irq_restore 3.54% [kernel] [k] __netif_receive_skb_core 3.38% [kernel] [k] r4k_dma_cache_inv 3.24% [nf_conntrack] [k] nf_conntrack_in 2.95% [xt_conntrack] [k] 0x0000018c 2.88% [nf_conntrack] [k] nf_conntrack_proto_fini 2.58% [iptable_nat] [k] 0x00000008 2.32% [bgmac] [k] 0x00000d9c 2.18% [nf_conntrack_ipv4] [k] need_ipv4_conntrack So you can see that __copy_user_common usage has really decreased with this patch! Unfortunately it didn't result in better performance... no idea why :( -- Rafał ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-15 11:36 ` Rafał Miłecki @ 2013-08-15 11:47 ` Felix Fietkau 2013-08-15 20:21 ` Rafał Miłecki 0 siblings, 1 reply; 8+ messages in thread From: Felix Fietkau @ 2013-08-15 11:47 UTC (permalink / raw) To: OpenWrt Development List Cc: Network Development, Jonas Gorski, Hauke Mehrtens On 2013-08-15 1:36 PM, Rafał Miłecki wrote: > 2013/8/11 Rafał Miłecki <zajec5@gmail.com>: >> It makes more sense to allocate new (empty) skb and pass it to the >> hardware. That way we avoid copying whole packet into new skb which >> should result in better performance. > > I did some testing of this patch using "perf" tool and iperf -s > running on the OpenWrt machine (with bgmac supported hardware). > > So you can see that __copy_user_common usage has really decreased with > this patch! > > Unfortunately it didn't result in better performance... no idea why :( Running iperf on the router is not useful as an indicator of routing performance. Please focus on tests where you only push traffic through the router, not directly to it. - Felix _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-15 11:47 ` Felix Fietkau @ 2013-08-15 20:21 ` Rafał Miłecki 2013-08-18 21:06 ` [OpenWrt-Devel] " Rafał Miłecki 2013-08-19 4:36 ` Felix Fietkau 0 siblings, 2 replies; 8+ messages in thread From: Rafał Miłecki @ 2013-08-15 20:21 UTC (permalink / raw) To: Felix Fietkau Cc: Network Development, Jonas Gorski, Hauke Mehrtens, OpenWrt Development List 2013/8/15 Felix Fietkau <nbd@openwrt.org>: > On 2013-08-15 1:36 PM, Rafał Miłecki wrote: >> 2013/8/11 Rafał Miłecki <zajec5@gmail.com>: >>> It makes more sense to allocate new (empty) skb and pass it to the >>> hardware. That way we avoid copying whole packet into new skb which >>> should result in better performance. >> >> I did some testing of this patch using "perf" tool and iperf -s >> running on the OpenWrt machine (with bgmac supported hardware). >> >> So you can see that __copy_user_common usage has really decreased with >> this patch! >> >> Unfortunately it didn't result in better performance... no idea why :( > Running iperf on the router is not useful as an indicator of routing > performance. Please focus on tests where you only push traffic through > the router, not directly to it. OK, so I started "iperf -s" on notebook plugged into WAN port, and then played with "iperf -c" on notebook connected to LAN#2. With some old 3.6.11 based OpenWrt build I got: [ 4] 0.0-60.0 sec 690 MBytes 96.4 Mbits/sec With very recent 3.10.4 based OpenWrt build: [ 4] 0.0-60.0 sec 667 MBytes 93.2 Mbits/sec After applying my patch on top of that 3.10.4: [ 5] 0.0-60.0 sec 759 MBytes 106 Mbits/sec And some dumps from "perf top": 3.10.4 6.75% [kernel] [k] __copy_user_common 6.73% [ip_tables] [k] ipt_do_table 4.33% [kernel] [k] arch_cpu_idle 3.96% [kernel] [k] arch_local_irq_restore 3.42% [bgmac] [k] 0x000007e0 3.35% [nf_conntrack] [k] nf_conntrack_proto_fini 2.72% [nf_conntrack] [k] nf_conntrack_in 2.50% [kernel] [k] __netif_receive_skb_core 2.42% [kernel] [k] r4k_dma_cache_inv 2.38% [kernel] [k] fib_table_lookup 2.20% [kernel] [k] dev_queue_xmit 2.11% [xt_conntrack] [k] 0x00000360 2.10% [kernel] [k] ip_route_input_noref 2.06% [nf_conntrack_ipv4] [k] need_ipv4_conntrack 3.10.4 + 0002-bgmac-pass-received-packet-to-the-netif-instead-of-c.patch 6.09% [ip_tables] [k] ipt_do_table 4.71% [kernel] [k] arch_cpu_idle 4.48% [bgmac] [k] 0x00000d7c 3.50% [nf_conntrack] [k] nf_conntrack_in 3.22% [kernel] [k] arch_local_irq_restore 3.16% [nf_conntrack] [k] nf_conntrack_proto_fini 2.88% [kernel] [k] __netif_receive_skb_core 2.78% [xt_conntrack] [k] 0x0000011c 2.69% [kernel] [k] r4k_dma_cache_inv 2.67% [iptable_nat] [k] 0x000002a0 2.36% [kernel] [k] ip_route_input_noref 2.27% [kernel] [k] ip_rcv 2.25% [nf_conntrack_ipv4] [k] need_ipv4_conntrack 2.23% [kernel] [k] nf_iterate I've compiled bgmac into the kernel and it seems that the magic 0xd7c was bgmac_poll. I'm afraid this "perf top" output doesn't really tell us where to look for optimizations :| I'll still try Felix ideas tomorrow, but I'm not sure if they help, since there isn't __copy_user_common anymore in the "perf top" output... -- Rafał _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [OpenWrt-Devel] [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-15 20:21 ` Rafał Miłecki @ 2013-08-18 21:06 ` Rafał Miłecki 2013-08-19 4:36 ` Felix Fietkau 1 sibling, 0 replies; 8+ messages in thread From: Rafał Miłecki @ 2013-08-18 21:06 UTC (permalink / raw) To: Felix Fietkau Cc: OpenWrt Development List, Network Development, Hauke Mehrtens, Florian Fainelli, Jonas Gorski, Robert Bradley [-- Attachment #1: Type: text/plain, Size: 6730 bytes --] I performed some tests using Netgear WNDR4500 (BCM4706 based) using: WNDR4500-V1.0.1.36_1.0.63.chk On IRC we were wondering if switch does routing for the CPU, so I did some trivial tests using "ifconfig". Short introduction of available intefaces: # brctl show bridge name bridge id STP enabled interfaces br0 8000.204e7fab3aa8 no vlan1 eth1 eth2 So br0 bridge connects: vlan1, eth1, eth2. I don't really know why there are eth2 and vlan2 I was using 192.168.5.2 for my WAN port I was using 192.168.1.1 for my LAN ports 192.168.5.1 was a "gate" PC with iperf -s running. On my 192.168.1.X machine I started iperf -c. So it was transfer of packets from LAN vlan to WAN vlan. iperf.exe -c 192.168.5.1 -t 60 ------------------------------------------------------------ Client connecting to 192.168.5.1, TCP port 5001 TCP window size: 64.0 KByte (default) ------------------------------------------------------------ [ 3] local 192.168.1.3 port 55244 connected with 192.168.5.1 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0-60.0 sec 4.02 GBytes 576 Mbits/sec As you can see, I got ~500Mb/s in comparison to ~105Mb/s when using OpenWrt. Now the most interesting part (I hope my mailer won't fail on formatting): # ifconfig; sleep 10; ifconfig br0 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet addr:192.168.1.1 Bcast:192.168.1.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:3639 errors:0 dropped:0 overruns:0 frame:0 TX packets:4001 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:368045 (359.4 KiB) TX bytes:3516213 (3.3 MiB) eth0 Link encap:Ethernet HWaddr 11:22:33:44:55:A9 inet addr:192.168.5.2 Bcast:192.168.5.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa9/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:309084 errors:0 dropped:0 overruns:0 frame:0 TX packets:3126985 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:19789406 (18.8 MiB) TX bytes:377801895 (360.2 MiB) Interrupt:4 Base address:0x2000 eth1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:424 TX packets:0 errors:65 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:3 Base address:0x8000 eth2 Link encap:Ethernet HWaddr 11:22:33:44:55:A7 inet6 addr: fe80::224e:7fff:feab:3aa7/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:54 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:5 Base address:0x8000 lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 inet6 addr: ::1/128 Scope:Host UP LOOPBACK RUNNING MULTICAST MTU:16436 Metric:1 RX packets:107 errors:0 dropped:0 overruns:0 frame:0 TX packets:107 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:11408 (11.1 KiB) TX bytes:11408 (11.1 KiB) vlan1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:3127849 errors:0 dropped:0 overruns:0 frame:0 TX packets:310049 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:390393723 (372.3 MiB) TX bytes:20643844 (19.6 MiB) vlan2 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) br0 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet addr:192.168.1.1 Bcast:192.168.1.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:3670 errors:0 dropped:0 overruns:0 frame:0 TX packets:4004 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:370146 (361.4 KiB) TX bytes:3516858 (3.3 MiB) eth0 Link encap:Ethernet HWaddr 11:22:33:44:55:A9 inet addr:192.168.5.2 Bcast:192.168.5.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa9/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:362946 errors:0 dropped:0 overruns:0 frame:0 TX packets:3671812 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:23236574 (22.1 MiB) TX bytes:1191974445 (1.1 GiB) Interrupt:4 Base address:0x2000 eth1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:424 TX packets:0 errors:65 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:3 Base address:0x8000 eth2 Link encap:Ethernet HWaddr 11:22:33:44:55:A7 inet6 addr: fe80::224e:7fff:feab:3aa7/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:54 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:5 Base address:0x8000 lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 inet6 addr: ::1/128 Scope:Host UP LOOPBACK RUNNING MULTICAST MTU:16436 Metric:1 RX packets:119 errors:0 dropped:0 overruns:0 frame:0 TX packets:119 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:12532 (12.2 KiB) TX bytes:12532 (12.2 KiB) vlan1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:3672697 errors:0 dropped:0 overruns:0 frame:0 TX packets:363914 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:1206747346 (1.1 GiB) TX bytes:24091669 (22.9 MiB) vlan2 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) So over 10s Netgear's firmware received ~700MiB of packets on vlan1 And it transferred ~700MiB of packets to eth0. I believe it proves that Netgear's firmware doesn't use any built in routing feature of the BCM53125 switch. [-- Attachment #2: netgear.wndr4500.orig.firmware.ifconfig.txt --] [-- Type: text/plain, Size: 6423 bytes --] # ifconfig; sleep 10; ifconfig br0 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet addr:192.168.1.1 Bcast:192.168.1.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:3639 errors:0 dropped:0 overruns:0 frame:0 TX packets:4001 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:368045 (359.4 KiB) TX bytes:3516213 (3.3 MiB) eth0 Link encap:Ethernet HWaddr 11:22:33:44:55:A9 inet addr:192.168.5.2 Bcast:192.168.5.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa9/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:309084 errors:0 dropped:0 overruns:0 frame:0 TX packets:3126985 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:19789406 (18.8 MiB) TX bytes:377801895 (360.2 MiB) Interrupt:4 Base address:0x2000 eth1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:424 TX packets:0 errors:65 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:3 Base address:0x8000 eth2 Link encap:Ethernet HWaddr 11:22:33:44:55:A7 inet6 addr: fe80::224e:7fff:feab:3aa7/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:54 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:5 Base address:0x8000 lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 inet6 addr: ::1/128 Scope:Host UP LOOPBACK RUNNING MULTICAST MTU:16436 Metric:1 RX packets:107 errors:0 dropped:0 overruns:0 frame:0 TX packets:107 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:11408 (11.1 KiB) TX bytes:11408 (11.1 KiB) vlan1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:3127849 errors:0 dropped:0 overruns:0 frame:0 TX packets:310049 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:390393723 (372.3 MiB) TX bytes:20643844 (19.6 MiB) vlan2 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) br0 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet addr:192.168.1.1 Bcast:192.168.1.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:3670 errors:0 dropped:0 overruns:0 frame:0 TX packets:4004 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:370146 (361.4 KiB) TX bytes:3516858 (3.3 MiB) eth0 Link encap:Ethernet HWaddr 11:22:33:44:55:A9 inet addr:192.168.5.2 Bcast:192.168.5.255 Mask:255.255.255.0 inet6 addr: fe80::224e:7fff:feab:3aa9/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:362946 errors:0 dropped:0 overruns:0 frame:0 TX packets:3671812 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:23236574 (22.1 MiB) TX bytes:1191974445 (1.1 GiB) Interrupt:4 Base address:0x2000 eth1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:424 TX packets:0 errors:65 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:3 Base address:0x8000 eth2 Link encap:Ethernet HWaddr 11:22:33:44:55:A7 inet6 addr: fe80::224e:7fff:feab:3aa7/64 Scope:Link UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:54 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) Interrupt:5 Base address:0x8000 lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 inet6 addr: ::1/128 Scope:Host UP LOOPBACK RUNNING MULTICAST MTU:16436 Metric:1 RX packets:119 errors:0 dropped:0 overruns:0 frame:0 TX packets:119 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:12532 (12.2 KiB) TX bytes:12532 (12.2 KiB) vlan1 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:3672697 errors:0 dropped:0 overruns:0 frame:0 TX packets:363914 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:1206747346 (1.1 GiB) TX bytes:24091669 (22.9 MiB) vlan2 Link encap:Ethernet HWaddr 11:22:33:44:55:A8 inet6 addr: fe80::224e:7fff:feab:3aa8/64 Scope:Link UP BROADCAST RUNNING PROMISC ALLMULTI MULTICAST MTU:1500 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:0 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-15 20:21 ` Rafał Miłecki 2013-08-18 21:06 ` [OpenWrt-Devel] " Rafał Miłecki @ 2013-08-19 4:36 ` Felix Fietkau 2013-08-19 17:20 ` Rafał Miłecki 1 sibling, 1 reply; 8+ messages in thread From: Felix Fietkau @ 2013-08-19 4:36 UTC (permalink / raw) To: Rafał Miłecki Cc: Network Development, Jonas Gorski, Hauke Mehrtens, OpenWrt Development List On 2013-08-15 10:21 PM, Rafał Miłecki wrote: > 2013/8/15 Felix Fietkau <nbd@openwrt.org>: >> On 2013-08-15 1:36 PM, Rafał Miłecki wrote: >>> 2013/8/11 Rafał Miłecki <zajec5@gmail.com>: >>>> It makes more sense to allocate new (empty) skb and pass it to the >>>> hardware. That way we avoid copying whole packet into new skb which >>>> should result in better performance. >>> >>> I did some testing of this patch using "perf" tool and iperf -s >>> running on the OpenWrt machine (with bgmac supported hardware). >>> >>> So you can see that __copy_user_common usage has really decreased with >>> this patch! >>> >>> Unfortunately it didn't result in better performance... no idea why :( >> Running iperf on the router is not useful as an indicator of routing >> performance. Please focus on tests where you only push traffic through >> the router, not directly to it. > > OK, so I started "iperf -s" on notebook plugged into WAN port, and > then played with "iperf -c" on notebook connected to LAN#2. > > With some old 3.6.11 based OpenWrt build I got: > [ 4] 0.0-60.0 sec 690 MBytes 96.4 Mbits/sec > > With very recent 3.10.4 based OpenWrt build: > [ 4] 0.0-60.0 sec 667 MBytes 93.2 Mbits/sec > > After applying my patch on top of that 3.10.4: > [ 5] 0.0-60.0 sec 759 MBytes 106 Mbits/sec > > And some dumps from "perf top": > > 3.10.4 > 6.75% [kernel] [k] __copy_user_common > 6.73% [ip_tables] [k] ipt_do_table > 4.33% [kernel] [k] arch_cpu_idle > 3.96% [kernel] [k] arch_local_irq_restore > 3.42% [bgmac] [k] 0x000007e0 > 3.35% [nf_conntrack] [k] nf_conntrack_proto_fini > 2.72% [nf_conntrack] [k] nf_conntrack_in > 2.50% [kernel] [k] __netif_receive_skb_core > 2.42% [kernel] [k] r4k_dma_cache_inv > 2.38% [kernel] [k] fib_table_lookup > 2.20% [kernel] [k] dev_queue_xmit > 2.11% [xt_conntrack] [k] 0x00000360 > 2.10% [kernel] [k] ip_route_input_noref > 2.06% [nf_conntrack_ipv4] [k] need_ipv4_conntrack > > 3.10.4 + 0002-bgmac-pass-received-packet-to-the-netif-instead-of-c.patch > 6.09% [ip_tables] [k] ipt_do_table > 4.71% [kernel] [k] arch_cpu_idle > 4.48% [bgmac] [k] 0x00000d7c > 3.50% [nf_conntrack] [k] nf_conntrack_in > 3.22% [kernel] [k] arch_local_irq_restore > 3.16% [nf_conntrack] [k] nf_conntrack_proto_fini > 2.88% [kernel] [k] __netif_receive_skb_core > 2.78% [xt_conntrack] [k] 0x0000011c > 2.69% [kernel] [k] r4k_dma_cache_inv > 2.67% [iptable_nat] [k] 0x000002a0 > 2.36% [kernel] [k] ip_route_input_noref > 2.27% [kernel] [k] ip_rcv > 2.25% [nf_conntrack_ipv4] [k] need_ipv4_conntrack > 2.23% [kernel] [k] nf_iterate > > I've compiled bgmac into the kernel and it seems that the magic 0xd7c > was bgmac_poll. > > I'm afraid this "perf top" output doesn't really tell us where to look > for optimizations :| I'll still try Felix ideas tomorrow, but I'm not > sure if they help, since there isn't __copy_user_common anymore in the > "perf top" output... What's the CPU load while passing traffic without running perf? Have you tested bridging performance? - Felix _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it 2013-08-19 4:36 ` Felix Fietkau @ 2013-08-19 17:20 ` Rafał Miłecki 0 siblings, 0 replies; 8+ messages in thread From: Rafał Miłecki @ 2013-08-19 17:20 UTC (permalink / raw) To: Felix Fietkau Cc: Network Development, Jonas Gorski, Hauke Mehrtens, OpenWrt Development List 2013/8/19 Felix Fietkau <nbd@openwrt.org>: > On 2013-08-15 10:21 PM, Rafał Miłecki wrote: >> I'm afraid this "perf top" output doesn't really tell us where to look >> for optimizations :| I'll still try Felix ideas tomorrow, but I'm not >> sure if they help, since there isn't __copy_user_common anymore in the >> "perf top" output... > What's the CPU load while passing traffic without running perf? During "ping 192.168.5.1" # top Mem: 19564K used, 107076K free, 0K shrd, 0K buff, 12916K cached CPU: 0% usr 0% sys 0% nic 99% idle 0% io 0% irq 0% sirq Load average: 0.17 0.10 0.04 1/24 810 # perf top PerfTop: 261 irqs/sec kernel:100.0% exact: 0.0% [4000Hz cycles], (all, 1 CPU) ------------------------------------------------------------------------------- 64.21% [kernel] [k] arch_cpu_idle 16.38% [kernel] [k] arch_local_irq_restore 12.01% [kernel] [k] cpu_startup_entry 2.16% [kernel] [k] rcu_idle_exit During "iperf -c 192.168.5.1" # top Mem: 19572K used, 107068K free, 0K shrd, 0K buff, 12916K cached CPU: 0% usr 0% sys 0% nic 3% idle 0% io 0% irq 96% sirq Load average: 0.29 0.11 0.04 1/24 809 PID PPID USER STAT VSZ %VSZ %CPU COMMAND 3 2 root RW 0 0% 9% [ksoftirqd/0] 809 376 root R 1496 1% 2% top 716 1 nobody S 964 1% 1% /usr/sbin/dnsmasq -C /var/etc/dnsmasq # perf top PerfTop: 265 irqs/sec kernel:100.0% exact: 0.0% [4000Hz cycles], (all, 1 CPU) ------------------------------------------------------------------------------- 6.41% [ip_tables] [k] ipt_do_table 4.01% [bgmac] [k] 0x000006ec 3.43% [kernel] [k] arch_cpu_idle 3.30% [kernel] [k] arch_local_irq_restore 3.25% [kernel] [k] ip_rcv 3.11% [nf_conntrack] [k] nf_conntrack_proto_fini 3.11% [nf_conntrack] [k] nf_conntrack_in > Have you tested bridging performance? Do you mean connecting both machines to the same VLAN? Like 2 LAN ports in the standard configuration? It gives me ~600Mb/s using OpenWrt. It's pretty much the same performance I got with the original firmware for transfer between 2 VLANs. [ 5] local 192.168.1.218 port 5001 connected with 192.168.1.131 port 59463 [ 5] 0.0-60.0 sec 4.16 GBytes 596 Mbits/sec -- Rafał _______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2013-08-19 17:20 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2013-08-11 17:49 [RFC][PATCH 1/2] bgmac: separate RX desc setup code into new function Rafał Miłecki 2013-08-11 17:49 ` [RFC][PATCH 2/2] bgmac: pass received packet to the netif instead of copying it Rafał Miłecki 2013-08-15 11:36 ` Rafał Miłecki 2013-08-15 11:47 ` Felix Fietkau 2013-08-15 20:21 ` Rafał Miłecki 2013-08-18 21:06 ` [OpenWrt-Devel] " Rafał Miłecki 2013-08-19 4:36 ` Felix Fietkau 2013-08-19 17:20 ` Rafał Miłecki
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).