Ming Lei, We noticed that commit 74e72f894 ("lib/percpu_counter.c: fix __percpu_counter_add()") introduces -47.6% regression in aim7 brk_test on a 2S SNB server. Comparing to its parent commit: 5a610fcc7390ee6 74e72f894d56eb9d2e1218530 --------------- ------------------------- 137340 -47.6% 71965 aim7.2000.jobs-per-min 8852 +112.1% 18775 sched_debug.cpu#30.nr_switches 8676 +109.7% 18192 sched_debug.cpu#14.nr_switches 9016 +110.2% 18955 sched_debug.cpu#21.nr_switches 9004 +120.1% 19822 sched_debug.cpu#20.nr_switches 8947 +107.3% 18551 sched_debug.cpu#6.nr_switches 8555 +113.2% 18238 sched_debug.cpu#24.nr_switches 8689 +107.7% 18049 sched_debug.cpu#23.nr_switches 9041 +110.3% 19008 sched_debug.cpu#16.nr_switches 9616 +101.9% 19415 sched_debug.cpu#2.nr_load_updates 9043 +102.0% 18269 sched_debug.cpu#15.nr_switches 9560 +103.2% 19422 sched_debug.cpu#3.nr_load_updates 621789 +102.1% 1256664 interrupts.LOC 9194 +101.4% 18519 sched_debug.cpu#30.nr_load_updates 9215 +102.4% 18648 sched_debug.cpu#29.nr_switches 9206 +103.5% 18733 sched_debug.cpu#28.nr_switches 9273 +100.1% 18556 sched_debug.cpu#22.nr_load_updates 9263 +101.9% 18700 sched_debug.cpu#16.nr_load_updates 9222 +101.4% 18574 sched_debug.cpu#28.nr_load_updates 9140 +101.9% 18452 sched_debug.cpu#31.nr_load_updates 9483 +98.9% 18860 sched_debug.cpu#12.nr_load_updates 9258 +100.9% 18604 sched_debug.cpu#14.nr_load_updates 9262 +100.5% 18575 sched_debug.cpu#21.nr_load_updates 9576 +97.8% 18942 sched_debug.cpu#1.nr_load_updates 9156 +101.7% 18469 sched_debug.cpu#24.nr_load_updates 9304 +100.4% 18650 sched_debug.cpu#8.nr_load_updates 9311 +100.2% 18643 sched_debug.cpu#15.nr_load_updates 9196 +101.3% 18509 sched_debug.cpu#29.nr_load_updates 9358 +98.6% 18587 sched_debug.cpu#9.nr_load_updates 9538 +94.6% 18564 sched_debug.cpu#27.nr_load_updates 9185 +101.2% 18485 sched_debug.cpu#23.nr_load_updates 9351 +99.7% 18670 sched_debug.cpu#19.nr_load_updates 9605 +94.7% 18696 sched_debug.cpu#13.nr_load_updates 9249 +102.6% 18740 sched_debug.cpu#20.nr_load_updates 9390 +99.1% 18697 sched_debug.cpu#6.nr_load_updates 9439 +97.9% 18680 sched_debug.cpu#26.nr_load_updates 9526 +97.3% 18791 sched_debug.cpu#4.nr_load_updates 9499 +96.0% 18620 sched_debug.cpu#7.nr_load_updates 9991 +93.5% 19337 sched_debug.cpu#12.nr_switches 9541 +95.9% 18691 sched_debug.cpu#5.nr_load_updates 9634 +95.3% 18813 sched_debug.cpu#11.nr_load_updates 9904 +90.5% 18866 sched_debug.cpu#10.nr_load_updates 9855 +94.8% 19198 sched_debug.cpu#25.nr_load_updates 10068 +93.5% 19481 sched_debug.cpu#4.nr_switches 9848 +89.6% 18676 sched_debug.cpu#0.nr_load_updates 9740 +94.9% 18981 sched_debug.cpu#17.nr_load_updates 345357 +92.5% 664755 softirqs.TIMER 9724 +96.1% 19072 sched_debug.cpu#19.nr_switches 9869 +90.9% 18837 sched_debug.cpu#18.nr_load_updates 10945 +93.1% 21139 sched_debug.cpu#10.nr_switches 172077 +85.9% 319881 softirqs.RCU 421 +80.9% 761 proc-vmstat.nr_written 12881 +74.0% 22416 sched_debug.cpu#1.nr_switches 114 +69.2% 192 uptime.boot 67011 +54.7% 103666 sched_debug.ktime 67811 +53.7% 104200 sched_debug.cpu#5.clock 67800 +53.7% 104188 sched_debug.cpu#0.clock 67811 +53.7% 104201 sched_debug.cpu#6.clock 67811 +53.7% 104196 sched_debug.cpu#8.clock 67810 +53.7% 104196 sched_debug.cpu#7.clock 67820 +53.7% 104209 sched_debug.cpu#4.clock 67817 +53.6% 104196 sched_debug.cpu#9.clock 67905 +53.7% 104343 sched_debug.cpu#16.clock 67888 +53.7% 104314 sched_debug.cpu#14.clock 67898 +53.6% 104324 sched_debug.cpu#15.clock 67807 +53.6% 104185 sched_debug.cpu#1.clock 67822 +53.6% 104208 sched_debug.cpu#3.clock 67819 +53.7% 104205 sched_debug.cpu#2.clock 67944 +53.7% 104400 sched_debug.cpu#22.clock 67928 +53.6% 104371 sched_debug.cpu#18.clock 67941 +53.7% 104393 sched_debug.cpu#21.clock 67943 +53.7% 104404 sched_debug.cpu#23.clock 67830 +53.6% 104213 sched_debug.sched_clk 67830 +53.6% 104213 sched_debug.cpu_clk 67933 +53.7% 104391 sched_debug.cpu#20.clock 67932 +53.6% 104376 sched_debug.cpu#19.clock 67930 +53.6% 104370 sched_debug.cpu#17.clock 67953 +53.6% 104409 sched_debug.cpu#24.clock 67882 +53.7% 104305 sched_debug.cpu#13.clock 67877 +53.6% 104287 sched_debug.cpu#12.clock 67981 +53.6% 104439 sched_debug.cpu#26.clock 67974 +53.6% 104436 sched_debug.cpu#25.clock 67862 +53.6% 104252 sched_debug.cpu#11.clock 67850 +53.6% 104222 sched_debug.cpu#10.clock 68004 +53.6% 104461 sched_debug.cpu#28.clock 67993 +53.6% 104455 sched_debug.cpu#27.clock 68030 +53.6% 104503 sched_debug.cpu#31.clock 68016 +53.6% 104482 sched_debug.cpu#29.clock 68021 +53.6% 104483 sched_debug.cpu#30.clock 1059405 +51.9% 1609746 sched_debug.cfs_rq[25]:/.tg_load_avg 1059109 +52.0% 1609825 sched_debug.cfs_rq[26]:/.tg_load_avg 273220 +45.9% 398742 proc-vmstat.numa_local 273220 +45.9% 398742 proc-vmstat.numa_hit 281699 +45.0% 408394 proc-vmstat.pgfree 1057479 +52.0% 1607333 sched_debug.cfs_rq[27]:/.tg_load_avg 1056538 +52.2% 1607894 sched_debug.cfs_rq[28]:/.tg_load_avg 1056470 +52.2% 1607708 sched_debug.cfs_rq[29]:/.tg_load_avg 1056049 +52.2% 1607807 sched_debug.cfs_rq[30]:/.tg_load_avg 1069001 +50.6% 1609825 sched_debug.cfs_rq[9]:/.tg_load_avg 1068875 +50.6% 1609533 sched_debug.cfs_rq[6]:/.tg_load_avg 1068926 +50.5% 1609163 sched_debug.cfs_rq[5]:/.tg_load_avg 1068799 +50.6% 1609577 sched_debug.cfs_rq[7]:/.tg_load_avg 1068772 +50.6% 1609624 sched_debug.cfs_rq[8]:/.tg_load_avg 1069130 +50.5% 1609138 sched_debug.cfs_rq[4]:/.tg_load_avg 1068724 +50.6% 1609260 sched_debug.cfs_rq[10]:/.tg_load_avg 1061774 +51.7% 1610512 sched_debug.cfs_rq[23]:/.tg_load_avg 1056373 +52.2% 1607696 sched_debug.cfs_rq[31]:/.tg_load_avg 1069580 +50.4% 1609078 sched_debug.cfs_rq[3]:/.tg_load_avg 1069620 +50.4% 1609105 sched_debug.cfs_rq[1]:/.tg_load_avg 1069611 +50.4% 1609079 sched_debug.cfs_rq[2]:/.tg_load_avg 1062332 +51.7% 1611027 sched_debug.cfs_rq[24]:/.tg_load_avg 1069709 +50.4% 1609095 sched_debug.cfs_rq[0]:/.tg_load_avg 866818 +44.5% 1252728 proc-vmstat.pgfault 1067497 +50.6% 1607684 sched_debug.cfs_rq[11]:/.tg_load_avg 1067443 +50.8% 1609519 sched_debug.cfs_rq[12]:/.tg_load_avg 1063258 +51.5% 1610462 sched_debug.cfs_rq[22]:/.tg_load_avg 1068168 +50.6% 1608865 sched_debug.cfs_rq[13]:/.tg_load_avg 1065421 +51.1% 1610375 sched_debug.cfs_rq[21]:/.tg_load_avg 1068679 +50.6% 1609429 sched_debug.cfs_rq[14]:/.tg_load_avg 1066627 +51.0% 1610136 sched_debug.cfs_rq[18]:/.tg_load_avg 1068857 +50.6% 1609901 sched_debug.cfs_rq[15]:/.tg_load_avg 1066325 +51.0% 1610606 sched_debug.cfs_rq[19]:/.tg_load_avg 1065623 +51.1% 1610266 sched_debug.cfs_rq[20]:/.tg_load_avg 1069096 +50.6% 1609945 sched_debug.cfs_rq[16]:/.tg_load_avg 1067277 +50.8% 1609907 sched_debug.cfs_rq[17]:/.tg_load_avg 260935 +43.2% 373559 proc-vmstat.pgalloc_normal 1155 +45.1% 1677 vmstat.procs.r 37727 +45.7% 54979 softirqs.SCHED 6124 +22.0% 7475 proc-vmstat.nr_tlb_local_flush_all 815 +10.5% 900 sched_debug.cfs_rq[3]:/.tg_runnable_contrib 37773 +9.9% 41515 sched_debug.cfs_rq[24]:/.avg->runnable_avg_sum 37998 +9.9% 41763 sched_debug.cfs_rq[3]:/.avg->runnable_avg_sum 37880 +9.7% 41556 sched_debug.cfs_rq[12]:/.avg->runnable_avg_sum 37849 +9.8% 41542 sched_debug.cfs_rq[5]:/.avg->runnable_avg_sum 37801 +9.6% 41436 sched_debug.cfs_rq[6]:/.avg->runnable_avg_sum 37831 +9.8% 41521 sched_debug.cfs_rq[8]:/.avg->runnable_avg_sum 2315 +110.3% 4869 time.system_time 598041 +106.9% 1237057 time.involuntary_context_switches 87.54 +90.7% 166.91 time.elapsed_time 152 -10.5% 136 time.user_time 7012 +7.3% 7525 vmstat.system.in 7343 +6.7% 7836 vmstat.system.cs 2818 +6.4% 2998 time.percent_of_cpu_this_job_got time.system_time 5000 ++------------------------------------------------------------------+ O O O O O O O O O O O O O O O O O O O O O O O O O O O O 4500 ++ | | | | | 4000 ++ | | | 3500 ++ | | | 3000 ++ | | | | | 2500 ++ | *..*.*..*.*..*.*..*.*..*.*..*.*..*.*..*.*..*.* | 2000 ++------------------------------------------------------------------+ time.percent_of_cpu_this_job_got 3000 O+-O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O-O--O | | | | 2950 ++ | | | | | 2900 ++ | | | 2850 ++ | | | *..*.*.. .*..*.*..*.*..*.*..*.*..*.*..*.* | 2800 ++ *.*..* | | | | | 2750 ++------------------------------------------------------------------+ time.elapsed_time 170 ++---------------------O----O-------O-O--O-------O-O--O-O----O--O-O--O O O O O O O O O O O O O O O O | 160 ++ | 150 ++ | | | 140 ++ | 130 ++ | | | 120 ++ | 110 ++ | | | 100 ++ | 90 ++ | *..*.*..*.*..*.*..*.*..*..*.*..*.*..*.*..*.*..* | 80 ++-------------------------------------------------------------------+ time.involuntary_context_switches 1.3e+06 ++---------------------------------------------------------------+ O O O O O O O O O O O O O O O O O O O O O O O O O O O O 1.2e+06 ++ | | | 1.1e+06 ++ | 1e+06 ++ | | | 900000 ++ | | | 800000 ++ | 700000 ++ | | | 600000 *+*..*.*..*.*.*..*.*..*.*.*..*.*..*.*..*.*.* | | | 500000 ++---------------------------------------------------------------+ aim7.2000.jobs-per-min 140000 ++----------*-------*------*--------------------------------------+ *.*..*.*..* *.*. *.*. *..*.*..*.*..*.* | 130000 ++ | | | 120000 ++ | | | 110000 ++ | | | 100000 ++ | | | 90000 ++ | | | 80000 ++ | | O | 70000 O+O--O-O--O-O--O----O-O-O--O-O--O-O--O-O--O-O-O--O-O--O-O--O-O--O-O vmstat.system.in 8800 ++----------------*-------------------------------------------------+ 8600 ++ : | | : | 8400 ++ :: | 8200 ++ : : | | : : | 8000 ++ : : | 7800 ++ : : | 7600 ++ : : | O O O O O O O: O:O O O O O O O O O O O O O O O O O O O O 7400 ++ : : | 7200 ++ : : | | : : .*.. | 7000 *+.*.*..*.*..*.* *..*.*..*.*..*.*..* *.* | 6800 ++------------------------------------------------------------------+ vmstat.system.cs 7900 ++------------------------------------------------------------------+ | O O O O O O O O O O O O O O O O O O 7800 O+ O O O O O O O O O | | | 7700 ++ | | | 7600 ++ | | | 7500 ++ | | *. | 7400 +++ * .*.. .*.. .*. .* *. | |+ : .* * .*. *. : + *..*.* | 7300 *+ : *. * : + | | : + * | 7200 ++------*-----------------------------------------------------------+