All of lore.kernel.org
 help / color / mirror / Atom feed
* "perf test 103: perf all metrics test" failing on hybrid i7 14700k
@ 2024-01-27 19:24 Arnaldo Carvalho de Melo
  2024-01-27 19:25 ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 3+ messages in thread
From: Arnaldo Carvalho de Melo @ 2024-01-27 19:24 UTC (permalink / raw)
  To: Ian Rogers, Kan Liang
  Cc: Adrian Hunter, Jiri Olsa, Namhyung Kim, Linux Kernel Mailing List,
	linux-perf-users

FYI:

root@number:~# grep -m1 "model name" /proc/cpuinfo 
model name	: Intel(R) Core(TM) i7-14700K

And see after that another hybrid machine, a Lenovo Thinkpad Carbon X1:

root@x1:~# grep -m1 "model name" /proc/cpuinfo
model name	: 13th Gen Intel(R) Core(TM) i7-1365U

back to the i7-14700K:

root@number:~# perf test -v 103
103: perf all metrics test                                           :
--- start ---
test child forked, pid 149355
Testing tma_core_bound
Testing tma_info_core_ilp
Testing tma_info_memory_l2mpki
Testing tma_memory_bound
Testing tma_info_bad_spec_branch_misprediction_cost
Testing tma_info_bad_spec_ipmisp_cond_ntaken
Testing tma_info_bad_spec_ipmisp_cond_taken
Testing tma_info_bad_spec_ipmisp_indirect
Testing tma_info_bad_spec_ipmisp_ret
Testing tma_info_bad_spec_ipmispredict
Testing tma_info_bottleneck_mispredictions
Testing tma_info_branches_callret
Testing tma_info_branches_cond_nt
Testing tma_info_branches_cond_tk
Testing tma_info_branches_jump
Testing tma_info_branches_other_branches
Testing tma_branch_mispredicts
Testing tma_clears_resteers
Testing tma_machine_clears
Testing tma_mispredicts_resteers
Testing tma_icache_misses
Testing tma_info_bottleneck_big_code
Testing tma_itlb_misses
Testing tma_unknown_branches
Testing tma_info_inst_mix_bptkbranch
Testing tma_info_inst_mix_ipbranch
Testing tma_info_inst_mix_ipcall
Testing tma_info_inst_mix_iptb
Testing tma_info_system_ipfarbranch
Testing tma_info_thread_uptb
Testing tma_info_memory_fb_hpki
Testing tma_info_memory_l1mpki
Testing tma_info_memory_l1mpki_load
Testing tma_info_memory_l2hpki_all
Testing tma_info_memory_l2hpki_load
Testing tma_info_memory_l2mpki_all
Testing tma_info_memory_l2mpki_load
Testing tma_info_memory_l3mpki
Testing tma_l1_bound
Testing tma_l2_bound
Testing tma_l3_bound
Testing tma_fp_scalar
Testing tma_fp_vector
Testing tma_fp_vector_128b
Testing tma_fp_vector_256b
Testing tma_int_vector_128b
Testing tma_int_vector_256b
Testing tma_port_0
Testing tma_x87_use
Testing tma_info_botlnk_l0_core_bound_likely
Testing tma_info_core_fp_arith_utilization
Testing tma_info_pipeline_execute
Testing tma_info_system_gflops
Testing tma_info_thread_execute_per_issue
Testing tma_dsb
Testing tma_info_frontend_dsb_coverage
Testing tma_decoder0_alone
Testing tma_dsb_switches
Testing tma_info_botlnk_l2_dsb_misses
Testing tma_info_frontend_dsb_switch_cost
Testing tma_info_frontend_ipdsb_miss_ret
Testing tma_mite
Testing tma_contested_accesses
Testing tma_false_sharing
Testing tma_backend_bound
Testing tma_backend_bound_aux
Testing tma_bad_speculation
Testing tma_frontend_bound
Testing tma_retiring
Testing tma_info_botlnk_l2_ic_misses
Testing tma_info_bottleneck_instruction_fetch_bw
Testing tma_info_frontend_fetch_upc
Testing tma_info_frontend_icache_miss_latency
Testing tma_info_frontend_ipunknown_branch
Testing tma_info_frontend_lsd_coverage
Testing tma_info_memory_tlb_code_stlb_mpki
Testing tma_fetch_bandwidth
Testing tma_lsd
Testing tma_branch_resteers
Testing tma_lcp
Testing tma_ms_switches
Testing tma_info_core_flopc
Testing tma_info_inst_mix_iparith
Testing tma_info_inst_mix_iparith_avx128
Testing tma_info_inst_mix_iparith_avx256
Testing tma_info_inst_mix_iparith_scalar_dp
Testing tma_info_inst_mix_iparith_scalar_sp
Testing tma_info_inst_mix_ipflop
Testing tma_fetch_latency
Testing tma_avx_assists
Testing tma_fp_arith
Testing tma_fp_assists
Testing tma_info_system_cpu_utilization
Testing tma_info_system_dram_bw_use
Testing tma_shuffles
Testing tma_info_frontend_l2mpki_code
Testing tma_info_frontend_l2mpki_code_all
Testing tma_info_inst_mix_ipload
Testing tma_info_inst_mix_ipstore
Testing tma_info_bottleneck_memory_bandwidth
Testing tma_info_bottleneck_memory_data_tlbs
Testing tma_info_bottleneck_memory_latency
Testing tma_info_memory_core_l1d_cache_fill_bw
Testing tma_info_memory_core_l2_cache_fill_bw
Testing tma_info_memory_core_l3_cache_access_bw
Testing tma_info_memory_core_l3_cache_fill_bw
Testing tma_info_memory_load_miss_real_latency
Testing tma_info_memory_mlp
Testing tma_info_memory_thread_l1d_cache_fill_bw_1t
Testing tma_info_memory_thread_l2_cache_fill_bw_1t
Testing tma_info_memory_thread_l3_cache_access_bw_1t
Testing tma_info_memory_thread_l3_cache_fill_bw_1t
Testing tma_info_memory_tlb_load_stlb_mpki
Testing tma_info_memory_tlb_page_walks_utilization
Testing tma_info_memory_tlb_store_stlb_mpki
Testing tma_info_system_mem_parallel_reads
Testing tma_info_system_mem_read_latency
Testing tma_info_system_mem_request_latency
Testing tma_info_thread_cpi
Testing tma_fb_full
Testing tma_mem_bandwidth
Testing tma_sq_full
Testing tma_streaming_stores
Testing tma_dram_bound
Testing tma_store_bound
Testing tma_l3_hit_latency
Testing tma_mem_latency
Testing tma_store_latency
Testing tma_dtlb_load
Testing tma_dtlb_store
Testing tma_load_stlb_hit
Testing tma_load_stlb_miss
Testing tma_store_stlb_hit
Testing tma_store_stlb_miss
Testing tma_info_memory_oro_data_l2_mlp
Testing tma_info_memory_oro_load_l2_mlp
Testing tma_info_memory_oro_load_l2_miss_latency
Testing tma_info_memory_oro_load_l3_miss_latency
Testing tma_microcode_sequencer
Testing tma_info_core_clks
Testing tma_info_core_clks_p
Testing tma_info_core_cpi
Testing tma_info_core_ipc
Testing tma_info_core_slots
Testing tma_info_core_upi
Testing tma_info_frontend_inst_miss_cost_dramhit_percent
Testing tma_info_frontend_inst_miss_cost_l2hit_percent
Testing tma_info_frontend_inst_miss_cost_l3hit_percent
Testing tma_info_inst_mix_branch_mispredict_ratio
Testing tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio
Testing tma_info_inst_mix_fpdiv_uop_ratio
Testing tma_info_inst_mix_idiv_uop_ratio
Testing tma_info_inst_mix_ipfarbranch
Testing tma_info_inst_mix_ipmisp_cond_ntaken
Testing tma_info_inst_mix_ipmisp_cond_taken
Testing tma_info_inst_mix_ipmisp_indirect
Testing tma_info_inst_mix_ipmisp_ret
Testing tma_info_inst_mix_ipmispredict
Testing tma_info_inst_mix_microcode_uop_ratio
Testing tma_info_inst_mix_x87_uop_ratio
Testing tma_info_l1_bound_address_alias_blocks
Testing tma_info_l1_bound_load_splits
Testing tma_info_l1_bound_store_fwd_blocks
Testing tma_info_memory_cycles_per_demand_load_dram_hit
Testing tma_info_memory_cycles_per_demand_load_l2_hit
Testing tma_info_memory_cycles_per_demand_load_l3_hit
Testing tma_info_memory_memloadpki
Testing tma_info_system_kernel_cpi
Testing tma_info_system_kernel_utilization
Testing tma_data_sharing
Testing tma_lock_latency
Testing tma_fused_instructions
Testing tma_info_pipeline_ipassist
Testing tma_info_pipeline_retire
Testing tma_info_pipeline_strings_cycles
Testing tma_info_thread_clks
Testing tma_info_thread_uoppi
Testing tma_int_operations
Testing tma_memory_operations
Testing tma_non_fused_branches
Testing tma_nop_instructions
Testing tma_other_light_ops
Testing tma_ports_utilization
Testing tma_ports_utilized_0
Testing tma_ports_utilized_1
Metric 'tma_ports_utilized_1' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 69.150 usec (+- 0.224 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.411 usec
  Average data synthesis took: 56.534 usec (+- 0.025 usec)
  Average num. events: 255.000 (+- 0.000)
  Average time per event 0.222 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.482356148 seconds time elapsed

       0.553851000 seconds user
       0.915151000 seconds sys
Testing tma_ports_utilized_2
Metric 'tma_ports_utilized_2' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 52.913 usec (+- 0.026 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.080 usec
  Average data synthesis took: 56.637 usec (+- 0.024 usec)
  Average num. events: 256.000 (+- 0.000)
  Average time per event 0.221 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.241983709 seconds time elapsed

       0.457768000 seconds user
       0.778312000 seconds sys
Testing tma_ports_utilized_3m
Metric 'tma_ports_utilized_3m' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 52.617 usec (+- 0.026 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.074 usec
  Average data synthesis took: 56.215 usec (+- 0.022 usec)
  Average num. events: 258.000 (+- 0.000)
  Average time per event 0.218 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/UOPS_EXECUTED.CYCLES_GE_3/                                        (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.235490934 seconds time elapsed

       0.432792000 seconds user
       0.796901000 seconds sys
Testing tma_serializing_operation
Testing C10_Pkg_Residency
Testing C1_Core_Residency
Testing C2_Pkg_Residency
Testing C3_Pkg_Residency
Testing C6_Core_Residency
Testing C6_Pkg_Residency
Testing C7_Core_Residency
Testing C7_Pkg_Residency
Testing C8_Pkg_Residency
Testing C9_Pkg_Residency
Testing tma_info_system_average_frequency
Testing tma_info_system_turbo_utilization
Testing tma_info_inst_mix_ipswpf
Testing tma_info_bottleneck_branching_overhead
Testing tma_info_core_coreipc
Testing tma_info_thread_ipc
Testing tma_heavy_operations
Testing tma_light_operations
Testing tma_info_core_core_clks
Testing tma_info_system_smt_2t_utilization
Testing tma_info_thread_slots_utilization
Testing UNCORE_FREQ
Testing tma_info_system_socket_clks
Testing tma_info_inst_mix_instructions
Testing tma_info_thread_slots
Testing tma_base
Testing tma_ms_uops
Testing tma_resource_bound
Testing tma_alloc_restriction
Testing tma_branch_detect
Testing tma_branch_resteer
Testing tma_cisc
Testing tma_decode
Testing tma_divider
Testing tma_fast_nuke
Testing tma_few_uops_instructions
Testing tma_fpdiv_uops
Testing tma_mem_scheduler
Testing tma_non_mem_scheduler
Testing tma_nuke
Testing tma_other_fb
Testing tma_other_load_store
Testing tma_other_ret
Testing tma_predecode
Testing tma_register
Testing tma_reorder_buffer
Testing tma_serialization
Testing tma_assists
Testing tma_disambiguation
Testing tma_fp_assist
Testing tma_ld_buffer
Testing tma_memory_ordering
Testing tma_other_l1
Testing tma_page_fault
Testing tma_rsv
Testing tma_smc
Testing tma_split_loads
Testing tma_split_stores
Testing tma_st_buffer
Testing tma_stlb_hit
Testing tma_stlb_miss
Testing tma_store_fwd_blk
Testing tma_alu_op_utilization
Testing tma_load_op_utilization
Testing tma_mixing_vectors
Testing tma_page_faults
Testing tma_store_op_utilization
Testing tma_memory_fence
Metric 'tma_memory_fence' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 52.176 usec (+- 0.040 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.065 usec
  Average data synthesis took: 57.213 usec (+- 0.091 usec)
  Average num. events: 254.000 (+- 0.000)
  Average time per event 0.225 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/MISC2_RETIRED.LFENCE/                                          (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.240821329 seconds time elapsed

       0.450033000 seconds user
       0.783502000 seconds sys
Testing tma_port_1
Testing tma_port_6
Testing tma_slow_pause
Metric 'tma_slow_pause' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 53.172 usec (+- 0.028 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.085 usec
  Average data synthesis took: 59.325 usec (+- 0.107 usec)
  Average num. events: 257.000 (+- 0.000)
  Average time per event 0.231 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.PAUSE/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.272733971 seconds time elapsed

       0.486217000 seconds user
       0.779214000 seconds sys
Testing smi_cycles
Testing smi_num
Testing tsx_aborted_cycles
Testing tsx_cycles_per_elision
Testing tsx_cycles_per_transaction
Testing tsx_transactional_cycles
test child finished with -1
---- end ----
perf all metrics test: FAILED!
root@number:~#


And on another hybrid machine, a Lenovo Thinkpad Carbon X1:

root@x1:~# grep -m1 "model name" /proc/cpuinfo
model name	: 13th Gen Intel(R) Core(TM) i7-1365U
root@x1:~# perf test -v 103
103: perf all metrics test                                           :
--- start ---
test child forked, pid 248913
Testing tma_core_bound
Testing tma_info_core_ilp
Testing tma_info_memory_l2mpki
Testing tma_memory_bound
Testing tma_info_bad_spec_branch_misprediction_cost
Testing tma_info_bad_spec_ipmisp_cond_ntaken
Testing tma_info_bad_spec_ipmisp_cond_taken
Testing tma_info_bad_spec_ipmisp_indirect
Testing tma_info_bad_spec_ipmisp_ret
Testing tma_info_bad_spec_ipmispredict
Testing tma_info_bottleneck_mispredictions
Testing tma_info_branches_callret
Testing tma_info_branches_cond_nt
Testing tma_info_branches_cond_tk
Testing tma_info_branches_jump
Testing tma_info_branches_other_branches
Testing tma_branch_mispredicts
Testing tma_clears_resteers
Testing tma_machine_clears
Testing tma_mispredicts_resteers
Testing tma_icache_misses
Testing tma_info_bottleneck_big_code
Testing tma_itlb_misses
Testing tma_unknown_branches
Testing tma_info_inst_mix_bptkbranch
Testing tma_info_inst_mix_ipbranch
Testing tma_info_inst_mix_ipcall
Testing tma_info_inst_mix_iptb
Testing tma_info_system_ipfarbranch
Testing tma_info_thread_uptb
Testing tma_info_memory_fb_hpki
Testing tma_info_memory_l1mpki
Testing tma_info_memory_l1mpki_load
Testing tma_info_memory_l2hpki_all
Testing tma_info_memory_l2hpki_load
Testing tma_info_memory_l2mpki_all
Testing tma_info_memory_l2mpki_load
Testing tma_info_memory_l3mpki
Testing tma_l1_bound
Testing tma_l2_bound
Testing tma_l3_bound
Testing tma_fp_scalar
Testing tma_fp_vector
Testing tma_fp_vector_128b
Testing tma_fp_vector_256b
Testing tma_int_vector_128b
Testing tma_int_vector_256b
Testing tma_port_0
Testing tma_x87_use
Testing tma_info_botlnk_l0_core_bound_likely
Testing tma_info_core_fp_arith_utilization
Testing tma_info_pipeline_execute
Testing tma_info_system_gflops
Testing tma_info_thread_execute_per_issue
Testing tma_dsb
Testing tma_info_frontend_dsb_coverage
Testing tma_decoder0_alone
Testing tma_dsb_switches
Testing tma_info_botlnk_l2_dsb_misses
Testing tma_info_frontend_dsb_switch_cost
Testing tma_info_frontend_ipdsb_miss_ret
Testing tma_mite
Testing tma_contested_accesses
Testing tma_false_sharing
Testing tma_backend_bound
Testing tma_backend_bound_aux
Testing tma_bad_speculation
Testing tma_frontend_bound
Testing tma_retiring
Testing tma_info_botlnk_l2_ic_misses
Testing tma_info_bottleneck_instruction_fetch_bw
Testing tma_info_frontend_fetch_upc
Testing tma_info_frontend_icache_miss_latency
Testing tma_info_frontend_ipunknown_branch
Testing tma_info_frontend_lsd_coverage
Testing tma_info_memory_tlb_code_stlb_mpki
Testing tma_fetch_bandwidth
Testing tma_lsd
Testing tma_branch_resteers
Testing tma_lcp
Testing tma_ms_switches
Testing tma_info_core_flopc
Testing tma_info_inst_mix_iparith
Testing tma_info_inst_mix_iparith_avx128
Testing tma_info_inst_mix_iparith_avx256
Testing tma_info_inst_mix_iparith_scalar_dp
Testing tma_info_inst_mix_iparith_scalar_sp
Testing tma_info_inst_mix_ipflop
Testing tma_fetch_latency
Testing tma_avx_assists
Testing tma_fp_arith
Testing tma_fp_assists
Testing tma_info_system_cpu_utilization
Testing tma_info_system_dram_bw_use
Testing tma_shuffles
Testing tma_info_frontend_l2mpki_code
Testing tma_info_frontend_l2mpki_code_all
Testing tma_info_inst_mix_ipload
Testing tma_info_inst_mix_ipstore
Testing tma_info_bottleneck_memory_bandwidth
Testing tma_info_bottleneck_memory_data_tlbs
Testing tma_info_bottleneck_memory_latency
Testing tma_info_memory_core_l1d_cache_fill_bw
Testing tma_info_memory_core_l2_cache_fill_bw
Testing tma_info_memory_core_l3_cache_access_bw
Testing tma_info_memory_core_l3_cache_fill_bw
Testing tma_info_memory_load_miss_real_latency
Testing tma_info_memory_mlp
Testing tma_info_memory_thread_l1d_cache_fill_bw_1t
Testing tma_info_memory_thread_l2_cache_fill_bw_1t
Testing tma_info_memory_thread_l3_cache_access_bw_1t
Testing tma_info_memory_thread_l3_cache_fill_bw_1t
Testing tma_info_memory_tlb_load_stlb_mpki
Testing tma_info_memory_tlb_page_walks_utilization
Testing tma_info_memory_tlb_store_stlb_mpki
Testing tma_info_system_mem_parallel_reads
Testing tma_info_system_mem_read_latency
Testing tma_info_system_mem_request_latency
Testing tma_info_thread_cpi
Testing tma_fb_full
Testing tma_mem_bandwidth
Testing tma_sq_full
Testing tma_streaming_stores
Testing tma_dram_bound
Testing tma_store_bound
Testing tma_l3_hit_latency
Testing tma_mem_latency
Testing tma_store_latency
Testing tma_dtlb_load
Testing tma_dtlb_store
Testing tma_load_stlb_hit
Testing tma_load_stlb_miss
Testing tma_store_stlb_hit
Testing tma_store_stlb_miss
Testing tma_info_memory_oro_data_l2_mlp
Testing tma_info_memory_oro_load_l2_mlp
Testing tma_info_memory_oro_load_l2_miss_latency
Testing tma_info_memory_oro_load_l3_miss_latency
Testing tma_microcode_sequencer
Testing tma_info_core_clks
Testing tma_info_core_clks_p
Testing tma_info_core_cpi
Testing tma_info_core_ipc
Testing tma_info_core_slots
Testing tma_info_core_upi
Testing tma_info_frontend_inst_miss_cost_dramhit_percent
Testing tma_info_frontend_inst_miss_cost_l2hit_percent
Testing tma_info_frontend_inst_miss_cost_l3hit_percent
Testing tma_info_inst_mix_branch_mispredict_ratio
Testing tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio
Testing tma_info_inst_mix_fpdiv_uop_ratio
Testing tma_info_inst_mix_idiv_uop_ratio
Testing tma_info_inst_mix_ipfarbranch
Testing tma_info_inst_mix_ipmisp_cond_ntaken
Testing tma_info_inst_mix_ipmisp_cond_taken
Testing tma_info_inst_mix_ipmisp_indirect
Testing tma_info_inst_mix_ipmisp_ret
Testing tma_info_inst_mix_ipmispredict
Testing tma_info_inst_mix_microcode_uop_ratio
Testing tma_info_inst_mix_x87_uop_ratio
Testing tma_info_l1_bound_address_alias_blocks
Testing tma_info_l1_bound_load_splits
Testing tma_info_l1_bound_store_fwd_blocks
Testing tma_info_memory_cycles_per_demand_load_dram_hit
Testing tma_info_memory_cycles_per_demand_load_l2_hit
Testing tma_info_memory_cycles_per_demand_load_l3_hit
Testing tma_info_memory_memloadpki
Testing tma_info_system_kernel_cpi
Testing tma_info_system_kernel_utilization
Testing tma_data_sharing
Testing tma_lock_latency
Testing tma_fused_instructions
Testing tma_info_pipeline_ipassist
Testing tma_info_pipeline_retire
Testing tma_info_pipeline_strings_cycles
Testing tma_info_thread_clks
Testing tma_info_thread_uoppi
Testing tma_int_operations
Testing tma_memory_operations
Testing tma_non_fused_branches
Testing tma_nop_instructions
Testing tma_other_light_ops
Testing tma_ports_utilization
Testing tma_ports_utilized_0
Testing tma_ports_utilized_1
Metric 'tma_ports_utilized_1' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 75.810 usec (+- 0.039 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.547 usec
  Average data synthesis took: 81.631 usec (+- 0.033 usec)
  Average num. events: 257.000 (+- 0.000)
  Average time per event 0.318 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.786980972 seconds time elapsed

       0.649608000 seconds user
       1.128060000 seconds sys
Testing tma_ports_utilized_2
Metric 'tma_ports_utilized_2' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 77.342 usec (+- 0.042 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.578 usec
  Average data synthesis took: 85.115 usec (+- 0.033 usec)
  Average num. events: 256.000 (+- 0.000)
  Average time per event 0.332 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.829972757 seconds time elapsed

       0.651383000 seconds user
       1.146739000 seconds sys
Testing tma_ports_utilized_3m
Metric 'tma_ports_utilized_3m' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 76.760 usec (+- 0.043 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.567 usec
  Average data synthesis took: 82.118 usec (+- 0.031 usec)
  Average num. events: 257.000 (+- 0.000)
  Average time per event 0.320 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/UOPS_EXECUTED.CYCLES_GE_3/                                        (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.798830992 seconds time elapsed

       0.626191000 seconds user
       1.159087000 seconds sys
Testing tma_serializing_operation
Testing C10_Pkg_Residency
Testing C1_Core_Residency
Testing C2_Pkg_Residency
Testing C3_Pkg_Residency
Testing C6_Core_Residency
Testing C6_Pkg_Residency
Testing C7_Core_Residency
Testing C7_Pkg_Residency
Testing C8_Pkg_Residency
Testing C9_Pkg_Residency
Testing tma_info_system_average_frequency
Testing tma_info_system_turbo_utilization
Testing tma_info_inst_mix_ipswpf
Testing tma_info_bottleneck_branching_overhead
Testing tma_info_core_coreipc
Testing tma_info_thread_ipc
Testing tma_heavy_operations
Testing tma_light_operations
Testing tma_info_core_core_clks
Testing tma_info_system_smt_2t_utilization
Testing tma_info_thread_slots_utilization
Testing UNCORE_FREQ
Testing tma_info_system_socket_clks
Testing tma_info_inst_mix_instructions
Testing tma_info_thread_slots
Testing tma_base
Testing tma_ms_uops
Testing tma_resource_bound
Testing tma_alloc_restriction
Testing tma_branch_detect
Testing tma_branch_resteer
Testing tma_cisc
Testing tma_decode
Testing tma_divider
Testing tma_fast_nuke
Testing tma_few_uops_instructions
Testing tma_fpdiv_uops
Testing tma_mem_scheduler
Testing tma_non_mem_scheduler
Testing tma_nuke
Testing tma_other_fb
Testing tma_other_load_store
Testing tma_other_ret
Testing tma_predecode
Testing tma_register
Testing tma_reorder_buffer
Testing tma_serialization
Testing tma_assists
Testing tma_disambiguation
Testing tma_fp_assist
Testing tma_ld_buffer
Testing tma_memory_ordering
Testing tma_other_l1
Testing tma_page_fault
Testing tma_rsv
Testing tma_smc
Testing tma_split_loads
Testing tma_split_stores
Testing tma_st_buffer
Testing tma_stlb_hit
Testing tma_stlb_miss
Testing tma_store_fwd_blk
Testing tma_alu_op_utilization
Testing tma_load_op_utilization
Testing tma_mixing_vectors
Testing tma_page_faults
Testing tma_store_op_utilization
Testing tma_memory_fence
Metric 'tma_memory_fence' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 74.959 usec (+- 0.035 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.530 usec
  Average data synthesis took: 80.259 usec (+- 0.030 usec)
  Average num. events: 255.000 (+- 0.000)
  Average time per event 0.315 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/MISC2_RETIRED.LFENCE/                                          (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.781118961 seconds time elapsed

       0.643719000 seconds user
       1.127896000 seconds sys
Testing tma_port_1
Testing tma_port_6
Testing tma_slow_pause
Metric 'tma_slow_pause' not printed in:
# Running 'internals/synthesize' benchmark:
Computing performance of single threaded perf event synthesis by
synthesizing events on the perf process itself:
  Average synthesis took: 75.389 usec (+- 0.044 usec)
  Average num. events: 49.000 (+- 0.000)
  Average time per event 1.539 usec
  Average data synthesis took: 80.662 usec (+- 0.031 usec)
  Average num. events: 256.000 (+- 0.000)
  Average time per event 0.315 usec

 Performance counter stats for 'perf bench internals synthesize':

     <not counted>      cpu_core/TOPDOWN.SLOTS/                                                 (0.00%)
     <not counted>      cpu_core/topdown-retiring/                                              (0.00%)
     <not counted>      cpu_core/topdown-mem-bound/                                             (0.00%)
     <not counted>      cpu_core/topdown-bad-spec/                                              (0.00%)
     <not counted>      cpu_core/topdown-fe-bound/                                              (0.00%)
     <not counted>      cpu_core/topdown-be-bound/                                              (0.00%)
     <not counted>      cpu_core/RESOURCE_STALLS.SCOREBOARD/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.1_PORTS_UTIL/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.PAUSE/                                        (0.00%)
     <not counted>      cpu_core/CYCLE_ACTIVITY.STALLS_TOTAL/                                        (0.00%)
     <not counted>      cpu_core/CPU_CLK_UNHALTED.THREAD/                                        (0.00%)
     <not counted>      cpu_core/ARITH.DIV_ACTIVE/                                              (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.2_PORTS_UTIL,umask=0xc/                                        (0.00%)
     <not counted>      cpu_core/EXE_ACTIVITY.3_PORTS_UTIL,umask=0x80/                                        (0.00%)

       1.765949472 seconds time elapsed

       0.636513000 seconds user
       1.119875000 seconds sys
Testing smi_cycles
Testing smi_num
Testing tsx_aborted_cycles
Testing tsx_cycles_per_elision
Testing tsx_cycles_per_transaction
Testing tsx_transactional_cycles
test child finished with -1
---- end ----
perf all metrics test: FAILED!
root@x1:~#

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-01-27 19:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-27 19:24 "perf test 103: perf all metrics test" failing on hybrid i7 14700k Arnaldo Carvalho de Melo
2024-01-27 19:25 ` Arnaldo Carvalho de Melo
2024-01-27 19:30   ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.