From 17b442426b98fe9ac60a27d4a1bace0e1fff0084 Mon Sep 17 00:00:00 2001 From: jharper5 Date: Tue, 25 Jun 2024 16:39:48 -0700 Subject: [PATCH] changes for version 1.4.0 --- README.md | 10 + _version.txt | 2 +- events/icx_nofixedtma.txt | 148 +++++++++++ events/metric_icx_nofixedtma.json | 329 ++++++++++++++++++++++++ events/metric_spr_emr_nofixedtma.json | 349 ++++++++++++++++++++++++++ events/metric_srf.json | 267 +++++++++++++++++++- events/spr_emr_nofixedtma.txt | 138 ++++++++++ events/srf.txt | 102 +++++++- perf-collect.py | 272 +++++++++++++++----- perf-collect.spec | 2 +- perf-postprocess.py | 41 ++- src/perf_helpers.py | 75 ++++-- src/prepare_perf_events.py | 78 +++--- 13 files changed, 1684 insertions(+), 129 deletions(-) create mode 100644 events/icx_nofixedtma.txt create mode 100644 events/metric_icx_nofixedtma.json create mode 100644 events/metric_spr_emr_nofixedtma.json create mode 100644 events/spr_emr_nofixedtma.txt diff --git a/README.md b/README.md index da17d57..f7d7739 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,16 @@ sudo ./perf-collect --timeout 10 ./perf-postprocess ``` +## Running perf-collect as a non-root user +As seen in the examples above, `sudo` is the standard approach to running perf-collect with elevated privileges. If `sudo` is not possible and running as the root user is not possible, then a user may request the following changes be made to the system by an administrator: +- sysctl -w kernel.perf_event_paranoid=0 +- sysctl -w kernel.nmi_watchdog=0 +- write '125' to all perf_event_mux_interval_ms files found under /sys/devices/*. + +`for i in $(find /sys/devices -name perf_event_mux_interval_ms); do echo 125 > $i; done` + +Recommend returning these settings to their prior values when analysis with PerfSpect is complete. + ## Output perf-collect outputs: diff --git a/_version.txt b/_version.txt index 17e63e7..88c5fb8 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.11 +1.4.0 diff --git a/events/icx_nofixedtma.txt b/events/icx_nofixedtma.txt new file mode 100644 index 0000000..00c16cc --- /dev/null +++ b/events/icx_nofixedtma.txt @@ -0,0 +1,148 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Icelake event list for platforms that don't have support for the fixed counter TMA events, e.g., some AWS +# VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xc3,umask=0x01,cmask=0x01,edge=0x01,period=100003,name='MACHINE_CLEARS.COUNT'/, +cpu/event=0xc5,umask=0x00,period=50021,name='BR_MISP_RETIRED.ALL_BRANCHES'/, +cpu/event=0xf1,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu/event=0x48,umask=0x02,period=1000003,name='L1D_PEND_MISS.FB_FULL_PERIODS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x80,umask=0x04,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,name='ICACHE_TAG.STALLS'/, +cpu/event=0x79,umask=0x30,name='IDQ.MS_SWITCHES'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x0d,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xab,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0x0D,umask=0x01,name='INT_MISC.CLEARS_COUNT'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xd0,umask=0x83,name='MEM_INST_RETIRED.ANY'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu/event=0x9c,umask=0x01,cmask=0x05,period=1000003,name='IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0xa3,umask=0x0C,cmask=0x0C,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa3,umask=0x14,cmask=0x14,period=2000003,name='CYCLE_ACTIVITY.STALLS_MEM_ANY'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa3,umask=0x04,cmask=0x04,period=1000003,name='CYCLE_ACTIVITY.STALLS_TOTAL'/, +cpu/event=0xa6,umask=0x02,period=2000003,name='EXE_ACTIVITY.1_PORTS_UTIL'/, +cpu/event=0xa6,umask=0x04,period=2000003,name='EXE_ACTIVITY.2_PORTS_UTIL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd0,umask=0x21,cmask=0x00,period=100007,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,period=100007,name='MEM_LOAD_RETIRED.FB_HIT'/, +cpu/event=0xd1,umask=0x08,period=200003,name='MEM_LOAD_RETIRED.L1_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xa3,umask=0x05,cmask=0x05,period=1000003,name='CYCLE_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xa3,umask=0x06,cmask=0x06,period=1000003,name='CYCLE_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0xa3,umask=0x0c,cmask=0x0c,period=1000003,name='CYCLE_ACTIVITY.STALLS_L1D_MISS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x05,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x05,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0x14,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIVIDER_ACTIVE'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x30,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0x56,umask=0x01,period=100003,name='UOPS_DECODED.DEC0'/, +cpu/event=0x56,umask=0x01,cmask=0x01,period=100003,name='UOPS_DECODED.DEC0:c1'/, +cpu/event=0x0e,umask=0x01,period=2000003,name='UOPS_ISSUED.ANY'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +# OCR +cpu/event=0xb7,umask=0x01,offcore_rsp=0x104000477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x85,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x08,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,offcore_rsp=0x1030000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x830000477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x730000477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0xb7,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x08,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x49,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu-cycles, +ref-cycles, +instructions; + +# C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +# UPI +upi/event=0x2,umask=0xf,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +# CHA +cha/event=0x00,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0xC8177E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; +cha/event=0x35,umask=0xc88ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/, +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x36,umask=0xC816FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +# memory read/writes +imc/event=0x04,umask=0x0f,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x04,umask=0x30,name='UNC_M_CAS_COUNT.WR'/; + +# power +power/energy-pkg/, +power/energy-ram/; diff --git a/events/metric_icx_nofixedtma.json b/events/metric_icx_nofixedtma.json new file mode 100644 index 0000000..5325629 --- /dev/null +++ b/events/metric_icx_nofixedtma.json @@ -0,0 +1,329 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycles", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[instructions] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000) / 1" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "(([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [IDQ.MS_SWITCHES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( 5 ) * [IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( ( [BR_MISP_RETIRED.ALL_BRANCHES] / ( [BR_MISP_RETIRED.ALL_BRANCHES] + [MACHINE_CLEARS.COUNT] ) ) * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] - [CYCLE_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L2_MISS] - [CYCLE_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [CYCLE_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) + ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) - ( ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) / ( ( [MEM_LOAD_RETIRED.L2_HIT] * ( 1 + ( [MEM_LOAD_RETIRED.FB_HIT] / [MEM_LOAD_RETIRED.L1_MISS] ) ) ) + [L1D_PEND_MISS.FB_FULL_PERIODS] ) ) * ( ( [CYCLE_ACTIVITY.STALLS_L1D_MISS] - [CYCLE_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [CYCLE_ACTIVITY.STALLS_MEM_ANY] + [EXE_ACTIVITY.BOUND_ON_STORES] ) / ( [CYCLE_ACTIVITY.STALLS_TOTAL] + ( [EXE_ACTIVITY.1_PORTS_UTIL] + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * [EXE_ACTIVITY.2_PORTS_UTIL] ) + [EXE_ACTIVITY.BOUND_ON_STORES] ) ) * ( ( [TOPDOWN.BACKEND_BOUND_SLOTS] + ( 5 ) * [INT_MISC.CLEARS_COUNT] ) / ( [TOPDOWN.SLOTS_P] ) ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIVIDER_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [MEM_INST_RETIRED.ANY] / [instructions] )" + }, + { + "name": "metric_TMA_....Branch_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) ) ) * [BR_INST_RETIRED.ALL_BRANCHES] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( ( ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [UOPS_DECODED.DEC0] - [UOPS_DECODED.DEC0:c1] ) / [IDQ.MITE_UOPS] ) - ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) * [IDQ.MS_UOPS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/events/metric_spr_emr_nofixedtma.json b/events/metric_spr_emr_nofixedtma.json new file mode 100644 index 0000000..587d6b1 --- /dev/null +++ b/events/metric_spr_emr_nofixedtma.json @@ -0,0 +1,349 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "origin": "perfmon website" + }, + { + "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[L2_LINES_IN.ALL] / [instructions]", + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + }, + { + "name": "metric_L2 demand data read MPI", + "name-txn": "metric_L2 demand data read misses per txn", + "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + }, + { + "name": "metric_L2 demand code MPI", + "name-txn": "metric_L2 demand code misses per txn", + "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for LOCAL requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand data read miss latency for REMOTE requests (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_UPI Data transmit BW (MB/sec) (only data)", + "expression": "([UNC_UPI_TxL_FLITS.ALL_DATA] * (64 / 9.0) / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_% Uops delivered from decoded Icache (DSB)", + "expression": "100 * ([IDQ.DSB_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_% Uops delivered from legacy decode pipeline (MITE)", + "expression": "100 * ([IDQ.MITE_UOPS] / ([IDQ.DSB_UOPS] + [IDQ.MITE_UOPS] + [IDQ.MS_UOPS] + [LSD.UOPS]) )" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.LOCAL_DRAM] + [OCR.HWPF_L3.L3_MISS_LOCAL]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_core initiated remote dram read bandwidth (MB/sec)", + "expression": "([OCR.READS_TO_CORE.REMOTE_DRAM] + [OCR.HWPF_L3.REMOTE]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.RD] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "([UNC_M_CAS_COUNT.WR] * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT.RD] + [UNC_M_CAS_COUNT.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_NUMA %_Reads addressed to local DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_NUMA %_Reads addressed to remote DRAM", + "expression": "100 * ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE]) / ([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE])" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100 * ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [ICACHE_DATA.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [ICACHE_TAG.STALLS] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....MS_Switches(%)", + "expression": "100 * ( ( 3 ) * [UOPS_RETIRED.MS:c1:e1] / ( [UOPS_RETIRED.SLOTS] / [UOPS_ISSUED.ANY] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....LCP(%)", + "expression": "100 * ( [DECODE.LCP] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DSB_Switches(%)", + "expression": "100 * ( [DSB2MITE_SWITCHES.PENALTY_CYCLES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100 * ( max( 0 , ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) - ( ( [IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] * ( 6 ) - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....MITE(%)", + "expression": "100 * ( ( [IDQ.MITE_CYCLES_ANY] - [IDQ.MITE_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_....DSB(%)", + "expression": "100 * ( ( [IDQ.DSB_CYCLES_ANY] - [IDQ.DSB_CYCLES_OK] ) / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) / 2 )" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100 * ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100 * ( max( 0 , ( max( 1 - ( ( ( [IDQ_UOPS_NOT_DELIVERED.CORE] - [INT_MISC.UOP_DROPPING] ) / ( [TOPDOWN.SLOTS_P] ) ) + ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) + ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) , 0 ) ) - ( [TOPDOWN.BR_MISPREDICT_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100 * ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100 * ( max( ( [EXE_ACTIVITY.BOUND_ON_LOADS] - [MEMORY_ACTIVITY.STALLS_L1D_MISS] ) / ( [cpu-cycles] ) , 0 ) )" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L1D_MISS] - [MEMORY_ACTIVITY.STALLS_L2_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L2_MISS] - [MEMORY_ACTIVITY.STALLS_L3_MISS] ) / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....DRAM_Bound(%)", + "expression": "100 * ( ( [MEMORY_ACTIVITY.STALLS_L3_MISS] / ( [cpu-cycles] ) ) )" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100 * ( [EXE_ACTIVITY.BOUND_ON_STORES] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100 * ( max( 0 , ( [TOPDOWN.BACKEND_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [TOPDOWN.MEMORY_BOUND_SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Divider(%)", + "expression": "100 * ( [ARITH.DIV_ACTIVE] / ( [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....AMX_Busy(%)", + "expression": "100 * ( [EXE.AMX_BUSY] / ( [CPU_CLK_UNHALTED.DISTRIBUTED] ) )" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_..Light_Operations(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Memory_Operations(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [MEM_UOP_RETIRED.ANY] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Fused_Instructions(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * [INST_RETIRED.MACRO_FUSED] / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_....Non_Fused_Branches(%)", + "expression": "100 * ( ( max( 0 , ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) ) ) * ( [BR_INST_RETIRED.ALL_BRANCHES] - [INST_RETIRED.MACRO_FUSED] ) / ( ( [UOPS_RETIRED.SLOTS] / ( [TOPDOWN.SLOTS_P] ) ) * ( [TOPDOWN.SLOTS_P] ) ) )" + }, + { + "name": "metric_TMA_..Heavy_Operations(%)", + "expression": "100 * ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) )" + }, + { + "name": "metric_TMA_....Few_Uops_Instructions(%)", + "expression": "100 * ( max( 0 , ( [UOPS_RETIRED.HEAVY] / ( [TOPDOWN.SLOTS_P] ) ) - ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) ) ) )" + }, + { + "name": "metric_TMA_....Microcode_Sequencer(%)", + "expression": "100 * ( [UOPS_RETIRED.MS] / ( [TOPDOWN.SLOTS_P] ) )" + } +] \ No newline at end of file diff --git a/events/metric_srf.json b/events/metric_srf.json index 1f38f6e..615d432 100644 --- a/events/metric_srf.json +++ b/events/metric_srf.json @@ -36,5 +36,270 @@ "name": "metric_giga_instructions_per_sec", "expression": "[instructions] / 1000000000", "origin": "perfspect" + }, + { + "name": "metric_locks retired per instr", + "name-txn": "metric_locks retired per txn", + "expression": "[MEM_UOPS_RETIRED.LOCK_LOADS] / [instructions]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]" + }, + { + "name": "metric_L1D demand data read MPI", + "name-txn": "metric_L1D demand data read misses per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_MISS] / [TXN]" + }, + { + "name": "metric_L1D demand data read hits per instr", + "name-txn": "metric_L1D demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L1_HIT] / [TXN]" + }, + { + "name": "metric_L1-I code read misses (w/ prefetches) per instr", + "name-txn": "metric_L1-I code read misses (w/ prefetches) per txn", + "expression": "[ICACHE.MISSES] / [instructions]", + "expression-txn": "[ICACHE.MISSES] / [TXN]" + }, + { + "name": "metric_L2 demand data read hits per instr", + "name-txn": "metric_L2 demand data read hits per txn", + "expression": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [instructions]", + "expression-txn": "[MEM_LOAD_UOPS_RETIRED.L2_HIT] / [TXN]" + }, + { + "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.REFERENCE] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.REFERENCE] / [TXN]" + }, + { + "name": "metric_L2 code MPI", + "name-txn": "metric_L2 code misses per txn", + "expression": "[OCR.L2_CODE_MISS] / [instructions]", + "expression-txn": "[OCR.L2_CODE_MISS] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HITM in another module (per instr)", + "name-txn": "metric_L2 Any local request that HITM in another module per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_L2 Any local request that HIT in another module and forwarded(per instr)", + "name-txn": "metric_L2 Any local request that HIT in another module and forwarded per txn", + "expression": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_L2 all L2 prefetches(per instr)", + "name-txn": "metric_L2 all L2 prefetches per txn", + "expression": "[OCR.HWPF_L2.ANY_RESPONSE] / [instructions]", + "expression-txn": "[OCR.HWPF_L2.ANY_RESPONSE] / [TXN]" + }, + { + "name": "metric_data_read_L2_Miss_Latency_using_ORO_events(ns)", + "expression": "( 1000000000 * ([OCR.READS_TO_CORE.OUTSTANDING] / [OCR.READS_TO_CORE.ANY_RESPONSE]) / ([cpu-cycles] / [TSC] * [SYSTEM_TSC_FREQ]) )" + }, + { + "name": "metric_L3 MPI (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L3 misses per txn (includes code+data+rfo w/ prefetches)", + "expression": "[LONGEST_LAT_CACHE.MISS] / [instructions]", + "expression-txn": "[LONGEST_LAT_CACHE.MISS] / [TXN]" + }, + { + "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [instructions]", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO] + [UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO]) / [TXN]" + }, + { + "name": "metric_LLC total HITM (per instr)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + }, + { + "name": "metric_LLC total HIT clean line forwards (per instr)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + }, + { + "name": "metric_LLC data read MPI (demand+prefetch)", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF] + [UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA]) / [TXN]" + }, + { + "name": "metric_LLC code read MPI (demand+prefetch)", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + }, + { + "name": "metric_Average LLC demand data read miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT] / [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_Average LLC demand RFO miss latency (in ns)", + "expression": "( 1000000000 * ([UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO] / [UNC_CHA_TOR_INSERTS.IA_MISS_RFO]) / ([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) ) ) * 1" + }, + { + "name": "metric_core initiated local dram read bandwidth (MB/sec)", + "expression": "([LONGEST_LAT_CACHE.MISS]) * 64 / 1000000", + "origin": "perfspect" + }, + { + "name": "metric_memory bandwidth read (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth write (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_memory bandwidth total (MB/sec)", + "expression": "(([UNC_M_CAS_COUNT_SCH0.RD] + [UNC_M_CAS_COUNT_SCH1.RD] + [UNC_M_CAS_COUNT_SCH0.WR] + [UNC_M_CAS_COUNT_SCH1.WR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_writes (MB/sec)", + "expression": "([UNC_CHA_TOR_INSERTS.IO_PCIRDCUR] * 64 / 1000000) / 1" + }, + { + "name": "metric_IO_bandwidth_disk_or_network_reads (MB/sec)", + "expression": "(([UNC_CHA_TOR_INSERTS.IO_ITOM] + [UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR]) * 64 / 1000000) / 1" + }, + { + "name": "metric_package power (watts)", + "expression": "[power/energy-pkg/]", + "origin": "perfspect" + }, + { + "name": "metric_DRAM power (watts)", + "expression": "[power/energy-ram/]", + "origin": "perfspect" + }, + { + "name": "metric_core c6 residency %", + "expression": "100 * [cstate_core/c6-residency/] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_package c6 residency %", + "expression": "100 * [cstate_pkg/c6-residency/] * [CORES_PER_SOCKET] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_uncore frequency GHz", + "expression": "([UNC_CHA_CLOCKTICKS] / ([CHAS_PER_SOCKET] * [SOCKET_COUNT]) / 1000000000) / 1" + }, + { + "name": "metric_ITLB (2nd level) MPI", + "name-txn": "metric_ITLB (2nd level) misses per txn", + "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) load MPI", + "name-txn": "metric_DTLB (2nd level) load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 4KB page load MPI", + "name-txn": "metric_DTLB (2nd level) 4KB page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 2MB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) 1GB large page load MPI", + "name-txn": "metric_DTLB (2nd level) 1GB large page load misses per txn", + "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]" + }, + { + "name": "metric_DTLB (2nd level) store MPI", + "name-txn": "metric_DTLB (2nd level) store misses per txn", + "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + }, + { + "name": "metric_TMA_Frontend_Bound(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Fetch_Latency(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_LATENCY] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_....ICache_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ICACHE] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....ITLB_Misses(%)", + "expression": "100 * ( [TOPDOWN_FE_BOUND.ITLB_MISS] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_....Branch_Resteer(%)", + "expression": "100*([TOPDOWN_FE_BOUND.BRANCH_RESTEER] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Fetch_Bandwidth(%)", + "expression": "100*([TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Bad_Speculation(%)", + "expression": "100 * ( [TOPDOWN_BAD_SPECULATION.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Branch_Mispredicts(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MISPREDICT] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_..Machine_Clears(%)", + "expression": "100*([TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Backend_Bound(%)", + "expression": "100 * ( [TOPDOWN_BE_BOUND.ALL] / ( 6 * [cpu-cycles] ) )" + }, + { + "name": "metric_TMA_..Memory_Bound(%)", + "expression": "100*min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))" + }, + { + "name": "metric_TMA_....L1_Bound(%)", + "expression": "100*([LD_HEAD.L1_BOUND_AT_RET] / [cpu-cycles])" + }, + { + "name": "metric_TMA_....L2_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.L2_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.L2_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....L3_Bound(%)", + "expression": "100*([MEM_BOUND_STALLS_LOAD.LLC_HIT] / [cpu-cycles] - (max(1*(([MEM_BOUND_STALLS_LOAD.ALL] - [LD_HEAD.L1_MISS_AT_RET]) / [cpu-cycles]), 0) * [MEM_BOUND_STALLS_LOAD.LLC_HIT] / [MEM_BOUND_STALLS_LOAD.ALL]))" + }, + { + "name": "metric_TMA_....Store_Bound(%)", + "expression": "100*(([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL])" + }, + { + "name": "metric_TMA_..Core_Bound(%)", + "expression": "100*max(0, 1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles]) - min(1*([TOPDOWN_BE_BOUND.ALL] / (6.0 * [cpu-cycles])), 1*([LD_HEAD.ANY_AT_RET] / [cpu-cycles] + ([TOPDOWN_BE_BOUND.MEM_SCHEDULER] / (6.0 * [cpu-cycles])) * [MEM_SCHEDULER_BLOCK.ST_BUF] / [MEM_SCHEDULER_BLOCK.ALL]))))" + }, + { + "name": "metric_TMA_....Serialization(%)", + "expression": "100*([TOPDOWN_BE_BOUND.SERIALIZATION] / (6.0 * [cpu-cycles]))" + }, + { + "name": "metric_TMA_Retiring(%)", + "expression": "100 * ( [TOPDOWN_RETIRING.ALL] / ( 6 * [cpu-cycles] ) )" } -] \ No newline at end of file +] diff --git a/events/spr_emr_nofixedtma.txt b/events/spr_emr_nofixedtma.txt new file mode 100644 index 0000000..d767656 --- /dev/null +++ b/events/spr_emr_nofixedtma.txt @@ -0,0 +1,138 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# Sapphire Rapids and Emerald Rapids event list for platforms that don't have support for the fixed counter +# TMA events, e.g., some AWS VMs. +# Note that there are no more than 10 events per group. On these same platforms, the cpu-cycles fixed +# counter is not supported so a general purpose counter will be used. + +cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, +cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, +cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, +cpu/event=0x25,umask=0x1f,period=100003,name='L2_LINES_IN.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xd1,umask=0x10,period=100021,name='MEM_LOAD_RETIRED.L2_MISS'/, +cpu/event=0x24,umask=0x24,period=200003,name='L2_RQSTS.CODE_RD_MISS'/, +cpu/event=0x11,umask=0x0e,period=100003,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0xa6,umask=0x40,cmask=0x02,period=1000003,name='EXE_ACTIVITY.BOUND_ON_STORES'/, +cpu/event=0xa6,umask=0x21,cmask=0x05,period=2000003,name='EXE_ACTIVITY.BOUND_ON_LOADS'/, +cpu/event=0xad,umask=0x10,period=1000003,name='INT_MISC.UOP_DROPPING'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x12,umask=0x0e,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x04,period=100003,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, +cpu/event=0x13,umask=0x0e,period=100003,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0xd1,umask=0x02,period=200003,name='MEM_LOAD_RETIRED.L2_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x09,cmask=0x09,period=1000003,name='MEMORY_ACTIVITY.STALLS_L3_MISS'/, +cpu/event=0x80,umask=0x04,period=500009,name='ICACHE_DATA.STALLS'/, +cpu/event=0x83,umask=0x04,period=200003,name='ICACHE_TAG.STALLS'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 1) +cpu/event=0x9c,umask=0x01,name='IDQ_UOPS_NOT_DELIVERED.CORE'/, +cpu/event=0xa4,umask=0x01,name='TOPDOWN.SLOTS_P'/, +cpu/event=0x9c,umask=0x01,name='IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE'/, +cpu/event=0xc2,umask=0x02,name='UOPS_RETIRED.SLOTS'/, +cpu/event=0xae,umask=0x01,name='UOPS_ISSUED.ANY'/, +cpu/event=0x87,umask=0x01,name='DECODE.LCP'/, +cpu/event=0x61,umask=0x02,name='DSB2MITE_SWITCHES.PENALTY_CYCLES'/, +cpu-cycles, +ref-cycles, +instructions; + +# events for TMA metrics without fixed counter support (group 2) +cpu/event=0xa4,umask=0x02,name='TOPDOWN.BACKEND_BOUND_SLOTS'/, +cpu/event=0xa4,umask=0x08,name='TOPDOWN.BR_MISPREDICT_SLOTS'/, +cpu/event=0xa4,umask=0x10,name='TOPDOWN.MEMORY_BOUND_SLOTS'/, +cpu/event=0xc2,umask=0x01,name='UOPS_RETIRED.HEAVY'/, +cpu/event=0xe5,umask=0x03,name='MEM_UOP_RETIRED.ANY'/, +cpu/event=0xc0,umask=0x10,name='INST_RETIRED.MACRO_FUSED'/, +cpu/event=0xc4,umask=0x00,name='BR_INST_RETIRED.ALL_BRANCHES'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x47,umask=0x03,cmask=0x03,period=1000003,name='MEMORY_ACTIVITY.STALLS_L1D_MISS'/, +cpu/event=0x47,umask=0x05,cmask=0x05,period=1000003,name='MEMORY_ACTIVITY.STALLS_L2_MISS'/, +cpu/event=0xb0,umask=0x09,cmask=0x01,period=1000003,name='ARITH.DIV_ACTIVE'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xd0,umask=0x21,cmask=0x00,period=1000003,name='MEM_INST_RETIRED.LOCK_LOADS'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x04,cmask=0x01,period=2000003,name='IDQ.MITE_CYCLES_ANY'/, +cpu/event=0x79,umask=0x04,cmask=0x06,period=2000003,name='IDQ.MITE_CYCLES_OK'/, +cpu/event=0x79,umask=0x08,cmask=0x01,period=2000003,name='IDQ.DSB_CYCLES_ANY'/, +cpu/event=0x79,umask=0x08,cmask=0x06,period=2000003,name='IDQ.DSB_CYCLES_OK'/, +cpu/event=0xec,umask=0x02,period=2000003,name='CPU_CLK_UNHALTED.DISTRIBUTED'/, +cpu/event=0xb7,umask=0x02,period=2000003,name='EXE.AMX_BUSY'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x79,umask=0x08,cmask=0x00,period=2000003,name='IDQ.DSB_UOPS'/, +cpu/event=0x79,umask=0x04,period=100003,name='IDQ.MITE_UOPS'/, +cpu/event=0x79,umask=0x20,period=100003,name='IDQ.MS_UOPS'/, +cpu/event=0xa8,umask=0x01,cmask=0x00,period=2000003,name='LSD.UOPS'/, +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#OCR +cpu/event=0x2a,umask=0x01,offcore_rsp=0x104004477,name='OCR.READS_TO_CORE.LOCAL_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x730004477,name='OCR.READS_TO_CORE.REMOTE_DRAM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x90002380,name='OCR.HWPF_L3.REMOTE'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x84002380,name='OCR.HWPF_L3.L3_MISS_LOCAL'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x1030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0x2a,umask=0x01,offcore_rsp=0x830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#UPI +upi/event=0x02,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/; + +#CHA (Cache) +cha/event=0x35,umask=0xc80ffe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0xc8177e01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE'/, +cha/event=0x36,umask=0xc8177e01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE'/; + +cha/event=0x35,umask=0xC816FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL'/, +cha/event=0x36,umask=0xc816fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL'/, +cha/event=0x35,umask=0xC896FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_LOCAL'/, +cha/event=0x35,umask=0xC8977E01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_REMOTE'/; + +cha/event=0x35,umask=0xccd7fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0xc817fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD'/, +cha/event=0x35,umask=0xc897fe01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF'/, +cha/event=0x36,umask=0xC817fe01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0xc8f3ff04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0xCC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0xCD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/, +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xcf,name='UNC_M_CAS_COUNT.RD'/, +imc/event=0x05,umask=0xf0,name='UNC_M_CAS_COUNT.WR'/; + +#power +power/energy-pkg/, +power/energy-ram/; diff --git a/events/srf.txt b/events/srf.txt index b57637d..49b3fe1 100644 --- a/events/srf.txt +++ b/events/srf.txt @@ -5,13 +5,107 @@ # SierraForest event list +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +cpu/event=0x08,umask=0x08,name='DTLB_LOAD_MISSES.WALK_COMPLETED_1G'/, +cpu/event=0x08,umask=0xe,name='DTLB_LOAD_MISSES.WALK_COMPLETED'/, +cpu/event=0x49,umask=0xe,name='DTLB_STORE_MISSES.WALK_COMPLETED'/, +cpu/event=0x12,umask=0x02,name='DTLB_LOAD_MISSES.WALK_COMPLETED_4K'/, +cpu/event=0x12,umask=0x04,name='DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M'/, cpu-cycles, ref-cycles, instructions; -cpu-cycles:k, -ref-cycles:k, -instructions:k; +cpu/event=0x2e,umask=0x41,name='LONGEST_LAT_CACHE.MISS'/, +cpu/event=0x2e,umask=0x4f,name='LONGEST_LAT_CACHE.REFERENCE'/, +cpu/event=0x85,umask=0xe,name='ITLB_MISSES.WALK_COMPLETED'/, +cpu/event=0xd0,umask=0x21,name='MEM_UOPS_RETIRED.LOCK_LOADS'/, +cpu/event=0xd1,umask=0x02,name='MEM_LOAD_UOPS_RETIRED.L2_HIT'/, +cpu/event=0xd1,umask=0x40,name='MEM_LOAD_UOPS_RETIRED.L1_MISS'/, +cpu/event=0xd1,umask=0x1,name='MEM_LOAD_UOPS_RETIRED.L1_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x71,umask=0x00,name='TOPDOWN_FE_BOUND.ALL'/, +cpu/event=0x71,umask=0x20,name='TOPDOWN_FE_BOUND.ICACHE'/, +cpu/event=0x71,umask=0x10,name='TOPDOWN_FE_BOUND.ITLB_MISS'/, +cpu/event=0x71,umask=0x72,name='TOPDOWN_FE_BOUND.FRONTEND_LATENCY'/, +cpu/event=0x71,umask=0x40,name='TOPDOWN_FE_BOUND.BRANCH_RESTEER'/, +cpu/event=0x71,umask=0x8d,name='TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x80,umask=0x02,name='ICACHE.MISSES'/, +cpu/event=0x05,umask=0xf4,name='LD_HEAD.L1_BOUND_AT_RET'/, +cpu/event=0x72,umask=0x00,name='TOPDOWN_RETIRING.ALL'/, +cpu/event=0x73,umask=0x03,name='TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS'/, +cpu/event=0x73,umask=0x04,name='TOPDOWN_BAD_SPECULATION.MISPREDICT'/, +cpu/event=0x73,umask=0x00,name='TOPDOWN_BAD_SPECULATION.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0xff,name='LD_HEAD.ANY_AT_RET'/, +cpu/event=0x04,umask=0x07,name='MEM_SCHEDULER_BLOCK.ALL'/, +cpu/event=0x04,umask=0x01,name='MEM_SCHEDULER_BLOCK.ST_BUF'/, +cpu/event=0x74,umask=0x02,name='TOPDOWN_BE_BOUND.MEM_SCHEDULER'/, +cpu/event=0x74,umask=0x10,name='TOPDOWN_BE_BOUND.SERIALIZATION'/, +cpu/event=0x74,umask=0x00,name='TOPDOWN_BE_BOUND.ALL'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0x05,umask=0x81,name='LD_HEAD.L1_MISS_AT_RET'/, +cpu/event=0x34,umask=0x6f,name='MEM_BOUND_STALLS_LOAD.ALL'/, +cpu/event=0x34,umask=0x01,name='MEM_BOUND_STALLS_LOAD.L2_HIT'/, +cpu/event=0x34,umask=0x06,name='MEM_BOUND_STALLS_LOAD.LLC_HIT'/, +cpu-cycles, +ref-cycles, +instructions; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x8000100000004477,name='OCR.READS_TO_CORE.OUTSTANDING'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x100000014477,name='OCR.READS_TO_CORE.ANY_RESPONSE'/; + +cpu/event=0xB7,umask=0x01,offcore_rsp=0x101030004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM'/, +cpu/event=0xB7,umask=0x01,offcore_rsp=0x100830004477,name='OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x10244,name='OCR.L2_CODE_MISS'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x10070,name='OCR.HWPF_L2.ANY_RESPONSE'/; + +cpu/event=0xb7,umask=0x01,cmask=0x00,offcore_rsp=0x1010003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HITM'/, +cpu/event=0xb7,umask=0x02,cmask=0x00,offcore_rsp=0x1008003C4477,name='OCR.READS_TO_CORE.L3_HIT.SNOOP_HIT_WITH_FWD'/; + +#CHA (Cache) +cha/event=0x01,umask=0x00,name='UNC_CHA_CLOCKTICKS'/; + +cha/event=0x35,umask=0x00C827FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT'/, +cha/event=0x35,umask=0x00C8A7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF'/, +cha/event=0x35,umask=0x00C80FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD'/, +cha/event=0x35,umask=0x00C88FFE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF'/; + +cha/event=0x35,umask=0x00CCD7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA'/, +cha/event=0x35,umask=0x00C807FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO'/, +cha/event=0x35,umask=0x00C887FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF'/, +cha/event=0x35,umask=0x00CCC7FE01,name='UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO'/; + +#CHA (IO Bandwidth) +cha/event=0x35,umask=0x00C8F3FF04,name='UNC_CHA_TOR_INSERTS.IO_PCIRDCUR'/, +cha/event=0x35,umask=0x00CC43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOM'/, +cha/event=0x35,umask=0x00CD43FF04,name='UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR'/; + +cha/event=0x36,umask=0x00C827FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT'/; + +cha/event=0x36,umask=0x00C807FE01,name='UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO'/; + +#IMC (memory read/writes) +imc/event=0x05,umask=0xCF,name='UNC_M_CAS_COUNT_SCH0.RD'/, +imc/event=0x05,umask=0xF0,name='UNC_M_CAS_COUNT_SCH0.WR'/, +imc/event=0x06,umask=0xCF,name='UNC_M_CAS_COUNT_SCH1.RD'/, +imc/event=0x06,umask=0xF0,name='UNC_M_CAS_COUNT_SCH1.WR'/; #C6 cstate_core/c6-residency/; @@ -19,4 +113,4 @@ cstate_pkg/c6-residency/; #power power/energy-pkg/, -power/energy-ram/; \ No newline at end of file +power/energy-ram/; diff --git a/perf-collect.py b/perf-collect.py index e3d95cb..1c28a24 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -38,6 +38,8 @@ def write_metadata( arch, cpuname, cpuid_info, + pmu_driver_version, + fixed_tma_supported, muxinterval, cpu, socket, @@ -68,6 +70,8 @@ def write_metadata( for c in _cpus: modified.write(str(c) + ";") modified.write("\n") + modified.write("PMUDriverVersion," + str(pmu_driver_version) + ",\n") + modified.write("FixedTMASupported," + str(fixed_tma_supported) + ",\n") modified.write("Perf event mux Interval ms," + str(muxinterval) + ",\n") cpumode = "enabled" if cpu else "disabled" socketmode = "enabled" if socket else "disabled" @@ -158,7 +162,8 @@ def supports_psi(): return False -def tma_supported(): +# fixed_tma_supported returns true if the fixed-purpose PMU counters for TMA events are supported on the target platform +def fixed_tma_supported(): perf_out = "" try: perf = subprocess.Popen( @@ -180,16 +185,68 @@ def tma_supported(): perf_out.split("\n"), ) } - except Exception: + except (IndexError, ValueError): + logging.debug("Failed to parse perf output in fixed_tma_supported()") + return False + try: + if events["TOPDOWN.SLOTS"] == events["PERF_METRICS.BAD_SPECULATION"]: + return False + except KeyError: + logging.debug("Failed to find required events in fixed_tma_supported()") return False - # This is a perf artifact of no vPMU support - if events["TOPDOWN.SLOTS"] == events["PERF_METRICS.BAD_SPECULATION"]: + if events["TOPDOWN.SLOTS"] == 0 or events["PERF_METRICS.BAD_SPECULATION"] == 0: return False return True +# fixed_event_supported returns true if the fixed-purpose PMU counter for the given event (cpu-cycles or instructions) event is supported on the target platform +# it makes this determination by filling all the general purpose counters with the given events, then adding one more +def fixed_event_supported(arch, event): + num_gp_counters = 0 + if arch == "broadwell" or arch == "skylake" or arch == "cascadelake": + num_gp_counters = 4 + elif ( + arch == "icelake" + or arch == "sapphirerapids" + or arch == "emeraldrapids" + or arch == "sierraforest" + ): + num_gp_counters = 8 + else: + crash(f"Unsupported architecture: {arch}") + + perf_out = "" + events = ",".join([event] * (num_gp_counters + 1)) + try: + perf = subprocess.Popen( + shlex.split("perf stat -a -e '{" + events + "}' sleep .1"), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + perf_out = perf.communicate()[0].decode() + except subprocess.CalledProcessError: + return False + # on some VMs we see "" or "" in the perf output + if "" in perf_out or "" in perf_out: + return False + # on some VMs we get a count of 0 + for line in perf_out.split("\n"): + tokens = line.split() + if len(tokens) == 2 and tokens[0] == "0": + return False + return True + + +def fixed_cycles_supported(arch): + return fixed_event_supported(arch, "cpu-cycles") + + +def fixed_instructions_supported(arch): + return fixed_event_supported(arch, "instructions") + + def ref_cycles_supported(): perf_out = "" try: @@ -228,6 +285,38 @@ def validate_file(fname): crash(str(fname) + " not accessible") +def get_eventfile_path(arch, script_path, supports_tma_fixed_events): + eventfile = None + if arch == "broadwell": + eventfile = "bdx.txt" + elif arch == "skylake" or arch == "cascadelake": + eventfile = "clx_skx.txt" + elif arch == "icelake": + if supports_tma_fixed_events: + eventfile = "icx.txt" + else: + eventfile = "icx_nofixedtma.txt" + elif arch == "sapphirerapids" or arch == "emeraldrapids": + if supports_tma_fixed_events: + eventfile = "spr_emr.txt" + else: + eventfile = "spr_emr_nofixedtma.txt" + elif arch == "sierraforest": + eventfile = "srf.txt" + + if eventfile is None: + return None + + # Convert path of event file to relative path if being packaged by pyInstaller into a binary + if getattr(sys, "frozen", False): + basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) + return os.path.join(basepath, eventfile) + elif __file__: + return script_path + "/events/" + eventfile + else: + crash("Unknown application type") + + if __name__ == "__main__": common.configure_logging(".") if platform.system() != "Linux": @@ -288,17 +377,46 @@ def validate_file(fname): parser.add_argument( "-V", "--version", help="display version info", action="store_true" ) + parser.add_argument( + "-e", "--eventfile", default=None, help="Relative path to eventfile" + ) args = parser.parse_args() if args.version: print(perf_helpers.get_tool_version()) sys.exit() - if os.geteuid() != 0: - crash("Must run PerfSpect as root, please re-run") + is_root = os.geteuid() == 0 + if not is_root: + logging.warning( + "User is not root. See README.md for requirements and instructions on how to run as non-root user." + ) + try: + input("Press Enter to continue as non-root user or Ctrl-c to exit...") + except KeyboardInterrupt: + print("\nExiting...") + sys.exit() + + if not is_root: + # check kernel.perf_event_paranoid. It needs to be zero for non-root users. + paranoid = perf_helpers.check_perf_event_paranoid() + if paranoid is None: + crash("kernel.perf_event_paranoid could not be determined") + if paranoid != 0: + crash( + "kernel.perf_event_paranoid is set to " + + str(paranoid) + + ". Run as root or set it to 0" + ) # disable nmi watchdog before collecting perf - nmi_watchdog = perf_helpers.disable_nmi_watchdog() + nmi_watchdog_status = perf_helpers.nmi_watchdog_enabled() + if nmi_watchdog_status is None: + crash("NMI watchdog status could not be determined") + + if is_root and nmi_watchdog_status: + perf_helpers.disable_nmi_watchdog() + interval = 5000 collect_psi = False @@ -319,48 +437,47 @@ def validate_file(fname): if args.muxinterval > 1000: crash("Input argument muxinterval is too large, max is [1s or 1000ms]") - # select architecture default event file if not supplied - have_uncore = True + # check if pmu available + if "cpu-cycles" not in perf_helpers.get_perf_list(): + crash( + "PMU's not available. Run baremetal or in a VM which exposes PMUs (sometimes full socket)" + ) + procinfo = perf_helpers.get_cpuinfo() arch, cpuname = perf_helpers.get_arch_and_name(procinfo) if not arch: crash( f"Unrecognized CPU architecture. Supported architectures: {', '.join(SUPPORTED_ARCHITECTURES)}" ) - eventfile = None - if arch == "broadwell": - eventfile = "bdx.txt" - elif arch == "skylake" or arch == "cascadelake": - eventfile = "clx_skx.txt" - elif arch == "icelake": - eventfile = "icx.txt" - elif arch == "sapphirerapids" or arch == "emeraldrapids": - eventfile = "spr_emr.txt" - elif arch == "sierraforest": - eventfile = "srf.txt" - if eventfile is None: - crash(f"failed to match architecture ({arch}) to event file name.") + # Can we use the fixed purpose PMU counters for TMA events? + # The fixed-purpose PMU counters for TMA events are not supported on architectures older than Icelake + # They are also not supported on some VMs, e.g., AWS ICX and SPR VMs + supports_tma_fixed_events = False + if arch == "icelake" or arch == "sapphirerapids" or arch == "emeraldrapids": + supports_tma_fixed_events = fixed_tma_supported() + if not supports_tma_fixed_events: + logging.warning( + "Due to lack of vPMU support, some TMA events will not be collected" + ) - # Convert path of event file to relative path if being packaged by pyInstaller into a binary - if getattr(sys, "frozen", False): - basepath = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) - eventfilename = eventfile - eventfile = os.path.join(basepath, eventfile) - elif __file__: - eventfile = script_path + "/events/" + eventfile - eventfilename = eventfile + # Can we use the fixed-purpose PMU counter for the cpu-cycles event? + supports_cycles_fixed_event = fixed_cycles_supported(arch) + + # Can we use the fixed-purpose PMU counter for the instructions event? + supports_instructions_fixed_event = fixed_instructions_supported(arch) + + # select architecture default event file if not supplied + if args.eventfile is not None: + eventfile = args.eventfile else: - crash("Unknown application type") + eventfile = get_eventfile_path(arch, script_path, supports_tma_fixed_events) + if eventfile is None: + crash(f"failed to match architecture ({arch}) to event file name.") - # check if pmu available - if "cpu-cycles" not in perf_helpers.get_perf_list(): - crash( - "PMU's not available. Run baremetal or in a VM which exposes PMUs (sometimes full socket)" - ) + logging.info("Event file: " + eventfile) - # get perf events to collect - include_tma = True + supports_uncore_events = True sys_devs = perf_helpers.get_sys_devices() if ( "uncore_cha" not in sys_devs @@ -369,39 +486,50 @@ def validate_file(fname): and "uncore_qpi" not in sys_devs and "uncore_imc" not in sys_devs ): - logging.info("disabling uncore (possibly in a vm?)") - have_uncore = False + logging.info("uncore devices not found (possibly in a vm?)") + supports_uncore_events = False + + supports_ref_cycles_event = ref_cycles_supported() - if arch == "icelake": - include_tma = tma_supported() - if not include_tma: - logging.warning( - "Due to lack of vPMU support, TMA L1 events will not be collected" - ) - if arch == "sapphirerapids" or arch == "emeraldrapids": - include_tma = tma_supported() - if not include_tma: - logging.warning( - "Due to lack of vPMU support, TMA L1 & L2 events will not be collected" - ) events, collection_events = prep_events.prepare_perf_events( eventfile, - (args.pid is not None or args.cid is not None or not have_uncore), - include_tma, - not have_uncore, - ref_cycles_supported(), + (args.pid is not None or args.cid is not None or not supports_uncore_events), + supports_tma_fixed_events, + supports_uncore_events, + supports_ref_cycles_event, ) # check output file is writable if not perf_helpers.check_file_writeable(args.outcsv): crash("Output file %s not writeable " % args.outcsv) + # adjust mux interval mux_intervals = perf_helpers.get_perf_event_mux_interval() if args.muxinterval > 0: - logging.info( - "changing default perf mux interval to " + str(args.muxinterval) + "ms" - ) - perf_helpers.set_perf_event_mux_interval(False, args.muxinterval, mux_intervals) + if is_root: + logging.info( + "changing perf mux interval to " + str(args.muxinterval) + "ms" + ) + perf_helpers.set_perf_event_mux_interval( + False, args.muxinterval, mux_intervals + ) + else: + for device, mux in mux_intervals.items(): + mux_int = -1 + try: + mux_int = int(mux) + except ValueError: + crash("Failed to parse mux interval on " + device) + if mux_int != args.muxinterval: + crash( + "mux interval on " + + device + + " is set to " + + str(mux_int) + + ". Run as root or set it to " + + str(args.muxinterval) + + "." + ) # parse cgroups cgroups = [] @@ -411,10 +539,25 @@ def validate_file(fname): if args.pid is not None or args.cid is not None: logging.info("Not collecting uncore events in this run mode") + pmu_driver_version = perf_helpers.get_pmu_driver_version() + # log some metadata logging.info("Architecture: " + arch) logging.info("Model: " + cpuname) logging.info("Kernel version: " + perf_helpers.get_version()) + logging.info("PMU driver version: " + pmu_driver_version) + logging.info("Uncore events supported: " + str(supports_uncore_events)) + logging.info( + "Fixed counter TMA events supported: " + str(supports_tma_fixed_events) + ) + logging.info( + "Fixed counter cpu-cycles event supported: " + str(supports_cycles_fixed_event) + ) + logging.info( + "Fixed counter instructions event supported: " + + str(supports_instructions_fixed_event) + ) + logging.info("ref-cycles event supported: " + str(supports_ref_cycles_event)) logging.info("Cores per socket: " + str(perf_helpers.get_cpu_count())) logging.info("Socket: " + str(perf_helpers.get_socket_count())) logging.info("Hyperthreading on: " + str(perf_helpers.get_ht_status())) @@ -482,6 +625,8 @@ def validate_file(fname): arch, cpuname, cpuid_info, + pmu_driver_version, + supports_tma_fixed_events, args.muxinterval, args.cpu, args.socket, @@ -491,10 +636,11 @@ def validate_file(fname): os.chmod(args.outcsv, 0o666) # nosec # reset nmi_watchdog to what it was before running perfspect - if nmi_watchdog != 0: + if is_root and nmi_watchdog_status is True: perf_helpers.enable_nmi_watchdog() - logging.info("changing perf mux interval back to default") - perf_helpers.set_perf_event_mux_interval(True, 1, mux_intervals) + if is_root: + logging.info("changing perf mux interval back to default") + perf_helpers.set_perf_event_mux_interval(True, 1, mux_intervals) logging.info("perf stat dumped to %s" % args.outcsv) diff --git a/perf-collect.spec b/perf-collect.spec index 5f86876..5a53fc8 100644 --- a/perf-collect.spec +++ b/perf-collect.spec @@ -7,7 +7,7 @@ block_cipher = None a = Analysis( ['perf-collect.py'], pathex=[], - datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/srf.txt', '.')], + datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/icx_nofixedtma.txt', '.'), ('./events/spr_emr.txt', '.'), ('./events/spr_emr_nofixedtma.txt', '.'), ('./events/srf.txt', '.')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/perf-postprocess.py b/perf-postprocess.py index a2d32a0..08d4dac 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -114,6 +114,13 @@ def get_args(script_path): type=int, help="Generate per-transaction metrics using the provided transactions/sec.", ) + parser.add_argument( + "-m", + "--metricfile", + default=None, + help="Relative path to metrics file in json format", + dest="metric_file", + ) args = parser.parse_args() @@ -349,6 +356,8 @@ def get_metadata_as_dict(meta_data_lines, txns=None): "Model", "kernel version", "PerfSpect version", + "PMUDriverVersion", + "FixedTMASupported", ]: if info in line: meta_data["metadata"][info] = line.split(",", 1)[1] @@ -414,16 +423,22 @@ def get_event_groups(event_lines): return groups -def get_metric_file_name(microarchitecture): +def get_metric_file_name(microarchitecture, fixed_tma_supported): metric_file = "" if microarchitecture == "broadwell": metric_file = "metric_bdx.json" elif microarchitecture == "skylake" or microarchitecture == "cascadelake": metric_file = "metric_skx_clx.json" elif microarchitecture == "icelake": - metric_file = "metric_icx.json" + if fixed_tma_supported: + metric_file = "metric_icx.json" + else: + metric_file = "metric_icx_nofixedtma.json" elif microarchitecture == "sapphirerapids" or microarchitecture == "emeraldrapids": - metric_file = "metric_spr_emr.json" + if fixed_tma_supported: + metric_file = "metric_spr_emr.json" + else: + metric_file = "metric_spr_emr_nofixedtma.json" elif microarchitecture == "sierraforest": metric_file = "metric_srf.json" else: @@ -445,9 +460,11 @@ def validate_file(fname): crash(str(fname) + " not accessible") -def get_metrics_formula(architecture, txns=None): +def get_metrics_formula(architecture, fixed_tma_supported, metric_file=None, txns=None): # get the metric file name based on architecture - metric_file = get_metric_file_name(architecture) + if metric_file is None: + metric_file = get_metric_file_name(architecture, fixed_tma_supported) + logging.info("Metric file: " + metric_file) validate_file(metric_file) with open(metric_file, "r") as f_metric: @@ -475,7 +492,7 @@ def get_socket_number(sockets_dict, CPU): def extract_dataframe(perf_data_lines, meta_data, perf_mode): - logging.info("Formatting event data") + logging.info("Parsing event data") # parse event data into dataframe and set header names perf_data_df = pd.DataFrame(perf_data_lines) if "CGROUPS" in meta_data and meta_data["CGROUPS"] == "enabled": @@ -732,7 +749,7 @@ def log_skip_metric(metric, instance, msg): # group_start_end_index_dict is both an input and output argument -# if empty, the start and end indexes for each geroup will be added +# if empty, the start and end indexes for each group will be added # if not, the start and end indexes for each group will be read from it def get_groups_to_dataframes( time_slice_df, group_to_event, group_start_end_index_dict, perf_mode @@ -1027,7 +1044,6 @@ def generate_metrics( group_to_df = get_groups_to_dataframes( time_slice_df, group_to_event, group_start_end_index_dict, perf_mode ) - time_metrics_result[time_slice] = evaluate_metrics( verbose, filtered_metrics, metadata, group_to_event, group_to_df, errors ) @@ -1174,7 +1190,7 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): args = get_args(script_path) input_file_path = args.rawfile out_file_path = args.outfile - # read all metadata, perf evernts, and perf data lines + # read all metadata, perf events, and perf data lines # Note: this might not be feasible for very large files meta_data_lines, perf_event_lines, perf_data_lines = get_all_data_lines( input_file_path @@ -1200,7 +1216,12 @@ def generate_raw_events(perf_data_df, out_file_path, perf_mode): perf_data_df = extract_dataframe(perf_data_lines, meta_data, perf_mode) # parse metrics expressions - metrics = get_metrics_formula(meta_data["constants"]["CONST_ARCH"], args.pertxn) + metrics = get_metrics_formula( + meta_data["constants"]["CONST_ARCH"], + meta_data["metadata"]["FixedTMASupported"] == "True", + args.metric_file, + args.pertxn, + ) if args.rawevents: # generate raw events for system, socket and CPU generate_raw_events(perf_data_df, out_file_path, perf_mode) diff --git a/src/perf_helpers.py b/src/perf_helpers.py index c6d4456..69cc2a0 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -112,8 +112,9 @@ def get_imc_cha_upi_count(): return imc_count, cha_count, upi_count -# device ids are not consecutive in some cases -def get_channel_ids(pattern): +# return a sorted list of device ids for a given device type pattern, e.g., uncore_cha_, uncore_imc_, etc. +# note: this is necessary because device ids are not always consecutive +def get_device_ids(pattern): sysdevices = os.listdir("/sys/bus/event_source/devices") devices = pattern + "[0-9]*" ids = [] @@ -138,14 +139,33 @@ def get_perf_event_mux_interval(): return mux_interval +# Returns true/false depending on state of the NMI watchdog timer, or None on error. +def nmi_watchdog_enabled(): + try: + proc_output = subprocess.check_output(["cat", "/proc/sys/kernel/nmi_watchdog"]) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + logging.warning(f"Failed to get nmi_watchdog status: {e}") + return None + try: + nmi_watchdog_status = int(proc_output.decode().strip()) + except (ValueError) as e: + logging.warning(f"Failed to interpret nmi_watchdog status: {e}") + return None + return nmi_watchdog_status == 1 + + # disable nmi watchdog and return its initial status # to restore it after collection def disable_nmi_watchdog(): + nmi_watchdog_status = nmi_watchdog_enabled() + if nmi_watchdog_status is None: + logging.error("Failed to get nmi_watchdog status.") + return None try: - proc_output = subprocess.check_output(["cat", "/proc/sys/kernel/nmi_watchdog"]) - nmi_watchdog_status = int(proc_output.decode().strip()) - if nmi_watchdog_status == 1: - proc_output = subprocess.check_output(["sysctl", "kernel.nmi_watchdog=0"]) + if nmi_watchdog_status: + proc_output = subprocess.check_output( + ["sysctl", "kernel.nmi_watchdog=0"], stderr=subprocess.STDOUT + ) new_watchdog_status = int( proc_output.decode().strip().replace("kernel.nmi_watchdog = ", "") ) @@ -158,7 +178,16 @@ def disable_nmi_watchdog(): logging.info("nmi_watchdog already disabled. No change needed.") return nmi_watchdog_status except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e: - crash(f"Failed to disable nmi_watchdog: {e}") + logging.warning(f"Failed to disable nmi_watchdog: {e}") + + +def check_perf_event_paranoid(): + try: + return int( + subprocess.check_output(["cat", "/proc/sys/kernel/perf_event_paranoid"]) + ) + except (ValueError, FileNotFoundError, subprocess.CalledProcessError) as e: + logging.warning(f"Failed to check perf_event_paranoid: {e}") # enable nmi watchdog @@ -183,15 +212,19 @@ def set_perf_event_mux_interval(reset, interval_ms, mux_interval): if os.path.isdir(dirpath): muxfile = os.path.join(dirpath, "perf_event_mux_interval_ms") if os.path.isfile(muxfile): - with open(muxfile, "w") as f_mux: - val = 0 - if reset: - val = int(mux_interval[f]) - else: - if int(mux_interval[f]): - val = int(interval_ms) - if val: - f_mux.write(str(val)) + try: + with open(muxfile, "w") as f_mux: + val = 0 + if reset: + val = int(mux_interval[f]) + else: + if int(mux_interval[f]): + val = int(interval_ms) + if val: + f_mux.write(str(val)) + except OSError as e: + logging.warning(f"Failed to write mux interval: {e}") + break # get linux kernel version @@ -399,3 +432,13 @@ def get_cgroups(cid): for c in cgroups: logging.info("attaching to cgroup: " + c) return cgroups + + +def get_pmu_driver_version(): + command = "dmesg | grep -A 1 'Intel PMU driver' | tail -1 | awk '{print $NF}'" + try: + version_number = subprocess.check_output(command, shell=True).decode().strip() + return version_number + except subprocess.CalledProcessError as e: + print(f"Error executing command: {e}") + return None diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index c236645..f839186 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -43,15 +43,13 @@ def expand_unc(line): line = line.strip() name = line.split("/")[0] unc_name = "uncore_" + name - unc_count = 0 - sys_devs = helper.get_sys_devices() - if unc_name in sys_devs: - unc_count = int(sys_devs[unc_name]) + ids = helper.get_device_ids(unc_name + "_") + unc_count = len(ids) if unc_count > 1: - line = line.replace(name, unc_name + "_0") + line = line.replace(name, unc_name + "_" + str(ids[0])) if "name=" in line: prettyname = (line.split("'"))[1].strip() - line = line.replace(prettyname, prettyname + ".0") + line = line.replace(prettyname, prettyname + "." + str(ids[0])) return line, unc_count @@ -63,8 +61,8 @@ def is_cpu_event(line): if ( (len(tmp_list) == 1 or tmp_list[0] == "cpu" or tmp_list[0].startswith("cstate")) and "OCR." not in line - and "uops_retired.ms" not in line - and "int_misc.unknown_branch_cycles" not in line + and "uops_retired.ms" not in line.lower() + and "int_misc.unknown_branch_cycles" not in line.lower() and "power/" not in line ): return True @@ -74,7 +72,7 @@ def is_cpu_event(line): # enumerate uncore events across all devices def enumerate_uncore(group, pattern, count): uncore_group = "" - ids = helper.get_channel_ids(pattern) + ids = helper.get_device_ids(pattern) for i in range(count - 1): old = pattern + str(ids[i]) new = pattern + str(ids[i + 1]) @@ -109,34 +107,40 @@ def get_cgroup_events_format(cgroups, events, num_events): return perf_format -def filter_events(event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles): +def filter_events( + event_file, + cpu_only, + supports_tma_fixed_events, + supports_uncore_events, + supports_ref_cycles, +): if not os.path.isfile(event_file): crash("event file not found") collection_events = [] unsupported_events = [] perf_list = helper.get_perf_list() - seperate_cycles = [] - if in_vm: - # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group - if supports_ref_cycles: - seperate_cycles = [ - "cpu-cycles,", - "cpu-cycles:k,", - "ref-cycles,", - "instructions;", - ] - else: - seperate_cycles = [ - "cpu-cycles,", - "cpu-cycles:k,", - "instructions;", - ] + # seperate_cycles = [] + # if not supports_uncore_events: + # # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group + # if supports_ref_cycles: + # seperate_cycles = [ + # "cpu-cycles,", + # "cpu-cycles:k,", + # "ref-cycles,", + # "instructions;", + # ] + # else: + # seperate_cycles = [ + # "cpu-cycles,", + # "cpu-cycles:k,", + # "instructions;", + # ] def process(line): line = line.strip() if line == "" or line.startswith("#") or (cpu_only and not is_cpu_event(line)): return - if not TMA_supported and ( + if not supports_tma_fixed_events and ( "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line ): return @@ -152,13 +156,13 @@ def process(line): with open(event_file, "r") as fin: for line in fin: - if in_vm and "cpu-cycles" in line: - continue + # if in_vm and "cpu-cycles" in line: + # continue if not supports_ref_cycles and "ref-cycles" in line: continue process(line) - for line in seperate_cycles: - process(line) + # for line in seperate_cycles: + # process(line) if len(unsupported_events) > 0: logging.warning( f"Perf unsupported events not counted: {unsupported_events}" @@ -167,7 +171,11 @@ def process(line): def prepare_perf_events( - event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles + event_file, + cpu_only, + supports_tma_fixed_events, + supports_uncore_events, + supports_ref_cycles, ): start_group = "'{" end_group = "}'" @@ -176,7 +184,11 @@ def prepare_perf_events( new_group = True collection_events, unsupported_events = filter_events( - event_file, cpu_only, TMA_supported, in_vm, supports_ref_cycles + event_file, + cpu_only, + supports_tma_fixed_events, + supports_uncore_events, + supports_ref_cycles, ) core_event = [] uncore_event = []