[Devel] [PATCH RH8 2/3] perf/x86/intel: make LBR initialization reusable
Evgenii Shatokhin
eshatokhin at virtuozzo.com
Wed Apr 21 19:59:03 MSK 2021
From: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
This patch introduces globally visible intel_pmu_lbr_fill() routine,
which gathers information which LBR MSRs are support for specific CPU
family/model.
It is supposed that the routine would be used in KVM code, using guest
CPU information as an input. By this reason, it should not have any side
effect which could affect host system.
https://jira.sw.ru/browse/PSBM-75679
Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
+++
perf/x86/intel: fix Intel processors define's naming
Some Intel cpu defines were renamed, so update
__intel_pmu_lbr_fill() where we use those defines.
Fixes: 746e059d3153 ("ms/perf/x86/intel: make reusable LBR initialization
code")
https://jira.sw.ru/browse/PSBM-94406
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
Commit e84ff4f253fe in the kernels from VZ7.
The list of CPU models was sync'ed to what intel_pmu_init() now uses.
The defines for CPU model numbers have changed again, this patch reflects
that.
Handling of "Cometlake", "Icelake" and "Tigerlake" CPU models was added too.
Note. If some future RHEL's kernel brings support for other models, they
will be visible in the switch {} in intel_pmu_init(). It will be needed then
to update __intel_pmu_lbr_fill() accordingly.
Done in the scope of https://jira.sw.ru/browse/PSBM-127794.
Signed-off-by: Evgenii Shatokhin <eshatokhin at virtuozzo.com>
---
arch/x86/events/intel/core.c | 34 +------
arch/x86/events/intel/lbr.c | 153 ++++++++++++++++++++++++++----
arch/x86/events/perf_event.h | 16 +---
arch/x86/include/asm/perf_event.h | 2 +
4 files changed, 139 insertions(+), 66 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e67d708f9140..fe265bba601c 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4611,6 +4611,8 @@ __init int intel_pmu_init(void)
intel_ds_init();
+ intel_pmu_lbr_init();
+
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
/*
@@ -4632,8 +4634,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- intel_pmu_lbr_init_core(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_core2_event_constraints;
x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
@@ -4648,8 +4648,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_nhm(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_nehalem_event_constraints;
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
@@ -4682,8 +4680,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- intel_pmu_lbr_init_atom(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_gen_event_constraints;
x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
@@ -4701,8 +4697,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_slm(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
x86_pmu.extra_regs = intel_slm_extra_regs;
@@ -4721,8 +4715,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_skl(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
@@ -4733,7 +4725,6 @@ __init int intel_pmu_init(void)
*/
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
- x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
td_attr = glm_events_attrs;
extra_attr = slm_format_attr;
@@ -4748,8 +4739,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_skl(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
@@ -4758,7 +4747,6 @@ __init int intel_pmu_init(void)
*/
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
- x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
@@ -4779,8 +4767,6 @@ __init int intel_pmu_init(void)
sizeof(hw_cache_extra_regs));
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
- intel_pmu_lbr_init_skl(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.extra_regs = intel_tnt_extra_regs;
/*
@@ -4789,7 +4775,6 @@ __init int intel_pmu_init(void)
*/
x86_pmu.pebs_aliases = NULL;
x86_pmu.pebs_prec_dist = true;
- x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.get_event_constraints = tnt_get_event_constraints;
extra_attr = slm_format_attr;
@@ -4805,8 +4790,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_nhm(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_westmere_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
@@ -4837,8 +4820,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_snb(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
@@ -4879,8 +4860,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_snb(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
@@ -4917,8 +4896,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_hsw(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_hsw_event_constraints;
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
@@ -4930,7 +4907,6 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
- x86_pmu.lbr_double_abort = true;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
hsw_format_attr : nhm_format_attr;
td_attr = hsw_events_attrs;
@@ -4959,8 +4935,6 @@ __init int intel_pmu_init(void)
hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
- intel_pmu_lbr_init_hsw(&x86_pmu.lbr);
-
x86_pmu.event_constraints = intel_bdw_event_constraints;
x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
x86_pmu.extra_regs = intel_snbep_extra_regs;
@@ -4988,7 +4962,6 @@ __init int intel_pmu_init(void)
slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs,
knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_knl(&x86_pmu.lbr);
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -5015,7 +4988,6 @@ __init int intel_pmu_init(void)
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_skl(&x86_pmu.lbr);
/* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
event_attr_td_recovery_bubbles.event_str_noht =
@@ -5065,7 +5037,6 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
- intel_pmu_lbr_init_skl(&x86_pmu.lbr);
x86_pmu.event_constraints = intel_icl_event_constraints;
x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
@@ -5083,7 +5054,6 @@ __init int intel_pmu_init(void)
mem_attr = icl_events_attrs;
tsx_attr = icl_tsx_events_attrs;
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
- x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_skl(pmem);
pr_cont("Icelake events, ");
name = "icelake";
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ea971fd767af..eb1a7976507b 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -5,6 +5,7 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
#include <asm/insn.h>
+#include <asm/intel-family.h>
#include "../perf_event.h"
@@ -1199,7 +1200,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
};
/* core */
-void __init intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr)
{
lbr->nr = 4;
lbr->tos = MSR_LBR_TOS;
@@ -1213,7 +1214,7 @@ void __init intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr)
}
/* nehalem/westmere */
-void __init intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr)
{
lbr->nr = 16;
lbr->tos = MSR_LBR_TOS;
@@ -1233,7 +1234,7 @@ void __init intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr)
}
/* sandy bridge */
-void __init intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr)
{
lbr->nr = 16;
lbr->tos = MSR_LBR_TOS;
@@ -1252,7 +1253,7 @@ void __init intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr)
}
/* haswell */
-void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr)
{
lbr->nr = 16;
lbr->tos = MSR_LBR_TOS;
@@ -1267,7 +1268,7 @@ void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr)
}
/* skylake */
-__init void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr)
{
lbr->nr = 32;
lbr->tos = MSR_LBR_TOS;
@@ -1286,19 +1287,8 @@ __init void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr)
}
/* atom */
-void __init intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr)
{
- /*
- * only models starting at stepping 10 seems
- * to have an operational LBR which can freeze
- * on PMU interrupt
- */
- if (boot_cpu_data.x86_model == 28
- && boot_cpu_data.x86_stepping < 10) {
- pr_cont("LBR disabled due to erratum");
- return;
- }
-
lbr->nr = 8;
lbr->tos = MSR_LBR_TOS;
lbr->from = MSR_LBR_CORE_FROM;
@@ -1311,7 +1301,7 @@ void __init intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr)
}
/* slm */
-void __init intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr)
{
lbr->nr = 8;
lbr->tos = MSR_LBR_TOS;
@@ -1329,7 +1319,7 @@ void __init intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr)
}
/* Knights Landing */
-void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr)
+static void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr)
{
lbr->nr = 8;
lbr->tos = MSR_LBR_TOS;
@@ -1343,3 +1333,128 @@ void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+
+static void __intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model)
+{
+ if (family != 0x6)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_CORE_YONAH:
+ break;
+ case INTEL_FAM6_CORE2_MEROM:
+ case INTEL_FAM6_CORE2_MEROM_L:
+ case INTEL_FAM6_CORE2_PENRYN:
+ case INTEL_FAM6_CORE2_DUNNINGTON:
+ intel_pmu_lbr_init_core(lbr);
+ break;
+ case INTEL_FAM6_NEHALEM:
+ case INTEL_FAM6_NEHALEM_EP:
+ case INTEL_FAM6_NEHALEM_EX:
+ intel_pmu_lbr_init_nhm(lbr);
+ break;
+ case INTEL_FAM6_ATOM_BONNELL:
+ case INTEL_FAM6_ATOM_BONNELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL:
+ case INTEL_FAM6_ATOM_SALTWELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+ intel_pmu_lbr_init_atom(lbr);
+ break;
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_D:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
+ case INTEL_FAM6_ATOM_AIRMONT:
+ case INTEL_FAM6_ATOM_AIRMONT_MID:
+ intel_pmu_lbr_init_slm(lbr);
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_GOLDMONT_D:
+ intel_pmu_lbr_init_skl(lbr);
+ lbr->pt_coexist = true;
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ intel_pmu_lbr_init_skl(lbr);
+ lbr->pt_coexist = true;
+ break;
+ case INTEL_FAM6_ATOM_TREMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT:
+ intel_pmu_lbr_init_skl(lbr);
+ lbr->pt_coexist = true;
+ break;
+ case INTEL_FAM6_WESTMERE:
+ case INTEL_FAM6_WESTMERE_EP:
+ case INTEL_FAM6_WESTMERE_EX:
+ intel_pmu_lbr_init_nhm(lbr);
+ break;
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
+ intel_pmu_lbr_init_snb(lbr);
+ break;
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE_X:
+ intel_pmu_lbr_init_snb(lbr);
+ break;
+ case INTEL_FAM6_HASWELL:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_HASWELL_L:
+ case INTEL_FAM6_HASWELL_G:
+ intel_pmu_lbr_init_hsw(lbr);
+ lbr->double_abort = true;
+ break;
+ case INTEL_FAM6_BROADWELL:
+ case INTEL_FAM6_BROADWELL_D:
+ case INTEL_FAM6_BROADWELL_G:
+ case INTEL_FAM6_BROADWELL_X:
+ intel_pmu_lbr_init_hsw(lbr);
+ break;
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
+ intel_pmu_lbr_init_knl(lbr);
+ break;
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_SKYLAKE_L:
+ case INTEL_FAM6_SKYLAKE:
+ case INTEL_FAM6_KABYLAKE_L:
+ case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
+ intel_pmu_lbr_init_skl(lbr);
+ break;
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
+ case INTEL_FAM6_ICELAKE_L:
+ case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
+ intel_pmu_lbr_init_skl(lbr);
+ lbr->pt_coexist = true;
+ break;
+ }
+}
+
+void __init intel_pmu_lbr_init(void)
+{
+ memset(&x86_pmu.lbr, 0, sizeof(struct x86_pmu_lbr));
+
+ /*
+ * only models starting at stepping 10 seems
+ * to have an operational LBR which can freeze
+ * on PMU interrupt
+ */
+ if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_BONNELL
+ && boot_cpu_data.x86_stepping < 10) {
+ pr_cont("LBR disabled due to erratum");
+ return;
+ }
+
+ __intel_pmu_lbr_fill(&x86_pmu.lbr, boot_cpu_data.x86,
+ boot_cpu_data.x86_model);
+}
+
+void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model)
+{
+ memset(&x86_pmu.lbr, 0, sizeof(struct x86_pmu_lbr));
+
+ __intel_pmu_lbr_fill(lbr, family, model);
+}
+EXPORT_SYMBOL_GPL(intel_pmu_lbr_fill);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b48cf9ed405c..e97f7aca54ec 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1058,21 +1058,7 @@ void intel_pmu_lbr_disable_all(void);
void intel_pmu_lbr_read(void);
-void intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr);
-
-void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr);
+void intel_pmu_lbr_init(void);
void intel_pmu_pebs_data_source_nhm(void);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 721f4f2034c5..e663f82e7dde 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -172,6 +172,8 @@ struct x86_pmu_lbr {
#define lbr_double_abort lbr.double_abort
#define lbr_pt_coexist lbr.pt_coexist
+void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model);
+
/*
* Fixed-purpose performance events:
*/
--
2.27.0
More information about the Devel
mailing list