[Devel] [PATCH RHEL7 COMMIT] ms/perf/x86/intel: make reusable LBR initialization code

Konstantin Khorenko khorenko at virtuozzo.com
Fri Dec 8 19:03:25 MSK 2017


The commit is pushed to "branch-rh7-3.10.0-693.11.1.vz7.39.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-693.11.1.vz7.39.2
------>
commit 4c3abd7f596b41567da6b93e065dd72c505b87a8
Author: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
Date:   Fri Dec 8 19:03:25 2017 +0300

    ms/perf/x86/intel: make reusable LBR initialization code
    
    This patch introduces globally visible intel_pmu_lbr_fill() routine,
    which gathers information which LBR MSRs are support for specific CPU
    family/model.
    
    It is supposed that the routine would be used in KVM code, using guest
    CPU information as an input. By this reason, it should not have any side
    effect which could affect host system.
    
     * LBR information moved to separate structure `struct x86_pmu_lbr';
     * All family-specific tweaks on gathered information are applied only
       for global x86_pmu.lbr to keep current perf initialization behavior.
    
    https://jira.sw.ru/browse/PSBM-75679
    Signed-off-by: Jan Dakinevich <jan.dakinevich at virtuozzo.com>
---
 arch/x86/events/core.c            |   6 +-
 arch/x86/events/intel/core.c      |  48 ++-----
 arch/x86/events/intel/lbr.c       | 277 +++++++++++++++++++++++++-------------
 arch/x86/events/perf_event.h      |  27 +---
 arch/x86/include/asm/perf_event.h |  11 ++
 5 files changed, 214 insertions(+), 155 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0551f8444bd4..56b784148b2f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -359,7 +359,7 @@ int x86_add_exclusive(unsigned int what)
 	 * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
 	 * LBR and BTS are still mutually exclusive.
 	 */
-	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
+	if (x86_pmu.lbr.pt_coexist && what == x86_lbr_exclusive_pt)
 		return 0;
 
 	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
@@ -382,7 +382,7 @@ int x86_add_exclusive(unsigned int what)
 
 void x86_del_exclusive(unsigned int what)
 {
-	if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
+	if (x86_pmu.lbr.pt_coexist && what == x86_lbr_exclusive_pt)
 		return;
 
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
@@ -486,7 +486,7 @@ int x86_pmu_hw_config(struct perf_event *event)
 			precise++;
 
 			/* Support for IP fixup */
-			if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
+			if (x86_pmu.lbr.nr || x86_pmu.intel_cap.pebs_format >= 2)
 				precise++;
 
 			if (x86_pmu.pebs_prec_dist)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d5a7eacf3d65..33c21e570649 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1988,7 +1988,7 @@ static void intel_pmu_reset(void)
 	}
 
 	/* Reset LBRs and LBR freezing */
-	if (x86_pmu.lbr_nr) {
+	if (x86_pmu.lbr.nr) {
 		update_debugctlmsr(get_debugctlmsr() &
 			~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
 	}
@@ -3050,7 +3050,7 @@ static int intel_pmu_cpu_prepare(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-	if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
+	if (x86_pmu.extra_regs || x86_pmu.lbr.sel_map) {
 		cpuc->shared_regs = allocate_shared_regs(cpu);
 		if (!cpuc->shared_regs)
 			goto err;
@@ -3116,7 +3116,7 @@ static void intel_pmu_cpu_starting(int cpu)
 		cpuc->shared_regs->refcnt++;
 	}
 
-	if (x86_pmu.lbr_sel_map)
+	if (x86_pmu.lbr.sel_map)
 		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
 
 	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
@@ -3176,7 +3176,7 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
 {
 	if (x86_pmu.pebs_active)
 		intel_pmu_pebs_sched_task(ctx, sched_in);
-	if (x86_pmu.lbr_nr)
+	if (x86_pmu.lbr.nr)
 		intel_pmu_lbr_sched_task(ctx, sched_in);
 }
 
@@ -3560,6 +3560,8 @@ __init int intel_pmu_init(void)
 
 	intel_ds_init();
 
+	intel_pmu_lbr_init();
+
 	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
 
 	/*
@@ -3578,8 +3580,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-		intel_pmu_lbr_init_core();
-
 		x86_pmu.event_constraints = intel_core2_event_constraints;
 		x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
 		pr_cont("Core2 events, ");
@@ -3593,8 +3593,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_nhm();
-
 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
 		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
@@ -3623,8 +3621,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
-		intel_pmu_lbr_init_atom();
-
 		x86_pmu.event_constraints = intel_gen_event_constraints;
 		x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
@@ -3639,8 +3635,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_slm();
-
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_slm_extra_regs;
@@ -3655,8 +3649,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_skl();
-
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_glm_extra_regs;
@@ -3667,7 +3659,6 @@ __init int intel_pmu_init(void)
 		 */
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.lbr_pt_coexist = true;
 		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
 		pr_cont("Goldmont events, ");
 		break;
@@ -3680,8 +3671,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_nhm();
-
 		x86_pmu.event_constraints = intel_westmere_event_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
@@ -3710,8 +3699,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_snb();
-
 		x86_pmu.event_constraints = intel_snb_event_constraints;
 		x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
@@ -3748,8 +3735,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_snb();
-
 		x86_pmu.event_constraints = intel_ivb_event_constraints;
 		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
 		x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
@@ -3781,8 +3766,6 @@ __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_hsw();
-
 		x86_pmu.event_constraints = intel_hsw_event_constraints;
 		x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
@@ -3795,7 +3778,6 @@ __init int intel_pmu_init(void)
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = hsw_get_event_constraints;
 		x86_pmu.cpu_events = hsw_events_attrs;
-		x86_pmu.lbr_double_abort = true;
 		pr_cont("Haswell events, ");
 		break;
 
@@ -3817,8 +3799,6 @@ __init int intel_pmu_init(void)
 		hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
 									      BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
 
-		intel_pmu_lbr_init_hsw();
-
 		x86_pmu.event_constraints = intel_bdw_event_constraints;
 		x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
 		x86_pmu.extra_regs = intel_snbep_extra_regs;
@@ -3841,7 +3821,6 @@ __init int intel_pmu_init(void)
 		       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs,
 		       knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-		intel_pmu_lbr_init_knl();
 
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -3862,7 +3841,6 @@ __init int intel_pmu_init(void)
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-		intel_pmu_lbr_init_skl();
 
 		x86_pmu.event_constraints = intel_skl_event_constraints;
 		x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
@@ -3936,12 +3914,12 @@ __init int intel_pmu_init(void)
 	 * Check all LBT MSR here.
 	 * Disable LBR access if any LBR MSRs can not be accessed.
 	 */
-	if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
-		x86_pmu.lbr_nr = 0;
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
-		if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
-		      check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
-			x86_pmu.lbr_nr = 0;
+	if (x86_pmu.lbr.nr && !check_msr(x86_pmu.lbr.tos, 0x3UL))
+		x86_pmu.lbr.nr = 0;
+	for (i = 0; i < x86_pmu.lbr.nr; i++) {
+		if (!(check_msr(x86_pmu.lbr.from + i, 0xffffUL) &&
+		      check_msr(x86_pmu.lbr.to + i, 0xffffUL)))
+			x86_pmu.lbr.nr = 0;
 	}
 
 	/*
@@ -3954,7 +3932,7 @@ __init int intel_pmu_init(void)
 			er->extra_msr_access = check_msr(er->msr, 0x11UL);
 			/* Disable LBR select mapping */
 			if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
-				x86_pmu.lbr_sel_map = NULL;
+				x86_pmu.lbr.sel_map = NULL;
 		}
 	}
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index d6beda099883..0ad3e3cad511 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -4,6 +4,7 @@
 #include <asm/perf_event.h>
 #include <asm/msr.h>
 #include <asm/insn.h>
+#include <asm/intel-family.h>
 
 #include "../perf_event.h"
 
@@ -161,7 +162,7 @@ static void __intel_pmu_lbr_enable(bool pmi)
 	 * did not change.
 	 */
 	if (cpuc->lbr_sel)
-		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
+		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr.sel_mask;
 	if (!pmi && cpuc->lbr_sel)
 		wrmsrl(MSR_LBR_SELECT, lbr_select);
 
@@ -190,17 +191,17 @@ static void intel_pmu_lbr_reset_32(void)
 {
 	int i;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++)
-		wrmsrl(x86_pmu.lbr_from + i, 0);
+	for (i = 0; i < x86_pmu.lbr.nr; i++)
+		wrmsrl(x86_pmu.lbr.from + i, 0);
 }
 
 static void intel_pmu_lbr_reset_64(void)
 {
 	int i;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
-		wrmsrl(x86_pmu.lbr_from + i, 0);
-		wrmsrl(x86_pmu.lbr_to   + i, 0);
+	for (i = 0; i < x86_pmu.lbr.nr; i++) {
+		wrmsrl(x86_pmu.lbr.from + i, 0);
+		wrmsrl(x86_pmu.lbr.to   + i, 0);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + i, 0);
 	}
@@ -208,7 +209,7 @@ static void intel_pmu_lbr_reset_64(void)
 
 void intel_pmu_lbr_reset(void)
 {
-	if (!x86_pmu.lbr_nr)
+	if (!x86_pmu.lbr.nr)
 		return;
 
 	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
@@ -224,7 +225,7 @@ static inline u64 intel_pmu_lbr_tos(void)
 {
 	u64 tos;
 
-	rdmsrl(x86_pmu.lbr_tos, tos);
+	rdmsrl(x86_pmu.lbr.tos, tos);
 	return tos;
 }
 
@@ -245,12 +246,12 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 		return;
 	}
 
-	mask = x86_pmu.lbr_nr - 1;
+	mask = x86_pmu.lbr.nr - 1;
 	tos = intel_pmu_lbr_tos();
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-		wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		wrmsrl(x86_pmu.lbr.from + lbr_idx, task_ctx->lbr_from[i]);
+		wrmsrl(x86_pmu.lbr.to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -268,12 +269,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 		return;
 	}
 
-	mask = x86_pmu.lbr_nr - 1;
+	mask = x86_pmu.lbr.nr - 1;
 	tos = intel_pmu_lbr_tos();
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
-		rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
+		rdmsrl(x86_pmu.lbr.from + lbr_idx, task_ctx->lbr_from[i]);
+		rdmsrl(x86_pmu.lbr.to + lbr_idx, task_ctx->lbr_to[i]);
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
@@ -318,7 +319,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
-	if (!x86_pmu.lbr_nr)
+	if (!x86_pmu.lbr.nr)
 		return;
 
 	cpuc->br_sel = event->hw.branch_reg.reg;
@@ -357,7 +358,7 @@ void intel_pmu_lbr_del(struct perf_event *event)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
-	if (!x86_pmu.lbr_nr)
+	if (!x86_pmu.lbr.nr)
 		return;
 
 	if (branch_user_callstack(cpuc->br_sel) &&
@@ -389,11 +390,11 @@ void intel_pmu_lbr_disable_all(void)
 
 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 {
-	unsigned long mask = x86_pmu.lbr_nr - 1;
+	unsigned long mask = x86_pmu.lbr.nr - 1;
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
 
-	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+	for (i = 0; i < x86_pmu.lbr.nr; i++) {
 		unsigned long lbr_idx = (tos - i) & mask;
 		union {
 			struct {
@@ -403,7 +404,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 			u64     lbr;
 		} msr_lastbranch;
 
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+		rdmsrl(x86_pmu.lbr.from + lbr_idx, msr_lastbranch.lbr);
 
 		cpuc->lbr_entries[i].from	= msr_lastbranch.from;
 		cpuc->lbr_entries[i].to		= msr_lastbranch.to;
@@ -422,12 +423,12 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
 	bool need_info = false;
-	unsigned long mask = x86_pmu.lbr_nr - 1;
+	unsigned long mask = x86_pmu.lbr.nr - 1;
 	int lbr_format = x86_pmu.intel_cap.lbr_format;
 	u64 tos = intel_pmu_lbr_tos();
 	int i;
 	int out = 0;
-	int num = x86_pmu.lbr_nr;
+	int num = x86_pmu.lbr.nr;
 
 	if (cpuc->lbr_sel) {
 		need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
@@ -442,8 +443,8 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		u16 cycles = 0;
 		int lbr_flags = lbr_desc[lbr_format];
 
-		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
-		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+		rdmsrl(x86_pmu.lbr.from + lbr_idx, from);
+		rdmsrl(x86_pmu.lbr.to   + lbr_idx, to);
 
 		if (lbr_format == LBR_FORMAT_INFO && need_info) {
 			u64 info;
@@ -485,7 +486,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		 * If the abort just happened outside the window
 		 * the extra entry cannot be removed.
 		 */
-		if (abort && x86_pmu.lbr_double_abort && out > 0)
+		if (abort && x86_pmu.lbr.double_abort && out > 0)
 			out--;
 
 		cpuc->lbr_entries[out].from	 = from;
@@ -594,7 +595,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 		if (!(br_type & (1ULL << i)))
 			continue;
 
-		v = x86_pmu.lbr_sel_map[i];
+		v = x86_pmu.lbr.sel_map[i];
 		if (v == LBR_NOT_SUPP)
 			return -EOPNOTSUPP;
 
@@ -612,7 +613,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 	 * But the 10th bit LBR_CALL_STACK does not operate
 	 * in suppress mode.
 	 */
-	reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
+	reg->config = mask ^ (x86_pmu.lbr.sel_mask & ~LBR_CALL_STACK);
 
 	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
 	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
@@ -629,7 +630,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * no LBR on this PMU
 	 */
-	if (!x86_pmu.lbr_nr)
+	if (!x86_pmu.lbr.nr)
 		return -EOPNOTSUPP;
 
 	/*
@@ -642,7 +643,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	/*
 	 * setup HW LBR filter, if any
 	 */
-	if (x86_pmu.lbr_sel_map)
+	if (x86_pmu.lbr.sel_map)
 		ret = intel_pmu_setup_hw_lbr_filter(event);
 
 	return ret;
@@ -931,12 +932,12 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 };
 
 /* core */
-void __init intel_pmu_lbr_init_core(void)
+static void intel_pmu_lbr_init_core(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr     = 4;
-	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+	lbr->nr     = 4;
+	lbr->tos    = MSR_LBR_TOS;
+	lbr->from   = MSR_LBR_CORE_FROM;
+	lbr->to     = MSR_LBR_CORE_TO;
 
 	/*
 	 * SW branch filter usage:
@@ -946,15 +947,15 @@ void __init intel_pmu_lbr_init_core(void)
 }
 
 /* nehalem/westmere */
-void __init intel_pmu_lbr_init_nhm(void)
+static void intel_pmu_lbr_init_nhm(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr     = 16;
-	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+	lbr->nr     = 16;
+	lbr->tos    = MSR_LBR_TOS;
+	lbr->from   = MSR_LBR_NHM_FROM;
+	lbr->to     = MSR_LBR_NHM_TO;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = nhm_lbr_sel_map;
 
 	/*
 	 * SW branch filter usage:
@@ -967,15 +968,15 @@ void __init intel_pmu_lbr_init_nhm(void)
 }
 
 /* sandy bridge */
-void __init intel_pmu_lbr_init_snb(void)
+static void intel_pmu_lbr_init_snb(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr	 = 16;
-	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
-	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+	lbr->nr   = 16;
+	lbr->tos  = MSR_LBR_TOS;
+	lbr->from = MSR_LBR_NHM_FROM;
+	lbr->to   = MSR_LBR_NHM_TO;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = snb_lbr_sel_map;
 
 	/*
 	 * SW branch filter usage:
@@ -987,29 +988,27 @@ void __init intel_pmu_lbr_init_snb(void)
 }
 
 /* haswell */
-void intel_pmu_lbr_init_hsw(void)
+static void intel_pmu_lbr_init_hsw(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr	 = 16;
-	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
-	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+	lbr->nr   = 16;
+	lbr->tos  = MSR_LBR_TOS;
+	lbr->from = MSR_LBR_NHM_FROM;
+	lbr->to   = MSR_LBR_NHM_TO;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
-
-	pr_cont("16-deep LBR, ");
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = hsw_lbr_sel_map;
 }
 
 /* skylake */
-__init void intel_pmu_lbr_init_skl(void)
+static void intel_pmu_lbr_init_skl(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr	 = 32;
-	x86_pmu.lbr_tos	 = MSR_LBR_TOS;
-	x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
-	x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
+	lbr->nr   = 32;
+	lbr->tos  = MSR_LBR_TOS;
+	lbr->from = MSR_LBR_NHM_FROM;
+	lbr->to   = MSR_LBR_NHM_TO;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = hsw_lbr_sel_map;
 
 	/*
 	 * SW branch filter usage:
@@ -1021,23 +1020,12 @@ __init void intel_pmu_lbr_init_skl(void)
 }
 
 /* atom */
-void __init intel_pmu_lbr_init_atom(void)
+static void intel_pmu_lbr_init_atom(struct x86_pmu_lbr *lbr)
 {
-	/*
-	 * only models starting at stepping 10 seems
-	 * to have an operational LBR which can freeze
-	 * on PMU interrupt
-	 */
-	if (boot_cpu_data.x86_model == 28
-	    && boot_cpu_data.x86_mask < 10) {
-		pr_cont("LBR disabled due to erratum");
-		return;
-	}
-
-	x86_pmu.lbr_nr	   = 8;
-	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+	lbr->nr     = 8;
+	lbr->tos    = MSR_LBR_TOS;
+	lbr->from   = MSR_LBR_CORE_FROM;
+	lbr->to     = MSR_LBR_CORE_TO;
 
 	/*
 	 * SW branch filter usage:
@@ -1047,33 +1035,132 @@ void __init intel_pmu_lbr_init_atom(void)
 }
 
 /* slm */
-void __init intel_pmu_lbr_init_slm(void)
+static void intel_pmu_lbr_init_slm(struct x86_pmu_lbr *lbr)
 {
-	x86_pmu.lbr_nr	   = 8;
-	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
-	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+	lbr->nr     = 8;
+	lbr->tos    = MSR_LBR_TOS;
+	lbr->from   = MSR_LBR_CORE_FROM;
+	lbr->to     = MSR_LBR_CORE_TO;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = nhm_lbr_sel_map;
 
 	/*
 	 * SW branch filter usage:
 	 * - compensate for lack of HW filter
 	 */
-	pr_cont("8-deep LBR, ");
 }
 
 /* Knights Landing */
-void intel_pmu_lbr_init_knl(void)
+static void intel_pmu_lbr_init_knl(struct x86_pmu_lbr *lbr)
+{
+	lbr->nr     = 8;
+	lbr->tos    = MSR_LBR_TOS;
+	lbr->from   = MSR_LBR_NHM_FROM;
+	lbr->to     = MSR_LBR_NHM_TO;
+
+	lbr->sel_mask = LBR_SEL_MASK;
+	lbr->sel_map  = snb_lbr_sel_map;
+}
+
+static void __intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model)
 {
-	x86_pmu.lbr_nr	   = 8;
-	x86_pmu.lbr_tos    = MSR_LBR_TOS;
-	x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
-	x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
+	bool apply_tweaks = lbr == &x86_pmu.lbr;
 
-	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
-	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
+	memset(lbr, 0, sizeof(struct x86_pmu_lbr));
 
-	pr_cont("8-deep LBR, ");
+	if (family != 0x6)
+		return;
+
+	switch (model) {
+	case INTEL_FAM6_CORE_YONAH:
+		break;
+	case INTEL_FAM6_CORE2_MEROM:
+	case INTEL_FAM6_CORE2_MEROM_L:
+	case INTEL_FAM6_CORE2_PENRYN:
+	case INTEL_FAM6_CORE2_DUNNINGTON:
+		intel_pmu_lbr_init_core(lbr);
+		break;
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
+		intel_pmu_lbr_init_nhm(lbr);
+		break;
+	case INTEL_FAM6_ATOM_PINEVIEW:
+	case INTEL_FAM6_ATOM_LINCROFT:
+	case INTEL_FAM6_ATOM_PENWELL:
+	case INTEL_FAM6_ATOM_CLOVERVIEW:
+	case INTEL_FAM6_ATOM_CEDARVIEW:
+		/*
+		 * only models starting at stepping 10 seems
+		 * to have an operational LBR which can freeze
+		 * on PMU interrupt
+		 */
+		if (apply_tweaks &&
+		    boot_cpu_data.x86_model == INTEL_FAM6_ATOM_PINEVIEW &&
+		    boot_cpu_data.x86_mask < 10) {
+			pr_cont("LBR disabled due to erratum");
+			return;
+		}
+		intel_pmu_lbr_init_atom(lbr);
+		break;
+	case INTEL_FAM6_ATOM_SILVERMONT1:
+	case INTEL_FAM6_ATOM_SILVERMONT2:
+	case INTEL_FAM6_ATOM_AIRMONT:
+		intel_pmu_lbr_init_slm(lbr);
+		break;
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_DENVERTON:
+	case INTEL_FAM6_ATOM_GEMINI_LAKE:
+		lbr->pt_coexist = true;
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
+		intel_pmu_lbr_init_skl(lbr);
+		break;
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
+		intel_pmu_lbr_init_snb(lbr);
+		break;
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
+		lbr->double_abort = true;
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
+		/*
+		 * OpenVZ kernel doesn't have Goldmont Plus CPU PMU support
+		 */
+		if (apply_tweaks &&
+		    boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GEMINI_LAKE)
+			return;
+		intel_pmu_lbr_init_hsw(lbr);
+		break;
+	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
+		intel_pmu_lbr_init_knl(lbr);
+		break;
+	}
+}
+
+void __init intel_pmu_lbr_init(void)
+{
+	__intel_pmu_lbr_fill(&x86_pmu.lbr, boot_cpu_data.x86,
+			     boot_cpu_data.x86_model);
+}
+
+void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model)
+{
+	__intel_pmu_lbr_fill(lbr, family, model);
 }
+EXPORT_SYMBOL_GPL(intel_pmu_lbr_fill);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9772a20bad4e..7104f4674c9c 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -598,12 +598,7 @@ struct x86_pmu {
 	/*
 	 * Intel LBR
 	 */
-	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
-	int		lbr_nr;			   /* hardware stack size */
-	u64		lbr_sel_mask;		   /* LBR_SELECT valid bits */
-	const int	*lbr_sel_map;		   /* lbr_select mappings */
-	bool		lbr_double_abort;	   /* duplicated lbr aborts */
-	bool		lbr_pt_coexist;		   /* (LBR|BTS) may coexist with PT */
+	struct x86_pmu_lbr lbr;
 
 	/*
 	 * Intel PT/LBR/BTS are exclusive
@@ -673,8 +668,8 @@ extern struct x86_pmu x86_pmu __read_mostly;
 
 static inline bool x86_pmu_has_lbr_callstack(void)
 {
-	return  x86_pmu.lbr_sel_map &&
-		x86_pmu.lbr_sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
+	return  x86_pmu.lbr.sel_map &&
+		x86_pmu.lbr.sel_map[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT] > 0;
 }
 
 DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -906,21 +901,9 @@ void intel_pmu_lbr_disable_all(void);
 
 void intel_pmu_lbr_read(void);
 
-void intel_pmu_lbr_init_core(void);
+void intel_pmu_lbr_init(void);
 
-void intel_pmu_lbr_init_nhm(void);
-
-void intel_pmu_lbr_init_atom(void);
-
-void intel_pmu_lbr_init_slm(void);
-
-void intel_pmu_lbr_init_snb(void);
-
-void intel_pmu_lbr_init_hsw(void);
-
-void intel_pmu_lbr_init_skl(void);
-
-void intel_pmu_lbr_init_knl(void);
+void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model);
 
 void intel_pmu_pebs_data_source_nhm(void);
 
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061bba1d..bf9f2086dbbe 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -124,6 +124,15 @@ struct x86_pmu_capability {
 	int		events_mask_len;
 };
 
+struct x86_pmu_lbr {
+	unsigned long	tos, from, to;	/* MSR base regs */
+	int		nr;		/* hardware stack size */
+	u64		sel_mask;	/* LBR_SELECT valid bits */
+	const int	*sel_map;	/* lbr_select mappings */
+	bool		double_abort;	/* duplicated lbr aborts */
+	bool		pt_coexist;	/* (LBR|BTS) may coexist with PT */
+};
+
 /*
  * Fixed-purpose performance events:
  */
@@ -299,4 +308,6 @@ static inline void perf_check_microcode(void) { }
 
 #define arch_perf_out_copy_user copy_from_user_nmi
 
+extern void intel_pmu_lbr_fill(struct x86_pmu_lbr *lbr, u8 family, u8 model);
+
 #endif /* _ASM_X86_PERF_EVENT_H */


More information about the Devel mailing list