[CRIU] [PATCH 4/9] cpuinfo: x86 -- Rework cpuinfo features fetching
Cyrill Gorcunov
gorcunov at openvz.org
Thu Sep 25 06:03:23 PDT 2014
Instead of parsing procfs lets use native cpuid(), it's a way faster.
The dark side is that the kernel may disable some of features via
bootline options even if they are present on hardware but for us
it's fine -- we will be testing hardware cpu for features anyway.
The X86_FEATURE_ bits are gathered from two sources: linux kernel
and cpu specifications.
Same time drop off cpu_set_feature() helper -- it's unneeded.
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
arch/aarch64/cpu.c | 4 -
arch/arm/cpu.c | 4 -
arch/x86/cpu.c | 189 +++++++++++++++++++++++++++++++++++++++------
arch/x86/include/asm/cpu.h | 189 ++++++++++++++++++++++++++++++++++++++++++++-
include/cpu.h | 1 -
5 files changed, 349 insertions(+), 38 deletions(-)
diff --git a/arch/aarch64/cpu.c b/arch/aarch64/cpu.c
index 6f4b5284a377..fc1b73e6dd5f 100644
--- a/arch/aarch64/cpu.c
+++ b/arch/aarch64/cpu.c
@@ -3,10 +3,6 @@
#include "cpu.h"
-void cpu_set_feature(unsigned int feature)
-{
-}
-
bool cpu_has_feature(unsigned int feature)
{
return false;
diff --git a/arch/arm/cpu.c b/arch/arm/cpu.c
index 6f4b5284a377..fc1b73e6dd5f 100644
--- a/arch/arm/cpu.c
+++ b/arch/arm/cpu.c
@@ -3,10 +3,6 @@
#include "cpu.h"
-void cpu_set_feature(unsigned int feature)
-{
-}
-
bool cpu_has_feature(unsigned int feature)
{
return false;
diff --git a/arch/x86/cpu.c b/arch/x86/cpu.c
index e180d2306e9e..12943e46927c 100644
--- a/arch/x86/cpu.c
+++ b/arch/x86/cpu.c
@@ -22,42 +22,181 @@
#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
-const char * const x86_cap_flags[NCAPINTS_BITS] = {
- [X86_FEATURE_FPU] = "fpu",
- [X86_FEATURE_FXSR] = "fxsr",
- [X86_FEATURE_XSAVE] = "xsave",
-};
+static struct cpuinfo_x86 rt_cpu_info;
-static DECLARE_BITMAP(cpu_features, NCAPINTS_BITS);
-#define cpu_has(bit) test_bit(bit, cpu_features)
+static void set_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ if (likely(feature < NCAPINTS_BITS))
+ set_bit(feature, (unsigned long *)c->x86_capability);
+}
-void cpu_set_feature(unsigned int feature)
+static void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
{
if (likely(feature < NCAPINTS_BITS))
- set_bit(feature, cpu_features);
+ clear_bit(feature, (unsigned long *)c->x86_capability);
}
-bool cpu_has_feature(unsigned int feature)
+static int test_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
{
if (likely(feature < NCAPINTS_BITS))
- return cpu_has(feature);
- return false;
+ return test_bit(feature, (unsigned long *)c->x86_capability);
+ return 0;
}
-static int proc_cpuinfo_match(char *tok)
+bool cpu_has_feature(unsigned int feature)
{
- if (!strcmp(tok, x86_cap_flags[X86_FEATURE_FXSR]))
- cpu_set_feature(X86_FEATURE_FXSR);
- else if (!strcmp(tok, x86_cap_flags[X86_FEATURE_XSAVE]))
- cpu_set_feature(X86_FEATURE_XSAVE);
- else if (!strcmp(tok, x86_cap_flags[X86_FEATURE_FPU]))
- cpu_set_feature(X86_FEATURE_FPU);
+ return test_cpu_cap(&rt_cpu_info, feature);
+}
+
+static int cpu_init_cpuid(struct cpuinfo_x86 *c)
+{
+ /*
+ * See cpu_detect() in the kernel, also
+ * read cpuid specs not only from general
+ * SDM but for extended instructions set
+ * reference.
+ */
+
+ /* Get vendor name */
+ cpuid(0x00000000,
+ (unsigned int *)&c->cpuid_level,
+ (unsigned int *)&c->x86_vendor_id[0],
+ (unsigned int *)&c->x86_vendor_id[8],
+ (unsigned int *)&c->x86_vendor_id[4]);
+
+ if (!strcmp(c->x86_vendor_id, "GenuineIntel")) {
+ c->x86_vendor = X86_VENDOR_INTEL;
+ } else if (!strcmp(c->x86_vendor_id, "AuthenticAMD")) {
+ c->x86_vendor = X86_VENDOR_AMD;
+ } else {
+ pr_err("Unsupported CPU vendor %s\n",
+ c->x86_vendor_id);
+ return -1;
+ }
+
+ c->x86_family = 4;
+
+ /* Intel-defined flags: level 0x00000001 */
+ if (c->cpuid_level >= 0x00000001) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
+ c->x86_family = (eax >> 8) & 0xf;
+ c->x86_model = (eax >> 4) & 0xf;
+ c->x86_mask = eax & 0xf;
+
+ if (c->x86_family == 0xf)
+ c->x86_family += (eax >> 20) & 0xff;
+ if (c->x86_family >= 0x6)
+ c->x86_model += ((eax >> 16) & 0xf) << 4;
+
+ c->x86_capability[0] = edx;
+ c->x86_capability[4] = ecx;
+ }
+
+ /* Additional Intel-defined flags: level 0x00000007 */
+ if (c->cpuid_level >= 0x00000007) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[9] = ebx;
+ c->x86_capability[11] = ecx;
+ }
+
+ /* Extended state features: level 0x0000000d */
+ if (c->cpuid_level >= 0x0000000d) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid_count(0x0000000d, 1, &eax, &ebx, &ecx, &edx);
+ c->x86_capability[10] = eax;
+ }
+
+ /* AMD-defined flags: level 0x80000001 */
+ c->extended_cpuid_level = cpuid_eax(0x80000000);
+
+ if ((c->extended_cpuid_level & 0xffff0000) == 0x80000000) {
+ if (c->extended_cpuid_level >= 0x80000001) {
+ c->x86_capability[1] = cpuid_edx(0x80000001);
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
+ }
+ }
+
+ /*
+ * We're don't care about scattered features for now,
+ * otherwise look into init_scattered_cpuid_features()
+ * in kernel.
+ */
+
+ if (c->extended_cpuid_level >= 0x80000004) {
+ unsigned int *v;
+ char *p, *q;
+ v = (unsigned int *)c->x86_model_id;
+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+ c->x86_model_id[48] = 0;
+
+ /*
+ * Intel chips right-justify this string for some dumb reason;
+ * undo that brain damage:
+ */
+ p = q = &c->x86_model_id[0];
+ while (*p == ' ')
+ p++;
+ if (p != q) {
+ while (*p)
+ *q++ = *p++;
+ while (q <= &c->x86_model_id[48])
+ *q++ = '\0'; /* Zero-pad the rest */
+ }
+ }
+
+ /* On x86-64 NOP is always present */
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ /*
+ * Strictly speaking we need to read MSR_IA32_MISC_ENABLE
+ * here but on ring3 it's impossible.
+ */
+ if (c->x86_family == 15) {
+ clear_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ clear_cpu_cap(c, X86_FEATURE_ERMS);
+ } else if (c->x86_family == 6) {
+ /* On x86-64 rep is fine */
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ }
+
+ /* See filter_cpuid_features in kernel */
+ if ((s32)c->cpuid_level < (s32)0x0000000d)
+ clear_cpu_cap(c, X86_FEATURE_XSAVE);
+ break;
+ case X86_VENDOR_AMD:
+ /*
+ * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+ * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ */
+ clear_cpu_cap(c, 0 * 32 + 31);
+ if (c->x86_family >= 0x10)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ if (c->x86_family == 0xf) {
+ u32 level;
+
+ /* On C+ stepping K8 rep microcode works well for copy/memset */
+ level = cpuid_eax(1);
+ if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
+ set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ }
+ break;
+ }
+
return 0;
}
int cpu_init(void)
{
- if (parse_cpuinfo_features(proc_cpuinfo_match))
+ if (cpu_init_cpuid(&rt_cpu_info))
return -1;
BUILD_BUG_ON(sizeof(struct xsave_struct) != XSAVE_SIZE);
@@ -67,17 +206,17 @@ int cpu_init(void)
* Make sure that at least FPU is onboard
* and fxsave is supported.
*/
- if (cpu_has(X86_FEATURE_FPU)) {
- if (!cpu_has(X86_FEATURE_FXSR)) {
+ if (cpu_has_feature(X86_FEATURE_FPU)) {
+ if (!cpu_has_feature(X86_FEATURE_FXSR)) {
pr_err("missing support fxsave/restore insns\n");
return -1;
}
}
pr_debug("fpu:%d fxsr:%d xsave:%d\n",
- !!cpu_has(X86_FEATURE_FPU),
- !!cpu_has(X86_FEATURE_FXSR),
- !!cpu_has(X86_FEATURE_XSAVE));
+ !!cpu_has_feature(X86_FEATURE_FPU),
+ !!cpu_has_feature(X86_FEATURE_FXSR),
+ !!cpu_has_feature(X86_FEATURE_XSAVE));
return 0;
}
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 407610aa22e8..db7ab5c6af0b 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -4,19 +4,200 @@
#include "asm/types.h"
/*
- * Adopted from linux kernel.
+ * Adopted from linux kernel and enhanced from
+ * Intel/AMD manuals.
*/
-#define NCAPINTS (10) /* N 32-bit words worth of info */
+#define NCAPINTS (12) /* N 32-bit words worth of info */
#define NCAPINTS_BITS (NCAPINTS * 32)
#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME (0*32+ 1) /* Virtual 8086 Mode Enhancements */
+#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extension */
+#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR (0*32+ 5) /* Model Specific Registers RDMSR and WRMSR Instructions */
+#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extension */
+#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC (0*32+ 9) /* APIC On-Chip */
+#define X86_FEATURE_SEP (0*32+11) /* SYSENTER and SYSEXIT Instructions */
+#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE (0*32+13) /* PTE Global Bit */
+#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 (0*32+17) /* 36-Bit Page Size Extension */
+#define X86_FEATURE_PSN (0*32+18) /* Processor Serial Number */
+#define X86_FEATURE_DS (0*32+21) /* Debug Store */
+#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_ACPI (0*32+22) /* Thermal Monitor and Software Controlled Clock Facilities */
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM (0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */
+#define X86_FEATURE_SS (0*32+27) /* Self Snoop */
+#define X86_FEATURE_HTT (0*32+28) /* Multi-Threading */
+#define X86_FEATURE_TM (0*32+29) /* Thermal Monitor */
+#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
+
+#define X86_FEATURE_SYSCALL (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
+#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW (1*32+31) /* 3DNow! */
+
+#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */
+#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
+
+#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit DS Area */
+#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */
+#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */
+#define X86_FEATURE_VMX (4*32+ 5) /* Virtual Machine Extensions */
+#define X86_FEATURE_SMX (4*32+ 6) /* Safer Mode Extensions */
+#define X86_FEATURE_EST (4*32+ 7) /* Enhanced Intel SpeedStep technology */
+#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CNXTID (4*32+10) /* L1 Context ID */
+#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
+#define X86_FEATURE_XTPR_UCTL (4*32+14) /* xTPR Update Control */
+#define X86_FEATURE_PDCM (4*32+15) /* Perfmon and Debug Capability */
+#define X86_FEATURE_PCID (4*32+17) /* Process-context identifiers */
+#define X86_FEATURE_DCA (4*32+18) /* Ability to prefetch data from a memory mapped device */
+#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
+#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSCDL (4*32+24) /* Local APIC timer supports one-shot operation using a TSC deadline value */
+#define X86_FEATURE_AES (4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
+#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */
+#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */
+
+#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
+
+#define X86_FEATURE_FSGSBASE (9*32+ 0) /* Supports RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
+#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 Foundation */
+#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_IPT (9*32+25) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA (9*32+29) /* Intel SHA extensions */
+#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 */
+#define X86_FEATURE_AVXVL (9*32+31) /* AVX-512 */
+
+#define X86_FEATURE_XSAVEOPT (10*32+0) /* XSAVEOPT */
+#define X86_FEATURE_XSAVEC (10*32+1) /* XSAVEC */
+#define X86_FEATURE_XGETBV1 (10*32+2) /* XGETBV with ECX = 1 */
+#define X86_FEATURE_XSAVES (10*32+3) /* XSAVES/XRSTORS */
+
+/*
+ * Node 11 is our own, kernel has not such entry.
+ */
+#define X86_FEATURE_PREFETCHWT1 (11*32+0) /* The PREFETCHWT1 instruction */
+
+#define X86_FEATURE_VERSION 1
+
+enum {
+ X86_VENDOR_INTEL = 0,
+ X86_VENDOR_AMD = 1,
+
+ X86_VENDOR_MAX
+};
+
+struct cpuinfo_x86 {
+ u8 x86_family;
+ u8 x86_vendor;
+ u8 x86_model;
+ u8 x86_mask;
+ u32 x86_capability[NCAPINTS];
+ u32 extended_cpuid_level;
+ int cpuid_level;
+ char x86_vendor_id[16];
+ char x86_model_id[64];
+};
+
+static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+static inline void cpuid(unsigned int op,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ native_cpuid(eax, ebx, ecx, edx);
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = count;
+ native_cpuid(eax, ebx, ecx, edx);
+}
+
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+}
+
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
-extern const char * const x86_cap_flags[NCAPINTS_BITS];
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return edx;
+}
-extern void cpu_set_feature(unsigned int feature);
extern bool cpu_has_feature(unsigned int feature);
extern int cpu_init(void);
diff --git a/include/cpu.h b/include/cpu.h
index 08220cfa7616..aa3516b97f97 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -3,7 +3,6 @@
#include "asm/cpu.h"
-extern void cpu_set_feature(unsigned int feature);
extern bool cpu_has_feature(unsigned int feature);
extern int cpu_init(void);
--
1.9.3
More information about the CRIU
mailing list