[CRIU] [PATCH 04/18] x86: cpu -- Detect and save fpu status in info
Cyrill Gorcunov
gorcunov at gmail.com
Thu Jul 19 15:47:32 MSK 2018
- externd compel_cpuinfo_t to keep all fpu information
neded for xsaves mode
- fetch xsaves data in compel_cpuid
All this will allow us to extend criu to support
avx-512 intructions.
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
compel/arch/x86/src/lib/cpu.c | 206 ++++++++++++++++++++++++-
compel/arch/x86/src/lib/include/uapi/asm/cpu.h | 15 ++
compel/arch/x86/src/lib/include/uapi/asm/fpu.h | 185 +++++++++++++++++++++-
3 files changed, 399 insertions(+), 7 deletions(-)
diff --git a/compel/arch/x86/src/lib/cpu.c b/compel/arch/x86/src/lib/cpu.c
index 93e430face6f..4657f9723ba6 100644
--- a/compel/arch/x86/src/lib/cpu.c
+++ b/compel/arch/x86/src/lib/cpu.c
@@ -6,6 +6,7 @@
#include "common/compiler.h"
#include "log.h"
+#include "common/bug.h"
#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
@@ -13,6 +14,40 @@
static compel_cpuinfo_t rt_info;
static bool rt_info_done = false;
+/*
+ * Although we spell it out in here, the Processor Trace
+ * xfeature is completely unused. We use other mechanisms
+ * to save/restore PT state in Linux.
+ */
+
+static const char * const xfeature_names[] = {
+ "x87 floating point registers" ,
+ "SSE registers" ,
+ "AVX registers" ,
+ "MPX bounds registers" ,
+ "MPX CSR" ,
+ "AVX-512 opmask" ,
+ "AVX-512 Hi256" ,
+ "AVX-512 ZMM_Hi256" ,
+ "Processor Trace" ,
+ "Protection Keys User registers",
+ "Hardware Duty Cycling" ,
+};
+
+static short xsave_cpuid_features[] = {
+ X86_FEATURE_FPU,
+ X86_FEATURE_XMM,
+ X86_FEATURE_AVX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT,
+ X86_FEATURE_PKU,
+ X86_FEATURE_HDC,
+};
+
void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
{
if (likely(feature < NCAPINTS_BITS))
@@ -32,6 +67,172 @@ int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
return 0;
}
+static int compel_fpuid(compel_cpuinfo_t *c)
+{
+ unsigned int last_good_offset;
+ uint32_t eax, ebx, ecx, edx;
+ size_t i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xsave_cpuid_features) !=
+ ARRAY_SIZE(xfeature_names));
+
+ if (!compel_test_cpu_cap(c, X86_FEATURE_FPU)) {
+ pr_err("fpu: No FPU detected\n");
+ return -1;
+ }
+
+ if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVE)) {
+ pr_info("fpu: x87 FPU will use %s\n",
+ compel_test_cpu_cap(c, X86_FEATURE_FXSR) ?
+ "FXSAVE" : "FSAVE");
+ return 0;
+ }
+
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ c->xfeatures_mask = eax + ((uint64_t)edx << 32);
+
+ if ((c->xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+ /*
+ * This indicates that something really unexpected happened
+ * with the enumeration.
+ */
+ pr_err("fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx\n",
+ (unsigned long long)c->xfeatures_mask);
+ return -1;
+ }
+
+ /*
+ * Clear XSAVE features that are disabled in the normal CPUID.
+ */
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+ if (!compel_test_cpu_cap(c, xsave_cpuid_features[i]))
+ c->xfeatures_mask &= ~(1 << i);
+ }
+
+ c->xfeatures_mask &= XCNTXT_MASK;
+ c->xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
+
+ /*
+ * xsaves is not enabled in userspace, so
+ * xsaves is mostly for debug purpose.
+ */
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ c->xsave_size = ebx;
+ c->xsave_size_max = ecx;
+
+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ c->xsaves_size = ebx;
+
+ pr_debug("fpu: xfeatures_mask 0x%llx xsave_size %u xsave_size_max %u xsaves_size %u\n",
+ (unsigned long long)c->xfeatures_mask,
+ c->xsave_size, c->xsave_size_max, c->xsaves_size);
+
+ if (c->xsave_size_max > sizeof(struct xsave_struct))
+ pr_warn_once("fpu: max xsave frame exceed xsave_struct (%u %u)\n",
+ c->xsave_size_max, (unsigned)sizeof(struct xsave_struct));
+
+ memset(c->xstate_offsets, 0xff, sizeof(c->xstate_offsets));
+ memset(c->xstate_sizes, 0xff, sizeof(c->xstate_sizes));
+ memset(c->xstate_comp_offsets, 0xff, sizeof(c->xstate_comp_offsets));
+ memset(c->xstate_comp_sizes, 0xff, sizeof(c->xstate_comp_sizes));
+
+ /* start at the beginnning of the "extended state" */
+ last_good_offset = offsetof(struct xsave_struct, extended_state_area);
+
+ /*
+ * The FP xstates and SSE xstates are legacy states. They are always
+ * in the fixed offsets in the xsave area in either compacted form
+ * or standard form.
+ */
+ c->xstate_offsets[0] = 0;
+ c->xstate_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
+ c->xstate_offsets[1] = c->xstate_sizes[0];
+ c->xstate_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
+
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if (!(c->xfeatures_mask & (1UL << i)))
+ continue;
+
+ /*
+ * If an xfeature is supervisor state, the offset
+ * in EBX is invalid. We leave it to -1.
+ *
+ * SDM says: If state component 'i' is a user state component,
+ * ECX[0] return 0; if state component i is a supervisor
+ * state component, ECX[0] returns 1.
+ */
+ cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ if (!(ecx & 1))
+ c->xstate_offsets[i] = ebx;
+
+ c->xstate_sizes[i] = eax;
+
+ /*
+ * In our xstate size checks, we assume that the
+ * highest-numbered xstate feature has the
+ * highest offset in the buffer. Ensure it does.
+ */
+ if (last_good_offset > c->xstate_offsets[i])
+ pr_warn_once("fpu: misordered xstate %d %d\n",
+ last_good_offset, c->xstate_offsets[i]);
+
+ last_good_offset = c->xstate_offsets[i];
+ }
+
+ BUILD_BUG_ON(sizeof(c->xstate_offsets) != sizeof(c->xstate_sizes));
+ BUILD_BUG_ON(sizeof(c->xstate_comp_offsets) != sizeof(c->xstate_comp_sizes));
+
+ c->xstate_comp_offsets[0] = 0;
+ c->xstate_comp_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
+ c->xstate_comp_offsets[1] = c->xstate_comp_sizes[0];
+ c->xstate_comp_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
+
+ if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVES)) {
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if ((c->xfeatures_mask & (1UL << i))) {
+ c->xstate_comp_offsets[i] = c->xstate_offsets[i];
+ c->xstate_comp_sizes[i] = c->xstate_sizes[i];
+ }
+ }
+ } else {
+ c->xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
+ FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+ if ((c->xfeatures_mask & (1UL << i)))
+ c->xstate_comp_sizes[i] = c->xstate_sizes[i];
+ else
+ c->xstate_comp_sizes[i] = 0;
+
+ if (i > FIRST_EXTENDED_XFEATURE) {
+ c->xstate_comp_offsets[i] = c->xstate_comp_offsets[i-1]
+ + c->xstate_comp_sizes[i-1];
+
+ /*
+ * The value returned by ECX[1] indicates the alignment
+ * of state component 'i' when the compacted format
+ * of the extended region of an XSAVE area is used:
+ */
+ cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ if (ecx & 2)
+ c->xstate_comp_offsets[i] = ALIGN(c->xstate_comp_offsets[i], 64);
+ }
+ }
+ }
+
+ if (!pr_quelled(COMPEL_LOG_DEBUG)) {
+ for (i = 0; i < ARRAY_SIZE(c->xstate_offsets); i++) {
+ if (!(c->xfeatures_mask & (1UL << i)))
+ continue;
+ pr_debug("fpu: %-32s xstate_offsets %6d / %-6d xstate_sizes %6d / %-6d\n",
+ xfeature_names[i], c->xstate_offsets[i], c->xstate_comp_offsets[i],
+ c->xstate_sizes[i], c->xstate_comp_sizes[i]);
+ }
+ }
+
+ return 0;
+}
+
int compel_cpuid(compel_cpuinfo_t *c)
{
uint32_t eax, ebx, ecx, edx;
@@ -222,7 +423,10 @@ int compel_cpuid(compel_cpuinfo_t *c)
break;
}
- return 0;
+ pr_debug("x86_family %u x86_vendor_id %s x86_model_id %s\n",
+ c->x86_family, c->x86_vendor_id, c->x86_model_id);
+
+ return compel_fpuid(c);
}
bool compel_cpu_has_feature(unsigned int feature)
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
index 65f0576f2689..6a0c91af490d 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h
@@ -3,6 +3,8 @@
#include <stdint.h>
+#include <compel/asm/fpu.h>
+
/*
* Adopted from linux kernel and enhanced from Intel/AMD manuals.
* Note these bits are not ABI for linux kernel but they _are_
@@ -277,6 +279,7 @@ enum cpuid_leafs {
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HDC (14*32+13) /* HDC base registers present */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
@@ -318,6 +321,7 @@ enum {
};
struct cpuinfo_x86 {
+ /* cpu context */
uint8_t x86_family;
uint8_t x86_vendor;
uint8_t x86_model;
@@ -328,6 +332,17 @@ struct cpuinfo_x86 {
int cpuid_level;
char x86_vendor_id[16];
char x86_model_id[64];
+
+ /* fpu context */
+ uint64_t xfeatures_mask;
+ uint32_t xsave_size_max;
+ uint32_t xsave_size;
+ uint32_t xstate_offsets[XFEATURE_MAX];
+ uint32_t xstate_sizes[XFEATURE_MAX];
+
+ uint32_t xsaves_size;
+ uint32_t xstate_comp_offsets[XFEATURE_MAX];
+ uint32_t xstate_comp_sizes[XFEATURE_MAX];
};
typedef struct cpuinfo_x86 compel_cpuinfo_t;
diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
index dca280bdb61d..b18c9175768f 100644
--- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
+++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h
@@ -19,7 +19,66 @@
#define XSTATE_YMM 0x4
#define FXSAVE_SIZE 512
-#define XSAVE_SIZE 832
+#define XSAVE_SIZE 4096
+
+#define XSAVE_HDR_SIZE 64
+#define XSAVE_HDR_OFFSET FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE 256
+#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature {
+ XFEATURE_FP,
+ XFEATURE_SSE,
+ /*
+ * Values above here are "legacy states".
+ * Those below are "extended states".
+ */
+ XFEATURE_YMM,
+ XFEATURE_BNDREGS,
+ XFEATURE_BNDCSR,
+ XFEATURE_OPMASK,
+ XFEATURE_ZMM_Hi256,
+ XFEATURE_Hi16_ZMM,
+ XFEATURE_PT,
+ XFEATURE_PKRU,
+ XFEATURE_HDC,
+
+ XFEATURE_MAX,
+};
+
+#define XSTATE_CPUID 0x0000000d
+
+#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
+#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
+#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
+#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
+#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
+#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
+#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
+#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
+#define XFEATURE_MASK_PT (1 << XFEATURE_PT)
+#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_HDC (1 << XFEATURE_HDC)
+
+#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
+
+#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
+
+/* Supervisor features */
+#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT | XFEATURE_HDC)
+
+/* All currently supported features */
+#define XCNTXT_MASK \
+ (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
struct fpx_sw_bytes {
uint32_t magic1;
@@ -66,27 +125,141 @@ struct i387_fxsave_struct {
struct xsave_hdr_struct {
uint64_t xstate_bv;
- uint64_t reserved1[2];
- uint64_t reserved2[5];
+ uint64_t xcomp_bv;
+ uint64_t reserved[6];
} __packed;
+/*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((uint64_t)1 << 63)
+
+/*
+ * State component 2:
+ *
+ * There are 16x 256-bit AVX registers named YMM0-YMM15.
+ * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
+ * and are stored in 'struct fxregs_state::xmm_space[]' in the
+ * "legacy" area.
+ *
+ * The high 128 bits are stored here.
+ */
struct ymmh_struct {
- uint32_t ymmh_space[64];
+ uint32_t ymmh_space[64];
+} __packed;
+
+/* Intel MPX support: */
+
+struct mpx_bndreg {
+ uint64_t lower_bound;
+ uint64_t upper_bound;
+} __packed;
+
+/*
+ * State component 3 is used for the 4 128-bit bounds registers
+ */
+struct mpx_bndreg_state {
+ struct mpx_bndreg bndreg[4];
+} __packed;
+
+/*
+ * State component 4 is used for the 64-bit user-mode MPX
+ * configuration register BNDCFGU and the 64-bit MPX status
+ * register BNDSTATUS. We call the pair "BNDCSR".
+ */
+struct mpx_bndcsr {
+ uint64_t bndcfgu;
+ uint64_t bndstatus;
} __packed;
+/*
+ * The BNDCSR state is padded out to be 64-bytes in size.
+ */
+struct mpx_bndcsr_state {
+ union {
+ struct mpx_bndcsr bndcsr;
+ uint8_t pad_to_64_bytes[64];
+ };
+} __packed;
+
+/* AVX-512 Components: */
+
+/*
+ * State component 5 is used for the 8 64-bit opmask registers
+ * k0-k7 (opmask state).
+ */
+struct avx_512_opmask_state {
+ uint64_t opmask_reg[8];
+} __packed;
+
+/*
+ * State component 6 is used for the upper 256 bits of the
+ * registers ZMM0-ZMM15. These 16 256-bit values are denoted
+ * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
+ */
+struct avx_512_zmm_uppers_state {
+ uint64_t zmm_upper[16 * 4];
+} __packed;
+
+/*
+ * State component 7 is used for the 16 512-bit registers
+ * ZMM16-ZMM31 (Hi16_ZMM state).
+ */
+struct avx_512_hi16_state {
+ uint64_t hi16_zmm[16 * 8];
+} __packed;
+
+/*
+ * State component 9: 32-bit PKRU register. The state is
+ * 8 bytes long but only 4 bytes is used currently.
+ */
+struct pkru_state {
+ uint32_t pkru;
+ uint32_t pad;
+} __packed;
+
+/*
+ * This is our most modern FPU state format, as saved by the XSAVE
+ * and restored by the XRSTOR instructions.
+ *
+ * It consists of a legacy fxregs portion, an xstate header and
+ * subsequent areas as defined by the xstate header. Not all CPUs
+ * support all the extensions, so the size of the extended area
+ * can vary quite a bit between CPUs.
+ *
+ *
+ * One page should be enough for the whole xsave state.
+ */
+#define EXTENDED_STATE_AREA_SIZE (4096 - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct))
+
/*
* cpu requires it to be 64 byte aligned
*/
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
- struct ymmh_struct ymmh;
+ union {
+ /*
+ * This ymmh is unndeed, for
+ * backward compatibility.
+ */
+ struct ymmh_struct ymmh;
+ uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
+ };
} __aligned(FP_MIN_ALIGN_BYTES) __packed;
struct xsave_struct_ia32 {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
- struct ymmh_struct ymmh;
+ union {
+ /*
+ * This ymmh is unndeed, for
+ * backward compatibility.
+ */
+ struct ymmh_struct ymmh;
+ uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
+ };
} __aligned(FXSAVE_ALIGN_BYTES) __packed;
typedef struct {
--
2.14.4
More information about the CRIU
mailing list