[CRIU] [PATCH 10/18] x86: cpu -- Add support for extended xsave area
Cyrill Gorcunov
gorcunov at gmail.com
Thu Jul 19 15:47:38 MSK 2018
cpu extensions (such as avx-512) require bigger xsave
area to keep fpu registers set, so we allocate a page
per process to keep them all. On checkpoint we parse
runtime fpu features and dump them into an image and
do reverse on restore procedure.
Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
criu/arch/x86/crtools.c | 153 +++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 138 insertions(+), 15 deletions(-)
diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c
index 0b5a0acd6779..bb25916f3519 100644
--- a/criu/arch/x86/crtools.c
+++ b/criu/arch/x86/crtools.c
@@ -17,6 +17,21 @@ int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpre
#define assign_reg(dst, src, e) do { dst->e = (__typeof__(dst->e))src.e; } while (0)
#define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e))
+#define assign_xsave(feature, xsave, member, area) \
+ do { \
+ if (compel_fpu_has_feature(feature)) { \
+ uint32_t off = compel_fpu_feature_offset(feature); \
+ void *from = &area[off]; \
+ size_t size = pb_repeated_size(xsave, member); \
+ size_t xsize = (size_t)compel_fpu_feature_size(feature); \
+ if (xsize != size) { \
+ pr_err("%s reported %zu bytes (expecting %zu)\n", \
+ # feature, xsize, size); \
+ return -1; \
+ } \
+ memcpy(xsave->member, from, size); \
+ } \
+ } while (0)
if (user_regs_native(regs)) {
assign_reg(gpregs, regs->native, r15);
@@ -89,14 +104,27 @@ int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpre
assign_array(core->thread_info->fpregs, fpregs->i387, xmm_space);
if (compel_cpu_has_feature(X86_FEATURE_OSXSAVE)) {
- BUG_ON(core->thread_info->fpregs->xsave->n_ymmh_space != ARRAY_SIZE(fpregs->ymmh.ymmh_space));
+ UserX86XsaveEntry *xsave = core->thread_info->fpregs->xsave;
+ uint8_t *extended_state_area = (void *)fpregs;
- assign_reg(core->thread_info->fpregs->xsave, fpregs->xsave_hdr, xstate_bv);
- assign_array(core->thread_info->fpregs->xsave, fpregs->ymmh, ymmh_space);
+ /*
+ * xcomp_bv is designated for compacted format but user
+ * space never use it, thus we can simply ignore.
+ */
+ assign_reg(xsave, fpregs->xsave_hdr, xstate_bv);
+
+ assign_xsave(XFEATURE_YMM, xsave, ymmh_space, extended_state_area);
+ assign_xsave(XFEATURE_BNDREGS, xsave, bndreg_state, extended_state_area);
+ assign_xsave(XFEATURE_BNDCSR, xsave, bndcsr_state, extended_state_area);
+ assign_xsave(XFEATURE_OPMASK, xsave, opmask_reg, extended_state_area);
+ assign_xsave(XFEATURE_ZMM_Hi256,xsave, zmm_upper, extended_state_area);
+ assign_xsave(XFEATURE_Hi16_ZMM, xsave, hi16_zmm, extended_state_area);
+ assign_xsave(XFEATURE_PKRU, xsave, pkru, extended_state_area);
}
#undef assign_reg
#undef assign_array
+#undef assign_xsave
return 0;
}
@@ -113,6 +141,62 @@ static void alloc_tls(ThreadInfoX86 *ti, void **mempool)
}
}
+static int alloc_xsave_extends(UserX86XsaveEntry *xsave)
+{
+ if (compel_fpu_has_feature(XFEATURE_YMM)) {
+ xsave->n_ymmh_space = 64;
+ xsave->ymmh_space = xzalloc(pb_repeated_size(xsave, ymmh_space));
+ if (!xsave->ymmh_space)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_BNDREGS)) {
+ xsave->n_bndreg_state = 4 * 2;
+ xsave->bndreg_state = xzalloc(pb_repeated_size(xsave, bndreg_state));
+ if (!xsave->bndreg_state)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_BNDCSR)) {
+ xsave->n_bndcsr_state = 2;
+ xsave->bndcsr_state = xzalloc(pb_repeated_size(xsave, bndcsr_state));
+ if (!xsave->bndcsr_state)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_OPMASK)) {
+ xsave->n_opmask_reg = 8;
+ xsave->opmask_reg = xzalloc(pb_repeated_size(xsave, opmask_reg));
+ if (!xsave->opmask_reg)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_ZMM_Hi256)) {
+ xsave->n_zmm_upper = 16 * 4;
+ xsave->zmm_upper = xzalloc(pb_repeated_size(xsave, zmm_upper));
+ if (!xsave->zmm_upper)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_Hi16_ZMM)) {
+ xsave->n_hi16_zmm = 16 * 8;
+ xsave->hi16_zmm = xzalloc(pb_repeated_size(xsave, hi16_zmm));
+ if (!xsave->hi16_zmm)
+ goto err;
+ }
+
+ if (compel_fpu_has_feature(XFEATURE_PKRU)) {
+ xsave->n_pkru = 2;
+ xsave->pkru = xzalloc(pb_repeated_size(xsave, pkru));
+ if (!xsave->pkru)
+ goto err;
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
int arch_alloc_thread_info(CoreEntry *core)
{
size_t sz;
@@ -165,9 +249,7 @@ int arch_alloc_thread_info(CoreEntry *core)
xsave = fpregs->xsave = xptr_pull(&m, UserX86XsaveEntry);
user_x86_xsave_entry__init(xsave);
- xsave->n_ymmh_space = 64;
- xsave->ymmh_space = xzalloc(pb_repeated_size(xsave, ymmh_space));
- if (!xsave->ymmh_space)
+ if (alloc_xsave_extends(xsave))
goto err;
}
}
@@ -182,8 +264,16 @@ void arch_free_thread_info(CoreEntry *core)
if (!core->thread_info)
return;
- if (core->thread_info->fpregs->xsave)
+ if (core->thread_info->fpregs->xsave) {
xfree(core->thread_info->fpregs->xsave->ymmh_space);
+ xfree(core->thread_info->fpregs->xsave->pkru);
+ xfree(core->thread_info->fpregs->xsave->hi16_zmm);
+ xfree(core->thread_info->fpregs->xsave->zmm_upper);
+ xfree(core->thread_info->fpregs->xsave->opmask_reg);
+ xfree(core->thread_info->fpregs->xsave->bndcsr_state);
+ xfree(core->thread_info->fpregs->xsave->bndreg_state);
+ }
+
xfree(core->thread_info->fpregs->st_space);
xfree(core->thread_info->fpregs->xmm_space);
xfree(core->thread_info);
@@ -284,6 +374,24 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
#define assign_reg(dst, src, e) do { dst.e = (__typeof__(dst.e))src->e; } while (0)
#define assign_array(dst, src, e) memcpy(dst.e, (src)->e, sizeof(dst.e))
+#define assign_xsave(feature, xsave, member, area) \
+ do { \
+ if (compel_fpu_has_feature(feature)) { \
+ uint32_t off = compel_fpu_feature_offset(feature); \
+ void *to = &area[off]; \
+ void *from = xsave->member; \
+ size_t size = pb_repeated_size(xsave, member); \
+ size_t xsize = (size_t)compel_fpu_feature_size(feature); \
+ if (xsize != size) { \
+ pr_err("%s reported %zu bytes (expecting %zu)\n", \
+ # feature, xsize, size); \
+ return -1; \
+ } \
+ xstate_bv |= (1UL << feature); \
+ xstate_size += xsize; \
+ memcpy(to, from, size); \
+ } \
+ } while (0)
assign_reg(x->i387, core->thread_info->fpregs, cwd);
assign_reg(x->i387, core->thread_info->fpregs, swd);
@@ -303,26 +411,40 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
if (compel_cpu_has_feature(X86_FEATURE_OSXSAVE)) {
struct fpx_sw_bytes *fpx_sw = (void *)&x->i387.sw_reserved;
+ size_t xstate_size = XSAVE_YMM_OFFSET;
+ uint32_t xstate_bv = 0;
void *magic2;
- x->xsave_hdr.xstate_bv = XSTATE_FP | XSTATE_SSE | XSTATE_YMM;
+ xstate_bv = XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
/*
* fpregs->xsave pointer might not present on image so we
- * simply clear out all ymm registers.
+ * simply clear out everything.
*/
- if (core->thread_info->fpregs->xsave)
- assign_array(x->ymmh, core->thread_info->fpregs->xsave, ymmh_space);
+ if (core->thread_info->fpregs->xsave) {
+ UserX86XsaveEntry *xsave = core->thread_info->fpregs->xsave;
+ uint8_t *extended_state_area = (void *)x;
+
+ assign_xsave(XFEATURE_YMM, xsave, ymmh_space, extended_state_area);
+ assign_xsave(XFEATURE_BNDREGS, xsave, bndreg_state, extended_state_area);
+ assign_xsave(XFEATURE_BNDCSR, xsave, bndcsr_state, extended_state_area);
+ assign_xsave(XFEATURE_OPMASK, xsave, opmask_reg, extended_state_area);
+ assign_xsave(XFEATURE_ZMM_Hi256,xsave, zmm_upper, extended_state_area);
+ assign_xsave(XFEATURE_Hi16_ZMM, xsave, hi16_zmm, extended_state_area);
+ assign_xsave(XFEATURE_PKRU, xsave, pkru, extended_state_area);
+ }
+
+ x->xsave_hdr.xstate_bv = xstate_bv;
fpx_sw->magic1 = FP_XSTATE_MAGIC1;
- fpx_sw->xstate_bv = XSTATE_FP | XSTATE_SSE | XSTATE_YMM;
- fpx_sw->xstate_size = sizeof(struct xsave_struct);
- fpx_sw->extended_size = sizeof(struct xsave_struct) + FP_XSTATE_MAGIC2_SIZE;
+ fpx_sw->xstate_bv = xstate_bv;
+ fpx_sw->xstate_size = xstate_size;
+ fpx_sw->extended_size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
/*
* This should be at the end of xsave frame.
*/
- magic2 = (void *)x + sizeof(struct xsave_struct);
+ magic2 = (void *)x + xstate_size;
*(u32 *)magic2 = FP_XSTATE_MAGIC2;
}
@@ -330,6 +452,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
#undef assign_reg
#undef assign_array
+#undef assign_xsave
return 0;
}
--
2.14.4
More information about the CRIU
mailing list