[CRIU] [PATCH 2/4] s390: Add guarded-storage support

Alice Frosi alice at linux.vnet.ibm.com
Thu Sep 28 14:50:10 MSK 2017


Dump and restore tasks with GS control blocks. Guarded-storage is a new
s390 feature to improve garbage collecting languages like Java.

There are two control blocks in the CPU:

 - GS control block
 - GS broadcast control block

Both control blocks have to be dumped and restored for all threads.

Signed-off-by: Alice Frosi <alice at linux.vnet.ibm.com>
Reviewed-by: Michael Holzheu <holzheu at linux.vnet.ibm.com>
---
 .../s390/src/lib/include/uapi/asm/infect-types.h   |   6 +
 compel/arch/s390/src/lib/infect.c                  | 102 +++++++-
 criu/arch/s390/crtools.c                           | 267 ++++++++++++++++++---
 images/core-s390.proto                             |   6 +
 4 files changed, 340 insertions(+), 41 deletions(-)

diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
index 84edea5..038c4a0 100644
--- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h
@@ -38,12 +38,18 @@ struct prfpreg {
 };
 
 #define USER_FPREGS_VXRS	0x000000001
+/* Guarded-storage control block */
+#define USER_GS_CB		0x000000002
+/* Guarded-storage broadcast control block */
+#define USER_GS_BC		0x000000004
 
 typedef struct {
 	uint32_t	flags;
 	struct prfpreg	prfpreg;
 	uint64_t	vxrs_low[16];
 	vector128_t	vxrs_high[16];
+	uint64_t	gs_cb[4];
+	uint64_t	gs_bc[4];
 } user_fpregs_struct_t;
 
 typedef struct {
diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c
index 3c1fff6..897b66f 100644
--- a/compel/arch/s390/src/lib/infect.c
+++ b/compel/arch/s390/src/lib/infect.c
@@ -19,6 +19,8 @@
 #define NT_PRFPREG		2
 #define NT_S390_VXRS_LOW	0x309
 #define NT_S390_VXRS_HIGH	0x30a
+#define NT_S390_GS_CB		0x30b
+#define NT_S390_GS_BC		0x30c
 
 /*
  * Print general purpose and access registers
@@ -40,17 +42,12 @@ static void print_user_regs_struct(const char *msg, int pid,
 }
 
 /*
- * Print floating point and vector registers
+ * Print vector registers
  */
-static void print_user_fpregs_struct(const char *msg, int pid,
-				     user_fpregs_struct_t *fpregs)
+static void print_vxrs(user_fpregs_struct_t *fpregs)
 {
 	int i;
 
-	pr_debug("%s: FP registers for pid=%d\n", msg, pid);
-	pr_debug("       fpc %08x\n", fpregs->prfpreg.fpc);
-	for (i = 0; i < 16; i++)
-		pr_debug("       f%02d %016lx\n", i, fpregs->prfpreg.fprs[i]);
 	if (!(fpregs->flags & USER_FPREGS_VXRS)) {
 		pr_debug("       No VXRS\n");
 		return;
@@ -63,6 +60,53 @@ static void print_user_fpregs_struct(const char *msg, int pid,
 			 fpregs->vxrs_high[i].part2);
 }
 
+/*
+ * Print guarded-storage control block
+ */
+static void print_gs_cb(user_fpregs_struct_t *fpregs)
+{
+	int i;
+
+	if (!(fpregs->flags & USER_GS_CB)) {
+		pr_debug("       No GS_CB\n");
+		return;
+	}
+	for (i = 0; i < 4; i++)
+		pr_debug("  gs_cb%02d %016lx\n", i, fpregs->gs_cb[i]);
+}
+
+/*
+ * Print guarded-storage broadcast control block
+ */
+static void print_gs_bc(user_fpregs_struct_t *fpregs)
+{
+	int i;
+
+	if (!(fpregs->flags & USER_GS_BC)) {
+		pr_debug("       No GS_BC\n");
+		return;
+	}
+	for (i = 0; i < 4; i++)
+		pr_debug("  gs_bc%02d %016lx\n", i, fpregs->gs_bc[i]);
+}
+
+/*
+ * Print FP registers, VX registers, and guarded storage
+ */
+static void print_user_fpregs_struct(const char *msg, int pid,
+				     user_fpregs_struct_t *fpregs)
+{
+	int i;
+
+	pr_debug("%s: FP registers for pid=%d\n", msg, pid);
+	pr_debug("       fpc %08x\n", fpregs->prfpreg.fpc);
+	for (i = 0; i < 16; i++)
+		pr_debug("       f%02d %016lx\n", i, fpregs->prfpreg.fprs[i]);
+	print_vxrs(fpregs);
+	print_gs_cb(fpregs);
+	print_gs_bc(fpregs);
+}
+
 int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe,
 			      user_regs_struct_t *regs,
 			      user_fpregs_struct_t *fpregs)
@@ -148,7 +192,47 @@ int get_vx_regs(pid_t pid, user_fpregs_struct_t *fpregs)
 	return 0;
 }
 
+/*
+ * Get guarded-storage control block
+ */
+int get_gs_cb(pid_t pid, user_fpregs_struct_t *fpregs)
+{
+	struct iovec iov;
 
+	fpregs->flags &= ~(USER_GS_CB | USER_GS_BC);
+	iov.iov_base = &fpregs->gs_cb;
+	iov.iov_len = sizeof(fpregs->gs_cb);
+	if (ptrace(PTRACE_GETREGSET, pid, NT_S390_GS_CB, &iov) < 0) {
+		switch (errno) {
+		case EINVAL:
+		case ENODEV:
+			memset(&fpregs->gs_cb, 0, sizeof(fpregs->gs_cb));
+			memset(&fpregs->gs_bc, 0, sizeof(fpregs->gs_bc));
+			pr_debug("GS_CB not supported\n");
+			return 0;
+		case ENODATA:
+			pr_debug("GS_CB not set\n");
+			break;
+		default:
+			return -1;
+		}
+	} else {
+		fpregs->flags |= USER_GS_CB;
+	}
+	iov.iov_base = &fpregs->gs_bc;
+	iov.iov_len = sizeof(fpregs->gs_bc);
+	if (ptrace(PTRACE_GETREGSET, pid, NT_S390_GS_BC, &iov) < 0) {
+		if (errno == ENODATA) {
+			pr_debug("GS_BC not set\n");
+			return 0;
+		}
+		pr_perror("Couldn't get GS_BC\n");
+		return -1;
+	}
+	fpregs->flags |= USER_GS_BC;
+
+	return 0;
+}
 /*
  * Prepare task registers for restart
  */
@@ -172,6 +256,10 @@ int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save,
 		pr_perror("Couldn't get vector registers");
 		return -1;
 	}
+	if (get_gs_cb(pid, &fpregs)) {
+		pr_perror("Couldn't get guarded-storage");
+		return -1;
+	}
 	print_user_fpregs_struct("get_task_regs", pid, &fpregs);
 	/* Check for system call restarting. */
 	if (regs->system_call) {
diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c
index cfab508..dd801fa 100644
--- a/criu/arch/s390/crtools.c
+++ b/criu/arch/s390/crtools.c
@@ -24,10 +24,13 @@
 #include "images/creds.pb-c.h"
 #include "ptrace.h"
 #include "pstree.h"
+#include "image.h"
 
 #define NT_PRFPREG		2
 #define NT_S390_VXRS_LOW	0x309
 #define NT_S390_VXRS_HIGH	0x30a
+#define NT_S390_GS_CB		0x30b
+#define NT_S390_GS_BC		0x30c
 
 /*
  * Print general purpose and access registers
@@ -47,24 +50,18 @@ static void print_core_gpregs(const char *msg, UserS390RegsEntry *gpregs)
 }
 
 /*
- * Print floating point and vector registers
+ * Print vector registers
  */
-static void print_core_fp_regs(const char *msg, CoreEntry *core)
+static void print_core_vx_regs(CoreEntry *core)
 {
 	UserS390VxrsHighEntry *vxrs_high;
 	UserS390VxrsLowEntry *vxrs_low;
-	UserS390FpregsEntry *fpregs;
 	int i;
 
 	vxrs_high = CORE_THREAD_ARCH_INFO(core)->vxrs_high;
 	vxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low;
-	fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
 
-	pr_debug("%s: Floating point registers\n", msg);
-	pr_debug("       fpc %08x\n", fpregs->fpc);
-	for (i = 0; i < 16; i++)
-		pr_debug("       f%02d %016lx\n", i, fpregs->fprs[i]);
-	if (!vxrs_low) {
+	if (vxrs_low == NULL) {
 		pr_debug("       No VXRS\n");
 		return;
 	}
@@ -76,6 +73,60 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core)
 }
 
 /*
+ * Print guarded-storage control block
+ */
+static void print_core_gs_cb(CoreEntry *core)
+{
+	UserS390GsCbEntry *gs_cb;
+	int i;
+
+	gs_cb = CORE_THREAD_ARCH_INFO(core)->gs_cb;
+	if (!gs_cb) {
+		pr_debug("       No GS_CB\n");
+		return;
+	}
+	for (i = 0; i < 4; i++)
+		pr_debug("       gs_cb%d %lx\n", i, gs_cb->regs[i]);
+}
+
+/*
+ * Print guarded-storage broadcast control block
+ */
+static void print_core_gs_bc(CoreEntry *core)
+{
+	UserS390GsCbEntry *gs_bc;
+	int i;
+
+	gs_bc = CORE_THREAD_ARCH_INFO(core)->gs_bc;
+
+	if (!gs_bc) {
+		pr_debug("       No GS_BC\n");
+		return;
+	}
+	for (i = 0; i < 4; i++)
+		pr_debug("       gs_bc%d %lx\n", i, gs_bc->regs[i]);
+}
+
+/*
+ * Print architecture registers
+ */
+static void print_core_fp_regs(const char *msg, CoreEntry *core)
+{
+	UserS390FpregsEntry *fpregs;
+	int i;
+
+	fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
+
+	pr_debug("%s: Floating point registers\n", msg);
+	pr_debug("       fpc %08x\n", fpregs->fpc);
+	for (i = 0; i < 16; i++)
+		pr_debug("       f%02d %016lx\n", i, fpregs->fprs[i]);
+	print_core_vx_regs(core);
+	print_core_gs_cb(core);
+	print_core_gs_bc(core);
+}
+
+/*
  * Allocate VxrsLow registers
  */
 static UserS390VxrsLowEntry *allocate_vxrs_low_regs(void)
@@ -144,14 +195,49 @@ static void free_vxrs_high_regs(UserS390VxrsHighEntry *vxrs_high)
 }
 
 /*
+ * Allocate guarded-storage control block (GS_CB and GS_BC)
+ */
+static UserS390GsCbEntry *allocate_gs_cb(void)
+{
+	UserS390GsCbEntry *gs_cb;
+
+	gs_cb = xmalloc(sizeof(*gs_cb));
+	if (!gs_cb)
+		return NULL;
+	user_s390_gs_cb_entry__init(gs_cb);
+
+	gs_cb->n_regs = 4;
+	gs_cb->regs = xzalloc(4 * sizeof(uint64_t));
+	if (!gs_cb->regs)
+		goto fail_free_gs_cb;
+	return gs_cb;
+
+fail_free_gs_cb:
+	xfree(gs_cb);
+	return NULL;
+}
+
+/*
+ * Free Guareded Storage control blocks
+ */
+static void free_gs_cb(UserS390GsCbEntry *gs_cb)
+{
+	if (gs_cb) {
+		xfree(gs_cb->regs);
+		xfree(gs_cb);
+	}
+}
+/*
  * Copy internal structures into Google Protocol Buffers
  */
 int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 {
-	UserS390VxrsHighEntry *vxrs_high;
-	UserS390VxrsLowEntry *vxrs_low;
-	UserS390FpregsEntry *fpregs;
-	UserS390RegsEntry *gpregs;
+	UserS390VxrsHighEntry *vxrs_high = NULL;
+	UserS390VxrsLowEntry *vxrs_low = NULL;
+	UserS390FpregsEntry *fpregs = NULL;
+	UserS390RegsEntry *gpregs = NULL;
+	UserS390GsCbEntry *gs_cb = NULL;
+	UserS390GsCbEntry *gs_bc = NULL;
 	CoreEntry *core = arg;
 
 	gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs;
@@ -163,15 +249,29 @@ int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 		if (!vxrs_low)
 			return -1;
 		vxrs_high = allocate_vxrs_high_regs();
-		if (!vxrs_high) {
-			free_vxrs_low_regs(vxrs_low);
-			return -1;
-		}
+		if (!vxrs_high)
+			goto fail_free_vxrs_low;
 		memcpy(vxrs_low->regs, &f->vxrs_low, sizeof(f->vxrs_low));
 		memcpy(vxrs_high->regs, &f->vxrs_high, sizeof(f->vxrs_high));
 		CORE_THREAD_ARCH_INFO(core)->vxrs_low = vxrs_low;
 		CORE_THREAD_ARCH_INFO(core)->vxrs_high = vxrs_high;
 	}
+	/* Guarded-storage control block */
+	if (f->flags & USER_GS_CB) {
+		gs_cb = allocate_gs_cb();
+		if (!gs_cb)
+			goto fail_free_gs_cb;
+		memcpy(gs_cb->regs, &f->gs_cb, sizeof(f->gs_cb));
+		CORE_THREAD_ARCH_INFO(core)->gs_cb = gs_cb;
+	}
+	/* Guarded-storage broadcast control block */
+	if (f->flags & USER_GS_BC) {
+		gs_bc = allocate_gs_cb();
+		if (!gs_bc)
+			goto fail_free_gs_bc;
+		memcpy(gs_bc->regs, &f->gs_bc, sizeof(f->gs_bc));
+		CORE_THREAD_ARCH_INFO(core)->gs_bc = gs_bc;
+	}
 	/* General purpose registers */
 	memcpy(gpregs->gprs, u->prstatus.gprs, sizeof(u->prstatus.gprs));
 	gpregs->psw_mask = u->prstatus.psw.mask;
@@ -184,6 +284,13 @@ int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f)
 	fpregs->fpc = f->prfpreg.fpc;
 	memcpy(fpregs->fprs, f->prfpreg.fprs, sizeof(f->prfpreg.fprs));
 	return 0;
+fail_free_gs_cb:
+	free_gs_cb(gs_cb);
+fail_free_gs_bc:
+	free_gs_cb(gs_bc);
+fail_free_vxrs_low:
+	free_vxrs_low_regs(vxrs_low);
+	return -1;
 }
 
 /*
@@ -225,7 +332,6 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core)
 		memcpy(&dst_ext->vxrs_high, vxrs_high->regs,
 		       sizeof(dst_ext->vxrs_high));
 	}
-	print_core_fp_regs("restore_fp_regs", core);
 	return 0;
 }
 
@@ -342,6 +448,8 @@ void arch_free_thread_info(CoreEntry *core)
 	free_fp_regs(CORE_THREAD_ARCH_INFO(core)->fpregs);
 	free_vxrs_low_regs(CORE_THREAD_ARCH_INFO(core)->vxrs_low);
 	free_vxrs_high_regs(CORE_THREAD_ARCH_INFO(core)->vxrs_high);
+	free_gs_cb(CORE_THREAD_ARCH_INFO(core)->gs_cb);
+	free_gs_cb(CORE_THREAD_ARCH_INFO(core)->gs_bc);
 	xfree(CORE_THREAD_ARCH_INFO(core));
 	CORE_THREAD_ARCH_INFO(core) = NULL;
 }
@@ -390,6 +498,52 @@ static int set_vx_regs(pid_t pid, user_fpregs_struct_t *fpregs)
 }
 
 /*
+ * Set guarded-storage control block
+ */
+static int set_gs_cb(pid_t pid, user_fpregs_struct_t *fpregs)
+{
+	struct iovec iov;
+
+	if (fpregs->flags & USER_GS_CB) {
+		iov.iov_base = &fpregs->gs_cb;
+		iov.iov_len = sizeof(fpregs->gs_cb);
+		if (setregset(pid, NT_S390_GS_CB, "S390_GS_CB", &iov))
+			return -1;
+	}
+
+	if (!(fpregs->flags & USER_GS_BC))
+		return 0;
+	iov.iov_base = &fpregs->gs_bc;
+	iov.iov_len = sizeof(fpregs->gs_bc);
+	return setregset(pid, NT_S390_GS_BC, "S390_GS_BC", &iov);
+}
+
+/*
+ * Restore registers not present in sigreturn signal frame
+ */
+static int set_task_regs_nosigrt(pid_t pid, CoreEntry *core)
+{
+	user_fpregs_struct_t fpregs;
+	UserS390GsCbEntry *cgs_cb;
+	UserS390GsCbEntry *cgs_bc;
+
+	memset(&fpregs, 0, sizeof(fpregs));
+	/* Guarded-storage control block (optional) */
+	cgs_cb = CORE_THREAD_ARCH_INFO(core)->gs_cb;
+	if (cgs_cb != NULL) {
+		fpregs.flags |= USER_GS_CB;
+		memcpy(&fpregs.gs_cb, cgs_cb->regs, sizeof(fpregs.gs_cb));
+	}
+	/* Guarded-storage broadcast control block (optional) */
+	cgs_bc = CORE_THREAD_ARCH_INFO(core)->gs_bc;
+	if (cgs_bc != NULL) {
+		fpregs.flags |= USER_GS_BC;
+		memcpy(&fpregs.gs_bc, cgs_bc->regs, sizeof(fpregs.gs_bc));
+	}
+	return set_gs_cb(pid, &fpregs);
+}
+
+/*
  * Restore registers for pid from core
  */
 static int set_task_regs(pid_t pid, CoreEntry *core)
@@ -410,34 +564,39 @@ static int set_task_regs(pid_t pid, CoreEntry *core)
 		return -1;
 	/* Vector registers (optional) */
 	cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low;
-	if (!cvxrs_low)
-		return 0;
-	cvxrs_high = CORE_THREAD_ARCH_INFO(core)->vxrs_high;
-	if (!cvxrs_high)
-		return -1;
-	fpregs.flags |= USER_FPREGS_VXRS;
-	memcpy(&fpregs.vxrs_low, cvxrs_low->regs, sizeof(fpregs.vxrs_low));
-	memcpy(&fpregs.vxrs_high, cvxrs_high->regs, sizeof(fpregs.vxrs_high));
-
-	return set_vx_regs(pid, &fpregs);
+	if (cvxrs_low != NULL) {
+		cvxrs_high = CORE_THREAD_ARCH_INFO(core)->vxrs_high;
+		if (!cvxrs_high)
+			return -1;
+		fpregs.flags |= USER_FPREGS_VXRS;
+		memcpy(&fpregs.vxrs_low, cvxrs_low->regs,
+				sizeof(fpregs.vxrs_low));
+		memcpy(&fpregs.vxrs_high, cvxrs_high->regs,
+				sizeof(fpregs.vxrs_high));
+		if (set_vx_regs(pid, &fpregs) < 0)
+			return -1;
+	}
+	return set_task_regs_nosigrt(pid, core);
 }
 
 /*
- * Restore vector and floating point registers for all threads
+ * Restore registers for all threads:
+ * - Floating point registers
+ * - Vector registers
+ * - Guarded-storage control block
+ * - Guarded-storage broadcast control block
  */
-int arch_set_thread_regs(struct pstree_item *item)
+int arch_set_thread_regs(struct pstree_item *item, bool with_threads)
 {
 	int i;
 
 	for_each_pstree_item(item) {
-		if (item->pid->state == TASK_DEAD ||
-		    item->pid->state == TASK_ZOMBIE ||
-		    item->pid->state == TASK_HELPER)
-			continue;
 		for (i = 0; i < item->nr_threads; i++) {
 			if (item->threads[i]->state == TASK_DEAD ||
 			    item->threads[i]->state == TASK_ZOMBIE)
 				continue;
+			if (!with_threads && i > 0)
+				continue;
 			if (set_task_regs(item->threads[i]->real,
 					  item->core[i])) {
 				pr_perror("Not set registers for task %d",
@@ -448,3 +607,43 @@ int arch_set_thread_regs(struct pstree_item *item)
 	}
 	return 0;
 }
+
+static int open_core(int pid, CoreEntry **pcore)
+{
+	struct cr_img *img;
+	int ret;
+
+	img = open_image(CR_FD_CORE, O_RSTR, pid);
+	if (!img) {
+		pr_err("Can't open core data for %d\n", pid);
+		return -1;
+	}
+	ret = pb_read_one(img, pcore, PB_CORE);
+	close_image(img);
+
+	return ret <= 0 ? -1 : 0;
+}
+
+/*
+ * Restore all registers not present in sigreturn signal frame
+ *
+ * - Guarded-storage control block
+ * - Guarded-storage broadcast control block
+ */
+int arch_set_thread_regs_nosigrt(struct pid *pid)
+{
+	CoreEntry *core;
+
+	core = xmalloc(sizeof(*core));
+	if (open_core(pid->ns[0].virt, &core) < 0) {
+		pr_perror("Cannot open core for virt pid %d", pid->ns[0].virt);
+		return -1;
+	}
+
+	if (set_task_regs_nosigrt(pid->real, core) < 0) {
+		pr_perror("Set register for pid %d", pid->real);
+		return -1;
+	}
+	print_core_fp_regs("restore_fp_regs", core);
+	return 0;
+}
diff --git a/images/core-s390.proto b/images/core-s390.proto
index 78d3e14..3b587fb 100644
--- a/images/core-s390.proto
+++ b/images/core-s390.proto
@@ -30,10 +30,16 @@ message user_s390_fpregs_entry {
 	repeated uint64			fprs		= 2;
 }
 
+message user_s390_gs_cb_entry {
+	repeated uint64			regs		= 1;
+}
+
 message thread_info_s390 {
 	required uint64				clear_tid_addr	= 1[(criu).hex = true];
 	required user_s390_regs_entry		gpregs		= 2[(criu).hex = true];
 	required user_s390_fpregs_entry		fpregs		= 3[(criu).hex = true];
 	optional user_s390_vxrs_low_entry	vxrs_low	= 4[(criu).hex = true];
 	optional user_s390_vxrs_high_entry	vxrs_high	= 5[(criu).hex = true];
+	optional user_s390_gs_cb_entry		gs_cb		= 6[(criu).hex = true];
+	optional user_s390_gs_cb_entry		gs_bc		= 7[(criu).hex = true];
 }
-- 
2.9.3



More information about the CRIU mailing list