[Devel] Re: [PATCH 05/10] Introduce function to dump process

Louis Rilling Louis.Rilling at kerlabs.com
Mon Oct 20 04:02:26 PDT 2008


Hi,

On Sat, Oct 18, 2008 at 03:11:33AM +0400, Andrey Mirkin wrote:
> Functions to dump task struct, fpu state and registers are added.
> All IDs are saved from the POV of process (container) namespace.

Just a couple of little comments, in case this series should keep on living.

[...]

> diff --git a/checkpoint/cpt_process.c b/checkpoint/cpt_process.c
> new file mode 100644
> index 0000000..58f608d
> --- /dev/null
> +++ b/checkpoint/cpt_process.c
> @@ -0,0 +1,236 @@
> +/*
> + *  Copyright (C) 2008 Parallels, Inc.
> + *
> + *  Author: Andrey Mirkin <major at openvz.org>
> + *
> + *  This program is free software; you can redistribute it and/or
> + *  modify it under the terms of the GNU General Public License as
> + *  published by the Free Software Foundation, version 2 of the
> + *  License.
> + *
> + */
> +
> +#include <linux/sched.h>
> +#include <linux/fs.h>
> +#include <linux/file.h>
> +#include <linux/version.h>
> +#include <linux/nsproxy.h>
> +
> +#include "checkpoint.h"
> +#include "cpt_image.h"
> +
> +static unsigned int encode_task_flags(unsigned int task_flags)
> +{
> +	unsigned int flags = 0;
> +
> +	if (task_flags & PF_EXITING)
> +		flags |= (1 << CPT_PF_EXITING);
> +	if (task_flags & PF_FORKNOEXEC)
> +		flags |= (1 << CPT_PF_FORKNOEXEC);
> +	if (task_flags & PF_SUPERPRIV)
> +		flags |= (1 << CPT_PF_SUPERPRIV);
> +	if (task_flags & PF_DUMPCORE)
> +		flags |= (1 << CPT_PF_DUMPCORE);
> +	if (task_flags & PF_SIGNALED)
> +		flags |= (1 << CPT_PF_SIGNALED);
> +	if (task_flags & PF_USED_MATH)
> +		flags |= (1 << CPT_PF_USED_MATH);
> +	
> +	return flags;
> +		
> +}
> +
> +int cpt_dump_task_struct(struct task_struct *tsk, struct cpt_context *ctx)
> +{
> +	struct cpt_task_image *t;
> +	int i;
> +	int err;
> +
> +	t = kzalloc(sizeof(*t), GFP_KERNEL);
> +	if (!t)
> +		return -ENOMEM;
> +
> +	t->cpt_len = sizeof(*t);
> +	t->cpt_type = CPT_OBJ_TASK;
> +	t->cpt_hdrlen = sizeof(*t);
> +	t->cpt_content = CPT_CONTENT_ARRAY;
> +
> +	t->cpt_state = tsk->state;
> +	t->cpt_flags = encode_task_flags(tsk->flags);
> +	t->cpt_exit_code = tsk->exit_code;
> +	t->cpt_exit_signal = tsk->exit_signal;
> +	t->cpt_pdeath_signal = tsk->pdeath_signal;
> +	t->cpt_pid = task_pid_nr_ns(tsk, ctx->nsproxy->pid_ns);
> +	t->cpt_tgid = task_tgid_nr_ns(tsk, ctx->nsproxy->pid_ns);
> +	t->cpt_ppid = tsk->parent ?
> +		task_pid_nr_ns(tsk->parent, ctx->nsproxy->pid_ns) : 0;
> +	t->cpt_rppid = tsk->real_parent ?
> +		task_pid_nr_ns(tsk->real_parent, ctx->nsproxy->pid_ns) : 0;
> +	t->cpt_pgrp = task_pgrp_nr_ns(tsk, ctx->nsproxy->pid_ns);
> +	t->cpt_session = task_session_nr_ns(tsk, ctx->nsproxy->pid_ns);
> +	t->cpt_old_pgrp = 0;
> +	if (tsk->signal->tty_old_pgrp)
> +		t->cpt_old_pgrp = pid_vnr(tsk->signal->tty_old_pgrp);
> +	t->cpt_leader = tsk->group_leader ? task_pid_vnr(tsk->group_leader) : 0;

Why pid_vnr() here, and task_*_nr_ns() above? According to the introducing
comment, I'd expect something like pid_nr_ns(tsk->signal->tty_old_pgrp,
tsk->nsproxy->pid_ns), and the same for tsk->group_leader.

IIUC, pid_vnr() is correct only if ctx->nsproxy->pid_ns == tsk->nsproxy->pid_ns
== current->nsproxy->pid_ns, and I expect current to live in a different pid_ns.

Comments?

> +	t->cpt_utime = tsk->utime;
> +	t->cpt_stime = tsk->stime;
> +	t->cpt_utimescaled = tsk->utimescaled;
> +	t->cpt_stimescaled = tsk->stimescaled;
> +	t->cpt_gtime = tsk->gtime;
> +	t->cpt_prev_utime = tsk->prev_utime;
> +	t->cpt_prev_stime = tsk->prev_stime;
> +	t->cpt_nvcsw = tsk->nvcsw;
> +	t->cpt_nivcsw = tsk->nivcsw;
> +	t->cpt_start_time = cpt_timespec_export(&tsk->start_time);
> +	t->cpt_real_start_time = cpt_timespec_export(&tsk->real_start_time);
> +	t->cpt_min_flt = tsk->min_flt;
> +	t->cpt_maj_flt = tsk->maj_flt;
> +	memcpy(t->cpt_comm, tsk->comm, TASK_COMM_LEN);
> +	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
> +		t->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b) << 32) +
> +			tsk->thread.tls_array[i].a;
> +	}
> +	/* TODO: encode thread flags and status like task flags */
> +	t->cpt_thrflags = task_thread_info(tsk)->flags & ~(1<<TIF_FREEZE);
> +	t->cpt_thrstatus = task_thread_info(tsk)->status;
> +	t->cpt_user = tsk->user->uid;
> +	t->cpt_uid = tsk->uid;
> +	t->cpt_euid = tsk->euid;
> +	t->cpt_suid = tsk->suid;
> +	t->cpt_fsuid = tsk->fsuid;
> +	t->cpt_gid = tsk->gid;
> +	t->cpt_egid = tsk->egid;
> +	t->cpt_sgid = tsk->sgid;
> +	t->cpt_fsgid = tsk->fsgid;
> +
> +	err = ctx->write(t, sizeof(*t), ctx);
> +
> +	kfree(t);
> +	return err;
> +}
> +
> +static int cpt_dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx)
> +{
> +	struct cpt_obj_bits hdr;
> +	int err;
> +	int content;
> +	unsigned long size;
> +
> +	content = CPT_CONTENT_X86_FPUSTATE;
> +	size = sizeof(struct i387_fxsave_struct);
> +#ifndef CONFIG_X86_64
> +	if (!cpu_has_fxsr) {
> +		size = sizeof(struct i387_fsave_struct);
> +		content = CPT_CONTENT_X86_FPUSTATE_OLD;
> +	}
> +#endif
> +
> +	hdr.cpt_len = sizeof(hdr) + size;
> +	hdr.cpt_type = CPT_OBJ_BITS;
> +	hdr.cpt_hdrlen = sizeof(hdr);
> +	hdr.cpt_content = content;
> +	hdr.cpt_size = size;
> +	err = ctx->write(&hdr, sizeof(hdr), ctx);
> +	if (!err)
> +		ctx->write(tsk->thread.xstate, size, ctx);

Should check the error code of the line above, right?

> +	return err;
> +}
> +
> +static u32 encode_segment(u32 segreg)
> +{
> +	segreg &= 0xFFFF;
> +
> +	if (segreg == 0)
> +		return CPT_SEG_ZERO;
> +	if ((segreg & 3) != 3) {
> +		eprintk("Invalid RPL of a segment reg %x\n", segreg);
> +		return CPT_SEG_ZERO;
> +	}
> +
> +	/* LDT descriptor, it is just an index to LDT array */
> +	if (segreg & 4)
> +		return CPT_SEG_LDT + (segreg >> 3);
> +
> +	/* TLS descriptor. */
> +	if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN &&
> +			(segreg >> 3) <= GDT_ENTRY_TLS_MAX)
> +		return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN);
> +
> +	/* One of standard desriptors */
> +#ifdef CONFIG_X86_64
> +	if (segreg == __USER32_DS)
> +		return CPT_SEG_USER32_DS;
> +	if (segreg == __USER32_CS)
> +		return CPT_SEG_USER32_CS;
> +	if (segreg == __USER_DS)
> +		return CPT_SEG_USER64_DS;
> +	if (segreg == __USER_CS)
> +		return CPT_SEG_USER64_CS;
> +#else
> +	if (segreg == __USER_DS)
> +		return CPT_SEG_USER32_DS;
> +	if (segreg == __USER_CS)
> +		return CPT_SEG_USER32_CS;
> +#endif
> +	eprintk("Invalid segment reg %x\n", segreg);
> +	return CPT_SEG_ZERO;
> +}
> +
> +static int cpt_dump_registers(struct task_struct *tsk, struct cpt_context *ctx)
> +{
> +	struct cpt_x86_regs ri;
> +	struct pt_regs *pt_regs;
> +
> +	ri.cpt_len = sizeof(ri);
> +	ri.cpt_type = CPT_OBJ_X86_REGS;
> +	ri.cpt_hdrlen = sizeof(ri);
> +	ri.cpt_content = CPT_CONTENT_VOID;
> +
> +	ri.cpt_debugreg[0] = tsk->thread.debugreg0;
> +	ri.cpt_debugreg[1] = tsk->thread.debugreg1;
> +	ri.cpt_debugreg[2] = tsk->thread.debugreg2;
> +	ri.cpt_debugreg[3] = tsk->thread.debugreg3;
> +	ri.cpt_debugreg[4] = 0;
> +	ri.cpt_debugreg[5] = 0;
> +	ri.cpt_debugreg[6] = tsk->thread.debugreg6;
> +	ri.cpt_debugreg[7] = tsk->thread.debugreg7;
> +
> +	pt_regs = task_pt_regs(tsk);
> +
> +	ri.cpt_fs = encode_segment(pt_regs->fs);
> +	ri.cpt_gs = encode_segment(tsk->thread.gs);
> +
> +	ri.cpt_bx = pt_regs->bx;
> +	ri.cpt_cx = pt_regs->cx;
> +	ri.cpt_dx = pt_regs->dx;
> +	ri.cpt_si = pt_regs->si;
> +	ri.cpt_di = pt_regs->di;
> +	ri.cpt_bp = pt_regs->bp;
> +	ri.cpt_ax = pt_regs->ax;
> +	ri.cpt_ds = encode_segment(pt_regs->ds);
> +	ri.cpt_es = encode_segment(pt_regs->es);
> +	ri.cpt_orig_ax = pt_regs->orig_ax;
> +	ri.cpt_ip = pt_regs->ip;
> +	ri.cpt_cs = encode_segment(pt_regs->cs);
> +	ri.cpt_flags = pt_regs->flags;
> +	ri.cpt_sp = pt_regs->sp;
> +	ri.cpt_ss = encode_segment(pt_regs->ss);
> +	
> +	return ctx->write(&ri, sizeof(ri), ctx);
> +}
> +
> +int cpt_dump_task(struct task_struct *tsk, struct cpt_context *ctx)
> +{
> +	int err;
> +
> +	err = cpt_dump_task_struct(tsk, ctx);
> +
> +	/* Dump task mm */
> +
> +	if (!err)
> +		cpt_dump_fpustate(tsk, ctx);

error checking...

> +	if (!err)
> +		cpt_dump_registers(tsk, ctx);

error checking...

> +
> +	return err;
> +}
> -- 
> 1.5.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

Louis

-- 
Dr Louis Rilling			Kerlabs
Skype: louis.rilling			Batiment Germanium
Phone: (+33|0) 6 80 89 08 23		80 avenue des Buttes de Coesmes
http://www.kerlabs.com/			35700 Rennes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.openvz.org/pipermail/devel/attachments/20081020/22bdab78/attachment-0001.sig>
-------------- next part --------------
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers


More information about the Devel mailing list