[Devel] Re: [RFC][cr][PATCH 2/6] Checkpoint file-locks

Matt Helsley matthltc at us.ibm.com
Tue May 4 20:12:56 PDT 2010


On Tue, May 04, 2010 at 06:44:39PM -0700, sukadev at linux.vnet.ibm.com wrote:
> From: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
> 
> While checkpointing each file-descriptor, find all the locks on the
> file and save information about the lock in the checkpoint-image.
> A follow-on patch will use this informaiton to restore the file-locks.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
> ---
>  fs/checkpoint.c                |   98 ++++++++++++++++++++++++++++++++++-----
>  include/linux/checkpoint_hdr.h |   10 ++++
>  2 files changed, 95 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/checkpoint.c b/fs/checkpoint.c
> index e036a7a..180302e 100644
> --- a/fs/checkpoint.c
> +++ b/fs/checkpoint.c
> @@ -26,6 +26,7 @@
>  #include <linux/checkpoint.h>
>  #include <linux/eventpoll.h>
>  #include <linux/eventfd.h>
> +#include <linux/smp_lock.h>
>  #include <net/sock.h>
> 
>  /**************************************************************************
> @@ -249,8 +250,86 @@ static int checkpoint_file(struct ckpt_ctx *ctx, void *ptr)
>  	return ret;
>  }
> 
> +static int checkpoint_one_file_lock(struct ckpt_ctx *ctx, struct file *file,
> +		int fd, struct file_lock *lock)

fd is only used for debugging info. Probably ought to remove it.

> +{
> +	int rc;
> +	struct ckpt_hdr_file_lock *h;
> +
> +	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE_LOCK);
> +	if (!h)
> +		return -ENOMEM;
> +
> +	h->fl_start = lock->fl_start;
> +	h->fl_end = lock->fl_end;
> +	h->fl_type = lock->fl_type;
> +	h->fl_flags = lock->fl_flags;
> +
> +	rc = ckpt_write_obj(ctx, &h->h);
> +
> +	ckpt_hdr_put(ctx, h);
> +
> +	ckpt_debug("Lock [%lld, %lld, %d, 0x%x] fd %d, rc %d\n", lock->fl_start,
> +			lock->fl_end, lock->fl_type, lock->fl_flags, fd, rc);
> +
> +	return rc;
> +}
> +
> +int
> +checkpoint_file_locks(struct ckpt_ctx *ctx, struct files_struct *files,
> +		struct file *file, int fd)
> +{
> +	int rc;
> +	struct inode *inode;
> +	struct file_lock **lockpp;
> +	struct file_lock *lockp;
> +	struct file_lock last_lock;
> +
> +	lock_kernel();

Eep. What are the current standards as far as adding "new" uses of the BKL?
Arnd/anti-BKL-ninjas might be good folks to Cc on the next round if this
is still here.

Regardless, I'd think some good comments about the kernel-locking are in
order so that when this BKL use is being removed those involved can easily
know what it's meant to protect.

> +	inode = file->f_path.dentry->d_inode;
> +	for_each_lock(inode, lockpp) {
> +		lockp = *lockpp;
> +		ckpt_debug("Found lock [%lld, %lld, %d, 0x%x]\n",
> +				lockp->fl_start, lockp->fl_end, 
> +				lockp->fl_type, lockp->fl_flags);
> +
> +		if (lockp->fl_owner != files)
> +			continue;
> +

Maybe a stupid idea:
		rc = -EBADF;
> +		if (IS_POSIX(lockp)) {
> +			rc = checkpoint_one_file_lock(ctx, file, fd, lockp);

Remove the other contents of this block.

> +			if (rc < 0) {
> +				ckpt_err(ctx,  rc, "%(T)fd %d, checkpoint "
> +						"lock failed\n", fd);
> +				goto out;
> +			}
> +		} else {

		if (rc < 0) {

> +			ckpt_err(ctx, rc, "%(T)fd %d has unsupported file "
> +					"lock type, flags 0x%x\n", fd,
> +					lockp->fl_flags);
> +			goto out;
> +		}
> +	}

I say maybe stupid because it may not follow accepted patterns and coalesces
two ckpt_err() reports into something more complex or ambiguous.

> +
> +	/*
> +	 * Checkpoint a dummy file-lock to mark the end of file-locks
> +	 * for this fd.
> +	 */
> +	memset(&last_lock, 0, sizeof(struct file_lock));
> +	last_lock.fl_start = -1;
> +	last_lock.fl_flags = FL_POSIX;
> +	rc = checkpoint_one_file_lock(ctx, file, fd, &last_lock);

Seems like you could just pull allocation of h out of checkpoint_one_file_lock,
pass it as a parameter to that function, then outside the loop
fill out the "dummy" here, and call:

	rc = ckpt_write_obj(ctx, &h->h);

directly instead of inventing a last_lock struct.

> +	if (rc < 0)
> +		ckpt_err(ctx,  rc, "%(T)fd %d, checkpoint last-lock failed\n",
> +				fd);
> +out:
> +	unlock_kernel();
> +	return rc;
> +}
> +
>  /**
> - * ckpt_write_file_desc - dump the state of a given file descriptor
> + * checkpoint_file_desc - dump the state of a given file descriptor
>   * @ctx: checkpoint context
>   * @files: files_struct pointer
>   * @fd: file descriptor
> @@ -282,18 +361,6 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
>  	}
>  	rcu_read_unlock();
> 
> -	ret = find_locks_with_owner(file, files);
> -	/*
> -	 * find_locks_with_owner() returns an error when there
> -	 * are no locks found, so we *want* it to return an error
> -	 * code.  Its success means we have to fail the checkpoint.
> -	 */
> -	if (!ret) {
> -		ret = -EBADF;
> -		ckpt_err(ctx, ret, "%(T)fd %d has file lock or lease\n", fd);
> -		goto out;
> -	}
> -
>  	/* sanity check (although this shouldn't happen) */
>  	ret = -EBADF;
>  	if (!file) {
> @@ -328,6 +395,11 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
>  	h->fd_close_on_exec = coe;
> 
>  	ret = ckpt_write_obj(ctx, &h->h);
> +	if (ret < 0)
> +		goto out;
> +
> +	ret = checkpoint_file_locks(ctx, files, file, fd);
> +
>  out:
>  	ckpt_hdr_put(ctx, h);
>  	if (file)
> diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
> index 790214f..d2a0fcd 100644
> --- a/include/linux/checkpoint_hdr.h
> +++ b/include/linux/checkpoint_hdr.h
> @@ -144,6 +144,8 @@ enum {
>  #define CKPT_HDR_TTY_LDISC CKPT_HDR_TTY_LDISC
>  	CKPT_HDR_EPOLL_ITEMS,  /* must be after file-table */
>  #define CKPT_HDR_EPOLL_ITEMS CKPT_HDR_EPOLL_ITEMS
> +	CKPT_HDR_FILE_LOCK,
> +#define CKPT_HDR_FILE_LOCK CKPT_HDR_FILE_LOCK
> 
>  	CKPT_HDR_MM = 401,
>  #define CKPT_HDR_MM CKPT_HDR_MM
> @@ -576,6 +578,14 @@ struct ckpt_hdr_file_generic {
>  	struct ckpt_hdr_file common;
>  } __attribute__((aligned(8)));
> 
> +struct ckpt_hdr_file_lock {
> +       struct ckpt_hdr h;
> +       loff_t fl_start;
> +       loff_t fl_end;

fl_start and fl_end are file positions, aren't they?

loff_t is, unfortunately, a notoriously ambiguous type -- it doesn't
meet our standards for a biarch-safe checkpoint image. I think a u64,
like ckpt_hdr_file's f_pos field, is called for.

Cheers,
	-Matt Helsley
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list