[CRIU] [PATCH] phaul: Go library for live migration
Andrei Vagin
avagin at virtuozzo.com
Wed Aug 2 09:47:17 MSK 2017
On Mon, Mar 13, 2017 at 11:34:31AM -0700, Andrei Vagin wrote:
> Applied. Thanks. Could you add the test to scripts/travis/travis-tests?
ping
https://github.com/xemul/criu/issues/367
>
> On Thu, Feb 16, 2017 at 03:06:11PM +0300, Pavel Emelyanov wrote:
> > The API is as simple as
> >
> > srv := MakePhaulServer(config)
> > cln := MakePhaulClient(local, remote, config)
> > cln.Migrate()
> >
> > * config is the PhaulConfig struct that contains pid to migrate,
> > memory transfer channel (file descriptor) that phaul can use
> > to send/receive memory and path to existing directory where
> > phaul can put intermediate files and images.
> >
> > * local is PhaulLocal interface with (for now) the single method
> > - DumpCopyRestore(): method that phaul calls when it's time
> > to do engine-specific dump, images copy and restore on
> > the destination side.
> >
> > Few words about the latter -- we've learned, that different
> > engines have their own way to call CRIU to dump a container,
> > so phaul, instead of dumping one by its own, lets the caller
> > do it. To keep-up with pre-dump stuff, the client should
> > not forget to do three things:
> >
> > - set the TrackMem option to true
> > - set the ParentImg to the passed value
> > - set the Ps (page server) channel with 'config.Memfd'
> >
> > The criu object is passed here as well, so that caller can
> > call Dump() on it (once we have keep_open support in libcriu
> > this will help to avoid additional criu execve).
> >
> > The method also should handle the PostDump notification and
> > do images-copy and restore in it. Not sure how to wrap this
> > into phaul better.
> >
> > * remote is PhaulRemote interface whose method should be called
> > on the dst side on the PhaulServer object using whatever RPC
> > the caller finds acceptable.
> >
> > As a demonstration the src/test/main.go example is attached. To
> > see how it goes 'make' it, then start the 'piggie $outfile'
> > proggie and run 'test $pid' command. The piggie will be, well,
> > live migrated locally :) i.e. will appear as a process with
> > different pid (it lives in a pid namespace).
> >
> > Changes since v2:
> >
> > * Reworked the API onto local/remote/config scheme
> > * Added ability to configure diretory for images
> > * Re-used server side Criu object for final restore
> >
> > Changes since v1:
> >
> > * Supported keep_open-s for pre-dumps
> > * Added code comments about interface
> > * Simplified the example code
> >
> > Further plans for this are
> >
> > - move py p.haul to use this compiled library
> > - add post-copy (lazy pages) support (with Mike help)
> > - add image-cache and image-proxy (with Ridrigo help)
> > - add API/framwork for FS migration
> >
> > Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
> > ---
> > phaul/.gitignore | 1 +
> > phaul/Makefile | 11 +++
> > phaul/piggie.c | 57 ++++++++++++++
> > phaul/src/phaul/api.go | 52 +++++++++++++
> > phaul/src/phaul/client.go | 130 ++++++++++++++++++++++++++++++++
> > phaul/src/phaul/images.go | 41 ++++++++++
> > phaul/src/phaul/server.go | 73 ++++++++++++++++++
> > phaul/src/phaul/stats.go | 31 ++++++++
> > phaul/src/test/main.go | 188 ++++++++++++++++++++++++++++++++++++++++++++++
> > 9 files changed, 584 insertions(+)
> > create mode 100644 phaul/.gitignore
> > create mode 100644 phaul/Makefile
> > create mode 100644 phaul/piggie.c
> > create mode 100644 phaul/src/phaul/api.go
> > create mode 100644 phaul/src/phaul/client.go
> > create mode 100644 phaul/src/phaul/images.go
> > create mode 100644 phaul/src/phaul/server.go
> > create mode 100644 phaul/src/phaul/stats.go
> > create mode 100644 phaul/src/test/main.go
> >
> > diff --git a/phaul/.gitignore b/phaul/.gitignore
> > new file mode 100644
> > index 0000000..d8d94a2
> > --- /dev/null
> > +++ b/phaul/.gitignore
> > @@ -0,0 +1 @@
> > +src/stats/stats.pb.go
> > diff --git a/phaul/Makefile b/phaul/Makefile
> > new file mode 100644
> > index 0000000..7e83ba2
> > --- /dev/null
> > +++ b/phaul/Makefile
> > @@ -0,0 +1,11 @@
> > +all: test piggie
> > +
> > +test: stats
> > + GOPATH=$(shell pwd):$(shell pwd)/../lib/go/:/usr/share/gocode go build -o test test
> > +
> > +stats:
> > + mkdir -p src/stats/
> > + protoc --go_out=src/stats/ --proto_path=../images/ ../images/stats.proto
> > +
> > +piggie: piggie.c
> > + gcc piggie.c -o piggie
> > diff --git a/phaul/piggie.c b/phaul/piggie.c
> > new file mode 100644
> > index 0000000..1dc0801
> > --- /dev/null
> > +++ b/phaul/piggie.c
> > @@ -0,0 +1,57 @@
> > +#define _GNU_SOURCE
> > +#include <stdio.h>
> > +#include <signal.h>
> > +#include <unistd.h>
> > +#include <sys/mman.h>
> > +#include <fcntl.h>
> > +#include <sched.h>
> > +
> > +#define STKS (4*4096)
> > +
> > +#ifndef CLONE_NEWPID
> > +#define CLONE_NEWPID 0x20000000
> > +#endif
> > +
> > +static int do_test(void *logf)
> > +{
> > + int fd, i = 0;
> > +
> > + setsid();
> > +
> > + close(0);
> > + close(1);
> > + close(2);
> > +
> > + fd = open("/dev/null", O_RDONLY);
> > + if (fd != 0) {
> > + dup2(fd, 0);
> > + close(fd);
> > + }
> > +
> > + fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600);
> > + dup2(fd, 1);
> > + dup2(fd, 2);
> > + if (fd != 1 && fd != 2)
> > + close(fd);
> > +
> > + while (1) {
> > + sleep(1);
> > + printf("%d\n", i++);
> > + fflush(stdout);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +int main(int argc, char **argv)
> > +{
> > + int pid;
> > + void *stk;
> > +
> > + stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE,
> > + MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
> > + pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]);
> > + printf("Child forked, pid %d\n", pid);
> > +
> > + return 0;
> > +}
> > diff --git a/phaul/src/phaul/api.go b/phaul/src/phaul/api.go
> > new file mode 100644
> > index 0000000..6093a12
> > --- /dev/null
> > +++ b/phaul/src/phaul/api.go
> > @@ -0,0 +1,52 @@
> > +package phaul
> > +
> > +import (
> > + "criu"
> > +)
> > +
> > +/*
> > + * Configuration passed around
> > + *
> > + * Pid is what we migrate
> > + * Memfd is the file descriptor via which criu can
> > + * transfer memory pages.
> > + * Wdir is the directory where phaul can put images
> > + * and other stuff
> > + */
> > +type PhaulConfig struct {
> > + Pid int
> > + Memfd int
> > + Wdir string
> > +}
> > +
> > +/*
> > + * Rpc between PhaulClient and PhaulServer. When client
> > + * calls anything on this one, the corresponding method
> > + * should be called on PhaulServer object.
> > + */
> > +type PhaulRemote interface {
> > + StartIter() error
> > + StopIter() error
> > +}
> > +
> > +/*
> > + * Interface to local classes. PhaulClient calls them when
> > + * it needs something on the source node.
> > + *
> > + * Methods:
> > + *
> > + * - DumpCopyRestore() is called on client side when the
> > + * pre-iterations are over and it's time to do full dump,
> > + * copy images and restore them on the server side.
> > + * All the time this method is executed victim tree is
> > + * frozen on client. Returning nil kills the tree, error
> > + * unfreezes it and resumes. The criu argument is the
> > + * pointer on created criu.Criu object on which client
> > + * may call Dump(). The requirement on opts passed are:
> > + * set Ps.Fd to comm.Memfd
> > + * set ParentImg to last_client_images_path
> > + * set TrackMem to true
> > + */
> > +type PhaulLocal interface {
> > + DumpCopyRestore(criu *criu.Criu, c PhaulConfig, last_client_images_path string) error
> > +}
> > diff --git a/phaul/src/phaul/client.go b/phaul/src/phaul/client.go
> > new file mode 100644
> > index 0000000..06fb821
> > --- /dev/null
> > +++ b/phaul/src/phaul/client.go
> > @@ -0,0 +1,130 @@
> > +package phaul
> > +
> > +import (
> > + "criu"
> > + "fmt"
> > + "github.com/golang/protobuf/proto"
> > + "rpc"
> > + "stats"
> > +)
> > +
> > +const minPagesWritten uint64 = 64
> > +const maxIters int = 8
> > +const maxGrowDelta int64 = 32
> > +
> > +type PhaulClient struct {
> > + local PhaulLocal
> > + remote PhaulRemote
> > + cfg PhaulConfig
> > +}
> > +
> > +/*
> > + * Main entry point. Caller should create the client object by
> > + * passing here local, remote and comm. See comment in corresponding
> > + * interfaces/structs for explanation.
> > + *
> > + * Then call client.Migrate() and enjoy :)
> > + */
> > +func MakePhaulClient(l PhaulLocal, r PhaulRemote, c PhaulConfig) (*PhaulClient, error) {
> > + return &PhaulClient{local: l, remote: r, cfg: c}, nil
> > +}
> > +
> > +func isLastIter(iter int, stats *stats.DumpStatsEntry, prev_stats *stats.DumpStatsEntry) bool {
> > + if iter >= maxIters {
> > + fmt.Printf("`- max iters reached\n")
> > + return true
> > + }
> > +
> > + pagesWritten := stats.GetPagesWritten()
> > + if pagesWritten < minPagesWritten {
> > + fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten))
> > + return true
> > + }
> > +
> > + pages_delta := int64(pagesWritten) - int64(prev_stats.GetPagesWritten())
> > + if pages_delta >= maxGrowDelta {
> > + fmt.Printf("`- grow iter (%d) reached\n", int(pages_delta))
> > + return true
> > + }
> > +
> > + return false
> > +}
> > +
> > +func (pc *PhaulClient) Migrate() error {
> > + criu := criu.MakeCriu()
> > + psi := rpc.CriuPageServerInfo{
> > + Fd: proto.Int32(int32(pc.cfg.Memfd)),
> > + }
> > + opts := rpc.CriuOpts{
> > + Pid: proto.Int32(int32(pc.cfg.Pid)),
> > + LogLevel: proto.Int32(4),
> > + LogFile: proto.String("pre-dump.log"),
> > + Ps: &psi,
> > + }
> > +
> > + err := criu.Prepare()
> > + if err != nil {
> > + return err
> > + }
> > +
> > + defer criu.Cleanup()
> > +
> > + imgs, err := preparePhaulImages(pc.cfg.Wdir)
> > + if err != nil {
> > + return err
> > + }
> > + prev_stats := &stats.DumpStatsEntry{}
> > + iter := 0
> > +
> > + for {
> > + err = pc.remote.StartIter()
> > + if err != nil {
> > + return err
> > + }
> > +
> > + prev_p := imgs.lastImagesDir()
> > + img_dir, err := imgs.openNextDir()
> > + if err != nil {
> > + return err
> > + }
> > +
> > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
> > + if prev_p != "" {
> > + opts.ParentImg = proto.String(prev_p)
> > + }
> > +
> > + err = criu.PreDump(opts, nil)
> > + img_dir.Close()
> > + if err != nil {
> > + return err
> > + }
> > +
> > + err = pc.remote.StopIter()
> > + if err != nil {
> > + return err
> > + }
> > +
> > + st, err := criuGetDumpStats(img_dir)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + if isLastIter(iter, st, prev_stats) {
> > + break
> > + }
> > +
> > + prev_stats = st
> > + }
> > +
> > + err = pc.remote.StartIter()
> > + if err == nil {
> > + prev_p := imgs.lastImagesDir()
> > + err = pc.local.DumpCopyRestore(criu, pc.cfg, prev_p)
> > + err2 := pc.remote.StopIter()
> > + if err == nil {
> > + err = err2
> > + }
> > + }
> > +
> > + return err
> > +}
> > diff --git a/phaul/src/phaul/images.go b/phaul/src/phaul/images.go
> > new file mode 100644
> > index 0000000..5a433ca
> > --- /dev/null
> > +++ b/phaul/src/phaul/images.go
> > @@ -0,0 +1,41 @@
> > +package phaul
> > +
> > +import (
> > + "fmt"
> > + "os"
> > + "path/filepath"
> > +)
> > +
> > +type images struct {
> > + cursor int
> > + dir string
> > +}
> > +
> > +func preparePhaulImages(wdir string) (*images, error) {
> > + return &images{dir: wdir}, nil
> > +}
> > +
> > +func (i *images) getPath(idx int) string {
> > + return fmt.Sprintf(i.dir+"/%d", idx)
> > +}
> > +
> > +func (i *images) openNextDir() (*os.File, error) {
> > + ipath := i.getPath(i.cursor)
> > + err := os.Mkdir(ipath, 0700)
> > + if err != nil {
> > + return nil, err
> > + }
> > +
> > + i.cursor++
> > + return os.Open(ipath)
> > +}
> > +
> > +func (i *images) lastImagesDir() string {
> > + var ret string
> > + if i.cursor == 0 {
> > + ret = ""
> > + } else {
> > + ret, _ = filepath.Abs(i.getPath(i.cursor - 1))
> > + }
> > + return ret
> > +}
> > diff --git a/phaul/src/phaul/server.go b/phaul/src/phaul/server.go
> > new file mode 100644
> > index 0000000..8992ee9
> > --- /dev/null
> > +++ b/phaul/src/phaul/server.go
> > @@ -0,0 +1,73 @@
> > +package phaul
> > +
> > +import (
> > + "criu"
> > + "fmt"
> > + "github.com/golang/protobuf/proto"
> > + "rpc"
> > +)
> > +
> > +type PhaulServer struct {
> > + cfg PhaulConfig
> > + imgs *images
> > + cr *criu.Criu
> > +}
> > +
> > +/*
> > + * Main entry point. Make the server with comm and call PhaulRemote
> > + * methods on it upon client requests.
> > + */
> > +func MakePhaulServer(c PhaulConfig) (*PhaulServer, error) {
> > + img, err := preparePhaulImages(c.Wdir)
> > + if err != nil {
> > + return nil, err
> > + }
> > +
> > + cr := criu.MakeCriu()
> > +
> > + return &PhaulServer{imgs: img, cfg: c, cr: cr}, nil
> > +}
> > +
> > +/*
> > + * PhaulRemote methods
> > + */
> > +func (s *PhaulServer) StartIter() error {
> > + fmt.Printf("S: start iter\n")
> > + psi := rpc.CriuPageServerInfo{
> > + Fd: proto.Int32(int32(s.cfg.Memfd)),
> > + }
> > + opts := rpc.CriuOpts{
> > + LogLevel: proto.Int32(4),
> > + LogFile: proto.String("ps.log"),
> > + Ps: &psi,
> > + }
> > +
> > + prev_p := s.imgs.lastImagesDir()
> > + img_dir, err := s.imgs.openNextDir()
> > + if err != nil {
> > + return err
> > + }
> > + defer img_dir.Close()
> > +
> > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
> > + if prev_p != "" {
> > + opts.ParentImg = proto.String(prev_p)
> > + }
> > +
> > + return s.cr.StartPageServer(opts)
> > +}
> > +
> > +func (s *PhaulServer) StopIter() error {
> > + return nil
> > +}
> > +
> > +/*
> > + * Server-local methods
> > + */
> > +func (s *PhaulServer) LastImagesDir() string {
> > + return s.imgs.lastImagesDir()
> > +}
> > +
> > +func (s *PhaulServer) GetCriu() *criu.Criu {
> > + return s.cr
> > +}
> > diff --git a/phaul/src/phaul/stats.go b/phaul/src/phaul/stats.go
> > new file mode 100644
> > index 0000000..77eb346
> > --- /dev/null
> > +++ b/phaul/src/phaul/stats.go
> > @@ -0,0 +1,31 @@
> > +package phaul
> > +
> > +import (
> > + "github.com/golang/protobuf/proto"
> > + "os"
> > + "stats"
> > +)
> > +
> > +/* FIXME: report stats from CriuResp */
> > +func criuGetDumpStats(img_dir *os.File) (*stats.DumpStatsEntry, error) {
> > + stf, err := os.Open(img_dir.Name() + "/stats-dump")
> > + if err != nil {
> > + return nil, err
> > + }
> > + defer stf.Close()
> > +
> > + buf := make([]byte, 2*4096)
> > + sz, err := stf.Read(buf)
> > + if err != nil {
> > + return nil, err
> > + }
> > +
> > + st := &stats.StatsEntry{}
> > + // Skip 2 magic values and entry size
> > + err = proto.Unmarshal(buf[12:sz], st)
> > + if err != nil {
> > + return nil, err
> > + }
> > +
> > + return st.GetDump(), nil
> > +}
> > diff --git a/phaul/src/test/main.go b/phaul/src/test/main.go
> > new file mode 100644
> > index 0000000..0e853df
> > --- /dev/null
> > +++ b/phaul/src/test/main.go
> > @@ -0,0 +1,188 @@
> > +package main
> > +
> > +import (
> > + "criu"
> > + "fmt"
> > + "github.com/golang/protobuf/proto"
> > + "os"
> > + "phaul"
> > + "rpc"
> > + "strconv"
> > + "strings"
> > + "syscall"
> > +)
> > +
> > +type testLocal struct {
> > + criu.CriuNoNotify
> > + r *testRemote
> > +}
> > +
> > +type testRemote struct {
> > + srv *phaul.PhaulServer
> > +}
> > +
> > +/* Dir where test will put dump images */
> > +const images_dir = "test_images"
> > +
> > +func prepareImages() error {
> > + err := os.Mkdir(images_dir, 0700)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + /* Work dir for PhaulClient */
> > + err = os.Mkdir(images_dir+"/local", 0700)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + /* Work dir for PhaulServer */
> > + err = os.Mkdir(images_dir+"/remote", 0700)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + /* Work dir for DumpCopyRestore */
> > + err = os.Mkdir(images_dir+"/test", 0700)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + return nil
> > +}
> > +
> > +func mergeImages(dump_dir, last_pre_dump_dir string) error {
> > + idir, err := os.Open(dump_dir)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + defer idir.Close()
> > +
> > + imgs, err := idir.Readdirnames(0)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + for _, fname := range imgs {
> > + if !strings.HasSuffix(fname, ".img") {
> > + continue
> > + }
> > +
> > + fmt.Printf("\t%s -> %s/\n", fname, last_pre_dump_dir)
> > + err = syscall.Link(dump_dir+"/"+fname, last_pre_dump_dir+"/"+fname)
> > + if err != nil {
> > + return err
> > + }
> > + }
> > +
> > + return nil
> > +}
> > +
> > +func (r *testRemote) doRestore() error {
> > + last_srv_images_dir := r.srv.LastImagesDir()
> > + /*
> > + * In images_dir we have images from dump, in the
> > + * last_srv_images_dir -- where server-side images
> > + * (from page server, with pages and pagemaps) are.
> > + * Need to put former into latter and restore from
> > + * them.
> > + */
> > + err := mergeImages(images_dir+"/test", last_srv_images_dir)
> > + if err != nil {
> > + return err
> > + }
> > +
> > + img_dir, err := os.Open(last_srv_images_dir)
> > + if err != nil {
> > + return err
> > + }
> > + defer img_dir.Close()
> > +
> > + opts := rpc.CriuOpts{
> > + LogLevel: proto.Int32(4),
> > + LogFile: proto.String("restore.log"),
> > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())),
> > + }
> > +
> > + cr := r.srv.GetCriu()
> > + fmt.Printf("Do restore\n")
> > + return cr.Restore(opts, nil)
> > +}
> > +
> > +func (l *testLocal) PostDump() error {
> > + return l.r.doRestore()
> > +}
> > +
> > +func (l *testLocal) DumpCopyRestore(cr *criu.Criu, cfg phaul.PhaulConfig, last_cln_images_dir string) error {
> > + fmt.Printf("Final stage\n")
> > +
> > + img_dir, err := os.Open(images_dir + "/test")
> > + if err != nil {
> > + return err
> > + }
> > + defer img_dir.Close()
> > +
> > + psi := rpc.CriuPageServerInfo{
> > + Fd: proto.Int32(int32(cfg.Memfd)),
> > + }
> > +
> > + opts := rpc.CriuOpts{
> > + Pid: proto.Int32(int32(cfg.Pid)),
> > + LogLevel: proto.Int32(4),
> > + LogFile: proto.String("dump.log"),
> > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())),
> > + TrackMem: proto.Bool(true),
> > + ParentImg: proto.String(last_cln_images_dir),
> > + Ps: &psi,
> > + }
> > +
> > + fmt.Printf("Do dump\n")
> > + return cr.Dump(opts, l)
> > +}
> > +
> > +func main() {
> > + pid, _ := strconv.Atoi(os.Args[1])
> > + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0)
> > + if err != nil {
> > + fmt.Printf("Can't make socketpair\n")
> > + return
> > + }
> > +
> > + err = prepareImages()
> > + if err != nil {
> > + fmt.Printf("Can't prepare dirs for images\n")
> > + return
> > + }
> > +
> > + fmt.Printf("Make server part (socket %d)\n", fds[1])
> > + srv, err := phaul.MakePhaulServer(phaul.PhaulConfig{
> > + Pid: pid,
> > + Memfd: fds[1],
> > + Wdir: images_dir + "/remote"})
> > + if err != nil {
> > + return
> > + }
> > +
> > + r := &testRemote{srv}
> > +
> > + fmt.Printf("Make client part (socket %d)\n", fds[0])
> > + cln, err := phaul.MakePhaulClient(&testLocal{r: r}, srv,
> > + phaul.PhaulConfig{
> > + Pid: pid,
> > + Memfd: fds[0],
> > + Wdir: images_dir + "/local"})
> > + if err != nil {
> > + return
> > + }
> > +
> > + fmt.Printf("Migrate\n")
> > + err = cln.Migrate()
> > + if err != nil {
> > + fmt.Printf("Failed: ")
> > + fmt.Print(err)
> > + return
> > + }
> > +
> > + fmt.Printf("SUCCESS!\n")
> > +}
> > --
> > 2.5.0
> > _______________________________________________
> > CRIU mailing list
> > CRIU at openvz.org
> > https://lists.openvz.org/mailman/listinfo/criu
> _______________________________________________
> CRIU mailing list
> CRIU at openvz.org
> https://lists.openvz.org/mailman/listinfo/criu
More information about the CRIU
mailing list