[CRIU] [PATCH 2/2] phaul: Go library for live migration
Pavel Emelyanov
xemul at virtuozzo.com
Mon Dec 19 02:18:39 PST 2016
The API is as simple as
srv := MakePhaulServer(comm)
cln := MakePhaulClient(victim, rpc, comm)
cln.Migrate()
* comm is the PhaulComm struct that container channels (file
descriptors) that phaul can use to send/receive memory and
(in plans) filesystem contents over the wire
* victim is PhaulVictim interface with (for now) two methods:
- GetRootPid(): pid of the root of the tree to migrate
- DumpCopyRestore(): method that phaul calls when it's time
to do engine-specific dump, images copy and restore on
the destination side.
Few words about the latter -- we've learned, that different
engines have their own way to call CRIU to dump a container,
so phaul, instead of dumping one by its own, lets the caller
do it. To keep-up with pre-dump stuff, the client should
not forget to do three things:
- set the TrackMem option to true
- set the ParentImg to the passed value
- set the Ps (page server) channel with 'comm'
The criu object is passed here as well, so that caller can
call Dump() on it (once we have keep_open support in libcriu
this will help to avoid additional criu execve).
* rpc is an interface whose method should be called on the dst
side on the PhaulServer object using whatever RPC the caller
finds acceptable.
As a demonstration the src/test/main.go example is attached. To
see how it goes 'make' it, then start the 'piggie $outfile'
proggie and run 'test $pid' command. The piggie will be, well,
live migrated locally :) i.e. will appear as a process with
different pid (it lives in a pid namespace).
Plans for this source is
- tune and fix
- support keep_open for criu
- add post-copy (lazy pages) support
- add API/framwork for FS migration
- move py p.haul to use this compiled library
Signed-off-by: Pavel Emelyanov <xemul at virtuozzo.com>
---
phaul/.gitignore | 1 +
phaul/Makefile | 11 ++++
phaul/piggie.c | 57 ++++++++++++++++++
phaul/src/phaul/client.go | 116 +++++++++++++++++++++++++++++++++++++
phaul/src/phaul/comm.go | 10 ++++
phaul/src/phaul/images.go | 48 ++++++++++++++++
phaul/src/phaul/server.go | 59 +++++++++++++++++++
phaul/src/phaul/stats.go | 30 ++++++++++
phaul/src/phaul/victim.go | 10 ++++
phaul/src/test/main.go | 143 ++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 485 insertions(+)
create mode 100644 phaul/.gitignore
create mode 100644 phaul/Makefile
create mode 100644 phaul/piggie.c
create mode 100644 phaul/src/phaul/client.go
create mode 100644 phaul/src/phaul/comm.go
create mode 100644 phaul/src/phaul/images.go
create mode 100644 phaul/src/phaul/server.go
create mode 100644 phaul/src/phaul/stats.go
create mode 100644 phaul/src/phaul/victim.go
create mode 100644 phaul/src/test/main.go
diff --git a/phaul/.gitignore b/phaul/.gitignore
new file mode 100644
index 0000000..d8d94a2
--- /dev/null
+++ b/phaul/.gitignore
@@ -0,0 +1 @@
+src/stats/stats.pb.go
diff --git a/phaul/Makefile b/phaul/Makefile
new file mode 100644
index 0000000..7e83ba2
--- /dev/null
+++ b/phaul/Makefile
@@ -0,0 +1,11 @@
+all: test piggie
+
+test: stats
+ GOPATH=$(shell pwd):$(shell pwd)/../lib/go/:/usr/share/gocode go build -o test test
+
+stats:
+ mkdir -p src/stats/
+ protoc --go_out=src/stats/ --proto_path=../images/ ../images/stats.proto
+
+piggie: piggie.c
+ gcc piggie.c -o piggie
diff --git a/phaul/piggie.c b/phaul/piggie.c
new file mode 100644
index 0000000..1dc0801
--- /dev/null
+++ b/phaul/piggie.c
@@ -0,0 +1,57 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <sched.h>
+
+#define STKS (4*4096)
+
+#ifndef CLONE_NEWPID
+#define CLONE_NEWPID 0x20000000
+#endif
+
+static int do_test(void *logf)
+{
+ int fd, i = 0;
+
+ setsid();
+
+ close(0);
+ close(1);
+ close(2);
+
+ fd = open("/dev/null", O_RDONLY);
+ if (fd != 0) {
+ dup2(fd, 0);
+ close(fd);
+ }
+
+ fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ if (fd != 1 && fd != 2)
+ close(fd);
+
+ while (1) {
+ sleep(1);
+ printf("%d\n", i++);
+ fflush(stdout);
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int pid;
+ void *stk;
+
+ stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
+ pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]);
+ printf("Child forked, pid %d\n", pid);
+
+ return 0;
+}
diff --git a/phaul/src/phaul/client.go b/phaul/src/phaul/client.go
new file mode 100644
index 0000000..aeb1a23
--- /dev/null
+++ b/phaul/src/phaul/client.go
@@ -0,0 +1,116 @@
+package phaul
+
+import (
+ "criu"
+ "fmt"
+ "github.com/golang/protobuf/proto"
+ "rpc"
+ "stats"
+)
+
+const minPagesWritten uint64 = 64
+const maxIters int = 8
+const maxGrowDelta int64 = 32
+
+type PhaulClient struct {
+ victim PhaulVictim
+ phrpc PhaulRpc
+ comm PhaulComm
+}
+
+func MakePhaulClient(v PhaulVictim, rpc PhaulRpc, comm PhaulComm) (*PhaulClient, error) {
+ return &PhaulClient{victim: v, phrpc: rpc, comm: comm}, nil
+}
+
+func isLastIter(iter int, stats *stats.DumpStatsEntry, prev_stats *stats.DumpStatsEntry) bool {
+ if iter >= maxIters {
+ fmt.Printf("`- max iters reached\n")
+ return true
+ }
+
+ pagesWritten := stats.GetPagesWritten()
+ if pagesWritten < minPagesWritten {
+ fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten))
+ return true
+ }
+
+ pages_delta := int64(pagesWritten) - int64(prev_stats.GetPagesWritten())
+ if pages_delta >= maxGrowDelta {
+ fmt.Printf("`- grow iter (%d) reached\n", int(pages_delta))
+ return true
+ }
+
+ return false
+}
+
+func (pc *PhaulClient) Migrate() error {
+ criu := criu.MakeCriu()
+ psi := rpc.CriuPageServerInfo{
+ Fd: proto.Int32(int32(pc.comm.Memfd)),
+ }
+ opts := rpc.CriuOpts{
+ Pid: proto.Int32(int32(pc.victim.GetRootPid())),
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("pre-dump.log"),
+ Ps: &psi,
+ }
+
+ imgs, err := preparePhaulImages("c")
+ if err != nil {
+ return err
+ }
+ prev_stats := &stats.DumpStatsEntry{}
+ iter := 0
+
+ for {
+ err = pc.phrpc.StartIter()
+ if err != nil {
+ return err
+ }
+
+ prev_p := imgs.lastImagesDir()
+ img_dir, err := imgs.openNextDir()
+ if err != nil {
+ return err
+ }
+
+ opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
+ if prev_p != "" {
+ opts.ParentImg = proto.String(prev_p)
+ }
+
+ err = criu.PreDump(opts, nil)
+ img_dir.Close()
+ if err != nil {
+ return err
+ }
+
+ err = pc.phrpc.StopIter()
+ if err != nil {
+ return err
+ }
+
+ st, err := criuGetDumpStats(img_dir)
+ if err != nil {
+ return err
+ }
+
+ if isLastIter(iter, st, prev_stats) {
+ break
+ }
+
+ prev_stats = st
+ }
+
+ err = pc.phrpc.StartIter()
+ if err == nil {
+ prev_p := imgs.lastImagesDir()
+ err = pc.victim.DumpCopyRestore(criu, pc.comm, prev_p)
+ err2 := pc.phrpc.StopIter()
+ if err == nil {
+ err = err2
+ }
+ }
+
+ return err
+}
diff --git a/phaul/src/phaul/comm.go b/phaul/src/phaul/comm.go
new file mode 100644
index 0000000..fed4575
--- /dev/null
+++ b/phaul/src/phaul/comm.go
@@ -0,0 +1,10 @@
+package phaul
+
+type PhaulComm struct {
+ Memfd int
+}
+
+type PhaulRpc interface {
+ StartIter() error
+ StopIter() error
+}
diff --git a/phaul/src/phaul/images.go b/phaul/src/phaul/images.go
new file mode 100644
index 0000000..6546f40
--- /dev/null
+++ b/phaul/src/phaul/images.go
@@ -0,0 +1,48 @@
+package phaul
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+)
+
+type images struct {
+ cursor int
+ sfx string
+}
+
+const images_dir = "phaul_img"
+
+func preparePhaulImages(typ string) (*images, error) {
+ err := os.Mkdir(images_dir+"_"+typ, 0700)
+ if err != nil {
+ return nil, err
+ }
+
+ return &images{sfx: typ}, nil
+}
+
+func (i *images) getPath(idx int) string {
+ return fmt.Sprintf(images_dir+"_"+i.sfx+"/%d", idx)
+}
+
+func (i *images) openNextDir() (*os.File, error) {
+ ipath := i.getPath(i.cursor)
+ err := os.Mkdir(ipath, 0700)
+ if err != nil {
+ return nil, err
+ }
+
+ i.cursor++
+ return os.Open(ipath)
+}
+
+func (i *images) lastImagesDir() string {
+ var ret string
+ if i.cursor == 0 {
+ ret = ""
+ } else {
+ ret, _ = filepath.Abs(i.getPath(i.cursor - 1))
+ }
+ return ret
+}
diff --git a/phaul/src/phaul/server.go b/phaul/src/phaul/server.go
new file mode 100644
index 0000000..504669d
--- /dev/null
+++ b/phaul/src/phaul/server.go
@@ -0,0 +1,59 @@
+package phaul
+
+import (
+ "criu"
+ "fmt"
+ "github.com/golang/protobuf/proto"
+ "rpc"
+)
+
+type PhaulServer struct {
+ comm PhaulComm
+ imgs *images
+ cr *criu.Criu
+}
+
+func MakePhaulServer(comm PhaulComm) (*PhaulServer, error) {
+ img, err := preparePhaulImages("s")
+ if err != nil {
+ return nil, err
+ }
+
+ cr := criu.MakeCriu()
+
+ return &PhaulServer{imgs: img, comm: comm, cr: cr}, nil
+}
+
+func (s *PhaulServer) StartIter() error {
+ fmt.Printf("S: start iter\n")
+ psi := rpc.CriuPageServerInfo{
+ Fd: proto.Int32(int32(s.comm.Memfd)),
+ }
+ opts := rpc.CriuOpts{
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("ps.log"),
+ Ps: &psi,
+ }
+
+ prev_p := s.imgs.lastImagesDir()
+ img_dir, err := s.imgs.openNextDir()
+ if err != nil {
+ return err
+ }
+ defer img_dir.Close()
+
+ opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
+ if prev_p != "" {
+ opts.ParentImg = proto.String(prev_p)
+ }
+
+ return s.cr.StartPageServer(opts)
+}
+
+func (s PhaulServer) LastImagesDir() string {
+ return s.imgs.lastImagesDir()
+}
+
+func (s *PhaulServer) StopIter() error {
+ return nil
+}
diff --git a/phaul/src/phaul/stats.go b/phaul/src/phaul/stats.go
new file mode 100644
index 0000000..f5ea455
--- /dev/null
+++ b/phaul/src/phaul/stats.go
@@ -0,0 +1,30 @@
+package phaul
+
+import (
+ "github.com/golang/protobuf/proto"
+ "os"
+ "stats"
+)
+
+func criuGetDumpStats(img_dir *os.File) (*stats.DumpStatsEntry, error) {
+ stf, err := os.Open(img_dir.Name() + "/stats-dump")
+ if err != nil {
+ return nil, err
+ }
+ defer stf.Close()
+
+ buf := make([]byte, 2*4096)
+ sz, err := stf.Read(buf)
+ if err != nil {
+ return nil, err
+ }
+
+ st := &stats.StatsEntry{}
+ // Skip 2 magic values and entry size
+ err = proto.Unmarshal(buf[12:sz], st)
+ if err != nil {
+ return nil, err
+ }
+
+ return st.GetDump(), nil
+}
diff --git a/phaul/src/phaul/victim.go b/phaul/src/phaul/victim.go
new file mode 100644
index 0000000..0b04bda
--- /dev/null
+++ b/phaul/src/phaul/victim.go
@@ -0,0 +1,10 @@
+package phaul
+
+import (
+ "criu"
+)
+
+type PhaulVictim interface {
+ GetRootPid() int
+ DumpCopyRestore(criu *criu.Criu, comm PhaulComm, last_images_path string) error
+}
diff --git a/phaul/src/test/main.go b/phaul/src/test/main.go
new file mode 100644
index 0000000..45786a4
--- /dev/null
+++ b/phaul/src/test/main.go
@@ -0,0 +1,143 @@
+package main
+
+import (
+ "criu"
+ "fmt"
+ "github.com/golang/protobuf/proto"
+ "os"
+ "phaul"
+ "rpc"
+ "strconv"
+ "strings"
+ "syscall"
+)
+
+type testVictim struct {
+ pid int
+ srv *phaul.PhaulServer
+}
+
+func (v testVictim) GetRootPid() int {
+ return v.pid
+}
+
+type testNfy struct {
+ criu.CriuNoNotify
+ last_images_dir string
+}
+
+const images_dir = "test_images"
+
+func mergeImages(dump_dir, last_pre_dump_dir string) error {
+ idir, err := os.Open(dump_dir)
+ if err != nil {
+ return err
+ }
+
+ defer idir.Close()
+
+ imgs, err := idir.Readdirnames(0)
+ if err != nil {
+ return err
+ }
+
+ for _, fname := range imgs {
+ if !strings.HasSuffix(fname, ".img") {
+ continue
+ }
+
+ fmt.Printf("%s -> %s/\n", fname, last_pre_dump_dir)
+ err = syscall.Link(dump_dir+"/"+fname, last_pre_dump_dir+"/"+fname)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (nfy testNfy) PostDump() error {
+ err := mergeImages(images_dir, nfy.last_images_dir)
+ if err != nil {
+ return err
+ }
+
+ cr := criu.MakeCriu()
+ opts := rpc.CriuOpts{
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("restore.log"),
+ }
+ img_dir, err := os.Open(nfy.last_images_dir)
+ if err != nil {
+ return err
+ }
+ defer img_dir.Close()
+
+ opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
+
+ fmt.Printf("Do restore\n")
+ return cr.Restore(opts, nil)
+}
+
+func (v testVictim) DumpCopyRestore(cr *criu.Criu, comm phaul.PhaulComm, last_images_dir string) error {
+ fmt.Printf("Final stage\n")
+ psi := rpc.CriuPageServerInfo{
+ Fd: proto.Int32(int32(comm.Memfd)),
+ }
+ opts := rpc.CriuOpts{
+ Pid: proto.Int32(int32(v.pid)),
+ LogLevel: proto.Int32(4),
+ LogFile: proto.String("dump.log"),
+ Ps: &psi,
+ TrackMem: proto.Bool(true),
+ }
+
+ err := os.Mkdir(images_dir, 0700)
+ if err != nil {
+ return err
+ }
+
+ img_dir, err := os.Open(images_dir)
+ if err != nil {
+ return err
+ }
+ defer img_dir.Close()
+
+ opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd()))
+ opts.ParentImg = proto.String(last_images_dir)
+
+ fmt.Printf("Do dump\n")
+ return cr.Dump(opts, testNfy{last_images_dir: v.srv.LastImagesDir()})
+}
+
+func main() {
+ fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0)
+ if err != nil {
+ fmt.Printf("Can't make socketpair\n")
+ return
+ }
+
+ fmt.Printf("Make server part (socket %d)\n", fds[1])
+ srv, err := phaul.MakePhaulServer(phaul.PhaulComm{Memfd: fds[1]})
+ if err != nil {
+ return
+ }
+
+ fmt.Printf("Make client part (socket %d)\n", fds[0])
+ pid, _ := strconv.Atoi(os.Args[1])
+ cln, err := phaul.MakePhaulClient(testVictim{pid: pid, srv: srv},
+ srv, phaul.PhaulComm{Memfd: fds[0]})
+ if err != nil {
+ return
+ }
+
+ fmt.Printf("Migrate\n")
+ err = cln.Migrate()
+ if err != nil {
+ fmt.Printf("Failed: ")
+ fmt.Print(err)
+ return
+ }
+
+ fmt.Printf("SUCCESS!\n")
+}
--
2.5.0
More information about the CRIU
mailing list