[CRIU] [PATCH 1/2] sk-inet: Add initial support for raw sockets
Andrey Vagin
avagin at virtuozzo.com
Wed Mar 15 15:33:18 PDT 2017
On Wed, Feb 22, 2017 at 04:53:27PM +0300, Cyrill Gorcunov wrote:
> For raw sockets we need DIAG module extension, so in case
> if we're failing while collecting socket don't exit with
> error but warn a user and if we really meet raw socket
> we will exit later on socket's lookup stage.
>
> Strictly speaking we can use procfs parsing instead but
> this gonna be a way more complex that well-known diag
> approach and taking into account that raw sockets are
> note that widely used lets support only when diag module
> is present in the system.
>
> In the patch the initial raw sockets support added
> compelte enough to handle SO_IP_SET request from
> ipset tool (needed by modern containers). But the
> code might need extention/fixes in future.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> criu/cr-check.c | 11 ++++++++++
> criu/include/sk-inet.h | 12 +++++++++++
> criu/sk-inet.c | 58 ++++++++++++++++++++++++++++++++++++--------------
> criu/sockets.c | 35 ++++++++++++++++++++++++++++++
> images/sk-inet.proto | 2 ++
> 5 files changed, 102 insertions(+), 16 deletions(-)
>
> diff --git a/criu/cr-check.c b/criu/cr-check.c
> index c8261255fdeb..3e487a774403 100644
> --- a/criu/cr-check.c
> +++ b/criu/cr-check.c
> @@ -49,6 +49,7 @@
> #include "cr_options.h"
> #include "libnetlink.h"
> #include "net.h"
> +#include "inet_diag.h"
> #include "linux/userfaultfd.h"
> #include "restorer.h"
>
> @@ -1090,6 +1091,14 @@ static int check_sk_netns(void)
> return 0;
> }
>
> +static int check_net_diag_raw(void)
> +{
> + check_sock_diag();
> + return !socket_test_collect_bit(AF_INET, IPPROTO_RAW) &&
> + !socket_test_collect_bit(AF_INET6, IPPROTO_RAW);
> +}
> +
> +
> static int check_compat_cr(void)
> {
> if (kdat_compat_sigreturn_test())
> @@ -1202,6 +1211,7 @@ int cr_check(void)
> ret |= check_userns();
> ret |= check_loginuid();
> ret |= check_sk_netns();
> + ret |= check_net_diag_raw();
> }
>
> /*
> @@ -1254,6 +1264,7 @@ static struct feature_list feature_list[] = {
> { "lazy_pages", check_uffd },
> { "compat_cr", check_compat_cr },
> { "sk_ns", check_sk_netns },
> + { "net_diag_raw", check_net_diag_raw },
> { NULL, NULL },
> };
>
> diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
> index bf6fb1d77ddf..5d996581c18d 100644
> --- a/criu/include/sk-inet.h
> +++ b/criu/include/sk-inet.h
> @@ -16,6 +16,18 @@
> #define TCP_REPAIR_OPTIONS 22
> #endif
>
> +#ifndef IP_HDRINCL
> +# define IP_HDRINCL 3
> +#endif
> +
> +#ifndef IP_NODEFRAG
> +# define IP_NODEFRAG 22
> +#endif
> +
> +#ifndef IPV6_HDRINCL
> +# define IPV6_HDRINCL 36
> +#endif
> +
> struct inet_sk_desc {
> struct socket_desc sd;
> unsigned int type;
> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> index ee6ce60e2b12..4efe6eb4cc11 100644
> --- a/criu/sk-inet.c
> +++ b/criu/sk-inet.c
> @@ -101,7 +101,7 @@ static void show_one_inet_img(const char *act, const InetSkEntry *e)
> e->state, src_addr);
> }
>
> -static int can_dump_ipproto(int ino, int proto)
> +static int can_dump_ipproto(int ino, int proto, int type)
> {
> /* Make sure it's a proto we support */
> switch (proto) {
> @@ -111,8 +111,12 @@ static int can_dump_ipproto(int ino, int proto)
> case IPPROTO_UDPLITE:
> break;
> default:
> - pr_err("Unsupported proto %d for socket %x\n", proto, ino);
> - return 0;
> + /* Raw sockets may have any protocol inside */
> + if (type != SOCK_RAW) {
maybe we can check type out of this function?
> + pr_err("Unsupported proto %d (type %d) for socket %x\n",
> + proto, type, ino);
> + return 0;
> + }
> }
>
> return 1;
> @@ -142,9 +146,9 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
> return 1;
> }
>
> - if (sk->type != SOCK_STREAM) {
> + if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
> pr_err("Can't dump %d inet socket %x. "
> - "Only can stream and dgram.\n",
> + "Only can stream, dgram and raw.\n",
> sk->type, sk->sd.ino);
> return 0;
> }
> @@ -288,12 +292,24 @@ err:
> return NULL;
> }
>
> -static int dump_ip_opts(int sk, IpOptsEntry *ioe)
> +
> +static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
> {
> int ret = 0;
>
> - ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> - ioe->has_freebind = ioe->freebind;
> + if (type == SOCK_RAW) {
> + if (family == AF_INET6) {
> + ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
> + } else {
> + ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
> + ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> + ioe->has_nodefrag = ioe->nodefrag;
> + }
> + ioe->has_hdrincl = ioe->hdrincl;
> + } else {
> + ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
is IP_FREEBIND not suitable for raw sockets?
> + ioe->has_freebind = ioe->freebind;
> + }
>
> return ret;
> }
> @@ -323,14 +339,18 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
> InetSkEntry ie = INET_SK_ENTRY__INIT;
> IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
> SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
> - int ret = -1, err = -1, proto;
> + int ret = -1, err = -1, proto, type;
>
> ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL,
> &proto, sizeof(proto));
> if (ret)
> goto err;
> + ret = do_dump_opt(lfd, SOL_SOCKET, SO_TYPE,
> + &type, sizeof(type));
> + if (ret)
> + goto err;
>
> - if (!can_dump_ipproto(p->stat.st_ino, proto))
> + if (!can_dump_ipproto(p->stat.st_ino, proto, type))
something like this
if (type != SOCK_RAW && !can_dump_ipproto(p->stat.st_ino, proto, type))) {
> goto err;
>
> sk = (struct inet_sk_desc *)lookup_socket(p->stat.st_ino, family, proto);
> @@ -410,7 +430,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
> memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
> memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
>
> - if (dump_ip_opts(lfd, &ipopts))
> + if (dump_ip_opts(family, sk->type, lfd, &ipopts))
> goto err;
>
> if (dump_socket_opts(lfd, &skopts))
> @@ -424,7 +444,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>
> switch (proto) {
> case IPPROTO_TCP:
> - err = dump_one_tcp(lfd, sk);
> + err = (sk->type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0;
> break;
> default:
> err = 0;
> @@ -606,12 +626,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
> return 0;
> }
>
> -int restore_ip_opts(int sk, IpOptsEntry *ioe)
> +int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
> {
> int ret = 0;
>
> if (ioe->has_freebind)
> ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> + if (ioe->has_nodefrag)
> + ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> + if (ioe->has_hdrincl)
> + ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
> + family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
> + &ioe->hdrincl);
>
> return ret;
> }
> @@ -635,7 +661,7 @@ static int open_inet_sk(struct file_desc *d, int *new_fd)
> return -1;
> }
>
> - if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
> + if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
> pr_err("Unsupported socket type: %d\n", ie->type);
> return -1;
> }
> @@ -713,7 +739,7 @@ done:
> if (rst_file_params(sk, ie->fown, ie->flags))
> goto err;
>
> - if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
> + if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
> goto err;
>
> if (restore_socket_opts(sk, ie->opts))
> @@ -780,7 +806,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> * sockets could not be bound to them in this moment
> * without setting IP_FREEBIND.
> */
> - if (ii->ie->family == AF_INET6) {
> + if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
> int yes = 1;
>
> if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
> diff --git a/criu/sockets.c b/criu/sockets.c
> index 420eee8252e2..06938f27ce05 100644
> --- a/criu/sockets.c
> +++ b/criu/sockets.c
> @@ -61,9 +61,11 @@ enum socket_cl_bits
> INET_TCP_CL_BIT,
> INET_UDP_CL_BIT,
> INET_UDPLITE_CL_BIT,
> + INET_RAW_CL_BIT,
> INET6_TCP_CL_BIT,
> INET6_UDP_CL_BIT,
> INET6_UDPLITE_CL_BIT,
> + INET6_RAW_CL_BIT,
> UNIX_CL_BIT,
> PACKET_CL_BIT,
> _MAX_CL_BIT,
> @@ -89,6 +91,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
> return INET_UDP_CL_BIT;
> if (proto == IPPROTO_UDPLITE)
> return INET_UDPLITE_CL_BIT;
> + if (proto == IPPROTO_RAW)
> + return INET_RAW_CL_BIT;
> }
> if (family == AF_INET6) {
> if (proto == IPPROTO_TCP)
> @@ -97,6 +101,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
> return INET6_UDP_CL_BIT;
> if (proto == IPPROTO_UDPLITE)
> return INET6_UDPLITE_CL_BIT;
> + if (proto == IPPROTO_RAW)
> + return INET6_RAW_CL_BIT;
> }
>
> pr_err("Unknown pair family %d proto %d\n", family, proto);
> @@ -598,6 +604,9 @@ static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg)
> case IPPROTO_TCP:
> type = SOCK_STREAM;
> break;
> + case IPPROTO_RAW:
> + type = SOCK_RAW;
> + break;
> case IPPROTO_UDP:
> case IPPROTO_UDPLITE:
> type = SOCK_DGRAM;
> @@ -620,6 +629,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
>
> if (tmp == 0)
> set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
> + else if (tmp == -ENOENT &&
> + ((req->r.n.sdiag_family == AF_INET ||
> + req->r.n.sdiag_family == AF_INET6) &&
> + req->r.n.sdiag_protocol == IPPROTO_RAW)) {
> + pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
> + req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
> + tmp = 0;
can you handle this error in collect_sockets() like we do for netlink
and packet sockers
> + }
>
> return tmp;
> }
> @@ -677,6 +694,15 @@ int collect_sockets(struct ns_id *ns)
> if (tmp)
> err = tmp;
>
> + /* Collect IPv4 RAW sockets */
> + req.r.i.sdiag_family = AF_INET;
> + req.r.i.sdiag_protocol = IPPROTO_RAW;
> + req.r.i.idiag_ext = 0;
> + req.r.i.idiag_states = -1; /* All */
> + tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
> + if (tmp)
> + err = tmp;
> +
> /* Collect IPv6 TCP sockets */
> req.r.i.sdiag_family = AF_INET6;
> req.r.i.sdiag_protocol = IPPROTO_TCP;
> @@ -708,6 +734,15 @@ int collect_sockets(struct ns_id *ns)
> if (tmp)
> err = tmp;
>
> + /* Collect IPv6 RAW sockets */
> + req.r.i.sdiag_family = AF_INET6;
> + req.r.i.sdiag_protocol = IPPROTO_RAW;
> + req.r.i.idiag_ext = 0;
> + req.r.i.idiag_states = -1; /* All */
> + tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, ns, &req.r.i);
> + if (tmp)
> + err = tmp;
> +
> req.r.p.sdiag_family = AF_PACKET;
> req.r.p.sdiag_protocol = 0;
> req.r.p.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
> diff --git a/images/sk-inet.proto b/images/sk-inet.proto
> index 09c5a47d2464..173c74a40df7 100644
> --- a/images/sk-inet.proto
> +++ b/images/sk-inet.proto
> @@ -6,6 +6,8 @@ import "sk-opts.proto";
>
> message ip_opts_entry {
> optional bool freebind = 1;
> + optional bool hdrincl = 2;
> + optional bool nodefrag = 3;
> }
>
> message inet_sk_entry {
> --
> 2.7.4
>
More information about the CRIU
mailing list