[CRIU] [PATCH 1/2] sk-inet: Add initial support for raw sockets
Pavel Emelyanov
xemul at virtuozzo.com
Wed Nov 16 09:03:50 PST 2016
On 11/16/2016 01:59 PM, Cyrill Gorcunov wrote:
> For raw sockets we need DIAG module extension,
What's the status of respective kernel patches?
> so in case
> if we're failing while collecting socket don't exit with
> error but warn a user and if we really meet raw socket
> we will exit later on socket's lookup stage.
>
> Strictly speaking we can use procfs parsing instead but
> this gonna be a way more complex that well-known diag
> approach and taking into account that raw sockets are
> note that widely used lets support only when diag module
> is present in the system.
>
> In the patch the initial raw sockets support added
> compelte enough to handle SO_IP_SET request from
> ipset tool (needed by modern containers). But the
> code might need extention/fixes in future.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> criu/include/sk-inet.h | 12 +++++++++++
> criu/sk-inet.c | 58 ++++++++++++++++++++++++++++++++++++--------------
> criu/sockets.c | 35 ++++++++++++++++++++++++++++++
> images/sk-inet.proto | 2 ++
> 4 files changed, 91 insertions(+), 16 deletions(-)
>
> diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h
> index a06a8ac161da..58c05887e04d 100644
> --- a/criu/include/sk-inet.h
> +++ b/criu/include/sk-inet.h
> @@ -16,6 +16,18 @@
> #define TCP_REPAIR_OPTIONS 22
> #endif
>
> +#ifndef IP_HDRINCL
> +# define IP_HDRINCL 3
> +#endif
> +
> +#ifndef IP_NODEFRAG
> +# define IP_NODEFRAG 22
> +#endif
> +
> +#ifndef IPV6_HDRINCL
> +# define IPV6_HDRINCL 36
> +#endif
> +
> struct inet_sk_desc {
> struct socket_desc sd;
> unsigned int type;
> diff --git a/criu/sk-inet.c b/criu/sk-inet.c
> index 924cf8c09d60..42134d4d5b3b 100644
> --- a/criu/sk-inet.c
> +++ b/criu/sk-inet.c
> @@ -94,7 +94,7 @@ static void show_one_inet_img(const char *act, const InetSkEntry *e)
> e->state, src_addr);
> }
>
> -static int can_dump_ipproto(int ino, int proto)
> +static int can_dump_ipproto(int ino, int proto, int type)
> {
> /* Make sure it's a proto we support */
> switch (proto) {
> @@ -104,8 +104,12 @@ static int can_dump_ipproto(int ino, int proto)
> case IPPROTO_UDPLITE:
> break;
> default:
> - pr_err("Unsupported proto %d for socket %x\n", proto, ino);
> - return 0;
> + /* Raw sockets may have any protocol inside */
> + if (type != SOCK_RAW) {
> + pr_err("Unsupported proto %d (type %d) for socket %x\n",
> + proto, type, ino);
> + return 0;
> + }
> }
>
> return 1;
> @@ -135,9 +139,9 @@ static int can_dump_inet_sk(const struct inet_sk_desc *sk)
> return 1;
> }
>
> - if (sk->type != SOCK_STREAM) {
> + if (sk->type != SOCK_STREAM && sk->type != SOCK_RAW) {
> pr_err("Can't dump %d inet socket %x. "
> - "Only can stream and dgram.\n",
> + "Only can stream, dgram and raw.\n",
> sk->type, sk->sd.ino);
> return 0;
> }
> @@ -240,12 +244,24 @@ err:
> return NULL;
> }
>
> -static int dump_ip_opts(int sk, IpOptsEntry *ioe)
> +
> +static int dump_ip_opts(int family, int type, int sk, IpOptsEntry *ioe)
> {
> int ret = 0;
>
> - ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> - ioe->has_freebind = ioe->freebind;
> + if (type == SOCK_RAW) {
> + if (family == AF_INET6) {
> + ret |= dump_opt(sk, SOL_IPV6, IPV6_HDRINCL, &ioe->hdrincl);
> + } else {
> + ret |= dump_opt(sk, SOL_IP, IP_HDRINCL, &ioe->hdrincl);
> + ret |= dump_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> + ioe->has_nodefrag = ioe->nodefrag;
> + }
> + ioe->has_hdrincl = ioe->hdrincl;
> + } else {
> + ret |= dump_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> + ioe->has_freebind = ioe->freebind;
> + }
>
> return ret;
> }
> @@ -275,14 +291,18 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
> InetSkEntry ie = INET_SK_ENTRY__INIT;
> IpOptsEntry ipopts = IP_OPTS_ENTRY__INIT;
> SkOptsEntry skopts = SK_OPTS_ENTRY__INIT;
> - int ret = -1, err = -1, proto;
> + int ret = -1, err = -1, proto, type;
>
> ret = do_dump_opt(lfd, SOL_SOCKET, SO_PROTOCOL,
> &proto, sizeof(proto));
> if (ret)
> goto err;
> + ret = do_dump_opt(lfd, SOL_SOCKET, SO_TYPE,
> + &type, sizeof(type));
> + if (ret)
> + goto err;
>
> - if (!can_dump_ipproto(p->stat.st_ino, proto))
> + if (!can_dump_ipproto(p->stat.st_ino, proto, type))
> goto err;
>
> sk = (struct inet_sk_desc *)lookup_socket(p->stat.st_ino, family, proto);
> @@ -359,7 +379,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
> memcpy(ie.src_addr, sk->src_addr, pb_repeated_size(&ie, src_addr));
> memcpy(ie.dst_addr, sk->dst_addr, pb_repeated_size(&ie, dst_addr));
>
> - if (dump_ip_opts(lfd, &ipopts))
> + if (dump_ip_opts(family, sk->type, lfd, &ipopts))
> goto err;
>
> if (dump_socket_opts(lfd, &skopts))
> @@ -376,7 +396,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa
>
> switch (proto) {
> case IPPROTO_TCP:
> - err = dump_one_tcp(lfd, sk);
> + err = (sk->type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0;
> break;
> default:
> err = 0;
> @@ -540,12 +560,18 @@ static int post_open_inet_sk(struct file_desc *d, int sk)
> return 0;
> }
>
> -int restore_ip_opts(int sk, IpOptsEntry *ioe)
> +int restore_ip_opts(int family, int sk, IpOptsEntry *ioe)
> {
> int ret = 0;
>
> if (ioe->has_freebind)
> ret |= restore_opt(sk, SOL_IP, IP_FREEBIND, &ioe->freebind);
> + if (ioe->has_nodefrag)
> + ret |= restore_opt(sk, SOL_IP, IP_NODEFRAG, &ioe->nodefrag);
> + if (ioe->has_hdrincl)
> + ret |= restore_opt(sk, family == AF_INET6 ? SOL_IPV6 : SOL_IP,
> + family == AF_INET6 ? IPV6_HDRINCL : IP_HDRINCL,
> + &ioe->hdrincl);
>
> return ret;
> }
> @@ -565,7 +591,7 @@ static int open_inet_sk(struct file_desc *d)
> return -1;
> }
>
> - if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM)) {
> + if ((ie->type != SOCK_STREAM) && (ie->type != SOCK_DGRAM) && (ie->type != SOCK_RAW)) {
> pr_err("Unsupported socket type: %d\n", ie->type);
> return -1;
> }
> @@ -641,7 +667,7 @@ done:
> if (rst_file_params(sk, ie->fown, ie->flags))
> goto err;
>
> - if (ie->ip_opts && restore_ip_opts(sk, ie->ip_opts))
> + if (ie->ip_opts && restore_ip_opts(ie->family, sk, ie->ip_opts))
> goto err;
>
> if (restore_socket_opts(sk, ie->opts))
> @@ -713,7 +739,7 @@ int inet_bind(int sk, struct inet_sk_info *ii)
> * sockets could not be bound to them in this moment
> * without setting IP_FREEBIND.
> */
> - if (ii->ie->family == AF_INET6) {
> + if (ii->ie->family == AF_INET6 && ii->ie->proto != IPPROTO_RAW) {
> int yes = 1;
>
> if (restore_opt(sk, SOL_IP, IP_FREEBIND, &yes))
> diff --git a/criu/sockets.c b/criu/sockets.c
> index b5c03fdf2984..57fa4c16a646 100644
> --- a/criu/sockets.c
> +++ b/criu/sockets.c
> @@ -57,9 +57,11 @@ enum socket_cl_bits
> INET_TCP_CL_BIT,
> INET_UDP_CL_BIT,
> INET_UDPLITE_CL_BIT,
> + INET_RAW_CL_BIT,
> INET6_TCP_CL_BIT,
> INET6_UDP_CL_BIT,
> INET6_UDPLITE_CL_BIT,
> + INET6_RAW_CL_BIT,
> UNIX_CL_BIT,
> PACKET_CL_BIT,
> _MAX_CL_BIT,
> @@ -85,6 +87,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
> return INET_UDP_CL_BIT;
> if (proto == IPPROTO_UDPLITE)
> return INET_UDPLITE_CL_BIT;
> + if (proto == IPPROTO_RAW)
> + return INET_RAW_CL_BIT;
> }
> if (family == AF_INET6) {
> if (proto == IPPROTO_TCP)
> @@ -93,6 +97,8 @@ enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsigned int proto)
> return INET6_UDP_CL_BIT;
> if (proto == IPPROTO_UDPLITE)
> return INET6_UDPLITE_CL_BIT;
> + if (proto == IPPROTO_RAW)
> + return INET6_RAW_CL_BIT;
> }
>
> pr_err("Unknown pair family %d proto %d\n", family, proto);
> @@ -593,6 +599,9 @@ static int inet_receive_one(struct nlmsghdr *h, void *arg)
> case IPPROTO_TCP:
> type = SOCK_STREAM;
> break;
> + case IPPROTO_RAW:
> + type = SOCK_RAW;
> + break;
> case IPPROTO_UDP:
> case IPPROTO_UDPLITE:
> type = SOCK_DGRAM;
> @@ -614,6 +623,14 @@ static int do_collect_req(int nl, struct sock_diag_req *req, int size,
>
> if (tmp == 0)
> set_collect_bit(req->r.n.sdiag_family, req->r.n.sdiag_protocol);
> + else if (tmp == -ENOENT &&
> + ((req->r.n.sdiag_family == AF_INET ||
> + req->r.n.sdiag_family == AF_INET6) &&
> + req->r.n.sdiag_protocol == IPPROTO_RAW)) {
> + pr_warn("No support for DIAG module on family %s with protocol IPPROTO_RAW, may fail later\n",
> + req->r.n.sdiag_family == AF_INET ? "IPv4" : "IPv6");
> + tmp = 0;
> + }
>
> return tmp;
> }
> @@ -668,6 +685,15 @@ int collect_sockets(struct ns_id *ns)
> if (tmp)
> err = tmp;
>
> + /* Collect IPv4 RAW sockets */
> + req.r.i.sdiag_family = AF_INET;
> + req.r.i.sdiag_protocol = IPPROTO_RAW;
> + req.r.i.idiag_ext = 0;
> + req.r.i.idiag_states = -1; /* All */
> + tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
> + if (tmp)
> + err = tmp;
> +
> /* Collect IPv6 TCP sockets */
> req.r.i.sdiag_family = AF_INET6;
> req.r.i.sdiag_protocol = IPPROTO_TCP;
> @@ -696,6 +722,15 @@ int collect_sockets(struct ns_id *ns)
> if (tmp)
> err = tmp;
>
> + /* Collect IPv6 RAW sockets */
> + req.r.i.sdiag_family = AF_INET6;
> + req.r.i.sdiag_protocol = IPPROTO_RAW;
> + req.r.i.idiag_ext = 0;
> + req.r.i.idiag_states = -1; /* All */
> + tmp = do_collect_req(nl, &req, sizeof(req), inet_receive_one, &req.r.i);
> + if (tmp)
> + err = tmp;
> +
> req.r.p.sdiag_family = AF_PACKET;
> req.r.p.sdiag_protocol = 0;
> req.r.p.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MCLIST |
> diff --git a/images/sk-inet.proto b/images/sk-inet.proto
> index 01dda875a247..6c5b8df585e7 100644
> --- a/images/sk-inet.proto
> +++ b/images/sk-inet.proto
> @@ -6,6 +6,8 @@ import "sk-opts.proto";
>
> message ip_opts_entry {
> optional bool freebind = 1;
> + optional bool hdrincl = 2;
> + optional bool nodefrag = 3;
> }
>
> message inet_sk_entry {
>
More information about the CRIU
mailing list