[Devel] [PATCH RHEL COMMIT] fence-watchdog: Add fence-watchdog driver

Konstantin Khorenko khorenko at virtuozzo.com
Thu Sep 30 17:43:57 MSK 2021


The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 5216fd4e1597fe5990502fe8d717210e3aebf363
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Thu Sep 30 17:43:57 2021 +0300

    fence-watchdog: Add fence-watchdog driver
    
    We need to forbid system to work without a special userspace
    daemon for purposes of HA cluster. So add this watchdog module,
    which will fence the node, if that daemon won't update timer
    value in the file /sys/kernel/watchdog_timer.
    The module is needed for pstorage, so we need to protect network
    from the broken node, so we can put check to net_rx_action.
    
    Signed-off-by: Dmitry Guryanov <dguryanov at parallels.com>
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    Rebase: ktkhai@
    
    Putting fence_wdog_jiffies64 in same cacheline with jiffies will
    be in a separate patch: "fence-watchdog: link fence_wdog_jiffies64 and
    jiffies in one cacheline"
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    
    (cherry-picked from vz8 commit aef6d38b398b ("fence-watchdog:
    Add fence-watchdog driver"))
    
    Updated use of timekeeping API since 32-bit timespec is no longer
    available.
    
    Applied minor formatting fixes.
    
    Added "CONFIG_FENCE_WATCHDOG=y" to
    redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
    
    Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
 include/linux/fence-watchdog.h                     |  15 +
 kernel/Kconfig.openvz                              |   4 +
 kernel/Makefile                                    |   1 +
 kernel/fence-watchdog.c                            | 313 +++++++++++++++++++++
 net/core/dev.c                                     |  13 +
 .../custom-overrides/generic/CONFIG_FENCE_WATCHDOG |   1 +
 6 files changed, 347 insertions(+)

diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 000000000000..26b542a4080f
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,15 @@
+/*
+ *  include/linux/fence-watchdog.h
+ *
+ *  Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *  Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline int fence_wdog_check_timer(void);
+bool fence_wdog_tmo_match(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 6c3fbed8ae60..9489342596ab 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -60,4 +60,8 @@ config VZ_EVENT
 	  networking code does. By now just the notifications of
 	  the VE essensial status changes are being sent.
 
+config FENCE_WATCHDOG
+	bool "Fencing watchdog for HA cluster support"
+	depends on X86_64
+	default n
 endmenu
diff --git a/kernel/Makefile b/kernel/Makefile
index bf938a777629..6f59a21caa5b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
 
 obj-$(CONFIG_HAS_IOMEM) += iomem.o
 obj-$(CONFIG_RSEQ) += rseq.o
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 000000000000..e7fe7d2f3804
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,313 @@
+/*
+ *  kernel/fence-watchdog.c
+ *
+ *  Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ *  Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+#include <linux/device.h>
+#include <linux/kmsg_dump.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+
+#define MAX_U64			(~(u64)0)
+#define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
+#define ACTION_NAME_LEN		16
+
+enum {
+	FENCE_WDOG_CRASH = 0,
+	FENCE_WDOG_REBOOT = 1,
+	FENCE_WDOG_POWEROFF = 2,
+	FENCE_WDOG_NETFILTER = 3,
+};
+
+const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
+
+unsigned long volatile fence_wdog_jiffies64 = MAX_U64;
+static int fence_wdog_action = FENCE_WDOG_CRASH;
+
+enum {
+	NOT_FENCED = 0,
+	FENCED = 1,
+	FENCED_TIMEOUT = 2,
+};
+
+static atomic_t fence_stage = ATOMIC_INIT(NOT_FENCED);
+static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
+
+#define SECS_PER_MIN	60
+#define PREFIX_LEN	39
+
+static int print_prefix(char *msg) {
+	struct timespec64 ts;
+	struct tm tm;
+
+	ktime_get_real_ts64(&ts);
+	time64_to_tm(ts.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
+
+	return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] fence-watchdog: ",
+			tm.tm_hour, tm.tm_min, tm.tm_sec,
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+}
+
+#define MSG_LEN (PREFIX_LEN + 10)
+
+void fence_wdog_log(void)
+{
+	char msg[MSG_LEN];
+	struct file *file;
+	int ret, len;
+
+	ret = print_prefix(msg);
+	if (ret < 0)
+		return;
+
+	len = strlen(msg);
+
+	ret = snprintf(msg + len, MSG_LEN - len, "%s\n", action_names[fence_wdog_action]);
+	if (ret != strlen(action_names[fence_wdog_action]) + 1) {
+		printk(KERN_EMERG "fence-watchdog: Failed to sprintf msg\n");
+		return;
+	}
+
+	file = filp_open(fence_wdog_log_path,
+			 O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | O_LARGEFILE,
+			 0600);
+	if (IS_ERR(file)) {
+		printk(KERN_EMERG "fence-watchdog: Failed to open log path\n");
+		return;
+	}
+
+	if (!S_ISREG(file_inode(file)->i_mode)) {
+		printk(KERN_EMERG "fence-watchdog: Wrong type of log file\n");
+		goto close;
+	}
+
+	ret = kernel_write(file, msg, strlen(msg), &file->f_pos);
+	if (ret < 0) {
+		printk(KERN_EMERG "fence-watchdog: Failed to write msg, ret=%d\n", ret);
+		goto close;
+	}
+
+	ret = vfs_fsync(file, 0);
+	if (ret < 0)
+		printk(KERN_EMERG "fence-watchdog: Failed to fsync log file ret=%d\n", ret);
+
+close:
+	ret = filp_close(file, NULL);
+	if (ret < 0)
+		printk(KERN_EMERG "fence-watchdog: Failed to close log file ret=%d\n", ret);
+
+	return;
+}
+
+static void do_halt_or_reboot(struct work_struct *dummy)
+{
+	printk(KERN_EMERG "fence-watchdog: %s\n",
+	       action_names[fence_wdog_action]);
+
+	fence_wdog_log();
+
+	switch (fence_wdog_action) {
+	case FENCE_WDOG_REBOOT:
+		emergency_restart();
+		break;
+	case FENCE_WDOG_POWEROFF:
+		kernel_halt();
+		break;
+	}
+}
+
+static DECLARE_WORK(halt_or_reboot_work, do_halt_or_reboot);
+
+void fence_wdog_do_fence(void)
+{
+	if (fence_wdog_action == FENCE_WDOG_CRASH ||
+			atomic_read(&fence_stage) == FENCED_TIMEOUT)
+		panic("fence-watchdog: %s\n",
+		      action_names[fence_wdog_action]);
+	else
+		schedule_work(&halt_or_reboot_work);
+}
+
+#define FENCE_WDOG_TIMEOUT 30
+
+inline int fence_wdog_check_timer(void)
+{
+	if (unlikely(get_jiffies_64() > fence_wdog_jiffies64 &&
+			fence_wdog_action != FENCE_WDOG_NETFILTER)) {
+		if (atomic_cmpxchg(&fence_stage, NOT_FENCED, FENCED) == NOT_FENCED
+		    || (get_jiffies_64() > fence_wdog_jiffies64
+		    + FENCE_WDOG_TIMEOUT * HZ
+		    && atomic_cmpxchg(&fence_stage, FENCED, FENCED_TIMEOUT) == FENCED))
+			fence_wdog_do_fence();
+
+		return 1;
+	}
+
+	return 0;
+}
+
+bool fence_wdog_tmo_match(void)
+{
+	return get_jiffies_64() > fence_wdog_jiffies64;
+}
+EXPORT_SYMBOL(fence_wdog_tmo_match);
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+	struct timespec64 t;
+
+	if (jiffies_delta > MAX_JIFFIES_DELTA) {
+		ret =  sprintf(buf, "inf\n");
+	} else {
+		jiffies_to_timespec64(jiffies_delta, &t);
+		ret =  sprintf(buf, "%lld\n", t.tv_sec);
+	}
+
+	return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long long val;
+	unsigned long jiffies_delta;
+	struct timespec64 t;
+
+	if (kstrtoull(buf, 10, &val))
+		return -EINVAL;
+
+	if (val == 0) {
+		fence_wdog_jiffies64 = MAX_U64;
+		return count;
+	}
+
+	t.tv_sec = val;
+	t.tv_nsec = 0;
+
+	jiffies_delta = timespec64_to_jiffies(&t);
+	if (jiffies_delta > MAX_JIFFIES_DELTA)
+		return -EINVAL;
+
+	fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+	return count;
+}
+
+static ssize_t fence_wdog_action_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", action_names[fence_wdog_action]);
+}
+
+static ssize_t fence_wdog_action_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	char str_action[ACTION_NAME_LEN];
+	int i = 0;
+
+	if (sscanf(buf, "%15s", str_action) != 1)
+		return -EINVAL;
+
+	for (i = 0; action_names[i]; i++) {
+		if ((!strncasecmp(str_action, action_names[i], ACTION_NAME_LEN))) {
+			fence_wdog_action = i;
+			return count;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	int i, ret = 0;
+
+	for (i = 0; action_names[i] != NULL; i++)
+		ret += sprintf(&buf[ret], "%s ", action_names[i]);
+
+	ret += sprintf(&buf[ret], "\n");
+	return ret;
+}
+
+static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
+}
+
+#define STORE_FORMAT_LEN 16
+
+static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	char format[STORE_FORMAT_LEN];
+	int ret;
+
+	ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
+	if (ret < 0)
+		return ret;
+
+
+	if (sscanf(buf, format, fence_wdog_log_path) != 1)
+		return -EINVAL;
+	return 0;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+	__ATTR(watchdog_timer, 0644,
+		fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct kobj_attribute fence_wdog_action_attr =
+	__ATTR(watchdog_action, 0644,
+		fence_wdog_action_show, fence_wdog_action_store);
+
+static struct kobj_attribute fence_wdog_available_actions_attr =
+	__ATTR(watchdog_available_actions, 0644,
+		fence_wdog_available_actions_show, NULL);
+
+static struct kobj_attribute fence_wdog_log_path_attr =
+	__ATTR(watchdog_log_path, 0644,
+		fence_wdog_log_path_show, fence_wdog_log_path_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+	&fence_wdog_timer_attr.attr,
+	&fence_wdog_action_attr.attr,
+	&fence_wdog_available_actions_attr.attr,
+	&fence_wdog_log_path_attr.attr,
+	NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+	.attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+	sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+	return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3500c9544d27..21b0e5ff5eaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
 #include <linux/ve.h>
+#include <linux/fence-watchdog.h>
 
 #include "net-sysfs.h"
 
@@ -3669,6 +3670,14 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
 	struct sk_buff *skb = first;
 	int rc = NETDEV_TX_OK;
 
+#ifdef CONFIG_FENCE_WATCHDOG
+	if (unlikely(fence_wdog_check_timer())) {
+		kfree_skb(skb);
+		*ret = rc;
+		return NULL;
+	}
+#endif
+
 	while (skb) {
 		struct sk_buff *next = skb->next;
 
@@ -7189,6 +7198,10 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
 	list_splice_init(&sd->poll_list, &list);
 	local_irq_enable();
 
+#ifdef CONFIG_FENCE_WATCHDOG
+	fence_wdog_check_timer();
+#endif
+
 	for (;;) {
 		struct napi_struct *n;
 
diff --git a/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
new file mode 100644
index 000000000000..434aac2b336a
--- /dev/null
+++ b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
@@ -0,0 +1 @@
+CONFIG_FENCE_WATCHDOG=y


More information about the Devel mailing list