[Devel] [PATCH vz9 01/16] fence-watchdog: Add fence-watchdog driver
Nikita Yushchenko
nikita.yushchenko at virtuozzo.com
Wed Sep 29 10:00:02 MSK 2021
From: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
We need to forbid system to work without a special userspace
daemon for purposes of HA cluster. So add this watchdog module,
which will fence the node, if that daemon won't update timer
value in the file /sys/kernel/watchdog_timer.
The module is needed for pstorage, so we need to protect network
from the broken node, so we can put check to net_rx_action.
Signed-off-by: Dmitry Guryanov <dguryanov at parallels.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Rebase: ktkhai@
Putting fence_wdog_jiffies64 in same cacheline with jiffies will
be in a separate patch: "fence-watchdog: link fence_wdog_jiffies64 and
jiffies in one cacheline"
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
(cherry-picked from vz8 commit aef6d38b398b ("fence-watchdog:
Add fence-watchdog driver"))
Updated use of timekeeping API since 32-bit timespec is no longer
available.
Applied minor formatting fixes.
Added "CONFIG_FENCE_WATCHDOG=y" to
redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
Signed-off-by: Nikita Yushchenko <nikita.yushchenko at virtuozzo.com>
---
include/linux/fence-watchdog.h | 15 +
kernel/Kconfig.openvz | 4 +
kernel/Makefile | 1 +
kernel/fence-watchdog.c | 313 ++++++++++++++++++
net/core/dev.c | 13 +
.../generic/CONFIG_FENCE_WATCHDOG | 1 +
6 files changed, 347 insertions(+)
create mode 100644 include/linux/fence-watchdog.h
create mode 100644 kernel/fence-watchdog.c
create mode 100644 redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 000000000000..26b542a4080f
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,15 @@
+/*
+ * include/linux/fence-watchdog.h
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ * Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline int fence_wdog_check_timer(void);
+bool fence_wdog_tmo_match(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 6c3fbed8ae60..9489342596ab 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -60,4 +60,8 @@ config VZ_EVENT
networking code does. By now just the notifications of
the VE essensial status changes are being sent.
+config FENCE_WATCHDOG
+ bool "Fencing watchdog for HA cluster support"
+ depends on X86_64
+ default n
endmenu
diff --git a/kernel/Makefile b/kernel/Makefile
index bf938a777629..6f59a21caa5b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
obj-$(CONFIG_TORTURE_TEST) += torture.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
obj-$(CONFIG_HAS_IOMEM) += iomem.o
obj-$(CONFIG_RSEQ) += rseq.o
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 000000000000..e7fe7d2f3804
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,313 @@
+/*
+ * kernel/fence-watchdog.c
+ *
+ * Copyright (c) 2010-2015 Parallels IP Holdings GmbH
+ * Copyright (c) 2017-2021 Virtuozzo International GmbH. All rights reserved.
+ *
+ */
+
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+#include <linux/device.h>
+#include <linux/kmsg_dump.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+
+#define MAX_U64 (~(u64)0)
+#define MAX_JIFFIES_DELTA (10 * 365UL * 24UL * 3600UL * HZ)
+#define ACTION_NAME_LEN 16
+
+enum {
+ FENCE_WDOG_CRASH = 0,
+ FENCE_WDOG_REBOOT = 1,
+ FENCE_WDOG_POWEROFF = 2,
+ FENCE_WDOG_NETFILTER = 3,
+};
+
+const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
+
+unsigned long volatile fence_wdog_jiffies64 = MAX_U64;
+static int fence_wdog_action = FENCE_WDOG_CRASH;
+
+enum {
+ NOT_FENCED = 0,
+ FENCED = 1,
+ FENCED_TIMEOUT = 2,
+};
+
+static atomic_t fence_stage = ATOMIC_INIT(NOT_FENCED);
+static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
+
+#define SECS_PER_MIN 60
+#define PREFIX_LEN 39
+
+static int print_prefix(char *msg) {
+ struct timespec64 ts;
+ struct tm tm;
+
+ ktime_get_real_ts64(&ts);
+ time64_to_tm(ts.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
+
+ return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] fence-watchdog: ",
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
+}
+
+#define MSG_LEN (PREFIX_LEN + 10)
+
+void fence_wdog_log(void)
+{
+ char msg[MSG_LEN];
+ struct file *file;
+ int ret, len;
+
+ ret = print_prefix(msg);
+ if (ret < 0)
+ return;
+
+ len = strlen(msg);
+
+ ret = snprintf(msg + len, MSG_LEN - len, "%s\n", action_names[fence_wdog_action]);
+ if (ret != strlen(action_names[fence_wdog_action]) + 1) {
+ printk(KERN_EMERG "fence-watchdog: Failed to sprintf msg\n");
+ return;
+ }
+
+ file = filp_open(fence_wdog_log_path,
+ O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | O_LARGEFILE,
+ 0600);
+ if (IS_ERR(file)) {
+ printk(KERN_EMERG "fence-watchdog: Failed to open log path\n");
+ return;
+ }
+
+ if (!S_ISREG(file_inode(file)->i_mode)) {
+ printk(KERN_EMERG "fence-watchdog: Wrong type of log file\n");
+ goto close;
+ }
+
+ ret = kernel_write(file, msg, strlen(msg), &file->f_pos);
+ if (ret < 0) {
+ printk(KERN_EMERG "fence-watchdog: Failed to write msg, ret=%d\n", ret);
+ goto close;
+ }
+
+ ret = vfs_fsync(file, 0);
+ if (ret < 0)
+ printk(KERN_EMERG "fence-watchdog: Failed to fsync log file ret=%d\n", ret);
+
+close:
+ ret = filp_close(file, NULL);
+ if (ret < 0)
+ printk(KERN_EMERG "fence-watchdog: Failed to close log file ret=%d\n", ret);
+
+ return;
+}
+
+static void do_halt_or_reboot(struct work_struct *dummy)
+{
+ printk(KERN_EMERG "fence-watchdog: %s\n",
+ action_names[fence_wdog_action]);
+
+ fence_wdog_log();
+
+ switch (fence_wdog_action) {
+ case FENCE_WDOG_REBOOT:
+ emergency_restart();
+ break;
+ case FENCE_WDOG_POWEROFF:
+ kernel_halt();
+ break;
+ }
+}
+
+static DECLARE_WORK(halt_or_reboot_work, do_halt_or_reboot);
+
+void fence_wdog_do_fence(void)
+{
+ if (fence_wdog_action == FENCE_WDOG_CRASH ||
+ atomic_read(&fence_stage) == FENCED_TIMEOUT)
+ panic("fence-watchdog: %s\n",
+ action_names[fence_wdog_action]);
+ else
+ schedule_work(&halt_or_reboot_work);
+}
+
+#define FENCE_WDOG_TIMEOUT 30
+
+inline int fence_wdog_check_timer(void)
+{
+ if (unlikely(get_jiffies_64() > fence_wdog_jiffies64 &&
+ fence_wdog_action != FENCE_WDOG_NETFILTER)) {
+ if (atomic_cmpxchg(&fence_stage, NOT_FENCED, FENCED) == NOT_FENCED
+ || (get_jiffies_64() > fence_wdog_jiffies64
+ + FENCE_WDOG_TIMEOUT * HZ
+ && atomic_cmpxchg(&fence_stage, FENCED, FENCED_TIMEOUT) == FENCED))
+ fence_wdog_do_fence();
+
+ return 1;
+ }
+
+ return 0;
+}
+
+bool fence_wdog_tmo_match(void)
+{
+ return get_jiffies_64() > fence_wdog_jiffies64;
+}
+EXPORT_SYMBOL(fence_wdog_tmo_match);
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t ret;
+ u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+ struct timespec64 t;
+
+ if (jiffies_delta > MAX_JIFFIES_DELTA) {
+ ret = sprintf(buf, "inf\n");
+ } else {
+ jiffies_to_timespec64(jiffies_delta, &t);
+ ret = sprintf(buf, "%lld\n", t.tv_sec);
+ }
+
+ return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ unsigned long long val;
+ unsigned long jiffies_delta;
+ struct timespec64 t;
+
+ if (kstrtoull(buf, 10, &val))
+ return -EINVAL;
+
+ if (val == 0) {
+ fence_wdog_jiffies64 = MAX_U64;
+ return count;
+ }
+
+ t.tv_sec = val;
+ t.tv_nsec = 0;
+
+ jiffies_delta = timespec64_to_jiffies(&t);
+ if (jiffies_delta > MAX_JIFFIES_DELTA)
+ return -EINVAL;
+
+ fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+ return count;
+}
+
+static ssize_t fence_wdog_action_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", action_names[fence_wdog_action]);
+}
+
+static ssize_t fence_wdog_action_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ char str_action[ACTION_NAME_LEN];
+ int i = 0;
+
+ if (sscanf(buf, "%15s", str_action) != 1)
+ return -EINVAL;
+
+ for (i = 0; action_names[i]; i++) {
+ if ((!strncasecmp(str_action, action_names[i], ACTION_NAME_LEN))) {
+ fence_wdog_action = i;
+ return count;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ int i, ret = 0;
+
+ for (i = 0; action_names[i] != NULL; i++)
+ ret += sprintf(&buf[ret], "%s ", action_names[i]);
+
+ ret += sprintf(&buf[ret], "\n");
+ return ret;
+}
+
+static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
+}
+
+#define STORE_FORMAT_LEN 16
+
+static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ char format[STORE_FORMAT_LEN];
+ int ret;
+
+ ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
+ if (ret < 0)
+ return ret;
+
+
+ if (sscanf(buf, format, fence_wdog_log_path) != 1)
+ return -EINVAL;
+ return 0;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+ __ATTR(watchdog_timer, 0644,
+ fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct kobj_attribute fence_wdog_action_attr =
+ __ATTR(watchdog_action, 0644,
+ fence_wdog_action_show, fence_wdog_action_store);
+
+static struct kobj_attribute fence_wdog_available_actions_attr =
+ __ATTR(watchdog_available_actions, 0644,
+ fence_wdog_available_actions_show, NULL);
+
+static struct kobj_attribute fence_wdog_log_path_attr =
+ __ATTR(watchdog_log_path, 0644,
+ fence_wdog_log_path_show, fence_wdog_log_path_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+ &fence_wdog_timer_attr.attr,
+ &fence_wdog_action_attr.attr,
+ &fence_wdog_available_actions_attr.attr,
+ &fence_wdog_log_path_attr.attr,
+ NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+ .attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+ sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+ return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 3500c9544d27..21b0e5ff5eaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,6 +151,7 @@
#include <linux/prandom.h>
#include <linux/once_lite.h>
#include <linux/ve.h>
+#include <linux/fence-watchdog.h>
#include "net-sysfs.h"
@@ -3669,6 +3670,14 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
struct sk_buff *skb = first;
int rc = NETDEV_TX_OK;
+#ifdef CONFIG_FENCE_WATCHDOG
+ if (unlikely(fence_wdog_check_timer())) {
+ kfree_skb(skb);
+ *ret = rc;
+ return NULL;
+ }
+#endif
+
while (skb) {
struct sk_buff *next = skb->next;
@@ -7189,6 +7198,10 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
+#ifdef CONFIG_FENCE_WATCHDOG
+ fence_wdog_check_timer();
+#endif
+
for (;;) {
struct napi_struct *n;
diff --git a/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
new file mode 100644
index 000000000000..434aac2b336a
--- /dev/null
+++ b/redhat/configs/custom-overrides/generic/CONFIG_FENCE_WATCHDOG
@@ -0,0 +1 @@
+CONFIG_FENCE_WATCHDOG=y
--
2.30.2
More information about the Devel
mailing list