[Devel] [PATCH RHEL7 COMMIT] fence-watchdog: Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Nov 18 03:59:10 PST 2015
The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.10
------>
commit d6bdf858dd38883cf7190d0a1543095c21e78f35
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date: Wed Nov 18 15:59:10 2015 +0400
fence-watchdog: Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3
Patchset description:
Port: fence watchdog feature
/dev/watchdog "softdog" is really close but it has several problems:
- no shutdown action
- no network blocking action
- can not get timeout and timeleft
- set timeout through ioctl only
- set action in module load parameters only
- our userspace(shaman) is using our interface
so it is easier to port our watchdog
Pavel Tikhomirov (9):
Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3
Port: diff-fence-watchdog-add-support-of-crash
Port: diff-fence-watchdog-write-log-message-when-timer-elapses
Port: diff-fence-watchdog-rename-action-to-halt
Port: diff-fence-watchdog-schedule-work-in-case-of-halt-action
Port: diff-fence-watchdog-add-netfilter-action
Port: diff-fence-watchdog-add-wdog_tmo-match
Port: diff-fence-watchdog-arptables-add-ability-to-filter-by
-watchdog-timeout-v5
Port: diff-fence-watchdog-netfilter-add-aliases-for-module-wdog_tmo
https://jira.sw.ru/browse/PSBM-40736
================================================
This patch description:
use DECLARE_VVAR to put fence_wdog_jiffies64 near jiffies in the
same cacheline
Author: Dmitry Guryanov
Email: dguryanov at parallels.com
Subject: add fencing watchdog for HA cluster
Date: Thu, 10 Jan 2013 15:25:06 +0400
* [fence-watchdog] kernel fensing watchgod is implemented.
The userspace daemon should write number of seconds before
fencing
to the file /sys/kernel/watchdog_timer, and must renew it,
before
the time elapses. (PSBM-13631)
We need to forbid system to work without a special userspace
daemon for purposes of HA cluster. So add this watchdog module,
which will fence the node, if that daemon won't update timer
value in the file /sys/kernel/watchdog_timer.
The module is needed for pstorage, so we need to protect network
from the broken node, so we can put check to net_rx_action.
https://jira.sw.ru/browse/PSBM-13631
Signed-off-by: Dmitry Guryanov <dguryanov at parallels.com>
Acked-by: Pavel Emelyanov <xemul at parallels.com>
Changes in v2:
* remove locking and add dependency on x86_64
* put fence_wdog_jiffies64 near jiffies, so they'll be in
the same cache line
* use timespec_to_jiffies and jiffies_to_timespec
Changes in v3:
* Add missing define for MAX_U64
Changes in v4:
* remove unneded extern definition
* move __section_fence_wdog macro to fence_watchdog.c
* remove __jiffy_data attribute, which is ignored.
Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Acked-by: Andrew Vagin <avagin at virtuozzo.com>
---
arch/x86/include/asm/vvar.h | 1 +
include/linux/fence-watchdog.h | 6 +++
kernel/Kconfig.openvz | 5 +++
kernel/Makefile | 1 +
kernel/fence-watchdog.c | 99 ++++++++++++++++++++++++++++++++++++++++++
net/core/dev.c | 4 ++
6 files changed, 116 insertions(+)
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index de656ac..c8ea4ce 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -45,6 +45,7 @@
DECLARE_VVAR(0, volatile unsigned long, jiffies)
DECLARE_VVAR(16, int, vgetcpu_mode)
+DECLARE_VVAR(64, volatile unsigned long, fence_wdog_jiffies64)
DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
#undef DECLARE_VVAR
diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 0000000..9cb41e9
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline void fence_wdog_check_timer(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 4cfecd6..81d3b01 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -94,6 +94,11 @@ config VZ_EVENT
networking code does. By now just the notifications of
the VE essensial status changes are being sent.
+
+config FENCE_WATCHDOG
+ bool "Fencing watchdog for HA cluster support"
+ depends on X86_64
+ default n
endmenu
diff --git a/kernel/Makefile b/kernel/Makefile
index fcedc78..73a73ed 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,7 @@ obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
$(obj)/configs.o: $(obj)/config_data.h
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 0000000..e25140f
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,99 @@
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+
+#define MAX_U64 (~(u64)0)
+#define MAX_JIFFIES_DELTA (10 * 365UL * 24UL * 3600UL * HZ)
+
+DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
+
+void fence_wdog_do_fence(void)
+{
+ lockdep_off();
+ local_irq_enable();
+ emergency_restart();
+}
+
+inline void fence_wdog_check_timer(void)
+{
+ if (get_jiffies_64() > fence_wdog_jiffies64)
+ fence_wdog_do_fence();
+}
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t ret;
+ u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+ struct timespec t;
+
+ if (jiffies_delta > MAX_JIFFIES_DELTA) {
+ ret = sprintf(buf, "inf\n");
+ } else {
+ jiffies_to_timespec(jiffies_delta, &t);
+ ret = sprintf(buf, "%ld\n", t.tv_sec);
+ }
+
+ return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ unsigned long long val;
+ unsigned long jiffies_delta;
+ struct timespec t;
+
+ if (strict_strtoull(buf, 10, &val))
+ return -EINVAL;
+
+ if (val == 0) {
+ fence_wdog_jiffies64 = MAX_U64;
+ return count;
+ }
+
+ t.tv_sec = val;
+ t.tv_nsec = 0;
+
+ jiffies_delta = timespec_to_jiffies(&t);
+ if (jiffies_delta > MAX_JIFFIES_DELTA)
+ return -EINVAL;
+
+ fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+ return count;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+ __ATTR(watchdog_timer, 0644,
+ fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+ &fence_wdog_timer_attr.attr,
+ NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+ .attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+ sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+ return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6cceccf..5002d76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
#include <linux/hashtable.h>
+#include <linux/fence-watchdog.h>
#include "net-sysfs.h"
@@ -4351,6 +4352,9 @@ static void net_rx_action(struct softirq_action *h)
out:
net_rps_action_and_irq_enable(sd);
+#ifdef CONFIG_FENCE_WATCHDOG
+ fence_wdog_check_timer();
+#endif
return;
softnet_break:
More information about the Devel
mailing list