[Devel] [PATCH RH7 1/9] Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3

Pavel Tikhomirov ptikhomirov at virtuozzo.com
Tue Oct 13 07:52:51 PDT 2015


use DECLARE_VVAR to put fence_wdog_jiffies64 near jiffies in the
same cacheline

Author: Dmitry Guryanov
Email: dguryanov at parallels.com
Subject: add fencing watchdog for HA cluster
Date: Thu, 10 Jan 2013 15:25:06 +0400

* [fence-watchdog] kernel fensing watchgod is implemented.
	The userspace daemon should write number of seconds before
fencing
	to the file /sys/kernel/watchdog_timer, and must renew it,
before
	the time elapses. (PSBM-13631)

We need to forbid system to work without a special userspace
daemon for purposes of HA cluster. So add this watchdog module,
which will fence the node, if that daemon won't update timer
value in the file /sys/kernel/watchdog_timer.
The module is needed for pstorage, so we need to protect network
from the broken node, so we can put check to net_rx_action.

https://jira.sw.ru/browse/PSBM-13631

Signed-off-by: Dmitry Guryanov <dguryanov at parallels.com>
Acked-by: Pavel Emelyanov <xemul at parallels.com>

Changes in v2:
        * remove locking and add dependency on x86_64
        * put fence_wdog_jiffies64 near jiffies, so they'll be in
          the same cache line
        * use timespec_to_jiffies and jiffies_to_timespec

Changes in v3:
        * Add missing define for MAX_U64

Changes in v4:
	* remove unneded extern definition
	* move __section_fence_wdog macro to fence_watchdog.c
	* remove __jiffy_data attribute, which is ignored.

Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
---
 arch/x86/include/asm/vvar.h    |  1 +
 include/linux/fence-watchdog.h |  6 +++
 kernel/Kconfig.openvz          |  5 +++
 kernel/Makefile                |  1 +
 kernel/fence-watchdog.c        | 99 ++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c                 |  4 ++
 6 files changed, 116 insertions(+)
 create mode 100644 include/linux/fence-watchdog.h
 create mode 100644 kernel/fence-watchdog.c

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index de656ac..1ae9a5c 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -45,6 +45,7 @@
 
 DECLARE_VVAR(0, volatile unsigned long, jiffies)
 DECLARE_VVAR(16, int, vgetcpu_mode)
+DECLARE_VVAR(64, volatile unsigned long, fence_wdog_jiffies64)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 0000000..9cb41e9
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline void fence_wdog_check_timer(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 2465d82..d9b9164 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -94,6 +94,11 @@ config VZ_EVENT
  	  networking code does. By now just the notifications of
  	  the VE essensial status changes are being sent.
 
+
+config FENCE_WATCHDOG
+	bool "Fencing watchdog for HA cluster support"
+	depends on X86_64
+	default n
 endmenu
 
 
diff --git a/kernel/Makefile b/kernel/Makefile
index fcedc78..73a73ed 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,7 @@ obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
 
 $(obj)/configs.o: $(obj)/config_data.h
 
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 0000000..e25140f
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,99 @@
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+
+#define MAX_U64			(~(u64)0)
+#define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
+
+DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
+
+void fence_wdog_do_fence(void)
+{
+	lockdep_off();
+	local_irq_enable();
+	emergency_restart();
+}
+
+inline void fence_wdog_check_timer(void)
+{
+	if (get_jiffies_64() > fence_wdog_jiffies64)
+		fence_wdog_do_fence();
+}
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+	struct timespec t;
+
+	if (jiffies_delta > MAX_JIFFIES_DELTA) {
+		ret =  sprintf(buf, "inf\n");
+	} else {
+		jiffies_to_timespec(jiffies_delta, &t);
+		ret =  sprintf(buf, "%ld\n", t.tv_sec);
+	}
+
+	return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long long val;
+	unsigned long jiffies_delta;
+	struct timespec t;
+
+	if (strict_strtoull(buf, 10, &val))
+		return -EINVAL;
+
+	if (val == 0) {
+		fence_wdog_jiffies64 = MAX_U64;
+		return count;
+	}
+
+	t.tv_sec = val;
+	t.tv_nsec = 0;
+
+	jiffies_delta = timespec_to_jiffies(&t);
+	if (jiffies_delta > MAX_JIFFIES_DELTA)
+		return -EINVAL;
+
+	fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+	return count;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+	__ATTR(watchdog_timer, 0644,
+		fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+	&fence_wdog_timer_attr.attr,
+	NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+	.attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+	sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+	return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6cceccf..5002d76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
 #include <linux/hashtable.h>
+#include <linux/fence-watchdog.h>
 
 #include "net-sysfs.h"
 
@@ -4351,6 +4352,9 @@ static void net_rx_action(struct softirq_action *h)
 out:
 	net_rps_action_and_irq_enable(sd);
 
+#ifdef CONFIG_FENCE_WATCHDOG
+	fence_wdog_check_timer();
+#endif
 	return;
 
 softnet_break:
-- 
1.9.3




More information about the Devel mailing list