[Devel] [PATCH RHEL7 COMMIT] fence-watchdog: Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3

Konstantin Khorenko khorenko at virtuozzo.com
Wed Nov 18 03:59:10 PST 2015


The commit is pushed to "branch-rh7-3.10.0-229.7.2.vz7.9.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.9.10
------>
commit d6bdf858dd38883cf7190d0a1543095c21e78f35
Author: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
Date:   Wed Nov 18 15:59:10 2015 +0400

    fence-watchdog: Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3
    
    Patchset description:
    
    Port: fence watchdog feature
    
    /dev/watchdog "softdog" is really close but it has several problems:
    - no shutdown action
    - no network blocking action
    - can not get timeout and timeleft
    - set timeout through ioctl only
    - set action in module load parameters only
    - our userspace(shaman) is using our interface
    
    so it is easier to port our watchdog
    
    Pavel Tikhomirov (9):
      Port: diff-fence-watchdog-introduce-fencing-watchdog-for-HA-cluster-3
      Port: diff-fence-watchdog-add-support-of-crash
      Port: diff-fence-watchdog-write-log-message-when-timer-elapses
      Port: diff-fence-watchdog-rename-action-to-halt
      Port: diff-fence-watchdog-schedule-work-in-case-of-halt-action
      Port: diff-fence-watchdog-add-netfilter-action
      Port: diff-fence-watchdog-add-wdog_tmo-match
      Port: diff-fence-watchdog-arptables-add-ability-to-filter-by
        -watchdog-timeout-v5
      Port: diff-fence-watchdog-netfilter-add-aliases-for-module-wdog_tmo
    
    https://jira.sw.ru/browse/PSBM-40736
    
    ================================================
    This patch description:
    
    use DECLARE_VVAR to put fence_wdog_jiffies64 near jiffies in the
    same cacheline
    
    Author: Dmitry Guryanov
    Email: dguryanov at parallels.com
    Subject: add fencing watchdog for HA cluster
    Date: Thu, 10 Jan 2013 15:25:06 +0400
    
    * [fence-watchdog] kernel fensing watchgod is implemented.
    	The userspace daemon should write number of seconds before
    fencing
    	to the file /sys/kernel/watchdog_timer, and must renew it,
    before
    	the time elapses. (PSBM-13631)
    
    We need to forbid system to work without a special userspace
    daemon for purposes of HA cluster. So add this watchdog module,
    which will fence the node, if that daemon won't update timer
    value in the file /sys/kernel/watchdog_timer.
    The module is needed for pstorage, so we need to protect network
    from the broken node, so we can put check to net_rx_action.
    
    https://jira.sw.ru/browse/PSBM-13631
    
    Signed-off-by: Dmitry Guryanov <dguryanov at parallels.com>
    
    Acked-by: Pavel Emelyanov <xemul at parallels.com>
    
    Changes in v2:
            * remove locking and add dependency on x86_64
            * put fence_wdog_jiffies64 near jiffies, so they'll be in
              the same cache line
            * use timespec_to_jiffies and jiffies_to_timespec
    
    Changes in v3:
            * Add missing define for MAX_U64
    
    Changes in v4:
    	* remove unneded extern definition
    	* move __section_fence_wdog macro to fence_watchdog.c
    	* remove __jiffy_data attribute, which is ignored.
    
    Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>
    Acked-by: Andrew Vagin <avagin at virtuozzo.com>
---
 arch/x86/include/asm/vvar.h    |  1 +
 include/linux/fence-watchdog.h |  6 +++
 kernel/Kconfig.openvz          |  5 +++
 kernel/Makefile                |  1 +
 kernel/fence-watchdog.c        | 99 ++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c                 |  4 ++
 6 files changed, 116 insertions(+)

diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index de656ac..c8ea4ce 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -45,6 +45,7 @@
 
 DECLARE_VVAR(0, volatile unsigned long, jiffies)
 DECLARE_VVAR(16, int, vgetcpu_mode)
+DECLARE_VVAR(64, volatile unsigned long, fence_wdog_jiffies64)
 DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
 
 #undef DECLARE_VVAR
diff --git a/include/linux/fence-watchdog.h b/include/linux/fence-watchdog.h
new file mode 100644
index 0000000..9cb41e9
--- /dev/null
+++ b/include/linux/fence-watchdog.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_FENCE_WATCHDOG_H_
+#define _LINUX_FENCE_WATCHDOG_H_
+
+inline void fence_wdog_check_timer(void);
+
+#endif
diff --git a/kernel/Kconfig.openvz b/kernel/Kconfig.openvz
index 4cfecd6..81d3b01 100644
--- a/kernel/Kconfig.openvz
+++ b/kernel/Kconfig.openvz
@@ -94,6 +94,11 @@ config VZ_EVENT
  	  networking code does. By now just the notifications of
  	  the VE essensial status changes are being sent.
 
+
+config FENCE_WATCHDOG
+	bool "Fencing watchdog for HA cluster support"
+	depends on X86_64
+	default n
 endmenu
 
 
diff --git a/kernel/Makefile b/kernel/Makefile
index fcedc78..73a73ed 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,7 @@ obj-$(CONFIG_PADATA) += padata.o
 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
+obj-$(CONFIG_FENCE_WATCHDOG) += fence-watchdog.o
 
 $(obj)/configs.o: $(obj)/config_data.h
 
diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
new file mode 100644
index 0000000..e25140f
--- /dev/null
+++ b/kernel/fence-watchdog.c
@@ -0,0 +1,99 @@
+/*
+ * Provide userspace with an interface to forbid kernel to work
+ * without an userspace daemon.
+ *
+ * The daemon should write number of seconds before fencing to the
+ * file /sys/kernel/watchdog_timer, and must renew it, until the
+ * time elapses.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/jiffies.h>
+#include <linux/reboot.h>
+#include <linux/fence-watchdog.h>
+
+#define MAX_U64			(~(u64)0)
+#define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
+
+DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
+
+void fence_wdog_do_fence(void)
+{
+	lockdep_off();
+	local_irq_enable();
+	emergency_restart();
+}
+
+inline void fence_wdog_check_timer(void)
+{
+	if (get_jiffies_64() > fence_wdog_jiffies64)
+		fence_wdog_do_fence();
+}
+
+static ssize_t fence_wdog_timer_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	ssize_t ret;
+	u64 jiffies_delta = fence_wdog_jiffies64 - get_jiffies_64();
+	struct timespec t;
+
+	if (jiffies_delta > MAX_JIFFIES_DELTA) {
+		ret =  sprintf(buf, "inf\n");
+	} else {
+		jiffies_to_timespec(jiffies_delta, &t);
+		ret =  sprintf(buf, "%ld\n", t.tv_sec);
+	}
+
+	return ret;
+}
+
+static ssize_t fence_wdog_timer_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long long val;
+	unsigned long jiffies_delta;
+	struct timespec t;
+
+	if (strict_strtoull(buf, 10, &val))
+		return -EINVAL;
+
+	if (val == 0) {
+		fence_wdog_jiffies64 = MAX_U64;
+		return count;
+	}
+
+	t.tv_sec = val;
+	t.tv_nsec = 0;
+
+	jiffies_delta = timespec_to_jiffies(&t);
+	if (jiffies_delta > MAX_JIFFIES_DELTA)
+		return -EINVAL;
+
+	fence_wdog_jiffies64 = get_jiffies_64() + jiffies_delta;
+
+	return count;
+}
+
+static struct kobj_attribute fence_wdog_timer_attr =
+	__ATTR(watchdog_timer, 0644,
+		fence_wdog_timer_show, fence_wdog_timer_store);
+
+static struct attribute *fence_wdog_attrs[] = {
+	&fence_wdog_timer_attr.attr,
+	NULL,
+};
+
+static struct attribute_group fence_wdog_attr_group = {
+	.attrs = fence_wdog_attrs,
+};
+
+static int __init fence_wdog_init(void)
+{
+	sysfs_update_group(kernel_kobj, &fence_wdog_attr_group);
+	return 0;
+}
+
+module_init(fence_wdog_init)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6cceccf..5002d76 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,7 @@
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
 #include <linux/hashtable.h>
+#include <linux/fence-watchdog.h>
 
 #include "net-sysfs.h"
 
@@ -4351,6 +4352,9 @@ static void net_rx_action(struct softirq_action *h)
 out:
 	net_rps_action_and_irq_enable(sd);
 
+#ifdef CONFIG_FENCE_WATCHDOG
+	fence_wdog_check_timer();
+#endif
 	return;
 
 softnet_break:


More information about the Devel mailing list