[Devel] [PATCH RH7 v3 1/2] fence-watchdog: log wdog actions directly to special file

Kirill Tkhai ktkhai at virtuozzo.com
Thu Jul 20 18:00:28 MSK 2017


On 20.07.2017 17:58, Pavel Tikhomirov wrote:
> If watchdog was triggered, that means that userspace is unreliable
> and we can't expect kernel messages are logged by userspace logger,
> thus we need ourselfs write info about watchdog triggered to special
> file configured through sysfs /sys/kernel/watchdog_log_path, so that
> users can see that watchdog was triggered but not some other event.
> 
> Note: we schedule do_halt_or_reboot to workqueue, which consists of
> pool of kernel threads. Manager thread adds more worker threads when
> all existing workers sleep, restricted by number of max_active. For
> our system_wq, max_active == 256. So we can not fear that waiting for
> fsync in our work will deadlock with writeback work.
> 
> Print timestamp in format "HH:MM:SS/YYYY-MM-DD" local time.
> 
> v2: use kernel_write helper
> v3: add timestamp to message, control string operation len
> 
> https://jira.sw.ru/browse/PSBM-54747
> Signed-off-by: Pavel Tikhomirov <ptikhomirov at virtuozzo.com>

Reviewed-by: Kirill Tkhai <ktkhai at virtuozzo.com>

> ---
>  kernel/fence-watchdog.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 101 insertions(+)
> 
> diff --git a/kernel/fence-watchdog.c b/kernel/fence-watchdog.c
> index f5c2347f..c35b3ad 100644
> --- a/kernel/fence-watchdog.c
> +++ b/kernel/fence-watchdog.c
> @@ -23,6 +23,8 @@
>  #include <linux/fence-watchdog.h>
>  #include <linux/device.h>
>  #include <linux/kmsg_dump.h>
> +#include <linux/fs.h>
> +#include <linux/string.h>
>  
>  #define MAX_U64			(~(u64)0)
>  #define MAX_JIFFIES_DELTA	(10 * 365UL * 24UL * 3600UL * HZ)
> @@ -41,11 +43,81 @@ const char *action_names[] = {"crash", "reboot", "halt", "netfilter", NULL};
>  DEFINE_VVAR(volatile unsigned long, fence_wdog_jiffies64) = MAX_U64;
>  static int fence_wdog_action = FENCE_WDOG_CRASH;
>  static atomic_t not_fenced = ATOMIC_INIT(-1);
> +static char fence_wdog_log_path[PATH_MAX] = "/fence_wdog.log";
> +
> +#define SECS_PER_MIN	60
> +#define PREFIX_LEN	39
> +
> +static int print_prefix(char *msg) {
> +	struct timeval tv;
> +	struct tm tm;
> +
> +	do_gettimeofday(&tv);
> +	time_to_tm(tv.tv_sec - sys_tz.tz_minuteswest * SECS_PER_MIN, 0, &tm);
> +
> +	return snprintf(msg, PREFIX_LEN, "[%02d:%02d:%02d/%04ld-%02d-%02d] fence-watchdog: ",
> +			tm.tm_hour, tm.tm_min, tm.tm_sec,
> +			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday);
> +}
> +
> +#define MSG_LEN (PREFIX_LEN + 10)
> +
> +void fence_wdog_log(void)
> +{
> +	char msg[MSG_LEN];
> +	struct file *file;
> +	int ret, len;
> +
> +	ret = print_prefix(msg);
> +	if (ret < 0)
> +		return;
> +
> +	len = strlen(msg);
> +
> +	ret = snprintf(msg + len, MSG_LEN - len, "%s\n", action_names[fence_wdog_action]);
> +	if (ret != strlen(action_names[fence_wdog_action]) + 1) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to sprintf msg\n");
> +		return;
> +	}
> +
> +	file = filp_open(fence_wdog_log_path,
> +			 O_CREAT | O_WRONLY | O_APPEND | O_NOFOLLOW | O_LARGEFILE,
> +			 0600);
> +	if (IS_ERR(file)) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to open log path\n");
> +		return;
> +	}
> +
> +	if (!S_ISREG(file_inode(file)->i_mode)) {
> +		printk(KERN_EMERG"fence-watchdog: Wrong type of log file\n");
> +		goto close;
> +	}
> +
> +	ret = kernel_write(file, msg, strlen(msg), file->f_pos);
> +	if (ret < 0) {
> +		printk(KERN_EMERG"fence-watchdog: Failed to write msg, ret=%d\n", ret);
> +		goto close;
> +	}
> +
> +	ret = vfs_fsync(file, 0);
> +	if (ret < 0)
> +		printk(KERN_EMERG"fence-watchdog: Failed to fsync log file ret=%d\n", ret);
> +
> +close:
> +	ret = filp_close(file, NULL);
> +	if (ret < 0)
> +		printk(KERN_EMERG"fence-watchdog: Failed to close log file ret=%d\n", ret);
> +
> +	return;
> +}
>  
>  static void do_halt_or_reboot(struct work_struct *dummy)
>  {
>  	printk(KERN_EMERG"fence-watchdog: %s\n",
>  	       action_names[fence_wdog_action]);
> +
> +	fence_wdog_log();
> +
>  	switch (fence_wdog_action) {
>  	case FENCE_WDOG_REBOOT:
>  		emergency_restart();
> @@ -166,6 +238,30 @@ static ssize_t fence_wdog_available_actions_show(struct kobject *kobj,
>  	return ret;
>  }
>  
> +static ssize_t fence_wdog_log_path_show(struct kobject *kobj,
> +		struct kobj_attribute *attr, char *buf)
> +{
> +	return snprintf(buf, PAGE_SIZE, "%s\n", fence_wdog_log_path);
> +}
> +
> +#define STORE_FORMAT_LEN 16
> +
> +static ssize_t fence_wdog_log_path_store(struct kobject *kobj,
> +		struct kobj_attribute *attr, const char *buf, size_t count)
> +{
> +	char format[STORE_FORMAT_LEN];
> +	int ret;
> +
> +	ret = snprintf(format, STORE_FORMAT_LEN, "%%%ds", PATH_MAX - 1);
> +	if (ret < 0)
> +		return ret;
> +
> +
> +	if (sscanf(buf, format, fence_wdog_log_path) != 1)
> +		return -EINVAL;
> +	return 0;
> +}
> +
>  static struct kobj_attribute fence_wdog_timer_attr =
>  	__ATTR(watchdog_timer, 0644,
>  		fence_wdog_timer_show, fence_wdog_timer_store);
> @@ -178,10 +274,15 @@ static struct kobj_attribute fence_wdog_available_actions_attr =
>  	__ATTR(watchdog_available_actions, 0644,
>  		fence_wdog_available_actions_show, NULL);
>  
> +static struct kobj_attribute fence_wdog_log_path_attr =
> +	__ATTR(watchdog_log_path, 0644,
> +		fence_wdog_log_path_show, fence_wdog_log_path_store);
> +
>  static struct attribute *fence_wdog_attrs[] = {
>  	&fence_wdog_timer_attr.attr,
>  	&fence_wdog_action_attr.attr,
>  	&fence_wdog_available_actions_attr.attr,
> +	&fence_wdog_log_path_attr.attr,
>  	NULL,
>  };
>  
> 


More information about the Devel mailing list