[Devel] [PATCH RHEL COMMIT] ve/printk: Virtualize log_wait queue

Fri Oct 1 18:23:59 MSK 2021

The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit d04759114253d359ae2eda81a0664b912e95ec7c
Author: Konstantin Khorenko <khorenko at virtuozzo.com>
Date:   Thu Sep 30 21:40:37 2021 +0300

    ve/printk: Virtualize log_wait queue
    
    https://jira.sw.ru/browse/PSBM-17899
    
    Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
    Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
    
    +++
    ve/printk: Fix printk virtualization
    
    ve_printk() corrupts host's dmesg:
            # dmesg|wc -l
            599
            # vzctl create 101
            # vzctl set 101 --netif_add eth0 --save
            # vzctl start 101
            # vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp'
            # dmesg|wc -l
            2
    
    Add missing parts of prinkt virtualization to fix this.
    
    https://jira.sw.ru/browse/PSBM-17899
    https://jira.sw.ru/browse/PSBM-105442
    
    Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
    
    Rebasing to vz9: part of vz8 commit:
     d63aeb311a64 ("ve/printk: printk virtualization")
    
    https://jira.sw.ru/browse/PSBM-133985
    
    Note: we don't wake up wait_queue if vprintk_emit_log() has been called
    in sched context. Rare case, we've lived that way forever.
    
    Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
 fs/proc/kmsg.c         |  4 ++--
 kernel/printk/printk.c | 60 +++++++++++++++++++++++++++++++++-----------------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index b38ad552887f..386ec85dea0f 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -18,7 +18,7 @@
 #include <linux/uaccess.h>
 #include <asm/io.h>
 
-extern wait_queue_head_t log_wait;
+extern void log_poll_wait(struct file *file, poll_table *wait);
 
 static int kmsg_open(struct inode * inode, struct file * file)
 {
@@ -42,7 +42,7 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
 
 static __poll_t kmsg_poll(struct file *file, poll_table *wait)
 {
-	poll_wait(file, &log_wait, wait);
+	log_poll_wait(file, wait);
 	if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
 		return EPOLLIN | EPOLLRDNORM;
 	return 0;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 04244a3edeab..7f1bec17866d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -360,7 +360,6 @@ enum log_flags {
 static DEFINE_RAW_SPINLOCK(syslog_lock);
 
 #ifdef CONFIG_PRINTK
-DECLARE_WAIT_QUEUE_HEAD(log_wait);
 
 struct latched_seq {
 	seqcount_latch_t	latch;
@@ -419,6 +418,8 @@ static struct log_state {
 	u64 exclusive_console_stop_seq;
 	unsigned long console_dropped;
 
+	wait_queue_head_t wait;
+
 	/*
 	 * The next printk record to read after the last 'clear' command. There are
 	 * two copies (updated with seqcount_latch) so that reads can locklessly
@@ -429,6 +430,7 @@ static struct log_state {
 } init_log_state = {
 	.buf = __log_buf,
 	.buf_len = __LOG_BUF_LEN,
+	.wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_log_state.wait),
 	.clear_seq = {
 		.latch	= SEQCNT_LATCH_ZERO(init_log_state.clear_seq.latch),
 		.val[0]	= 0,
@@ -460,6 +462,11 @@ static inline struct log_state *ve_log_state(void)
 	return log;
 }
 
+void log_poll_wait(struct file *filp, poll_table *p)
+{
+	poll_wait(filp, &ve_log_state()->wait, p);
+}
+
 /*
  * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
  * per_cpu_areas are initialised. This variable is set to true when
@@ -782,7 +789,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
 		}
 
 		printk_safe_exit_irq();
-		ret = wait_event_interruptible(log_wait,
+		ret = wait_event_interruptible(log->wait,
 				prb_read_valid(log->prb, atomic64_read(&user->seq), r));
 		if (ret)
 			goto out;
@@ -874,7 +881,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
 	if (!user)
 		return EPOLLERR|EPOLLNVAL;
 
-	poll_wait(file, &log_wait, wait);
+	poll_wait(file, &log->wait, wait);
 
 	printk_safe_enter_irq();
 	if (prb_read_valid_info(log->prb, atomic64_read(&user->seq), &info, NULL)) {
@@ -1705,7 +1712,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
 		if (!access_ok(buf, len))
 			return -EFAULT;
 
-		error = wait_event_interruptible(log_wait,
+		error = wait_event_interruptible(log->wait,
 				prb_read_valid(log->prb, read_syslog_seq_irq(), NULL));
 		if (error)
 			return error;
@@ -2220,20 +2227,29 @@ asmlinkage int vprintk_emit_log(struct log_state *log,
 
 	/* If called from the scheduler, we can not call up(). */
 	if (!in_sched) {
-		/*
-		 * Disable preemption to avoid being preempted while holding
-		 * console_sem which would prevent anyone from printing to
-		 * console
-		 */
-		preempt_disable();
-		/*
-		 * Try to acquire and then immediately release the console
-		 * semaphore.  The release will print out buffers and wake up
-		 * /dev/kmsg and syslog() users.
-		 */
-		if (console_trylock_spinning())
-			console_unlock();
-		preempt_enable();
+		if (log == &init_log_state) {
+			/*
+			 * Disable preemption to avoid being preempted while holding
+			 * console_sem which would prevent anyone from printing to
+			 * console
+			 */
+			preempt_disable();
+			/*
+			 * Try to acquire and then immediately release the console
+			 * semaphore.  The release will print out buffers and wake up
+			 * /dev/kmsg and syslog() users.
+			 */
+			if (console_trylock_spinning())
+				console_unlock();
+			preempt_enable();
+		} else {
+			/*
+			 * For (in_sched) case we need to wake up via
+			 * irq_work_queue(), so ... let's just wake up only in
+			 * (!in_sched) case for now.
+			 */
+			wake_up_interruptible(&log->wait);
+		}
 	}
 
 	wake_up_klogd();
@@ -3230,6 +3246,7 @@ static DEFINE_PER_CPU(int, printk_pending);
 
 static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
+	struct log_state *log = &init_log_state;
 	int pending = __this_cpu_xchg(printk_pending, 0);
 
 	if (pending & PRINTK_PENDING_OUTPUT) {
@@ -3239,7 +3256,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
 	}
 
 	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
+		wake_up_interruptible(&log->wait);
 }
 
 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@@ -3247,11 +3264,13 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
 
 void wake_up_klogd(void)
 {
+	struct log_state *log = &init_log_state;
+
 	if (!printk_percpu_data_ready())
 		return;
 
 	preempt_disable();
-	if (waitqueue_active(&log_wait)) {
+	if (waitqueue_active(&log->wait)) {
 		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
 		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
 	}
@@ -3696,6 +3715,7 @@ int ve_log_init(struct ve_struct *ve)
 		return ret;
 	}
 
+	init_waitqueue_head(&log->wait);
 	seqcount_latch_init(&log->clear_seq.latch);
 
 	ve->log_state = log;