[Devel] [PATCH rh9 05/11] ve/printk: Virtualize log_wait queue
Konstantin Khorenko
khorenko at virtuozzo.com
Wed Sep 29 22:24:41 MSK 2021
From: Vladimir Davydov <vdavydov at parallels.com>
https://jira.sw.ru/browse/PSBM-17899
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
+++
ve/printk: Fix printk virtualization
ve_printk() corrupts host's dmesg:
# dmesg|wc -l
599
# vzctl create 101
# vzctl set 101 --netif_add eth0 --save
# vzctl start 101
# vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp'
# dmesg|wc -l
2
Add missing parts of prinkt virtualization to fix this.
https://jira.sw.ru/browse/PSBM-17899
https://jira.sw.ru/browse/PSBM-105442
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Rebasing to vz9: part of vz8 commit:
d63aeb311a64 ("ve/printk: printk virtualization")
https://jira.sw.ru/browse/PSBM-133985
Note: we don't wake up wait_queue if vprintk_emit_log() has been called
in sched context. Rare case, we've lived that way forever.
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
fs/proc/kmsg.c | 4 +--
kernel/printk/printk.c | 59 ++++++++++++++++++++++++++++--------------
2 files changed, 42 insertions(+), 21 deletions(-)
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index b38ad552887f..386ec85dea0f 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -18,7 +18,7 @@
#include <linux/uaccess.h>
#include <asm/io.h>
-extern wait_queue_head_t log_wait;
+extern void log_poll_wait(struct file *file, poll_table *wait);
static int kmsg_open(struct inode * inode, struct file * file)
{
@@ -42,7 +42,7 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
static __poll_t kmsg_poll(struct file *file, poll_table *wait)
{
- poll_wait(file, &log_wait, wait);
+ log_poll_wait(file, wait);
if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
return EPOLLIN | EPOLLRDNORM;
return 0;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 1da9021c279b..7f1bec17866d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -418,6 +418,8 @@ static struct log_state {
u64 exclusive_console_stop_seq;
unsigned long console_dropped;
+ wait_queue_head_t wait;
+
/*
* The next printk record to read after the last 'clear' command. There are
* two copies (updated with seqcount_latch) so that reads can locklessly
@@ -428,6 +430,7 @@ static struct log_state {
} init_log_state = {
.buf = __log_buf,
.buf_len = __LOG_BUF_LEN,
+ .wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_log_state.wait),
.clear_seq = {
.latch = SEQCNT_LATCH_ZERO(init_log_state.clear_seq.latch),
.val[0] = 0,
@@ -459,6 +462,11 @@ static inline struct log_state *ve_log_state(void)
return log;
}
+void log_poll_wait(struct file *filp, poll_table *p)
+{
+ poll_wait(filp, &ve_log_state()->wait, p);
+}
+
/*
* We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
* per_cpu_areas are initialised. This variable is set to true when
@@ -781,7 +789,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
}
printk_safe_exit_irq();
- ret = wait_event_interruptible(log_wait,
+ ret = wait_event_interruptible(log->wait,
prb_read_valid(log->prb, atomic64_read(&user->seq), r));
if (ret)
goto out;
@@ -873,7 +881,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
if (!user)
return EPOLLERR|EPOLLNVAL;
- poll_wait(file, &log_wait, wait);
+ poll_wait(file, &log->wait, wait);
printk_safe_enter_irq();
if (prb_read_valid_info(log->prb, atomic64_read(&user->seq), &info, NULL)) {
@@ -1704,7 +1712,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
if (!access_ok(buf, len))
return -EFAULT;
- error = wait_event_interruptible(log_wait,
+ error = wait_event_interruptible(log->wait,
prb_read_valid(log->prb, read_syslog_seq_irq(), NULL));
if (error)
return error;
@@ -2219,20 +2227,29 @@ asmlinkage int vprintk_emit_log(struct log_state *log,
/* If called from the scheduler, we can not call up(). */
if (!in_sched) {
- /*
- * Disable preemption to avoid being preempted while holding
- * console_sem which would prevent anyone from printing to
- * console
- */
- preempt_disable();
- /*
- * Try to acquire and then immediately release the console
- * semaphore. The release will print out buffers and wake up
- * /dev/kmsg and syslog() users.
- */
- if (console_trylock_spinning())
- console_unlock();
- preempt_enable();
+ if (log == &init_log_state) {
+ /*
+ * Disable preemption to avoid being preempted while holding
+ * console_sem which would prevent anyone from printing to
+ * console
+ */
+ preempt_disable();
+ /*
+ * Try to acquire and then immediately release the console
+ * semaphore. The release will print out buffers and wake up
+ * /dev/kmsg and syslog() users.
+ */
+ if (console_trylock_spinning())
+ console_unlock();
+ preempt_enable();
+ } else {
+ /*
+ * For (in_sched) case we need to wake up via
+ * irq_work_queue(), so ... let's just wake up only in
+ * (!in_sched) case for now.
+ */
+ wake_up_interruptible(&log->wait);
+ }
}
wake_up_klogd();
@@ -3229,6 +3246,7 @@ static DEFINE_PER_CPU(int, printk_pending);
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
+ struct log_state *log = &init_log_state;
int pending = __this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_OUTPUT) {
@@ -3238,7 +3256,7 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)
}
if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible(&log_wait);
+ wake_up_interruptible(&log->wait);
}
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
@@ -3246,11 +3264,13 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
void wake_up_klogd(void)
{
+ struct log_state *log = &init_log_state;
+
if (!printk_percpu_data_ready())
return;
preempt_disable();
- if (waitqueue_active(&log_wait)) {
+ if (waitqueue_active(&log->wait)) {
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
}
@@ -3695,6 +3715,7 @@ int ve_log_init(struct ve_struct *ve)
return ret;
}
+ init_waitqueue_head(&log->wait);
seqcount_latch_init(&log->clear_seq.latch);
ve->log_state = log;
--
2.28.0
More information about the Devel
mailing list