[Devel] [PATCH RHEL COMMIT] ve/printk: Virtualize "prb" and "clear_seq"
Konstantin Khorenko
khorenko at virtuozzo.com
Fri Oct 1 18:22:38 MSK 2021
The commit is pushed to "branch-rh9-5.14.vz9.1.x-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git
after ark-5.14
------>
commit 248c2b31353de1b28eed02cb9f3eab59eabd24dd
Author: Vladimir Davydov <vdavydov.dev at gmail.com>
Date: Wed Sep 29 21:18:47 2021 +0300
ve/printk: Virtualize "prb" and "clear_seq"
https://jira.sw.ru/browse/PSBM-17899
Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
+++
ve/printk: Fix printk virtualization
ve_printk() corrupts host's dmesg:
# dmesg|wc -l
599
# vzctl create 101
# vzctl set 101 --netif_add eth0 --save
# vzctl start 101
# vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp'
# dmesg|wc -l
2
Add missing parts of prinkt virtualization to fix this.
https://jira.sw.ru/browse/PSBM-17899
https://jira.sw.ru/browse/PSBM-105442
Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
Rebasing to vz9: part of vz8 commit:
d63aeb311a64 ("ve/printk: printk virtualization")
https://jira.sw.ru/browse/PSBM-133985
Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
---
kernel/printk/printk.c | 176 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 123 insertions(+), 53 deletions(-)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 77e6787c752e..a1dedbc88426 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -378,17 +378,6 @@ struct latched_seq {
u64 val[2];
};
-/*
- * The next printk record to read after the last 'clear' command. There are
- * two copies (updated with seqcount_latch) so that reads can locklessly
- * access a valid value. Writers are synchronized by @syslog_lock.
- */
-static struct latched_seq clear_seq = {
- .latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
- .val[0] = 0,
- .val[1] = 0,
-};
-
#ifdef CONFIG_PRINTK_CALLER
#define PREFIX_MAX 48
#else
@@ -425,13 +414,26 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
static struct printk_ringbuffer printk_rb_dynamic;
-static struct printk_ringbuffer *prb = &printk_rb_static;
static struct log_state {
char *buf;
u32 buf_len;
+
+ /*
+ * The next printk record to read after the last 'clear' command. There are
+ * two copies (updated with seqcount_latch) so that reads can locklessly
+ * access a valid value. Writers are synchronized by @syslog_lock.
+ */
+ struct latched_seq clear_seq;
+ struct printk_ringbuffer *prb;
} init_log_state = {
.buf = __log_buf,
.buf_len = __LOG_BUF_LEN,
+ .clear_seq = {
+ .latch = SEQCNT_LATCH_ZERO(init_log_state.clear_seq.latch),
+ .val[0] = 0,
+ .val[1] = 0,
+ },
+ .prb = &printk_rb_static,
};
/* kdump relies on some log_* symbols, let's make it happy */
@@ -443,6 +445,8 @@ static void ____ ## name ## _definition(void) \
: : "g" (offsetof(typeof(inst), memb))); \
} \
extern typeof(inst.memb) name;
+DEFINE_STRUCT_MEMBER_ALIAS(clear_seq, init_log_state, clear_seq);
+DEFINE_STRUCT_MEMBER_ALIAS(prb, init_log_state, prb);
#undef DEFINE_STRUCT_MEMBER_ALIAS
static inline struct log_state *ve_log_state(void)
@@ -755,6 +759,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
static ssize_t devkmsg_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ struct log_state *log = ve_log_state();
struct devkmsg_user *user = file->private_data;
struct printk_record *r = &user->record;
size_t len;
@@ -768,7 +773,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
return ret;
printk_safe_enter_irq();
- if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
+ if (!prb_read_valid(log->prb, atomic64_read(&user->seq), r)) {
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
printk_safe_exit_irq();
@@ -777,7 +782,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
printk_safe_exit_irq();
ret = wait_event_interruptible(log_wait,
- prb_read_valid(prb, atomic64_read(&user->seq), r));
+ prb_read_valid(log->prb, atomic64_read(&user->seq), r));
if (ret)
goto out;
printk_safe_enter_irq();
@@ -824,6 +829,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
*/
static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
{
+ struct log_state *log = ve_log_state();
struct devkmsg_user *user = file->private_data;
loff_t ret = 0;
@@ -836,7 +842,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_SET:
/* the first record */
- atomic64_set(&user->seq, prb_first_valid_seq(prb));
+ atomic64_set(&user->seq, prb_first_valid_seq(log->prb));
break;
case SEEK_DATA:
/*
@@ -844,11 +850,11 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
* changes no global state, and does not clear anything.
*/
- atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
+ atomic64_set(&user->seq, latched_seq_read_nolock(&log->clear_seq));
break;
case SEEK_END:
/* after the last record */
- atomic64_set(&user->seq, prb_next_seq(prb));
+ atomic64_set(&user->seq, prb_next_seq(log->prb));
break;
default:
ret = -EINVAL;
@@ -859,6 +865,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
{
+ struct log_state *log = ve_log_state();
struct devkmsg_user *user = file->private_data;
struct printk_info info;
__poll_t ret = 0;
@@ -869,7 +876,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
poll_wait(file, &log_wait, wait);
printk_safe_enter_irq();
- if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
+ if (prb_read_valid_info(log->prb, atomic64_read(&user->seq), &info, NULL)) {
/* return error when data has vanished underneath us */
if (info.seq != atomic64_read(&user->seq))
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
@@ -883,6 +890,7 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
static int devkmsg_open(struct inode *inode, struct file *file)
{
+ struct log_state *log = ve_log_state();
struct devkmsg_user *user;
int err;
@@ -910,7 +918,7 @@ static int devkmsg_open(struct inode *inode, struct file *file)
&user->text_buf[0], sizeof(user->text_buf));
printk_safe_enter_irq();
- atomic64_set(&user->seq, prb_first_valid_seq(prb));
+ atomic64_set(&user->seq, prb_first_valid_seq(log->prb));
printk_safe_exit_irq();
file->private_data = user;
@@ -1193,7 +1201,7 @@ void __init setup_log_buf(int early)
* boot CPU and interrupts are disabled. So no new messages will
* appear during the transition to the dynamic buffer.
*/
- prb = &printk_rb_dynamic;
+ log->prb = &printk_rb_dynamic;
printk_safe_exit_irqrestore(flags);
@@ -1480,7 +1488,8 @@ static size_t get_record_print_text_size(struct printk_info *info,
* @max_seq is simply an upper bound and does not need to exist. If the caller
* does not require an upper bound, -1 can be used for @max_seq.
*/
-static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
+static u64 find_first_fitting_seq(struct log_state *log,
+ u64 start_seq, u64 max_seq, size_t size,
bool syslog, bool time)
{
struct printk_info info;
@@ -1489,7 +1498,7 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
u64 seq;
/* Determine the size of the records up to @max_seq. */
- prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ prb_for_each_info(start_seq, log->prb, seq, &info, &line_count) {
if (info.seq >= max_seq)
break;
len += get_record_print_text_size(&info, line_count, syslog, time);
@@ -1508,7 +1517,7 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
* might appear and get lost in the meantime. This is a best effort
* that prevents an infinite loop that could occur with a retry.
*/
- prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
+ prb_for_each_info(start_seq, log->prb, seq, &info, &line_count) {
if (len <= size || info.seq >= max_seq)
break;
len -= get_record_print_text_size(&info, line_count, syslog, time);
@@ -1517,7 +1526,8 @@ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
return seq;
}
-static int syslog_print(char __user *buf, int size)
+static int syslog_print(struct log_state *log,
+ char __user *buf, int size)
{
struct printk_info info;
struct printk_record r;
@@ -1536,7 +1546,7 @@ static int syslog_print(char __user *buf, int size)
printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
- if (!prb_read_valid(prb, syslog_seq, &r)) {
+ if (!prb_read_valid(log->prb, syslog_seq, &r)) {
raw_spin_unlock(&syslog_lock);
printk_safe_exit_irq();
break;
@@ -1588,7 +1598,8 @@ static int syslog_print(char __user *buf, int size)
return len;
}
-static int syslog_print_all(char __user *buf, int size, bool clear)
+static int syslog_print_all(struct log_state *log,
+ char __user *buf, int size, bool clear)
{
struct printk_info info;
struct printk_record r;
@@ -1607,13 +1618,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
* Find first record that fits, including all following records,
* into the user-provided buffer for this dump.
*/
- seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
- size, true, time);
+ seq = find_first_fitting_seq(log,
+ latched_seq_read_nolock(&log->clear_seq),
+ -1, size, true, time);
prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
len = 0;
- prb_for_each_record(seq, prb, seq, &r) {
+ prb_for_each_record(seq, log->prb, seq, &r) {
int textlen;
textlen = record_print_text(&r, true, time);
@@ -1636,7 +1648,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
if (clear) {
raw_spin_lock(&syslog_lock);
- latched_seq_write(&clear_seq, seq);
+ latched_seq_write(&log->clear_seq, seq);
raw_spin_unlock(&syslog_lock);
}
printk_safe_exit_irq();
@@ -1645,11 +1657,11 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
return len;
}
-static void syslog_clear(void)
+static void syslog_clear(struct log_state *log)
{
printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
- latched_seq_write(&clear_seq, prb_next_seq(prb));
+ latched_seq_write(&log->clear_seq, prb_next_seq(log->prb));
raw_spin_unlock(&syslog_lock);
printk_safe_exit_irq();
}
@@ -1692,7 +1704,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
return -EFAULT;
error = wait_event_interruptible(log_wait,
- prb_read_valid(prb, read_syslog_seq_irq(), NULL));
+ prb_read_valid(log->prb, read_syslog_seq_irq(), NULL));
if (error)
return error;
error = syslog_print(buf, len);
@@ -1709,11 +1721,11 @@ int do_syslog(int type, char __user *buf, int len, int source)
return 0;
if (!access_ok(buf, len))
return -EFAULT;
- error = syslog_print_all(buf, len, clear);
+ error = syslog_print_all(log, buf, len, clear);
break;
/* Clear ring buffer */
case SYSLOG_ACTION_CLEAR:
- syslog_clear();
+ syslog_clear(log);
break;
/* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_OFF:
@@ -1742,7 +1754,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
case SYSLOG_ACTION_SIZE_UNREAD:
printk_safe_enter_irq();
raw_spin_lock(&syslog_lock);
- if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
+ if (!prb_read_valid_info(log->prb, log->syslog_seq, &info, NULL)) {
/* No unread messages. */
raw_spin_unlock(&syslog_lock);
printk_safe_exit_irq();
@@ -1759,13 +1771,13 @@ int do_syslog(int type, char __user *buf, int len, int source)
* for pending data, not the size; return the count of
* records, not the length.
*/
- error = prb_next_seq(prb) - syslog_seq;
+ error = prb_next_seq(log->prb) - syslog_seq;
} else {
bool time = syslog_partial ? syslog_time : printk_time;
unsigned int line_count;
u64 seq;
- prb_for_each_info(syslog_seq, prb, seq, &info,
+ prb_for_each_info(log->syslog_seq, log->prb, seq, &info,
&line_count) {
error += get_record_print_text_size(&info, line_count,
true, time);
@@ -2117,7 +2129,7 @@ int vprintk_store_log(struct log_state *log, int facility, int level,
if (lflags & LOG_CONT) {
prb_rec_init_wr(&r, reserve_size);
- if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
+ if (prb_reserve_in_last(&e, log->prb, &r, caller_id, LOG_LINE_MAX)) {
text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
facility, &lflags, fmt, args);
r.info->text_len += text_len;
@@ -2139,12 +2151,12 @@ int vprintk_store_log(struct log_state *log, int facility, int level,
* structure when they fail.
*/
prb_rec_init_wr(&r, reserve_size);
- if (!prb_reserve(&e, prb, &r)) {
+ if (!prb_reserve(&e, log->prb, &r)) {
/* truncate the message if it is too long for empty buffer */
truncate_msg(log, &reserve_size, &trunc_msg_len);
prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
- if (!prb_reserve(&e, prb, &r))
+ if (!prb_reserve(&e, log->prb, &r))
return 0;
}
@@ -2600,6 +2612,7 @@ static inline int can_use_console(void)
*/
void console_unlock(void)
{
+ struct log_state *log = &init_log_state;
static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[CONSOLE_LOG_MAX];
unsigned long flags;
@@ -2649,7 +2662,7 @@ void console_unlock(void)
printk_safe_enter_irqsave(flags);
skip:
- if (!prb_read_valid(prb, console_seq, &r))
+ if (!prb_read_valid(log->prb, console_seq, &r))
break;
if (console_seq != r.info->seq) {
@@ -2725,7 +2738,7 @@ void console_unlock(void)
* there's a new owner and the console_unlock() from them will do the
* flush, no worries.
*/
- retry = prb_read_valid(prb, console_seq, NULL);
+ retry = prb_read_valid(log->prb, console_seq, NULL);
printk_safe_exit_irqrestore(flags);
if (retry && console_trylock())
@@ -2779,6 +2792,8 @@ void console_unblank(void)
*/
void console_flush_on_panic(enum con_flush_mode mode)
{
+ struct log_state *log = &init_log_state;
+
/*
* If someone else is holding the console lock, trylock will fail
* and may_schedule may be set. Ignore and proceed to unlock so
@@ -2793,7 +2808,7 @@ void console_flush_on_panic(enum con_flush_mode mode)
unsigned long flags;
printk_safe_enter_irqsave(flags);
- console_seq = prb_first_valid_seq(prb);
+ console_seq = prb_first_valid_seq(log->prb);
printk_safe_exit_irqrestore(flags);
}
console_unlock();
@@ -3443,7 +3458,8 @@ void kmsg_dump(enum kmsg_dump_reason reason)
bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
char *line, size_t size, size_t *len)
{
- u64 min_seq = latched_seq_read_nolock(&clear_seq);
+ struct log_state *log = &init_log_state;
+ u64 min_seq = latched_seq_read_nolock(&log->clear_seq);
struct printk_info info;
unsigned int line_count;
struct printk_record r;
@@ -3459,11 +3475,11 @@ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
/* Read text or count text lines? */
if (line) {
- if (!prb_read_valid(prb, iter->cur_seq, &r))
+ if (!prb_read_valid(log->prb, iter->cur_seq, &r))
goto out;
l = record_print_text(&r, syslog, printk_time);
} else {
- if (!prb_read_valid_info(prb, iter->cur_seq,
+ if (!prb_read_valid_info(log->prb, iter->cur_seq,
&info, &line_count)) {
goto out;
}
@@ -3504,7 +3520,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
char *buf, size_t size, size_t *len_out)
{
- u64 min_seq = latched_seq_read_nolock(&clear_seq);
+ struct log_state *log = &init_log_state;
+ u64 min_seq = latched_seq_read_nolock(&log->clear_seq);
struct printk_info info;
struct printk_record r;
unsigned long flags;
@@ -3521,7 +3538,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
iter->cur_seq = min_seq;
printk_safe_enter_irqsave(flags);
- if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
+ if (prb_read_valid_info(log->prb, iter->cur_seq, &info, NULL)) {
if (info.seq != iter->cur_seq) {
/* messages are gone, move to first available one */
iter->cur_seq = info.seq;
@@ -3540,7 +3557,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
* because this function (by way of record_print_text()) will
* not write more than size-1 bytes of text into @buf.
*/
- seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
+ seq = find_first_fitting_seq(log, iter->cur_seq, iter->next_seq,
size - 1, syslog, time);
/*
@@ -3552,7 +3569,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
prb_rec_init_rd(&r, &info, buf, size);
len = 0;
- prb_for_each_record(seq, prb, seq, &r) {
+ prb_for_each_record(seq, log->prb, seq, &r) {
if (r.info->seq >= iter->next_seq)
break;
@@ -3582,11 +3599,12 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
*/
void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
{
+ struct log_state *log = &init_log_state;
unsigned long flags;
printk_safe_enter_irqsave(flags);
- iter->cur_seq = latched_seq_read_nolock(&clear_seq);
- iter->next_seq = prb_next_seq(prb);
+ iter->cur_seq = latched_seq_read_nolock(&log->clear_seq);
+ iter->next_seq = prb_next_seq(log->prb);
printk_safe_exit_irqrestore(flags);
}
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
@@ -3595,12 +3613,21 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early)
{
unsigned long local_log_buf_len;
+ struct printk_ringbuffer *local_prb;
int ret = -EINVAL;
+ struct printk_info *new_infos;
+ unsigned int new_descs_count;
+ struct prb_desc *new_descs;
+ size_t new_descs_size;
+ size_t new_infos_size;
char *new_log_buf;
local_log_buf_len = VE_LOG_BUF_LEN;
+ BUILD_BUG_ON((VE_LOG_BUF_LEN >> PRB_AVGBITS) == 0);
+ new_descs_count = local_log_buf_len >> PRB_AVGBITS;
+
ret = -ENOMEM;
new_log_buf = kmalloc(local_log_buf_len, GFP_KERNEL);
if (unlikely(!new_log_buf)) {
@@ -3609,10 +3636,47 @@ int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early)
goto out;
}
+ new_descs_size = new_descs_count * sizeof(struct prb_desc);
+ new_descs = kmalloc(new_descs_size, GFP_KERNEL);
+ if (unlikely(!new_descs)) {
+ pr_err("log_buf_len: %zu desc bytes not available\n",
+ new_descs_size);
+ goto err_free_log_buf;
+ }
+
+ new_infos_size = new_descs_count * sizeof(struct printk_info);
+ new_infos = kmalloc(new_infos_size, GFP_KERNEL);
+ if (unlikely(!new_infos)) {
+ pr_err("log_buf_len: %zu info bytes not available\n",
+ new_infos_size);
+ goto err_free_descs;
+ }
+
+ local_prb = kmalloc(sizeof(struct printk_ringbuffer), GFP_KERNEL);
+ if (unlikely(!local_prb)) {
+ pr_err("log_buf_len: %lu info bytes not available\n",
+ sizeof(struct printk_ringbuffer));
+ goto err_free_infos;
+ }
+
+ prb_init(local_prb,
+ new_log_buf, ilog2(local_log_buf_len),
+ new_descs, ilog2(new_descs_count),
+ new_infos);
+
log->buf_len = local_log_buf_len;
log->buf = new_log_buf;
+ log->prb = local_prb;
+
return 0;
+
+err_free_infos:
+ kfree(new_infos);
+err_free_descs:
+ kfree(new_descs);
+err_free_log_buf:
+ kfree(new_log_buf);
out:
return ret;
}
@@ -3632,6 +3696,8 @@ int ve_log_init(struct ve_struct *ve)
return ret;
}
+ seqcount_latch_init(&log->clear_seq.latch);
+
ve->log_state = log;
return 0;
}
@@ -3639,7 +3705,11 @@ int ve_log_init(struct ve_struct *ve)
void ve_log_destroy(struct ve_struct *ve)
{
struct log_state *log = ve->log_state;
+ struct printk_ringbuffer *rb = log->prb;
+ kfree(rb->desc_ring.infos);
+ kfree(rb->desc_ring.descs);
+ kfree(log->prb);
kfree(log->buf);
kfree(log);
}
More information about the Devel
mailing list