[Devel] [PATCH rh9 01/11] ve/printk: Introduce struct "log_state" and virtualize log_buf/log_buf_len
Konstantin Khorenko
khorenko at virtuozzo.com
Thu Sep 30 13:50:11 MSK 2021
On 30.09.2021 13:43, Kirill Tkhai wrote:
> On 29.09.2021 22:24, Konstantin Khorenko wrote:
>> From: Vladimir Davydov <vdavydov at parallels.com>
>>
>> https://jira.sw.ru/browse/PSBM-17899
>>
>> Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
>> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
>>
>> +++
>> ve/printk: Fix printk virtualization
>>
>> ve_printk() corrupts host's dmesg:
>> # dmesg|wc -l
>> 599
>> # vzctl create 101
>> # vzctl set 101 --netif_add eth0 --save
>> # vzctl start 101
>> # vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp'
>> # dmesg|wc -l
>> 2
>>
>> Add missing parts of prinkt virtualization to fix this.
>>
>> https://jira.sw.ru/browse/PSBM-17899
>> https://jira.sw.ru/browse/PSBM-105442
>>
>> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
>>
>> Rebasing to vz9: part of vz8 commit:
>> d63aeb311a64 ("ve/printk: printk virtualization")
>>
>> https://jira.sw.ru/browse/PSBM-133985
>>
>> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
>> ---
>> include/linux/printk.h | 13 ++++
>> include/linux/ve.h | 3 +
>> kernel/printk/printk.c | 165 ++++++++++++++++++++++++++++++++++-------
>> kernel/ve/ve.c | 8 ++
>> 4 files changed, 164 insertions(+), 25 deletions(-)
>>
>> diff --git a/include/linux/printk.h b/include/linux/printk.h
>> index e834d78f0478..f178e2e5d7f5 100644
>> --- a/include/linux/printk.h
>> +++ b/include/linux/printk.h
>> @@ -176,6 +176,10 @@ int vprintk(const char *fmt, va_list args);
>> asmlinkage __printf(1, 2) __cold
>> int printk(const char *fmt, ...);
>>
>> +struct ve_struct;
>> +int ve_log_init(struct ve_struct *ve);
>> +void ve_log_destroy(struct ve_struct *ve);
>> +
>> /*
>> * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
>> */
>> @@ -222,6 +226,15 @@ int printk(const char *s, ...)
>> {
>> return 0;
>> }
>> +static inline
>> +int ve_log_init(struct ve_struct *ve)
>> +{
>> + return 0;
>> +}
>> +static inline
>> +void ve_log_destroy(struct ve_struct *ve)
>> +{
>> +}
>> static inline __printf(1, 2) __cold
>> int printk_deferred(const char *s, ...)
>> {
>> diff --git a/include/linux/ve.h b/include/linux/ve.h
>> index 248cdeb0a2e4..552fa577e2f9 100644
>> --- a/include/linux/ve.h
>> +++ b/include/linux/ve.h
>> @@ -50,6 +50,9 @@ struct ve_struct {
>> /* see vzcalluser.h for VE_FEATURE_XXX definitions */
>> __u64 features;
>>
>> + void *log_state;
>> +#define VE_LOG_BUF_LEN 4096
>> +
>> struct kstat_lat_pcpu_struct sched_lat_ve;
>>
>> struct kmapset_key sysfs_perms_key;
>> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
>> index 142a58d124d9..77e6787c752e 100644
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -44,6 +44,7 @@
>> #include <linux/irq_work.h>
>> #include <linux/ctype.h>
>> #include <linux/uio.h>
>> +#include <linux/ve.h>
>> #include <linux/sched/clock.h>
>> #include <linux/sched/debug.h>
>> #include <linux/sched/task_stack.h>
>> @@ -408,8 +409,6 @@ static struct latched_seq clear_seq = {
>> #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
>> #define LOG_BUF_LEN_MAX (u32)(1 << 31)
>> static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
>> -static char *log_buf = __log_buf;
>> -static u32 log_buf_len = __LOG_BUF_LEN;
>>
>> /*
>> * Define the average message size. This only affects the number of
>> @@ -427,6 +426,34 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
>> static struct printk_ringbuffer printk_rb_dynamic;
>>
>> static struct printk_ringbuffer *prb = &printk_rb_static;
>> +static struct log_state {
>> + char *buf;
>> + u32 buf_len;
>> +} init_log_state = {
>> + .buf = __log_buf,
>> + .buf_len = __LOG_BUF_LEN,
>> +};
>
> Maybe, we move this hunk up to previous hunk "-"?
>
> -static char *log_buf = __log_buf;
> -static u32 log_buf_len = __LOG_BUF_LEN;
> +static struct log_state {
> + char *buf;
> ...
> + .buf = __log_buf,
> + .buf_len = __LOG_BUF_LEN,
> ...
>
> So, it clearer to understand we just reassigned __log_buf etc here?
i'd leave it here because if you check the final code, there is
.prb = &printk_rb_static,
in the init_log_state initialization,
so init_log_state should be at least after
_DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
PRB_AVGBITS, &__log_buf[0]);
>
>> +
>> +/* kdump relies on some log_* symbols, let's make it happy */
>> +#define DEFINE_STRUCT_MEMBER_ALIAS(name, inst, memb) \
>> +static void ____ ## name ## _definition(void) __attribute__((used)); \
>> +static void ____ ## name ## _definition(void) \
>> +{ \
>> + asm (".globl " #name "\n\t.set " #name ", " #inst "+%c0" \
>> + : : "g" (offsetof(typeof(inst), memb))); \
>> +} \
>> +extern typeof(inst.memb) name;
>> +#undef DEFINE_STRUCT_MEMBER_ALIAS
>
> Maybe we should move this define to patch where it's used?
>
>> +
>> +static inline struct log_state *ve_log_state(void)
>> +{
>> + struct log_state *log = &init_log_state;
>> +#ifdef CONFIG_VE
>> + if (get_exec_env()->log_state)
>> + log = get_exec_env()->log_state;
>> +#endif
>> + return log;
>> +}
>>
>> /*
>> * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
>> @@ -468,13 +495,13 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls)
>> /* Return log buffer address */
>> char *log_buf_addr_get(void)
>> {
>> - return log_buf;
>> + return init_log_state.buf;
>> }
>>
>> /* Return log buffer size */
>> u32 log_buf_len_get(void)
>> {
>> - return log_buf_len;
>> + return init_log_state.buf_len;
>> }
>>
>> /*
>> @@ -485,13 +512,14 @@ u32 log_buf_len_get(void)
>> #define MAX_LOG_TAKE_PART 4
>> static const char trunc_msg[] = "<truncated>";
>>
>> -static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
>> +static void truncate_msg(struct log_state *log,
>> + u16 *text_len, u16 *trunc_msg_len)
>> {
>> /*
>> * The message should not take the whole buffer. Otherwise, it might
>> * get removed too soon.
>> */
>> - u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
>> + u32 max_text_len = log->buf_len / MAX_LOG_TAKE_PART;
>>
>> if (*text_len > max_text_len)
>> *text_len = max_text_len;
>> @@ -642,14 +670,20 @@ struct devkmsg_user {
>> struct printk_record record;
>> };
>>
>> -static __printf(3, 4) __cold
>> -int devkmsg_emit(int facility, int level, const char *fmt, ...)
>> +asmlinkage int vprintk_emit_log(struct log_state *log,
>> + int facility, int level,
>> + const struct dev_printk_info *dev_info,
>> + const char *fmt, va_list args);
>> +
>> +static __printf(4, 5) __cold
>> +int devkmsg_emit_log(struct log_state *log, int facility, int level,
>> + const char *fmt, ...)
>> {
>> va_list args;
>> int r;
>>
>> va_start(args, fmt);
>> - r = vprintk_emit(facility, level, NULL, fmt, args);
>> + r = vprintk_emit_log(log, facility, level, NULL, fmt, args);
>> va_end(args);
>>
>> return r;
>> @@ -657,6 +691,7 @@ int devkmsg_emit(int facility, int level, const char *fmt, ...)
>>
>> static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
>> {
>> + struct log_state *log = ve_log_state();
>> char *buf, *line;
>> int level = default_message_loglevel;
>> int facility = 1; /* LOG_USER */
>> @@ -712,7 +747,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
>> }
>> }
>>
>> - devkmsg_emit(facility, level, "%s", line);
>> + devkmsg_emit_log(log, facility, level, "%s", line);
>> kfree(buf);
>> return ret;
>> }
>> @@ -987,11 +1022,11 @@ static void __init log_buf_len_update(u64 size)
>>
>> if (size)
>> size = roundup_pow_of_two(size);
>> - if (size > log_buf_len)
>> + if (size > init_log_state.buf_len)
>> new_log_buf_len = (unsigned long)size;
>> }
>>
>> -/* save requested log_buf_len since it's too early to process it */
>> +/* save requested log->buf_len since it's too early to process it */
>> static int __init log_buf_len_setup(char *str)
>> {
>> u64 size;
>> @@ -1077,6 +1112,7 @@ static char setup_text_buf[LOG_LINE_MAX] __initdata;
>>
>> void __init setup_log_buf(int early)
>> {
>> + struct log_state *log = &init_log_state;
>> struct printk_info *new_infos;
>> unsigned int new_descs_count;
>> struct prb_desc *new_descs;
>> @@ -1097,7 +1133,7 @@ void __init setup_log_buf(int early)
>> if (!early)
>> set_percpu_data_ready();
>>
>> - if (log_buf != __log_buf)
>> + if (log->buf != __log_buf)
>> return;
>>
>> if (!early && !new_log_buf_len)
>> @@ -1144,8 +1180,8 @@ void __init setup_log_buf(int early)
>>
>> printk_safe_enter_irqsave(flags);
>>
>> - log_buf_len = new_log_buf_len;
>> - log_buf = new_log_buf;
>> + log->buf_len = new_log_buf_len;
>> + log->buf = new_log_buf;
>> new_log_buf_len = 0;
>>
>> free = __LOG_BUF_LEN;
>> @@ -1166,7 +1202,7 @@ void __init setup_log_buf(int early)
>> prb_next_seq(&printk_rb_static) - seq);
>> }
>>
>> - pr_info("log_buf_len: %u bytes\n", log_buf_len);
>> + pr_info("log_buf_len: %u bytes\n", log->buf_len);
>> pr_info("early log buf free: %u(%u%%)\n",
>> free, (free * 100) / __LOG_BUF_LEN);
>> return;
>> @@ -1632,6 +1668,7 @@ static u64 read_syslog_seq_irq(void)
>>
>> int do_syslog(int type, char __user *buf, int len, int source)
>> {
>> + struct log_state *log = ve_log_state();
>> struct printk_info info;
>> bool clear = false;
>> static int saved_console_loglevel = LOGLEVEL_DEFAULT;
>> @@ -1741,7 +1778,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
>> break;
>> /* Size of the log buffer */
>> case SYSLOG_ACTION_SIZE_BUFFER:
>> - error = log_buf_len;
>> + error = log->buf_len;
>> break;
>> default:
>> error = -EINVAL;
>> @@ -2031,8 +2068,8 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl
>> return text_len;
>> }
>>
>> -__printf(4, 0)
>> -int vprintk_store(int facility, int level,
>> +__printf(5, 0)
>> +int vprintk_store_log(struct log_state *log, int facility, int level,
>> const struct dev_printk_info *dev_info,
>> const char *fmt, va_list args)
>> {
>> @@ -2104,7 +2141,7 @@ int vprintk_store(int facility, int level,
>> prb_rec_init_wr(&r, reserve_size);
>> if (!prb_reserve(&e, prb, &r)) {
>> /* truncate the message if it is too long for empty buffer */
>> - truncate_msg(&reserve_size, &trunc_msg_len);
>> + truncate_msg(log, &reserve_size, &trunc_msg_len);
>>
>> prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
>> if (!prb_reserve(&e, prb, &r))
>> @@ -2133,9 +2170,18 @@ int vprintk_store(int facility, int level,
>> return (text_len + trunc_msg_len);
>> }
>>
>> -asmlinkage int vprintk_emit(int facility, int level,
>> - const struct dev_printk_info *dev_info,
>> - const char *fmt, va_list args)
>> +int vprintk_store(int facility, int level,
>> + const struct dev_printk_info *dev_info,
>> + const char *fmt, va_list args)
>> +{
>> + return vprintk_store_log(&init_log_state, facility, level,
>> + dev_info, fmt, args);
>> +}
>> +
>> +asmlinkage int vprintk_emit_log(struct log_state *log,
>> + int facility, int level,
>> + const struct dev_printk_info *dev_info,
>> + const char *fmt, va_list args)
>> {
>> int printed_len;
>> bool in_sched = false;
>> @@ -2154,7 +2200,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>> printk_delay();
>>
>> printk_safe_enter_irqsave(flags);
>> - printed_len = vprintk_store(facility, level, dev_info, fmt, args);
>> + printed_len = vprintk_store_log(log, facility, level, dev_info, fmt, args);
>> printk_safe_exit_irqrestore(flags);
>>
>> /* If called from the scheduler, we can not call up(). */
>> @@ -2178,8 +2224,23 @@ asmlinkage int vprintk_emit(int facility, int level,
>> wake_up_klogd();
>> return printed_len;
>> }
>> +
>> +asmlinkage int vprintk_emit(int facility, int level,
>> + const struct dev_printk_info *dev_info,
>> + const char *fmt, va_list args)
>> +{
>> + return vprintk_emit_log(&init_log_state,
>> + facility, level, dev_info,
>> + fmt, args);
>> +}
>> EXPORT_SYMBOL(vprintk_emit);
>>
>> +static int __vprintk(const char *fmt, va_list args)
>> +{
>> + return vprintk_emit_log(ve_log_state(), 0, LOGLEVEL_DEFAULT,
>> + NULL, fmt, args);
>> +}
>> +
>> int vprintk_default(const char *fmt, va_list args)
>> {
>> return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
>> @@ -3530,7 +3591,61 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
>> }
>> EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
>>
>> -#endif
>> +#ifdef CONFIG_VE
>> +int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early)
>> +{
>> + unsigned long local_log_buf_len;
>> + int ret = -EINVAL;
>> +
>> + char *new_log_buf;
>> +
>> + local_log_buf_len = VE_LOG_BUF_LEN;
>> +
>> + ret = -ENOMEM;
>> + new_log_buf = kmalloc(local_log_buf_len, GFP_KERNEL);
>> + if (unlikely(!new_log_buf)) {
>> + pr_err("log_buf_len: %lu text bytes not available\n",
>> + local_log_buf_len);
>> + goto out;
>> + }
>> +
>> + log->buf_len = local_log_buf_len;
>> + log->buf = new_log_buf;
>> +
>> + return 0;
>> +out:
>> + return ret;
>> +}
>> +
>> +int ve_log_init(struct ve_struct *ve)
>> +{
>> + struct log_state *log;
>> + int ret;
>> +
>> + log = kzalloc(sizeof(*log), GFP_KERNEL);
>> + if (!log)
>> + return -ENOMEM;
>> +
>> + ret = setup_log_buf_ve(log, ve, 0);
>> + if (!ret) {
>> + kfree(log);
>> + return ret;
>> + }
>> +
>> + ve->log_state = log;
>> + return 0;
>> +}
>> +
>> +void ve_log_destroy(struct ve_struct *ve)
>> +{
>> + struct log_state *log = ve->log_state;
>> +
>> + kfree(log->buf);
>> + kfree(log);
>> +}
>> +#endif /* CONFIG_VE */
>> +
>> +#endif /* CONFIG_PRINTK */
>>
>> #ifdef CONFIG_SMP
>> static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
>> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
>> index 8f192ee41832..e3a07d4c9fe4 100644
>> --- a/kernel/ve/ve.c
>> +++ b/kernel/ve/ve.c
>> @@ -590,12 +590,19 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
>>
>> atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
>> ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
>> +
>> + err = ve_log_init(ve);
>> + if (err)
>> + goto err_log;
>> +
>> do_init:
>> init_rwsem(&ve->op_sem);
>> INIT_LIST_HEAD(&ve->ve_list);
>> kmapset_init_key(&ve->sysfs_perms_key);
>> return &ve->css;
>>
>> +err_log:
>> + free_percpu(ve->sched_lat_ve.cur);
>> err_lat:
>> kmem_cache_free(ve_cachep, ve);
>> err_ve:
>> @@ -637,6 +644,7 @@ static void ve_destroy(struct cgroup_subsys_state *css)
>> struct ve_struct *ve = css_to_ve(css);
>>
>> kmapset_unlink(&ve->sysfs_perms_key, &sysfs_ve_perms_set);
>> + ve_log_destroy(ve);
>> free_percpu(ve->sched_lat_ve.cur);
>> kmem_cache_free(ve_cachep, ve);
>> }
>>
>
> .
>
More information about the Devel
mailing list