[Devel] [PATCH rh9 01/11] ve/printk: Introduce struct "log_state" and virtualize log_buf/log_buf_len

Kirill Tkhai ktkhai at virtuozzo.com
Thu Sep 30 13:43:05 MSK 2021


On 29.09.2021 22:24, Konstantin Khorenko wrote:
> From: Vladimir Davydov <vdavydov at parallels.com>
> 
> https://jira.sw.ru/browse/PSBM-17899
> 
> Signed-off-by: Vladimir Davydov <vdavydov at parallels.com>
> Signed-off-by: Stanislav Kinsburskiy <skinsbursky at virtuozzo.com>
> 
> +++
> ve/printk: Fix printk virtualization
> 
> ve_printk() corrupts host's dmesg:
> 	# dmesg|wc -l
> 	599
> 	# vzctl create 101
> 	# vzctl set 101 --netif_add eth0 --save
> 	# vzctl start 101
> 	# vzctl exec 101 'tcpdump -w tcpdump.out -U -n -i eth0 esp'
> 	# dmesg|wc -l
> 	2
> 
> Add missing parts of prinkt virtualization to fix this.
> 
> https://jira.sw.ru/browse/PSBM-17899
> https://jira.sw.ru/browse/PSBM-105442
> 
> Signed-off-by: Andrey Ryabinin <aryabinin at virtuozzo.com>
> 
> Rebasing to vz9: part of vz8 commit:
>  d63aeb311a64 ("ve/printk: printk virtualization")
> 
> https://jira.sw.ru/browse/PSBM-133985
> 
> Signed-off-by: Konstantin Khorenko <khorenko at virtuozzo.com>
> ---
>  include/linux/printk.h |  13 ++++
>  include/linux/ve.h     |   3 +
>  kernel/printk/printk.c | 165 ++++++++++++++++++++++++++++++++++-------
>  kernel/ve/ve.c         |   8 ++
>  4 files changed, 164 insertions(+), 25 deletions(-)
> 
> diff --git a/include/linux/printk.h b/include/linux/printk.h
> index e834d78f0478..f178e2e5d7f5 100644
> --- a/include/linux/printk.h
> +++ b/include/linux/printk.h
> @@ -176,6 +176,10 @@ int vprintk(const char *fmt, va_list args);
>  asmlinkage __printf(1, 2) __cold
>  int printk(const char *fmt, ...);
>  
> +struct ve_struct;
> +int ve_log_init(struct ve_struct *ve);
> +void ve_log_destroy(struct ve_struct *ve);
> +
>  /*
>   * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
>   */
> @@ -222,6 +226,15 @@ int printk(const char *s, ...)
>  {
>  	return 0;
>  }
> +static inline
> +int ve_log_init(struct ve_struct *ve)
> +{
> +	return 0;
> +}
> +static inline
> +void ve_log_destroy(struct ve_struct *ve)
> +{
> +}
>  static inline __printf(1, 2) __cold
>  int printk_deferred(const char *s, ...)
>  {
> diff --git a/include/linux/ve.h b/include/linux/ve.h
> index 248cdeb0a2e4..552fa577e2f9 100644
> --- a/include/linux/ve.h
> +++ b/include/linux/ve.h
> @@ -50,6 +50,9 @@ struct ve_struct {
>  	/* see vzcalluser.h for VE_FEATURE_XXX definitions */
>  	__u64			features;
>  
> +	void			*log_state;
> +#define VE_LOG_BUF_LEN		4096
> +
>  	struct kstat_lat_pcpu_struct    sched_lat_ve;
>  
>  	struct kmapset_key	sysfs_perms_key;
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 142a58d124d9..77e6787c752e 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -44,6 +44,7 @@
>  #include <linux/irq_work.h>
>  #include <linux/ctype.h>
>  #include <linux/uio.h>
> +#include <linux/ve.h>
>  #include <linux/sched/clock.h>
>  #include <linux/sched/debug.h>
>  #include <linux/sched/task_stack.h>
> @@ -408,8 +409,6 @@ static struct latched_seq clear_seq = {
>  #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
>  #define LOG_BUF_LEN_MAX (u32)(1 << 31)
>  static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
> -static char *log_buf = __log_buf;
> -static u32 log_buf_len = __LOG_BUF_LEN;
>  
>  /*
>   * Define the average message size. This only affects the number of
> @@ -427,6 +426,34 @@ _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
>  static struct printk_ringbuffer printk_rb_dynamic;
>  
>  static struct printk_ringbuffer *prb = &printk_rb_static;
> +static struct log_state {
> +	char *buf;
> +	u32 buf_len;
> +} init_log_state = {
> +	.buf = __log_buf,
> +	.buf_len = __LOG_BUF_LEN,
> +};

Maybe, we move this hunk up to previous hunk "-"?

-static char *log_buf = __log_buf;
-static u32 log_buf_len = __LOG_BUF_LEN;
+static struct log_state {
+	char *buf;
...
+	.buf = __log_buf,
+	.buf_len = __LOG_BUF_LEN,
...

So, it clearer to understand we just reassigned __log_buf etc here?

> +
> +/* kdump relies on some log_* symbols, let's make it happy */
> +#define DEFINE_STRUCT_MEMBER_ALIAS(name, inst, memb)			\
> +static void ____ ## name ## _definition(void) __attribute__((used));	\
> +static void ____ ## name ## _definition(void)				\
> +{									\
> +	asm (".globl " #name "\n\t.set " #name ", " #inst "+%c0"	\
> +	     : : "g" (offsetof(typeof(inst), memb)));			\
> +}									\
> +extern typeof(inst.memb) name;
> +#undef DEFINE_STRUCT_MEMBER_ALIAS

Maybe we should move this define to patch where it's used?

> +
> +static inline struct log_state *ve_log_state(void)
> +{
> +	struct log_state *log = &init_log_state;
> +#ifdef CONFIG_VE
> +	if (get_exec_env()->log_state)
> +		log = get_exec_env()->log_state;
> +#endif
> +	return log;
> +}
>  
>  /*
>   * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
> @@ -468,13 +495,13 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls)
>  /* Return log buffer address */
>  char *log_buf_addr_get(void)
>  {
> -	return log_buf;
> +	return init_log_state.buf;
>  }
>  
>  /* Return log buffer size */
>  u32 log_buf_len_get(void)
>  {
> -	return log_buf_len;
> +	return init_log_state.buf_len;
>  }
>  
>  /*
> @@ -485,13 +512,14 @@ u32 log_buf_len_get(void)
>  #define MAX_LOG_TAKE_PART 4
>  static const char trunc_msg[] = "<truncated>";
>  
> -static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
> +static void truncate_msg(struct log_state *log,
> +			 u16 *text_len, u16 *trunc_msg_len)
>  {
>  	/*
>  	 * The message should not take the whole buffer. Otherwise, it might
>  	 * get removed too soon.
>  	 */
> -	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
> +	u32 max_text_len = log->buf_len / MAX_LOG_TAKE_PART;
>  
>  	if (*text_len > max_text_len)
>  		*text_len = max_text_len;
> @@ -642,14 +670,20 @@ struct devkmsg_user {
>  	struct printk_record record;
>  };
>  
> -static __printf(3, 4) __cold
> -int devkmsg_emit(int facility, int level, const char *fmt, ...)
> +asmlinkage int vprintk_emit_log(struct log_state *log,
> +				int facility, int level,
> +				const struct dev_printk_info *dev_info,
> +				const char *fmt, va_list args);
> +
> +static __printf(4, 5) __cold
> +int devkmsg_emit_log(struct log_state *log, int facility, int level,
> +		     const char *fmt, ...)
>  {
>  	va_list args;
>  	int r;
>  
>  	va_start(args, fmt);
> -	r = vprintk_emit(facility, level, NULL, fmt, args);
> +	r = vprintk_emit_log(log, facility, level, NULL, fmt, args);
>  	va_end(args);
>  
>  	return r;
> @@ -657,6 +691,7 @@ int devkmsg_emit(int facility, int level, const char *fmt, ...)
>  
>  static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
>  {
> +	struct log_state *log = ve_log_state();
>  	char *buf, *line;
>  	int level = default_message_loglevel;
>  	int facility = 1;	/* LOG_USER */
> @@ -712,7 +747,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
>  		}
>  	}
>  
> -	devkmsg_emit(facility, level, "%s", line);
> +	devkmsg_emit_log(log, facility, level, "%s", line);
>  	kfree(buf);
>  	return ret;
>  }
> @@ -987,11 +1022,11 @@ static void __init log_buf_len_update(u64 size)
>  
>  	if (size)
>  		size = roundup_pow_of_two(size);
> -	if (size > log_buf_len)
> +	if (size > init_log_state.buf_len)
>  		new_log_buf_len = (unsigned long)size;
>  }
>  
> -/* save requested log_buf_len since it's too early to process it */
> +/* save requested log->buf_len since it's too early to process it */
>  static int __init log_buf_len_setup(char *str)
>  {
>  	u64 size;
> @@ -1077,6 +1112,7 @@ static char setup_text_buf[LOG_LINE_MAX] __initdata;
>  
>  void __init setup_log_buf(int early)
>  {
> +	struct log_state *log = &init_log_state;
>  	struct printk_info *new_infos;
>  	unsigned int new_descs_count;
>  	struct prb_desc *new_descs;
> @@ -1097,7 +1133,7 @@ void __init setup_log_buf(int early)
>  	if (!early)
>  		set_percpu_data_ready();
>  
> -	if (log_buf != __log_buf)
> +	if (log->buf != __log_buf)
>  		return;
>  
>  	if (!early && !new_log_buf_len)
> @@ -1144,8 +1180,8 @@ void __init setup_log_buf(int early)
>  
>  	printk_safe_enter_irqsave(flags);
>  
> -	log_buf_len = new_log_buf_len;
> -	log_buf = new_log_buf;
> +	log->buf_len = new_log_buf_len;
> +	log->buf = new_log_buf;
>  	new_log_buf_len = 0;
>  
>  	free = __LOG_BUF_LEN;
> @@ -1166,7 +1202,7 @@ void __init setup_log_buf(int early)
>  		       prb_next_seq(&printk_rb_static) - seq);
>  	}
>  
> -	pr_info("log_buf_len: %u bytes\n", log_buf_len);
> +	pr_info("log_buf_len: %u bytes\n", log->buf_len);
>  	pr_info("early log buf free: %u(%u%%)\n",
>  		free, (free * 100) / __LOG_BUF_LEN);
>  	return;
> @@ -1632,6 +1668,7 @@ static u64 read_syslog_seq_irq(void)
>  
>  int do_syslog(int type, char __user *buf, int len, int source)
>  {
> +	struct log_state *log = ve_log_state();
>  	struct printk_info info;
>  	bool clear = false;
>  	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
> @@ -1741,7 +1778,7 @@ int do_syslog(int type, char __user *buf, int len, int source)
>  		break;
>  	/* Size of the log buffer */
>  	case SYSLOG_ACTION_SIZE_BUFFER:
> -		error = log_buf_len;
> +		error = log->buf_len;
>  		break;
>  	default:
>  		error = -EINVAL;
> @@ -2031,8 +2068,8 @@ static u16 printk_sprint(char *text, u16 size, int facility, enum log_flags *lfl
>  	return text_len;
>  }
>  
> -__printf(4, 0)
> -int vprintk_store(int facility, int level,
> +__printf(5, 0)
> +int vprintk_store_log(struct log_state *log, int facility, int level,
>  		  const struct dev_printk_info *dev_info,
>  		  const char *fmt, va_list args)
>  {
> @@ -2104,7 +2141,7 @@ int vprintk_store(int facility, int level,
>  	prb_rec_init_wr(&r, reserve_size);
>  	if (!prb_reserve(&e, prb, &r)) {
>  		/* truncate the message if it is too long for empty buffer */
> -		truncate_msg(&reserve_size, &trunc_msg_len);
> +		truncate_msg(log, &reserve_size, &trunc_msg_len);
>  
>  		prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
>  		if (!prb_reserve(&e, prb, &r))
> @@ -2133,9 +2170,18 @@ int vprintk_store(int facility, int level,
>  	return (text_len + trunc_msg_len);
>  }
>  
> -asmlinkage int vprintk_emit(int facility, int level,
> -			    const struct dev_printk_info *dev_info,
> -			    const char *fmt, va_list args)
> +int vprintk_store(int facility, int level,
> +		  const struct dev_printk_info *dev_info,
> +		  const char *fmt, va_list args)
> +{
> +	return vprintk_store_log(&init_log_state, facility, level,
> +				 dev_info, fmt, args);
> +}
> +
> +asmlinkage int vprintk_emit_log(struct log_state *log,
> +				int facility, int level,
> +				const struct dev_printk_info *dev_info,
> +				const char *fmt, va_list args)
>  {
>  	int printed_len;
>  	bool in_sched = false;
> @@ -2154,7 +2200,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	printk_delay();
>  
>  	printk_safe_enter_irqsave(flags);
> -	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
> +	printed_len = vprintk_store_log(log, facility, level, dev_info, fmt, args);
>  	printk_safe_exit_irqrestore(flags);
>  
>  	/* If called from the scheduler, we can not call up(). */
> @@ -2178,8 +2224,23 @@ asmlinkage int vprintk_emit(int facility, int level,
>  	wake_up_klogd();
>  	return printed_len;
>  }
> +
> +asmlinkage int vprintk_emit(int facility, int level,
> +			    const struct dev_printk_info *dev_info,
> +			    const char *fmt, va_list args)
> +{
> +	return vprintk_emit_log(&init_log_state,
> +				facility, level, dev_info,
> +				fmt, args);
> +}
>  EXPORT_SYMBOL(vprintk_emit);
>  
> +static int __vprintk(const char *fmt, va_list args)
> +{
> +	return vprintk_emit_log(ve_log_state(), 0, LOGLEVEL_DEFAULT,
> +			        NULL, fmt, args);
> +}
> +
>  int vprintk_default(const char *fmt, va_list args)
>  {
>  	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
> @@ -3530,7 +3591,61 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
>  }
>  EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
>  
> -#endif
> +#ifdef CONFIG_VE
> +int setup_log_buf_ve(struct log_state *log, struct ve_struct *ve, int early)
> +{
> +	unsigned long local_log_buf_len;
> +	int ret = -EINVAL;
> +
> +	char *new_log_buf;
> +
> +	local_log_buf_len = VE_LOG_BUF_LEN;
> +
> +	ret = -ENOMEM;
> +	new_log_buf = kmalloc(local_log_buf_len, GFP_KERNEL);
> +	if (unlikely(!new_log_buf)) {
> +		pr_err("log_buf_len: %lu text bytes not available\n",
> +		       local_log_buf_len);
> +		goto out;
> +	}
> +
> +	log->buf_len = local_log_buf_len;
> +	log->buf = new_log_buf;
> +
> +	return 0;
> +out:
> +	return ret;
> +}
> +
> +int ve_log_init(struct ve_struct *ve)
> +{
> +	struct log_state *log;
> +	int ret;
> +
> +	log = kzalloc(sizeof(*log), GFP_KERNEL);
> +	if (!log)
> +		return -ENOMEM;
> +
> +	ret = setup_log_buf_ve(log, ve, 0);
> +	if (!ret) {
> +		kfree(log);
> +		return ret;
> +	}
> +
> +	ve->log_state = log;
> +	return 0;
> +}
> +
> +void ve_log_destroy(struct ve_struct *ve)
> +{
> +	struct log_state *log = ve->log_state;
> +
> +	kfree(log->buf);
> +	kfree(log);
> +}
> +#endif /* CONFIG_VE */
> +
> +#endif /* CONFIG_PRINTK */
>  
>  #ifdef CONFIG_SMP
>  static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
> index 8f192ee41832..e3a07d4c9fe4 100644
> --- a/kernel/ve/ve.c
> +++ b/kernel/ve/ve.c
> @@ -590,12 +590,19 @@ static struct cgroup_subsys_state *ve_create(struct cgroup_subsys_state *parent_
>  
>  	atomic_set(&ve->netns_avail_nr, NETNS_MAX_NR_DEFAULT);
>  	ve->netns_max_nr = NETNS_MAX_NR_DEFAULT;
> +
> +	err = ve_log_init(ve);
> +	if (err)
> +		goto err_log;
> +
>  do_init:
>  	init_rwsem(&ve->op_sem);
>  	INIT_LIST_HEAD(&ve->ve_list);
>  	kmapset_init_key(&ve->sysfs_perms_key);
>  	return &ve->css;
>  
> +err_log:
> +	free_percpu(ve->sched_lat_ve.cur);
>  err_lat:
>  	kmem_cache_free(ve_cachep, ve);
>  err_ve:
> @@ -637,6 +644,7 @@ static void ve_destroy(struct cgroup_subsys_state *css)
>  	struct ve_struct *ve = css_to_ve(css);
>  
>  	kmapset_unlink(&ve->sysfs_perms_key, &sysfs_ve_perms_set);
> +	ve_log_destroy(ve);
>  	free_percpu(ve->sched_lat_ve.cur);
>  	kmem_cache_free(ve_cachep, ve);
>  }
> 



More information about the Devel mailing list