[Devel] [PATCH 1/1] RFC: Containerized syslog (Take II)
Jean-Marc Pigeon
jmp at safe.ca
Tue Feb 16 07:24:17 PST 2010
Containerized syslog is now part of nsproxy.
A new flag CLONE_SYSLOG allow to unshare
syslog area.
Main containerized syslog purpose is to allow
full container not to leak or compromise
hosts syslog data.
---
include/linux/init_task.h | 2 +
include/linux/nsproxy.h | 2 +
include/linux/sched.h | 1 +
include/linux/syslog.h | 9 ++--
include/linux/user_namespace.h | 1 -
kernel/fork.c | 2 +-
kernel/nsproxy.c | 18 +++++++-
kernel/printk.c | 14 +++---
kernel/syslog.c | 84 ++++++++++++++++++++++++++++++++--------
kernel/user.c | 3 -
kernel/user_namespace.c | 5 --
11 files changed, 101 insertions(+), 40 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index abec69b..30b479e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -11,6 +11,7 @@
#include <linux/user_namespace.h>
#include <linux/securebits.h>
#include <net/net_namespace.h>
+#include <linux/syslog.h>
extern struct files_struct init_files;
extern struct fs_struct init_fs;
@@ -37,6 +38,7 @@ extern struct nsproxy init_nsproxy;
.count = ATOMIC_INIT(1), \
.uts_ns = &init_uts_ns, \
.mnt_ns = NULL, \
+ .syslog_ns = &init_kernel_syslog_ns, \
INIT_NET_NS(net_ns) \
INIT_IPC_NS(ipc_ns) \
}
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 7b370c7..852fed3 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -3,6 +3,7 @@
#include <linux/spinlock.h>
#include <linux/sched.h>
+#include <linux/syslog.h>
struct mnt_namespace;
struct uts_namespace;
@@ -29,6 +30,7 @@ struct nsproxy {
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
struct net *net_ns;
+ struct syslog_ns *syslog_ns;
};
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78efe7c..659cc81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -9,6 +9,7 @@
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_SYSLOG 0x00001000 /* set if we need private syslog (/proc/kmsg) */
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 98c6898..cdbebee 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -3,6 +3,7 @@
#include <linux/spinlock_types.h>
struct syslog_ns {
+ struct kref kref; /*syslog_ns reference count & control */
wait_queue_head_t wait;
spinlock_t logbuf_lock; /* access conflict locker */
unsigned log_start; /* Index into log_buf: next char to be read by syslog() */
@@ -22,8 +23,8 @@ extern struct syslog_ns init_kernel_syslog_ns;
* Syslog API
*
*/
-extern struct syslog_ns *syslog_malloc(unsigned container_buf_len);
-extern struct syslog_ns *syslog_realloc(struct syslog_ns *syslog_ns, unsigned container_buf_len);
-extern struct syslog_ns *syslog_free(struct syslog_ns *syslog);
-extern struct syslog_ns *syslog_get_current(void);
+extern struct syslog_ns *realloc_syslog_ns(struct syslog_ns *syslog_ns, unsigned container_buf_len);
+extern struct syslog_ns *copy_syslog_ns(unsigned long flags,struct syslog_ns *current_syslog_ns);
+extern struct syslog_ns *release_syslog_ns(struct syslog_ns *current_syslog_ns);
+extern struct syslog_ns *get_current_syslog_ns(void);
#endif /* _LINUX_SYSLOG_H */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 3d0c73e..cc4f453 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -14,7 +14,6 @@ struct user_namespace {
struct hlist_head uidhash_table[UIDHASH_SZ];
struct user_struct *creator;
struct work_struct destroyer;
- struct syslog_ns *syslog;
};
extern struct user_namespace init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd98..38c8d8c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1647,7 +1647,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|CLONE_SYSLOG))
goto bad_unshare_out;
/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9..ff968db 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -44,6 +44,8 @@ static inline struct nsproxy *create_nsproxy(void)
static struct nsproxy *create_new_namespaces(unsigned long flags,
struct task_struct *tsk, struct fs_struct *new_fs)
{
+#define CONTAINER_BUF_LEN 4096 /*should be enough for container syslog */
+
struct nsproxy *new_nsp;
int err;
@@ -80,9 +82,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
err = PTR_ERR(new_nsp->net_ns);
goto out_net;
}
-
+ new_nsp->syslog_ns = copy_syslog_ns(flags, tsk->nsproxy->syslog_ns);
+ if (IS_ERR(new_nsp->syslog_ns)) {
+ err = PTR_ERR(new_nsp->syslog_ns);
+ goto out_syslog;
+ }
+
return new_nsp;
+out_syslog:
+ if (new_nsp->net_ns)
+ put_net(new_nsp->net_ns);
out_net:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
@@ -116,7 +126,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
get_nsproxy(old_ns);
if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWPID | CLONE_NEWNET)))
+ CLONE_NEWPID | CLONE_NEWNET | CLONE_SYSLOG)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -151,6 +161,8 @@ out:
void free_nsproxy(struct nsproxy *ns)
{
+ if (ns->syslog_ns)
+ ns->syslog_ns=release_syslog_ns(ns->syslog_ns);
if (ns->mnt_ns)
put_mnt_ns(ns->mnt_ns);
if (ns->uts_ns)
@@ -173,7 +185,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWNET)))
+ CLONE_NEWNET | CLONE_SYSLOG )))
return 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/printk.c b/kernel/printk.c
index fd0a05c..3c7f213 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -148,7 +148,7 @@ static int saved_console_loglevel = -1;
*/
void log_buf_kexec_setup(void)
{
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
VMCOREINFO_SYMBOL(sys_log_buf);
VMCOREINFO_SYMBOL(sys_log_end);
@@ -163,7 +163,7 @@ static int __init log_buf_len_setup(char *str)
if (size) {
size = roundup_pow_of_two(size);
- (void) syslog_realloc(&init_kernel_syslog_ns,size);
+ (void) realloc_syslog_ns(&init_kernel_syslog_ns,size);
}
return 1;
}
@@ -244,7 +244,7 @@ int do_syslog(int type, char __user *buf, int len)
int do_clear = 0;
char c;
int error = 0;
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
error = security_syslog(type);
if (error)
@@ -638,7 +638,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
{
int printed_len = 0;
int current_log_level = default_message_loglevel;
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
unsigned long flags;
int this_cpu;
char *p;
@@ -1012,7 +1012,7 @@ void release_console_sem(void)
unsigned long flags;
unsigned _con_start, _log_end;
unsigned wake_klogd = 0;
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
for ( ; ; ) {
spin_lock_irqsave(&sys_log_lock, flags);
@@ -1252,7 +1252,7 @@ void register_console(struct console *newcon)
* for us.
*/
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
spin_lock_irqsave(&sys_log_lock, flags);
sys_log_con_start = sys_log_start;
@@ -1462,7 +1462,7 @@ void kmsg_dump(enum kmsg_dump_reason reason)
const char *s1, *s2;
unsigned long l1, l2;
unsigned long flags;
- struct syslog_ns *syslog_ns = syslog_get_current();
+ struct syslog_ns *syslog_ns = get_current_syslog_ns();
/* Theoretically, the log could move on after we do this, but
there's not a lot we can do about that. The new messages
diff --git a/kernel/syslog.c b/kernel/syslog.c
index 69d30a9..0088a85 100644
--- a/kernel/syslog.c
+++ b/kernel/syslog.c
@@ -22,35 +22,66 @@
*
*/
+#include <linux/module.h>
#include <linux/bootmem.h>
#include <linux/slab.h>
#include <linux/cred.h>
+#include <linux/kref.h>
#include <linux/user_namespace.h>
#include <linux/syslog.h>
+#ifdef CONFIG_PRINTK
/*
* Static memory definition, used to assign a syslog
* to the kernel itself
*
*/
-
-#ifdef CONFIG_PRINTK
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
static char __log_buf[__LOG_BUF_LEN];
struct syslog_ns init_kernel_syslog_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
.wait = __WAIT_QUEUE_HEAD_INITIALIZER(init_kernel_syslog_ns.wait),
.buf_len = __LOG_BUF_LEN,
.buf = __log_buf
};
+EXPORT_SYMBOL_GPL(init_kernel_syslog_ns);
#endif
+/*
+ * Procedure to free all ressources tied to syslog
+ *
+ */
+struct syslog_ns *syslog_free(struct syslog_ns *syslog)
+
+{
+ if (syslog != (struct syslog_ns *)0) {
+ (void) kfree(syslog->buf);
+ (void) kfree(syslog);
+ syslog = (struct syslog_ns *)0;
+ }
+ return syslog;
+}
/*
+ * Procedure to interface kref _put with syslog_free
+ *
+ */
+static void syslog_out(struct kref *kref)
+
+{
+ struct syslog_ns *sl;
+
+ sl=container_of(kref, struct syslog_ns, kref);
+ sl=syslog_free(sl);
+}
+/*
* Procedure to assign memory for syslog area
*
*/
-struct syslog_ns * syslog_malloc(unsigned container_buf_len)
+static struct syslog_ns * malloc_syslog_ns(unsigned container_buf_len)
{
struct syslog_ns *ns;
@@ -61,6 +92,8 @@ struct syslog_ns * syslog_malloc(unsigned container_buf_len)
if (!ns)
return ERR_PTR(-ENOMEM);
+ (void) kref_init(&(ns->kref));
+
ns->buf_len = container_buf_len;
ns->buf = kzalloc(container_buf_len, GFP_KERNEL);
if (!ns->buf) {
@@ -77,7 +110,7 @@ struct syslog_ns * syslog_malloc(unsigned container_buf_len)
* If syslog_ns is NULL, assign a brand new syslog_ns
*
*/
-struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned container_buf_len)
+struct syslog_ns * realloc_syslog_ns(struct syslog_ns *syslog_ns, unsigned container_buf_len)
{
if ((syslog_ns == &init_kernel_syslog_ns ) && (container_buf_len > syslog_ns->buf_len)) {
@@ -102,7 +135,7 @@ struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned containe
(void) free_bootmem((unsigned long)old_buf, old_buf_len);
}
if (!syslog_ns)
- return syslog_malloc(container_buf_len);
+ return malloc_syslog_ns(container_buf_len);
if (syslog_ns->buf_len > container_buf_len) {
(void) printk(KERN_WARNING "log_buf_len: Not allowed to decrease syslog buffer\n");
return ERR_PTR(-EINVAL);
@@ -126,32 +159,51 @@ struct syslog_ns * syslog_realloc(struct syslog_ns *syslog_ns, unsigned containe
(void) printk(KERN_NOTICE "log_buf_len: %u\n", syslog_ns->buf_len);
return syslog_ns;
}
+
/*
- * Procedure to free all ressources tied to syslog
+ * Procedure to use current syslog unless a CLONE_SYSLOG is set
+ * such a new syslog area is defined and used
*
*/
-struct syslog_ns *syslog_free(struct syslog_ns *syslog)
+struct syslog_ns *copy_syslog_ns(unsigned long flags,struct syslog_ns *current_syslog_ns)
{
- if (syslog != (struct syslog_ns *)0) {
- (void) kfree(syslog->buf);
- (void) kfree(syslog);
- syslog = (struct syslog_ns *)0;
- }
- return syslog;
+#define CONTAINER_BUF_LEN 4096 /*should be enough for container syslog */
+
+ BUG_ON(!current_syslog_ns);
+ if ((flags & CLONE_SYSLOG) == 0) /*incrementing usage reference count */
+ (void) kref_get(&(current_syslog_ns->kref));
+ else
+ current_syslog_ns=malloc_syslog_ns(CONTAINER_BUF_LEN);
+ return current_syslog_ns;
+
+}
+
+/*
+ * Procedure to decrement syslog usage count and free memory
+ * if syslog usage count reach zero.
+ *
+ */
+struct syslog_ns *release_syslog_ns(struct syslog_ns *current_syslog_ns)
+
+{
+ if (kref_put(&(current_syslog_ns->kref), syslog_out)==0)
+ current_syslog_ns=(struct syslog_ns *)0;
+ return current_syslog_ns;
}
/*
- * Procedure to get the current syslog area linked to a container (by CLONE_USER)
+ * Procedure to get the current syslog area linked to a container (by CLONE_SYSLOG)
* if trouble, pin down the problem before it propagate.
*
*/
-struct syslog_ns *syslog_get_current(void)
+struct syslog_ns *get_current_syslog_ns(void)
{
+
struct syslog_ns *ns;
- ns = current_user_ns()->syslog;
+ ns = current->nsproxy->syslog_ns;
BUG_ON(!ns);
return ns;
}
diff --git a/kernel/user.c b/kernel/user.c
index cb2d4ba..d9bea1f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -23,9 +23,6 @@ struct user_namespace init_user_ns = {
.kref = {
.refcount = ATOMIC_INIT(2),
},
-#ifdef CONFIG_PRINTK
- .syslog = &init_kernel_syslog_ns,
-#endif
.creator = &root_user
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9d8014f..db72d1b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -38,11 +38,6 @@ int create_user_ns(struct cred *new)
INIT_HLIST_HEAD(ns->uidhash_table + n);
- ns->syslog = syslog_malloc(CONTAINER_BUF_LEN);
- if (!ns->syslog) {
- kfree(ns);
- return -ENOMEM;
- }
/* Alloc new root user. */
root_user = alloc_uid(ns, 0);
if (!root_user) {
--
1.6.6
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list