[PATCH] Send quota messages via netlink

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] Send quota messages via netlink
@ 2007-08-28 14:13 Jan Kara
  2007-08-29  4:13 ` Andrew Morton
  2007-08-29  4:51 ` Andrew Morton
  0 siblings, 2 replies; 33+ messages in thread
From: Jan Kara @ 2007-08-28 14:13 UTC (permalink / raw)
  To: linux-kernel; +Cc: Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 723 bytes --]

  Hello,

  I'm sending rediffed patch implementing sending of quota messages via netlink
interface (some rationale in patch description). I've already posted it to
LKML some time ago and there were no objections, so I guess it's fine to put
it to -mm. Andrew, would you be so kind? Thanks.
  Userspace deamon reading the messages from the kernel and sending them to
dbus and/or user console is also written (it's part of quota-tools). The
only remaining problem is there are a few changes needed to libnl needed for
the userspace daemon. They were basically acked by the maintainer but it
seems he has not merged the patches yet. So this will take a bit more time.

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

[-- Attachment #2: quota-2.6.23-rc4-1-quota_messages.diff --]
[-- Type: text/x-patch, Size: 12212 bytes --]

Implement sending of quota messages via netlink interface. The advantage is
that in userspace we can better decide what to do with the message - for
example display a dialogue in your X session or just write the message to the
console. As a bonus, we can get rid of problems with console locking deep
inside filesystem code once we remove the old printing mechanism.

Signed-off-by: Jan Kara <jack@suse.cz>

diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-rc4/fs/dquot.c linux-2.6.23-rc4-1-quota_messages/fs/dquot.c
--- linux-2.6.23-rc4/fs/dquot.c	2007-08-28 14:08:51.000000000 +0200
+++ linux-2.6.23-rc4-1-quota_messages/fs/dquot.c	2007-08-28 14:16:26.000000000 +0200
@@ -79,6 +79,10 @@
 #include <linux/capability.h>
 #include <linux/quotaops.h>
 #include <linux/writeback.h> /* for inode_lock, oddly enough.. */
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#endif
 
 #include <asm/uaccess.h>
 
@@ -823,6 +827,7 @@ static inline void dquot_decr_space(stru
 	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 }
 
+#ifdef CONFIG_PRINT_QUOTA_WARNING
 static int flag_print_warnings = 1;
 
 static inline int need_print_warning(struct dquot *dquot)
@@ -839,22 +844,15 @@ static inline int need_print_warning(str
 	return 0;
 }
 
-/* Values of warnings */
-#define NOWARN 0
-#define IHARDWARN 1
-#define ISOFTLONGWARN 2
-#define ISOFTWARN 3
-#define BHARDWARN 4
-#define BSOFTLONGWARN 5
-#define BSOFTWARN 6
-
 /* Print warning to user which exceeded quota */
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
 	struct tty_struct *tty;
-	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
-	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
+	int flag = (warntype == QUOTA_NL_BHARDWARN ||
+		warntype == QUOTA_NL_BSOFTLONGWARN) ? DQ_BLKS_B :
+		((warntype == QUOTA_NL_IHARDWARN ||
+		warntype == QUOTA_NL_ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
 	if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
 		return;
@@ -864,28 +862,28 @@ static void print_warning(struct dquot *
 	if (!tty)
 		goto out_lock;
 	tty_write_message(tty, dquot->dq_sb->s_id);
-	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
+	if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN)
 		tty_write_message(tty, ": warning, ");
 	else
 		tty_write_message(tty, ": write failed, ");
 	tty_write_message(tty, quotatypes[dquot->dq_type]);
 	switch (warntype) {
-		case IHARDWARN:
+		case QUOTA_NL_IHARDWARN:
 			msg = " file limit reached.\r\n";
 			break;
-		case ISOFTLONGWARN:
+		case QUOTA_NL_ISOFTLONGWARN:
 			msg = " file quota exceeded too long.\r\n";
 			break;
-		case ISOFTWARN:
+		case QUOTA_NL_ISOFTWARN:
 			msg = " file quota exceeded.\r\n";
 			break;
-		case BHARDWARN:
+		case QUOTA_NL_BHARDWARN:
 			msg = " block limit reached.\r\n";
 			break;
-		case BSOFTLONGWARN:
+		case QUOTA_NL_BSOFTLONGWARN:
 			msg = " block quota exceeded too long.\r\n";
 			break;
-		case BSOFTWARN:
+		case QUOTA_NL_BSOFTWARN:
 			msg = " block quota exceeded.\r\n";
 			break;
 	}
@@ -893,14 +891,89 @@ static void print_warning(struct dquot *
 out_lock:
 	mutex_unlock(&tty_mutex);
 }
+#endif
+
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+
+/* Size of quota netlink message - actually an upperbound for buffer size */
+#define QUOTA_NL_MSG_SIZE 32
+
+/* Netlink family structure for quota */
+static struct genl_family quota_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "VFS_DQUOT",
+	.version = 1,
+	.maxattr = QUOTA_NL_A_MAX,
+};
+
+/* Send warning to userspace about user which exceeded quota */
+static void send_warning(const struct dquot *dquot, const char warntype)
+{
+	static unsigned long seq;
+	struct sk_buff *skb;
+	void *msg_head;
+	int ret;
+
+	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
+	if (!skb) {
+		printk(KERN_ERR
+		  "VFS: Not enough memory to send quota warning.\n");
+		return;
+	}
+	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
+	if (!msg_head) {
+		printk(KERN_ERR
+		  "VFS: Cannot store netlink header in quota warning.\n");
+		goto err_out;
+	}
+	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
+		MAJOR(dquot->dq_sb->s_dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
+		MINOR(dquot->dq_sb->s_dev));
+	if (ret)
+		goto attr_err_out;
+	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
+	if (ret)
+		goto attr_err_out;
+	genlmsg_end(skb, msg_head);
+
+	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
+	if (ret < 0 && ret != -ESRCH)
+		printk(KERN_ERR
+			"VFS: Failed to send notification message: %d\n", ret);
+	return;
+attr_err_out:
+	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
+err_out:
+	kfree_skb(skb);
+}
+#endif
 
 static inline void flush_warnings(struct dquot **dquots, char *warntype)
 {
 	int i;
 
 	for (i = 0; i < MAXQUOTAS; i++)
-		if (dquots[i] != NODQUOT && warntype[i] != NOWARN)
+		if (dquots[i] != NODQUOT && warntype[i] != QUOTA_NL_NOWARN) {
+#ifdef CONFIG_PRINT_QUOTA_WARNING
 			print_warning(dquots[i], warntype[i]);
+#endif
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+			send_warning(dquots[i], warntype[i]);
+#endif
+		}
 }
 
 static inline char ignore_hardlimit(struct dquot *dquot)
@@ -914,14 +987,14 @@ static inline char ignore_hardlimit(stru
 /* needs dq_data_lock */
 static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
 {
-	*warntype = NOWARN;
+	*warntype = QUOTA_NL_NOWARN;
 	if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
 	   (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_ihardlimit &&
             !ignore_hardlimit(dquot)) {
-		*warntype = IHARDWARN;
+		*warntype = QUOTA_NL_IHARDWARN;
 		return NO_QUOTA;
 	}
 
@@ -929,14 +1002,14 @@ static int check_idq(struct dquot *dquot
 	   (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit &&
 	    dquot->dq_dqb.dqb_itime && get_seconds() >= dquot->dq_dqb.dqb_itime &&
             !ignore_hardlimit(dquot)) {
-		*warntype = ISOFTLONGWARN;
+		*warntype = QUOTA_NL_ISOFTLONGWARN;
 		return NO_QUOTA;
 	}
 
 	if (dquot->dq_dqb.dqb_isoftlimit &&
 	   (dquot->dq_dqb.dqb_curinodes + inodes) > dquot->dq_dqb.dqb_isoftlimit &&
 	    dquot->dq_dqb.dqb_itime == 0) {
-		*warntype = ISOFTWARN;
+		*warntype = QUOTA_NL_ISOFTWARN;
 		dquot->dq_dqb.dqb_itime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
 	}
 
@@ -946,7 +1019,7 @@ static int check_idq(struct dquot *dquot
 /* needs dq_data_lock */
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
-	*warntype = 0;
+	*warntype = QUOTA_NL_NOWARN;
 	if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
@@ -954,7 +1027,7 @@ static int check_bdq(struct dquot *dquot
 	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
-			*warntype = BHARDWARN;
+			*warntype = QUOTA_NL_BHARDWARN;
 		return NO_QUOTA;
 	}
 
@@ -963,7 +1036,7 @@ static int check_bdq(struct dquot *dquot
 	    dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
             !ignore_hardlimit(dquot)) {
 		if (!prealloc)
-			*warntype = BSOFTLONGWARN;
+			*warntype = QUOTA_NL_BSOFTLONGWARN;
 		return NO_QUOTA;
 	}
 
@@ -971,7 +1044,7 @@ static int check_bdq(struct dquot *dquot
 	   toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
 	    dquot->dq_dqb.dqb_btime == 0) {
 		if (!prealloc) {
-			*warntype = BSOFTWARN;
+			*warntype = QUOTA_NL_BSOFTWARN;
 			dquot->dq_dqb.dqb_btime = get_seconds() + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
 		}
 		else
@@ -1066,7 +1139,7 @@ out_add:
 		return QUOTA_OK;
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		warntype[cnt] = NOWARN;
+		warntype[cnt] = QUOTA_NL_NOWARN;
 
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {	/* Now we can do reliable test... */
@@ -1112,7 +1185,7 @@ int dquot_alloc_inode(const struct inode
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		warntype[cnt] = NOWARN;
+		warntype[cnt] = QUOTA_NL_NOWARN;
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1234,7 +1307,7 @@ int dquot_transfer(struct inode *inode, 
 	/* Clear the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
-		warntype[cnt] = NOWARN;
+		warntype[cnt] = QUOTA_NL_NOWARN;
 	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	/* Now recheck reliably when holding dqptr_sem */
@@ -1808,6 +1881,7 @@ static ctl_table fs_dqstats_table[] = {
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
 	},
+#ifdef CONFIG_PRINT_QUOTA_WARNING
 	{
 		.ctl_name	= FS_DQ_WARNINGS,
 		.procname	= "warnings",
@@ -1816,6 +1890,7 @@ static ctl_table fs_dqstats_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
 	{ .ctl_name = 0 },
 };
 
@@ -1877,6 +1952,11 @@ static int __init dquot_init(void)
 
 	register_shrinker(&dqcache_shrinker);
 
+#ifdef CONFIG_QUOTA_NETLINK_INTERFACE
+	if (genl_register_family(&quota_genl_family) != 0)
+		printk(KERN_ERR "VFS: Failed to create quota netlink interface.\n");
+#endif
+
 	return 0;
 }
 module_init(dquot_init);
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-rc4/fs/Kconfig linux-2.6.23-rc4-1-quota_messages/fs/Kconfig
--- linux-2.6.23-rc4/fs/Kconfig	2007-08-28 14:08:50.000000000 +0200
+++ linux-2.6.23-rc4-1-quota_messages/fs/Kconfig	2007-08-28 14:16:26.000000000 +0200
@@ -537,6 +537,24 @@ config QUOTA
 	  with the quota tools. Probably the quota support is only useful for
 	  multi user systems. If unsure, say N.
 
+config QUOTA_NETLINK_INTERFACE
+	bool "Report quota messages through netlink interface"
+	depends on QUOTA && NET
+	help
+	  If you say Y here, quota warnings (about exceeding softlimit, reaching
+	  hardlimit, etc.) will be reported through netlink interface. If unsure,
+	  say Y.
+
+config PRINT_QUOTA_WARNING
+	bool "Print quota warnings to console (OBSOLETE)"
+	depends on QUOTA
+	default y
+	help
+	  If you say Y here, quota warnings (about exceeding softlimit, reaching
+	  hardlimit, etc.) will be printed to the process' controlling terminal.
+	  Note that this behavior is currently deprecated and may go away in
+	  future. Please use notification via netlink socket instead.
+
 config QFMT_V1
 	tristate "Old quota format support"
 	depends on QUOTA
diff -rupX /home/jack/.kerndiffexclude linux-2.6.23-rc4/include/linux/quota.h linux-2.6.23-rc4-1-quota_messages/include/linux/quota.h
--- linux-2.6.23-rc4/include/linux/quota.h	2007-07-16 17:47:29.000000000 +0200
+++ linux-2.6.23-rc4-1-quota_messages/include/linux/quota.h	2007-08-28 14:24:02.000000000 +0200
@@ -128,6 +128,37 @@ struct if_dqinfo {
 	__u32 dqi_valid;
 };
 
+/*
+ * Definitions for quota netlink interface
+ */
+#define QUOTA_NL_NOWARN 0
+#define QUOTA_NL_IHARDWARN 1		/* Inode hardlimit reached */
+#define QUOTA_NL_ISOFTLONGWARN 2 	/* Inode grace time expired */
+#define QUOTA_NL_ISOFTWARN 3		/* Inode softlimit reached */
+#define QUOTA_NL_BHARDWARN 4		/* Block hardlimit reached */
+#define QUOTA_NL_BSOFTLONGWARN 5	/* Block grace time expired */
+#define QUOTA_NL_BSOFTWARN 6		/* Block softlimit reached */
+
+enum {
+	QUOTA_NL_C_UNSPEC,
+	QUOTA_NL_C_WARNING,
+	__QUOTA_NL_C_MAX,
+};
+#define QUOTA_NL_C_MAX (__QUOTA_NL_C_MAX - 1)
+
+enum {
+	QUOTA_NL_A_UNSPEC,
+	QUOTA_NL_A_QTYPE,
+	QUOTA_NL_A_EXCESS_ID,
+	QUOTA_NL_A_WARNING,
+	QUOTA_NL_A_DEV_MAJOR,
+	QUOTA_NL_A_DEV_MINOR,
+	QUOTA_NL_A_CAUSED_ID,
+	__QUOTA_NL_A_MAX,
+};
+#define QUOTA_NL_A_MAX (__QUOTA_NL_A_MAX - 1)
+
+
 #ifdef __KERNEL__
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-28 14:13 [PATCH] Send quota messages via netlink Jan Kara
@ 2007-08-29  4:13 ` Andrew Morton
  2007-08-29  4:54   ` David Miller
                     ` (3 more replies)
  2007-08-29  4:51 ` Andrew Morton
  1 sibling, 4 replies; 33+ messages in thread
From: Andrew Morton @ 2007-08-29  4:13 UTC (permalink / raw)
  To: Jan Kara
  Cc: linux-kernel, Balbir Singh, Serge E. Hallyn, Eric W. Biederman,
	containers

On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:

>   Hello,
> 
>   I'm sending rediffed patch implementing sending of quota messages via netlink
> interface (some rationale in patch description). I've already posted it to
> LKML some time ago and there were no objections, so I guess it's fine to put
> it to -mm. Andrew, would you be so kind? Thanks.
>   Userspace deamon reading the messages from the kernel and sending them to
> dbus and/or user console is also written (it's part of quota-tools). The
> only remaining problem is there are a few changes needed to libnl needed for
> the userspace daemon. They were basically acked by the maintainer but it
> seems he has not merged the patches yet. So this will take a bit more time.
> 

So it's a new kernel->userspace interface.

But we have no description of the interface :(

> +/* Send warning to userspace about user which exceeded quota */
> +static void send_warning(const struct dquot *dquot, const char warntype)
> +{
> +	static unsigned long seq;
> +	struct sk_buff *skb;
> +	void *msg_head;
> +	int ret;
> +
> +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> +	if (!skb) {
> +		printk(KERN_ERR
> +		  "VFS: Not enough memory to send quota warning.\n");
> +		return;
> +	}
> +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
> +	if (!msg_head) {
> +		printk(KERN_ERR
> +		  "VFS: Cannot store netlink header in quota warning.\n");
> +		goto err_out;
> +	}
> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
> +	if (ret)
> +		goto attr_err_out;
> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
> +	if (ret)
> +		goto attr_err_out;
> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
> +	if (ret)
> +		goto attr_err_out;
> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
> +		MAJOR(dquot->dq_sb->s_dev));
> +	if (ret)
> +		goto attr_err_out;
> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
> +		MINOR(dquot->dq_sb->s_dev));
> +	if (ret)
> +		goto attr_err_out;
> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
> +	if (ret)
> +		goto attr_err_out;
> +	genlmsg_end(skb, msg_head);
> +
> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
> +	if (ret < 0 && ret != -ESRCH)
> +		printk(KERN_ERR
> +			"VFS: Failed to send notification message: %d\n", ret);
> +	return;
> +attr_err_out:
> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
> +err_out:
> +	kfree_skb(skb);
> +}
> +#endif

This is it.  Normally netlink payloads are represented as a struct.  How
come this one is built-by-hand?

It doesn't appear to be versioned.  Should it be?

Does it have (or need) reserved-set-to-zero space for expansion?  Again,
hard to tell..

I guess it's OK to send a major and minor out of the kernel like this. 
What's it for?  To represent a filesytem?  I wonder if there's a more
modern and useful way of describing the fs.  Path to mountpoint or
something?

I suspect the namespace virtualisation guys would be interested in a new
interface which is sending current->user->uid up to userspace.  uids are
per-namespace now.  What are the implications?  (cc's added)

Is it worth adding a comment explaining why GFP_NOFS is used here?



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:13 ` Andrew Morton
@ 2007-08-29  4:54   ` David Miller
  2007-08-29  5:41   ` Eric W. Biederman
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 33+ messages in thread
From: David Miller @ 2007-08-29  4:54 UTC (permalink / raw)
  To: akpm; +Cc: jack, linux-kernel, balbir, serue, ebiederm, containers

From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 28 Aug 2007 21:13:35 -0700

> This is it.  Normally netlink payloads are represented as a struct.  How
> come this one is built-by-hand?

He is using attributes, which is perfect and arbitrarily
extensible with zero backwards compatability concerns.

If he wants to provide a new attribute, he just adds it
without any issues.

When new attributes are added, older apps simply ignore the attributes
they don't understand.

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:13 ` Andrew Morton
  2007-08-29  4:54   ` David Miller
@ 2007-08-29  5:41   ` Eric W. Biederman
  2007-08-29  6:30   ` Balbir Singh
  2007-08-29 12:26   ` Jan Kara
  3 siblings, 0 replies; 33+ messages in thread
From: Eric W. Biederman @ 2007-08-29  5:41 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, linux-kernel, Balbir Singh, Serge E. Hallyn, containers

Andrew Morton <akpm@linux-foundation.org> writes:

> On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
>
>>   Hello,
>> 
>> I'm sending rediffed patch implementing sending of quota messages via netlink
>> interface (some rationale in patch description). I've already posted it to
>> LKML some time ago and there were no objections, so I guess it's fine to put
>> it to -mm. Andrew, would you be so kind? Thanks.
>>   Userspace deamon reading the messages from the kernel and sending them to
>> dbus and/or user console is also written (it's part of quota-tools). The
>> only remaining problem is there are a few changes needed to libnl needed for
>> the userspace daemon. They were basically acked by the maintainer but it
>> seems he has not merged the patches yet. So this will take a bit more time.
>> 
>
> So it's a new kernel->userspace interface.
>
> But we have no description of the interface :(
>
>> +/* Send warning to userspace about user which exceeded quota */
>> +static void send_warning(const struct dquot *dquot, const char warntype)
>> +{
>> +	static unsigned long seq;
>> +	struct sk_buff *skb;
>> +	void *msg_head;
>> +	int ret;
>> +
>> +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
>> +	if (!skb) {
>> +		printk(KERN_ERR
>> +		  "VFS: Not enough memory to send quota warning.\n");
>> +		return;
>> +	}
>> + msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0,
> QUOTA_NL_C_WARNING);
>> +	if (!msg_head) {
>> +		printk(KERN_ERR
>> +		  "VFS: Cannot store netlink header in quota warning.\n");
>> +		goto err_out;
>> +	}
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
>> +		MAJOR(dquot->dq_sb->s_dev));
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
>> +		MINOR(dquot->dq_sb->s_dev));
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	genlmsg_end(skb, msg_head);
>> +
>> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
>> +	if (ret < 0 && ret != -ESRCH)
>> +		printk(KERN_ERR
>> +			"VFS: Failed to send notification message: %d\n", ret);
>> +	return;
>> +attr_err_out:
>> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
>> +err_out:
>> +	kfree_skb(skb);
>> +}
>> +#endif
>
> This is it.  Normally netlink payloads are represented as a struct.  How
> come this one is built-by-hand?

No netlink fields (unless I'm confused) are represented as a struct,
not the entire netlink payload.

> It doesn't appear to be versioned.  Should it be?

Well.  If it is using netlink properly each field should have a tag.
So it should not need to be versioned, because each field is strictly
controlled.

> Does it have (or need) reserved-set-to-zero space for expansion?  Again,
> hard to tell..

Not if netlink is used properly.  Just another nested tag.

> I guess it's OK to send a major and minor out of the kernel like this. 
> What's it for?  To represent a filesytem?  I wonder if there's a more
> modern and useful way of describing the fs.  Path to mountpoint or
> something?

Or perhaps the string the fs was mounted with.

> I suspect the namespace virtualisation guys would be interested in a new
> interface which is sending current->user->uid up to userspace.  uids are
> per-namespace now.  What are the implications?  (cc's added)

That we definitely would be.  Although the user namespaces is rather
strongly incomplete at the moment.

> Is it worth adding a comment explaining why GFP_NOFS is used here?

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:13 ` Andrew Morton
  2007-08-29  4:54   ` David Miller
  2007-08-29  5:41   ` Eric W. Biederman
@ 2007-08-29  6:30   ` Balbir Singh
  2007-08-29 12:46     ` Jan Kara
  2007-08-29 12:26   ` Jan Kara
  3 siblings, 1 reply; 33+ messages in thread
From: Balbir Singh @ 2007-08-29  6:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jan Kara, linux-kernel, Balbir Singh, Serge E. Hallyn,
	Eric W. Biederman, containers

Andrew Morton wrote:
> On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> 
>>   Hello,
>>
>>   I'm sending rediffed patch implementing sending of quota messages via netlink
>> interface (some rationale in patch description). I've already posted it to
>> LKML some time ago and there were no objections, so I guess it's fine to put
>> it to -mm. Andrew, would you be so kind? Thanks.
>>   Userspace deamon reading the messages from the kernel and sending them to
>> dbus and/or user console is also written (it's part of quota-tools). The
>> only remaining problem is there are a few changes needed to libnl needed for
>> the userspace daemon. They were basically acked by the maintainer but it
>> seems he has not merged the patches yet. So this will take a bit more time.
>>
> 
> So it's a new kernel->userspace interface.
> 
> But we have no description of the interface :(
> 

And could we have some description of the context under which all the message
exchanges take place. When are these messages sent out -- what event
is the user space notified of?

>> +/* Send warning to userspace about user which exceeded quota */
>> +static void send_warning(const struct dquot *dquot, const char warntype)
>> +{
>> +	static unsigned long seq;
>> +	struct sk_buff *skb;
>> +	void *msg_head;
>> +	int ret;
>> +
>> +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
>> +	if (!skb) {
>> +		printk(KERN_ERR
>> +		  "VFS: Not enough memory to send quota warning.\n");
>> +		return;
>> +	}
>> +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
>> +	if (!msg_head) {
>> +		printk(KERN_ERR
>> +		  "VFS: Cannot store netlink header in quota warning.\n");
>> +		goto err_out;

One problem, we've been is losing notifications. It does not happen for us
due to the cpumask interface (which allows us to have parallel sockets
for each cpu or a set of cpus). How frequent are your notifications?

>> +	}
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
>> +		MAJOR(dquot->dq_sb->s_dev));
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
>> +		MINOR(dquot->dq_sb->s_dev));
>> +	if (ret)
>> +		goto attr_err_out;
>> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
>> +	if (ret)
>> +		goto attr_err_out;
>> +	genlmsg_end(skb, msg_head);
>> +

Have you looked at ensuring that the data structure works across 32 bit
and 64 bit systems (in terms of binary compatibility)? That's usually
a nice to have feature.

>> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
>> +	if (ret < 0 && ret != -ESRCH)
>> +		printk(KERN_ERR
>> +			"VFS: Failed to send notification message: %d\n", ret);
>> +	return;
>> +attr_err_out:
>> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
>> +err_out:
>> +	kfree_skb(skb);
>> +}
>> +#endif
> 
> This is it.  Normally netlink payloads are represented as a struct.  How
> come this one is built-by-hand?
> 
> It doesn't appear to be versioned.  Should it be?
> 

Yes, versioning is always nice and genetlink supports it.

> Does it have (or need) reserved-set-to-zero space for expansion?  Again,
> hard to tell..
> 
> I guess it's OK to send a major and minor out of the kernel like this. 
> What's it for?  To represent a filesytem?  I wonder if there's a more
> modern and useful way of describing the fs.  Path to mountpoint or
> something?
> 
> I suspect the namespace virtualisation guys would be interested in a new
> interface which is sending current->user->uid up to userspace.  uids are
> per-namespace now.  What are the implications?  (cc's added)
> 

The memory controller or VM would also be interested in notifications
of OOM. At OLS this year interest was shown in getting OOM notifications
and allow the user space a chance to handle the notification and take
action (especially for containers). We already have containerstats for
containers (which I was planning to reuse), but I was told that we would
be interested in user space OOM notifications in general.

> Is it worth adding a comment explaining why GFP_NOFS is used here?
> 
> 


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  6:30   ` Balbir Singh
@ 2007-08-29 12:46     ` Jan Kara
  2007-08-31  6:59       ` Balbir Singh
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-08-29 12:46 UTC (permalink / raw)
  To: Balbir Singh
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	Eric W. Biederman, containers

On Wed 29-08-07 12:00:07, Balbir Singh wrote:
> Andrew Morton wrote:
> > On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> >>   I'm sending rediffed patch implementing sending of quota messages via netlink
> >> interface (some rationale in patch description). I've already posted it to
> >> LKML some time ago and there were no objections, so I guess it's fine to put
> >> it to -mm. Andrew, would you be so kind? Thanks.
> >>   Userspace deamon reading the messages from the kernel and sending them to
> >> dbus and/or user console is also written (it's part of quota-tools). The
> >> only remaining problem is there are a few changes needed to libnl needed for
> >> the userspace daemon. They were basically acked by the maintainer but it
> >> seems he has not merged the patches yet. So this will take a bit more time.
> >>
> > 
> > So it's a new kernel->userspace interface.
> > 
> > But we have no description of the interface :(
> > 
> 
> And could we have some description of the context under which all the message
> exchanges take place. When are these messages sent out -- what event
> is the user space notified of?
  The user is notified about either exceeding his quota softlimit or
reaching hardlimit. If you are interested in more details, please ask.

> >> +/* Send warning to userspace about user which exceeded quota */
> >> +static void send_warning(const struct dquot *dquot, const char warntype)
> >> +{
> >> +	static unsigned long seq;
> >> +	struct sk_buff *skb;
> >> +	void *msg_head;
> >> +	int ret;
> >> +
> >> +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> >> +	if (!skb) {
> >> +		printk(KERN_ERR
> >> +		  "VFS: Not enough memory to send quota warning.\n");
> >> +		return;
> >> +	}
> >> +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
> >> +	if (!msg_head) {
> >> +		printk(KERN_ERR
> >> +		  "VFS: Cannot store netlink header in quota warning.\n");
> >> +		goto err_out;
> 
> One problem, we've been is losing notifications. It does not happen for us
> due to the cpumask interface (which allows us to have parallel sockets
> for each cpu or a set of cpus). How frequent are your notifications?
  Quite infrequent... Users won't exceed their quotas too often :).

> >> +	}
> >> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
> >> +		MAJOR(dquot->dq_sb->s_dev));
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
> >> +		MINOR(dquot->dq_sb->s_dev));
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
> >> +	if (ret)
> >> +		goto attr_err_out;
> >> +	genlmsg_end(skb, msg_head);
> >> +
> 
> Have you looked at ensuring that the data structure works across 32 bit
> and 64 bit systems (in terms of binary compatibility)? That's usually
> a nice to have feature.
  Generic netlink should take care of this - arguments are typed so it
knows how much bits numbers have. So this should be no issue. Are there any
other problems that you have in mind?

> >> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
> >> +	if (ret < 0 && ret != -ESRCH)
> >> +		printk(KERN_ERR
> >> +			"VFS: Failed to send notification message: %d\n", ret);
> >> +	return;
> >> +attr_err_out:
> >> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
> >> +err_out:
> >> +	kfree_skb(skb);
> >> +}
> >> +#endif
> > 
> > This is it.  Normally netlink payloads are represented as a struct.  How
> > come this one is built-by-hand?
> > 
> > It doesn't appear to be versioned.  Should it be?
> > 
> 
> Yes, versioning is always nice and genetlink supports it.
> 
> > Does it have (or need) reserved-set-to-zero space for expansion?  Again,
> > hard to tell..
> > 
> > I guess it's OK to send a major and minor out of the kernel like this. 
> > What's it for?  To represent a filesytem?  I wonder if there's a more
> > modern and useful way of describing the fs.  Path to mountpoint or
> > something?
> > 
> > I suspect the namespace virtualisation guys would be interested in a new
> > interface which is sending current->user->uid up to userspace.  uids are
> > per-namespace now.  What are the implications?  (cc's added)
> 
> The memory controller or VM would also be interested in notifications
> of OOM. At OLS this year interest was shown in getting OOM notifications
> and allow the user space a chance to handle the notification and take
> action (especially for containers). We already have containerstats for
> containers (which I was planning to reuse), but I was told that we would
> be interested in user space OOM notifications in general.
  Generic netlink can be used to pass this information (although in OOM
situation, it may be a bit hairy to get the network stack working...). But
I guess it's not related to my patch.

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 12:46     ` Jan Kara
@ 2007-08-31  6:59       ` Balbir Singh
  2007-09-03 10:18         ` Jan Kara
  0 siblings, 1 reply; 33+ messages in thread
From: Balbir Singh @ 2007-08-31  6:59 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	Eric W. Biederman, containers

Jan Kara wrote:
>>>> +	}
>>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
>>>> +		MAJOR(dquot->dq_sb->s_dev));
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
>>>> +		MINOR(dquot->dq_sb->s_dev));
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
>>>> +	if (ret)
>>>> +		goto attr_err_out;
>>>> +	genlmsg_end(skb, msg_head);
>>>> +
>> Have you looked at ensuring that the data structure works across 32 bit
>> and 64 bit systems (in terms of binary compatibility)? That's usually
>> a nice to have feature.
>   Generic netlink should take care of this - arguments are typed so it
> knows how much bits numbers have. So this should be no issue. Are there any
> other problems that you have in mind?
> 

Yes, but apart from that, if I remember Jamal Hadi's initial comments
on taskstats, he recommended that we align everything to 64 bit so
that the data is well aligned for 64 bit systems. You could also consider
creating a data structure, document it's members, align them and use
that to send out the data.

>>>> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
>>>> +	if (ret < 0 && ret != -ESRCH)
>>>> +		printk(KERN_ERR
>>>> +			"VFS: Failed to send notification message: %d\n", ret);
>>>> +	return;
>>>> +attr_err_out:
>>>> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
>>>> +err_out:
>>>> +	kfree_skb(skb);
>>>> +}
>>>> +#endif
>>> This is it.  Normally netlink payloads are represented as a struct.  How
>>> come this one is built-by-hand?
>>>
>>> It doesn't appear to be versioned.  Should it be?
>>>
>> Yes, versioning is always nice and genetlink supports it.
>>

It would nice for you to use the versioning feature.

>> The memory controller or VM would also be interested in notifications
>> of OOM. At OLS this year interest was shown in getting OOM notifications
>> and allow the user space a chance to handle the notification and take
>> action (especially for containers). We already have containerstats for
>> containers (which I was planning to reuse), but I was told that we would
>> be interested in user space OOM notifications in general.

>  Generic netlink can be used to pass this information (although in OOM
> situation, it may be a bit hairy to get the network stack working...). But
> I guess it's not related to my patch.

We could have a pre-allocated buffer stored at startup and use that for
OOM notification. In the case of container OOM, we are likely to have
free global memory. Working towards an infrastructure so that anybody can
build on top of it and sending notifications on interesting events becomes
easier would be nice. We can reuse code that way and add fewer bugs :-)


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-31  6:59       ` Balbir Singh
@ 2007-09-03 10:18         ` Jan Kara
  0 siblings, 0 replies; 33+ messages in thread
From: Jan Kara @ 2007-09-03 10:18 UTC (permalink / raw)
  To: Balbir Singh
  Cc: Andrew Morton, linux-kernel, Serge E. Hallyn, Eric W. Biederman,
	containers

On Fri 31-08-07 12:29:53, Balbir Singh wrote:
> Jan Kara wrote:
> >>>> +	}
> >>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
> >>>> +		MAJOR(dquot->dq_sb->s_dev));
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
> >>>> +		MINOR(dquot->dq_sb->s_dev));
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
> >>>> +	if (ret)
> >>>> +		goto attr_err_out;
> >>>> +	genlmsg_end(skb, msg_head);
> >>>> +
> >> Have you looked at ensuring that the data structure works across 32 bit
> >> and 64 bit systems (in terms of binary compatibility)? That's usually
> >> a nice to have feature.
> >   Generic netlink should take care of this - arguments are typed so it
> > knows how much bits numbers have. So this should be no issue. Are there any
> > other problems that you have in mind?
> > 
> Yes, but apart from that, if I remember Jamal Hadi's initial comments
> on taskstats, he recommended that we align everything to 64 bit so
> that the data is well aligned for 64 bit systems. You could also consider
  But each attribute is just one number (either 32 or 64 bit) so there's
not much to align. Also each attribute has its netlink header so alignment
is anyway hard to predict. Finally, this is by no means performance
critical - average system using quotas may get say 1 notification per user
per month?

> creating a data structure, document it's members, align them and use
> that to send out the data.
  I don't like sending one structure - by doing that you loose the
flexibility of netlink attributes...

> >>>> +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
> >>>> +	if (ret < 0 && ret != -ESRCH)
> >>>> +		printk(KERN_ERR
> >>>> +			"VFS: Failed to send notification message: %d\n", ret);
> >>>> +	return;
> >>>> +attr_err_out:
> >>>> +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
> >>>> +err_out:
> >>>> +	kfree_skb(skb);
> >>>> +}
> >>>> +#endif
> >>> This is it.  Normally netlink payloads are represented as a struct.  How
> >>> come this one is built-by-hand?
> >>>
> >>> It doesn't appear to be versioned.  Should it be?
> >>>
> >> Yes, versioning is always nice and genetlink supports it.
> >>
> It would nice for you to use the versioning feature.
  How does generic netlink support versioning? I have not found this
feature. Looking into Documentation/accounting/taskstats.txt it seems that
taskstats are versioning only the structure taskstats itself but not the
buch of attributes as a whole...

> >> The memory controller or VM would also be interested in notifications
> >> of OOM. At OLS this year interest was shown in getting OOM notifications
> >> and allow the user space a chance to handle the notification and take
> >> action (especially for containers). We already have containerstats for
> >> containers (which I was planning to reuse), but I was told that we would
> >> be interested in user space OOM notifications in general.
> 
> >  Generic netlink can be used to pass this information (although in OOM
> > situation, it may be a bit hairy to get the network stack working...). But
> > I guess it's not related to my patch.
> 
> We could have a pre-allocated buffer stored at startup and use that for
> OOM notification. In the case of container OOM, we are likely to have
> free global memory. Working towards an infrastructure so that anybody can
> build on top of it and sending notifications on interesting events becomes
> easier would be nice. We can reuse code that way and add fewer bugs :-)
  Yes, but generic netlink itself is such an infrastructure, isn't it? It
is about 70 lines of code to implement notification for quota subsystem so
it's really simple...

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:13 ` Andrew Morton
                     ` (2 preceding siblings ...)
  2007-08-29  6:30   ` Balbir Singh
@ 2007-08-29 12:26   ` Jan Kara
  2007-08-29 15:57     ` Randy Dunlap
  2007-08-29 18:31     ` Eric W. Biederman
  3 siblings, 2 replies; 33+ messages in thread
From: Jan Kara @ 2007-08-29 12:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, Balbir Singh, Serge E. Hallyn, Eric W. Biederman,
	containers

On Tue 28-08-07 21:13:35, Andrew Morton wrote:
> On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> 
> >   Hello,
> > 
> >   I'm sending rediffed patch implementing sending of quota messages via netlink
> > interface (some rationale in patch description). I've already posted it to
> > LKML some time ago and there were no objections, so I guess it's fine to put
> > it to -mm. Andrew, would you be so kind? Thanks.
> >   Userspace deamon reading the messages from the kernel and sending them to
> > dbus and/or user console is also written (it's part of quota-tools). The
> > only remaining problem is there are a few changes needed to libnl needed for
> > the userspace daemon. They were basically acked by the maintainer but it
> > seems he has not merged the patches yet. So this will take a bit more time.
> > 
> 
> So it's a new kernel->userspace interface.
> 
> But we have no description of the interface :(
  Oops, forgotten about it. I'll write one. Do we have some standard place
where to document such interfaces? I could create some file in
Documentation/filesystems/ but that seems a bit superfluous...

> > +/* Send warning to userspace about user which exceeded quota */
> > +static void send_warning(const struct dquot *dquot, const char warntype)
> > +{
> > +	static unsigned long seq;
> > +	struct sk_buff *skb;
> > +	void *msg_head;
> > +	int ret;
> > +
> > +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> > +	if (!skb) {
> > +		printk(KERN_ERR
> > +		  "VFS: Not enough memory to send quota warning.\n");
> > +		return;
> > +	}
> > +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
> > +	if (!msg_head) {
> > +		printk(KERN_ERR
> > +		  "VFS: Cannot store netlink header in quota warning.\n");
> > +		goto err_out;
> > +	}
> > +	ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type);
> > +	if (ret)
> > +		goto attr_err_out;
> > +	ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id);
> > +	if (ret)
> > +		goto attr_err_out;
> > +	ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype);
> > +	if (ret)
> > +		goto attr_err_out;
> > +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR,
> > +		MAJOR(dquot->dq_sb->s_dev));
> > +	if (ret)
> > +		goto attr_err_out;
> > +	ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR,
> > +		MINOR(dquot->dq_sb->s_dev));
> > +	if (ret)
> > +		goto attr_err_out;
> > +	ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
> > +	if (ret)
> > +		goto attr_err_out;
> > +	genlmsg_end(skb, msg_head);
> > +
> > +	ret = genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
> > +	if (ret < 0 && ret != -ESRCH)
> > +		printk(KERN_ERR
> > +			"VFS: Failed to send notification message: %d\n", ret);
> > +	return;
> > +attr_err_out:
> > +	printk(KERN_ERR "VFS: Failed to compose quota message: %d\n", ret);
> > +err_out:
> > +	kfree_skb(skb);
> > +}
> > +#endif
> 
> This is it.  Normally netlink payloads are represented as a struct.  How
> come this one is built-by-hand?
  I use "generic netlink", which is in fact a layer built on top of
netlink. As far as I've read it's documentation, creating a message
argument by argument is the preferred way. As David writes, this way
we can add new arguments without worries about backward compatibility,
alignment issues or such things.

> It doesn't appear to be versioned.  Should it be?
  We don't need a version for future additions. Also each attribute sent
has its identifier (e.g. QUOTA_NL_A_CAUSED_ID) and userspace checks these
identifiers and unknown attributes are ignored. But in case we would like
to remove some attribute, versioning would be probably useful so that
userspace won't break silently... So I'll add it.

> Does it have (or need) reserved-set-to-zero space for expansion?  Again,
> hard to tell..
  No, we don't need it as I wrote above.

> I guess it's OK to send a major and minor out of the kernel like this. 
> What's it for?  To represent a filesytem?  I wonder if there's a more
> modern and useful way of describing the fs.  Path to mountpoint or
> something?
  I also find major/minor pair a bit old-fashioned. But the identifying it
by a mountpoint is problematic - quota does not care about namespaces and
such and so it works with superblocks. It's not trivial to get a mountpoint
from a superblock (and generally it's frown upon, isn't it?). Also if a
filesystem is mounted on several places, we have to pick one (OK, userspace
has to do this choice anyway when displaying the message but still...).

> I suspect the namespace virtualisation guys would be interested in a new
> interface which is sending current->user->uid up to userspace.  uids are
> per-namespace now.  What are the implications?  (cc's added)
  I know there's something going on in this area but I don't know any
details. If somebody has some advice what should be passed into userspace
so that user/group can be idenitified, it is welcome.
  
> Is it worth adding a comment explaining why GFP_NOFS is used here?
  Probably yes. Added.

  Thanks for all your comments.

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 12:26   ` Jan Kara
@ 2007-08-29 15:57     ` Randy Dunlap
  2007-08-29 18:31     ` Eric W. Biederman
  1 sibling, 0 replies; 33+ messages in thread
From: Randy Dunlap @ 2007-08-29 15:57 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	Eric W. Biederman, containers

On Wed, 29 Aug 2007 14:26:47 +0200 Jan Kara wrote:

> On Tue 28-08-07 21:13:35, Andrew Morton wrote:
> > On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> > 
> > >   Hello,
> > > 
> > >   I'm sending rediffed patch implementing sending of quota messages via netlink
> > > interface (some rationale in patch description). I've already posted it to
> > > LKML some time ago and there were no objections, so I guess it's fine to put
> > > it to -mm. Andrew, would you be so kind? Thanks.
> > >   Userspace deamon reading the messages from the kernel and sending them to
> > > dbus and/or user console is also written (it's part of quota-tools). The
> > > only remaining problem is there are a few changes needed to libnl needed for
> > > the userspace daemon. They were basically acked by the maintainer but it
> > > seems he has not merged the patches yet. So this will take a bit more time.
> > > 
> > 
> > So it's a new kernel->userspace interface.
> > 
> > But we have no description of the interface :(
>   Oops, forgotten about it. I'll write one. Do we have some standard place
> where to document such interfaces? I could create some file in
> Documentation/filesystems/ but that seems a bit superfluous...

It looks like other quota documentation is in Documentation/filesystems/,
and that seems reasonable to me for the other quota docs & this one.

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 12:26   ` Jan Kara
  2007-08-29 15:57     ` Randy Dunlap
@ 2007-08-29 18:31     ` Eric W. Biederman
  2007-08-29 19:26       ` Jan Kara
  1 sibling, 1 reply; 33+ messages in thread
From: Eric W. Biederman @ 2007-08-29 18:31 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	containers

Jan Kara <jack@suse.cz> writes:

>> I suspect the namespace virtualisation guys would be interested in a new
>> interface which is sending current->user->uid up to userspace.  uids are
>> per-namespace now.  What are the implications?  (cc's added)

>   I know there's something going on in this area but I don't know any
> details. If somebody has some advice what should be passed into userspace
> so that user/group can be idenitified, it is welcome.

For non networking stuff netlink is a pain to use in this area.

Although if we are very careful we may be ok.  But this requires
some thinking through.

In principle the uid that corresponds to a struct user depends
on which user namespace you are in.

Now there is a cheap trick we can play.  A traditional filesystem
belongs to exactly one user namespace. So we can return the uid
in the filesystems user namespace.

Wait you are returning current->user->uid?  Shouldn't we return
the user who's quota is exceeded?  I.e. if alice owns a file
and makes it world writable.  And bob writes to the file wouldn't
that file still be billed to alice's quota?  So shouldn't we complain
about alice and not bob?

Anyway if the goal is to return a user who maps to the filesystem we
can just always return uids in the filesystems uid namespace.

Although if filesystems start supporting multiple user namespaces
natively we might have a challenge on our hands.

Let me see if I can think of a concrete example here.

We have a nfs server with quotas.
We have clients who mount the nfs filesystem without synchronizing
their /etc/password files, so we have separate user namespaces.

What are the ways to make this work?
- Everyone who has right access to the NFS mount on all
  machines must have their uid synchronized across all machines
  (the easiest case).

- Each different kernel has a mapping from it's local uids to
  the uids of the nfs filesystem. (ick if we do much more the
  root squash).

- The nfs filesystem knows about the situation and remembers the
  uid source (the uid namespace) as well as the uid when storing
  owners of files.  NFSv4 allows for this by treating users
  as user@domain.

Generally synchronizing uid namespaces (with possibly a root squash
exception) is the sanest and simplest thing to do in a case like this,
but it isn't always what is done.

As long as we are returning the filesystems idea of users we
shouldn't have to worry much about uid namespaces.  However
for non-traditional filesystems that don't store the user
as just a uid, say 9p and NFSv4, this implies that we want
to use the filesystems string identifier.  However I don't think
the quota system supports these filesystems yet.  So that
isn't an issue just yet.

However I'm still confused about the use of current->user.  If that
is what we really want and not the user who's quota will be charged
it gets to be a really trick business, because potentially the uid
we want to deliver varies depending on who opened the netlink socket.

Eric

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 18:31     ` Eric W. Biederman
@ 2007-08-29 19:26       ` Jan Kara
  2007-08-29 21:06         ` Eric W. Biederman
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-08-29 19:26 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	containers

On Wed 29-08-07 12:31:52, Eric W. Biederman wrote:
> Jan Kara <jack@suse.cz> writes:
> 
> >> I suspect the namespace virtualisation guys would be interested in a new
> >> interface which is sending current->user->uid up to userspace.  uids are
> >> per-namespace now.  What are the implications?  (cc's added)
> 
> >   I know there's something going on in this area but I don't know any
> > details. If somebody has some advice what should be passed into userspace
> > so that user/group can be idenitified, it is welcome.
> 
> For non networking stuff netlink is a pain to use in this area.
> 
> Although if we are very careful we may be ok.  But this requires
> some thinking through.
> 
> In principle the uid that corresponds to a struct user depends
> on which user namespace you are in.
> 
> Now there is a cheap trick we can play.  A traditional filesystem
> belongs to exactly one user namespace. So we can return the uid
> in the filesystems user namespace.
> 
> Wait you are returning current->user->uid?  Shouldn't we return
> the user who's quota is exceeded?  I.e. if alice owns a file
> and makes it world writable.  And bob writes to the file wouldn't
> that file still be billed to alice's quota?  So shouldn't we complain
> about alice and not bob?
  Yes, the quota will still be billed to Alice and originally we complained
only about Alice. Now, we are actually passing identities of two users: The
one who actually caused the quota to be exceeded and the one whose quota is
exceeded. Userspace app can then decide what to do with the information...
For example it makes sence to display the message to both Alice and Bob in
the case you've described...

> Anyway if the goal is to return a user who maps to the filesystem we
> can just always return uids in the filesystems uid namespace.
> 
> Although if filesystems start supporting multiple user namespaces
> natively we might have a challenge on our hands.
> 
> Let me see if I can think of a concrete example here.
> 
> We have a nfs server with quotas.
> We have clients who mount the nfs filesystem without synchronizing
> their /etc/password files, so we have separate user namespaces.
> 
> What are the ways to make this work?
> - Everyone who has right access to the NFS mount on all
>   machines must have their uid synchronized across all machines
>   (the easiest case).
> 
> - Each different kernel has a mapping from it's local uids to
>   the uids of the nfs filesystem. (ick if we do much more the
>   root squash).
> 
> - The nfs filesystem knows about the situation and remembers the
>   uid source (the uid namespace) as well as the uid when storing
>   owners of files.  NFSv4 allows for this by treating users
>   as user@domain.
> 
> Generally synchronizing uid namespaces (with possibly a root squash
> exception) is the sanest and simplest thing to do in a case like this,
> but it isn't always what is done.
> 
> As long as we are returning the filesystems idea of users we
> shouldn't have to worry much about uid namespaces.  However
> for non-traditional filesystems that don't store the user
> as just a uid, say 9p and NFSv4, this implies that we want
> to use the filesystems string identifier.  However I don't think
> the quota system supports these filesystems yet.  So that
> isn't an issue just yet.
  OK, quota kind of works for NFSv4 - we simply enforce quotas on the
server on a traditional filesystem and there are some RPC calls to get
quota status. For 9p, it does not work. But we should probably design the
interface generic enough so that it accommodates those untraditional
cases anyway.

> However I'm still confused about the use of current->user.  If that
> is what we really want and not the user who's quota will be charged
> it gets to be a really trick business, because potentially the uid
> we want to deliver varies depending on who opened the netlink socket.
  I see it's a complicated matter :). What I need to somehow pass to
userspace is something (and I don't really care whether it will be number,
string or whatever) that userspace can read and e.g. find a terminal
window or desktop the affected user has open and also translate the
identity to some user-understandable name (average user Joe has to
understand that he should quickly cleanup his home directory ;).
  Thinking more about it, we could probably pass a string to userspace in
the format:
  <namespace type>:<user identification>

So for example we can have something like:
  unix:1000 (traditional unix UIDs)
  nfs4:joe@machine

The problem is: Are we able to find out in which "namespace type" we are
and send enough identifying information from a context of unpriviledged
user?

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 19:26       ` Jan Kara
@ 2007-08-29 21:06         ` Eric W. Biederman
  2007-08-29 21:19           ` Valdis.Kletnieks
  2007-08-30  9:25           ` Jan Kara
  0 siblings, 2 replies; 33+ messages in thread
From: Eric W. Biederman @ 2007-08-29 21:06 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	containers

Jan Kara <jack@suse.cz> writes:

>
>> However I'm still confused about the use of current->user.  If that
>> is what we really want and not the user who's quota will be charged
>> it gets to be a really trick business, because potentially the uid
>> we want to deliver varies depending on who opened the netlink socket.
>   I see it's a complicated matter :). What I need to somehow pass to
> userspace is something (and I don't really care whether it will be number,
> string or whatever) that userspace can read and e.g. find a terminal
> window or desktop the affected user has open and also translate the
> identity to some user-understandable name (average user Joe has to
> understand that he should quickly cleanup his home directory ;).
>   Thinking more about it, we could probably pass a string to userspace in
> the format:
>   <namespace type>:<user identification>
>
> So for example we can have something like:
>   unix:1000 (traditional unix UIDs)
>   nfs4:joe@machine
>
> The problem is: Are we able to find out in which "namespace type" we are
> and send enough identifying information from a context of unpriviledged
> user?

Ok.  This provides enough context to understand what you are trying to do.
You do want the unix user id, not the filesystem notion.  Because you
are looking for the user.

So we have to figure out how to do the hard thing which is look at
who opened our netlink broadcast see if they are in the same user
namespace as current->user.  Which is a pain and we don't currently
have the infrastructure for.

Eric

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 21:06         ` Eric W. Biederman
@ 2007-08-29 21:19           ` Valdis.Kletnieks
  2007-08-30  9:25           ` Jan Kara
  1 sibling, 0 replies; 33+ messages in thread
From: Valdis.Kletnieks @ 2007-08-29 21:19 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jan Kara, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

[-- Attachment #1: Type: text/plain, Size: 500 bytes --]

On Wed, 29 Aug 2007 15:06:43 MDT, Eric W. Biederman said:

> So we have to figure out how to do the hard thing which is look at
> who opened our netlink broadcast see if they are in the same user
> namespace as current->user.  Which is a pain and we don't currently
> have the infrastructure for.

Provision also needs to be made for things that are listening to the
netlink broadcasts that don't match the user doing the operation or
the owner of the file - similar to the way auditd wants events.


[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29 21:06         ` Eric W. Biederman
  2007-08-29 21:19           ` Valdis.Kletnieks
@ 2007-08-30  9:25           ` Jan Kara
  2007-08-30 17:33             ` Eric W. Biederman
  2007-08-30 19:10             ` Serge E. Hallyn
  1 sibling, 2 replies; 33+ messages in thread
From: Jan Kara @ 2007-08-30  9:25 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	containers

On Wed 29-08-07 15:06:43, Eric W. Biederman wrote:
> Jan Kara <jack@suse.cz> writes:
> >> However I'm still confused about the use of current->user.  If that
> >> is what we really want and not the user who's quota will be charged
> >> it gets to be a really trick business, because potentially the uid
> >> we want to deliver varies depending on who opened the netlink socket.
> >   I see it's a complicated matter :). What I need to somehow pass to
> > userspace is something (and I don't really care whether it will be number,
> > string or whatever) that userspace can read and e.g. find a terminal
> > window or desktop the affected user has open and also translate the
> > identity to some user-understandable name (average user Joe has to
> > understand that he should quickly cleanup his home directory ;).
> >   Thinking more about it, we could probably pass a string to userspace in
> > the format:
> >   <namespace type>:<user identification>
> >
> > So for example we can have something like:
> >   unix:1000 (traditional unix UIDs)
> >   nfs4:joe@machine
> >
> > The problem is: Are we able to find out in which "namespace type" we are
> > and send enough identifying information from a context of unpriviledged
> > user?
> 
> Ok.  This provides enough context to understand what you are trying to do.
> You do want the unix user id, not the filesystem notion.  Because you
> are looking for the user.
> 
> So we have to figure out how to do the hard thing which is look at
> who opened our netlink broadcast see if they are in the same user
> namespace as current->user.  Which is a pain and we don't currently
> have the infrastructure for.
  There can be arbitrary number of listeners (potentially from different
namespaces if I understand it correctly) listening to broadcasts. So I
think we should pass some universal identifier rather than try to find out
who is listening etc. I think such identifiers would be useful for other
things too, won't they?
  BTW: Do you have some idea, when would be the infrastructure clearer?
Whether it makes sence to currently proceed with UIDs and later change it
to something generic or whether I should wait before you sort it out :).

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30  9:25           ` Jan Kara
@ 2007-08-30 17:33             ` Eric W. Biederman
  2007-08-30 18:54               ` Serge E. Hallyn
  2007-08-30 19:18               ` Serge E. Hallyn
  2007-08-30 19:10             ` Serge E. Hallyn
  1 sibling, 2 replies; 33+ messages in thread
From: Eric W. Biederman @ 2007-08-30 17:33 UTC (permalink / raw)
  To: Jan Kara
  Cc: Andrew Morton, linux-kernel, Balbir Singh, Serge E. Hallyn,
	containers

Jan Kara <jack@suse.cz> writes:
>   There can be arbitrary number of listeners (potentially from different
> namespaces if I understand it correctly) listening to broadcasts. So I
> think we should pass some universal identifier rather than try to find out
> who is listening etc. I think such identifiers would be useful for other
> things too, won't they?

So internal to the kernel we have such a universal identifier.
struct user.

There are to practical questions.
1) How do we present that information to user space?
2) How does user space want to process this information?

If we only want user space to be able to look up a user and send
him a message.  It probably makes sense to do the struct user to
uid conversion in the proper context in the kernel because we have
that information.

If this is a general feature that happens to allows us to look up
the user given the filesystems view of what is going on would be
easier in the kernel, and not require translation.  But it means
that we can't support 9p and nfs for now.  But since we don't support
quotas on the client end anyway that doesn't sound like a big deal.

The problem with the filesystem view is that there will be occasions
where we simply can not map a user into it, because the filesystem
won't have a concept of that particular user.

So we could run into the situation where alice owns the file.  Bob
writes to the file and pushes it over quota.  But the filesystem
has no concept of who bob is.  So we won't be able to report that
it was bob that pushed things over the edge.

> BTW: Do you have some idea, when would be the infrastructure clearer?

So the plan is to get to the point where are uid comparisons in the
kernel are (user namespace, uid) comparisons.  Or possibly struct
user comparisons (depending on the context.  And struct mount will
contain the user namespace of whoever mounted the filesystem.

Adding infrastructure to netlink to allow us to do conversions 
as the packets are enqueued for a specific user is something I
would rather avoid, but that is a path we can go down if we have
to.

> Whether it makes sence to currently proceed with UIDs and later change it
> to something generic or whether I should wait before you sort it out :).

A good question.  I think things are clear enough that it at least
makes sense to sketch a solution to the problem even if we don't
implement it at this point.

I have been hoping Cedric or Serge would jump in because I think those
are the guys who have been working on the implementation. 

Eric

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30 17:33             ` Eric W. Biederman
@ 2007-08-30 18:54               ` Serge E. Hallyn
  2007-08-30 19:18               ` Serge E. Hallyn
  1 sibling, 0 replies; 33+ messages in thread
From: Serge E. Hallyn @ 2007-08-30 18:54 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jan Kara, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Jan Kara <jack@suse.cz> writes:
> >   There can be arbitrary number of listeners (potentially from different
> > namespaces if I understand it correctly) listening to broadcasts. So I
> > think we should pass some universal identifier rather than try to find out
> > who is listening etc. I think such identifiers would be useful for other
> > things too, won't they?
> 
> So internal to the kernel we have such a universal identifier.
> struct user.
> 
> There are to practical questions.
> 1) How do we present that information to user space?
> 2) How does user space want to process this information?
> 
> If we only want user space to be able to look up a user and send
> him a message.  It probably makes sense to do the struct user to
> uid conversion in the proper context in the kernel because we have
> that information.
> 
> If this is a general feature that happens to allows us to look up
> the user given the filesystems view of what is going on would be
> easier in the kernel, and not require translation.  But it means
> that we can't support 9p and nfs for now.  But since we don't support
> quotas on the client end anyway that doesn't sound like a big deal.
> 
> The problem with the filesystem view is that there will be occasions
> where we simply can not map a user into it, because the filesystem
> won't have a concept of that particular user.
> 
> So we could run into the situation where alice owns the file.  Bob
> writes to the file and pushes it over quota.  But the filesystem
> has no concept of who bob is.  So we won't be able to report that
> it was bob that pushed things over the edge.
> 
> > BTW: Do you have some idea, when would be the infrastructure clearer?
> 
> So the plan is to get to the point where are uid comparisons in the
> kernel are (user namespace, uid) comparisons.  Or possibly struct
> user comparisons (depending on the context.  And struct mount will
> contain the user namespace of whoever mounted the filesystem.
> 
> Adding infrastructure to netlink to allow us to do conversions 
> as the packets are enqueued for a specific user is something I
> would rather avoid, but that is a path we can go down if we have
> to.
> 
> > Whether it makes sence to currently proceed with UIDs and later change it
> > to something generic or whether I should wait before you sort it out :).
> 
> A good question.  I think things are clear enough that it at least
> makes sense to sketch a solution to the problem even if we don't
> implement it at this point.
> 
> I have been hoping Cedric or Serge would jump in because I think those
> are the guys who have been working on the implementation. 

Sorry, I've lost the original patch from two separate mailboxes...

The proper behavior depends on how we end up tying filesystems to user
namespaces, which isn't actually decided yet.

The way I was recommending doing that was:

A filesystem is tied to a user namespace.  If a uid in another naemspace
is to be allowed to access the filesystem, it will actually - through a
key in it's keyring (which acts like a capability) - be mapped to a uid
in the filesystem's uid namespace.  So in Eric's example, if Alice
brings Bob over quota, Alice would have done so through some user
Charlie who she is authorized to act as through her keyring.  So Charlie
should be the id which would be logged over netlink.

Of course there is currently no support for this.  So I'd recommend one
of two options:  either just punt on uid namespace for now and we'll fix
it when we improve user namespaces - so log Alice's userid.  Or we can
try to do it somewhat correct now, which might be done as follows:

	1. introduce get_uid_in_userns(tsk).  For now this just returns
	   tsk->uid if current->userns == tsk->userns, else it returns
	   0.
	   This way in Eric's scenario, Bob would be told that root,
	   not an invalid user (Alice) had brought him over quota.
	   Eventually, this would walk tsk's keychain for a uid entry
	   in current's active user namespace.
	
	2. Add the userns to the netlink message.

Again I need to find Jan's orginal patch, but I'll take a look at this.

-serge

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30 17:33             ` Eric W. Biederman
  2007-08-30 18:54               ` Serge E. Hallyn
@ 2007-08-30 19:18               ` Serge E. Hallyn
  1 sibling, 0 replies; 33+ messages in thread
From: Serge E. Hallyn @ 2007-08-30 19:18 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Jan Kara, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Jan Kara <jack@suse.cz> writes:
> >   There can be arbitrary number of listeners (potentially from different
> > namespaces if I understand it correctly) listening to broadcasts. So I
> > think we should pass some universal identifier rather than try to find out
> > who is listening etc. I think such identifiers would be useful for other
> > things too, won't they?
> 
> So internal to the kernel we have such a universal identifier.
> struct user.
> 
> There are to practical questions.
> 1) How do we present that information to user space?
> 2) How does user space want to process this information?
> 
> If we only want user space to be able to look up a user and send
> him a message.  It probably makes sense to do the struct user to
> uid conversion in the proper context in the kernel because we have
> that information.
> 
> If this is a general feature that happens to allows us to look up
> the user given the filesystems view of what is going on would be
> easier in the kernel, and not require translation.  But it means
> that we can't support 9p and nfs for now.  But since we don't support
> quotas on the client end anyway that doesn't sound like a big deal.
> 
> The problem with the filesystem view is that there will be occasions
> where we simply can not map a user into it, because the filesystem
> won't have a concept of that particular user.
> 
> So we could run into the situation where alice owns the file.  Bob
> writes to the file and pushes it over quota.  But the filesystem
> has no concept of who bob is.  So we won't be able to report that
> it was bob that pushed things over the edge.
> 
> > BTW: Do you have some idea, when would be the infrastructure clearer?
> 
> So the plan is to get to the point where are uid comparisons in the
> kernel are (user namespace, uid) comparisons.  Or possibly struct

Just fyi Eric,

Note that given the amount of churn going on due to pid and network
namespaces, I was seeing completion of user namespaces as something to
be done sometime next year.  In the meantime I was only going to do
something with capabilities to restrict root in user namespaces (which I
think will take the form of per-process non-expandable cap_bsets, which
I plan to start basically right now).

But I'll gladly do the userns enhancements earlier if it's actually
wanted.  They promise to be great fun  :)

-serge

> user comparisons (depending on the context.  And struct mount will
> contain the user namespace of whoever mounted the filesystem.
> 
> Adding infrastructure to netlink to allow us to do conversions 
> as the packets are enqueued for a specific user is something I
> would rather avoid, but that is a path we can go down if we have
> to.
> 
> > Whether it makes sence to currently proceed with UIDs and later change it
> > to something generic or whether I should wait before you sort it out :).
> 
> A good question.  I think things are clear enough that it at least
> makes sense to sketch a solution to the problem even if we don't
> implement it at this point.
> 
> I have been hoping Cedric or Serge would jump in because I think those
> are the guys who have been working on the implementation. 
> 
> Eric
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30  9:25           ` Jan Kara
  2007-08-30 17:33             ` Eric W. Biederman
@ 2007-08-30 19:10             ` Serge E. Hallyn
  2007-08-30 22:18               ` Jan Kara
  1 sibling, 1 reply; 33+ messages in thread
From: Serge E. Hallyn @ 2007-08-30 19:10 UTC (permalink / raw)
  To: Jan Kara
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

Quoting Jan Kara (jack@suse.cz):
> On Wed 29-08-07 15:06:43, Eric W. Biederman wrote:
> > Jan Kara <jack@suse.cz> writes:
> > >> However I'm still confused about the use of current->user.  If that
> > >> is what we really want and not the user who's quota will be charged
> > >> it gets to be a really trick business, because potentially the uid
> > >> we want to deliver varies depending on who opened the netlink socket.
> > >   I see it's a complicated matter :). What I need to somehow pass to
> > > userspace is something (and I don't really care whether it will be number,
> > > string or whatever) that userspace can read and e.g. find a terminal
> > > window or desktop the affected user has open and also translate the
> > > identity to some user-understandable name (average user Joe has to
> > > understand that he should quickly cleanup his home directory ;).
> > >   Thinking more about it, we could probably pass a string to userspace in
> > > the format:
> > >   <namespace type>:<user identification>
> > >
> > > So for example we can have something like:
> > >   unix:1000 (traditional unix UIDs)
> > >   nfs4:joe@machine
> > >
> > > The problem is: Are we able to find out in which "namespace type" we are
> > > and send enough identifying information from a context of unpriviledged
> > > user?
> > 
> > Ok.  This provides enough context to understand what you are trying to do.
> > You do want the unix user id, not the filesystem notion.  Because you
> > are looking for the user.
> > 
> > So we have to figure out how to do the hard thing which is look at
> > who opened our netlink broadcast see if they are in the same user
> > namespace as current->user.  Which is a pain and we don't currently
> > have the infrastructure for.
>   There can be arbitrary number of listeners (potentially from different
> namespaces if I understand it correctly) listening to broadcasts. So I

Currently that is true, but i think isolating netlink sockets is going
to have to be done pretty soon.

On the one hand cloning a new netlink socket ns when you unshare
CLONE_NEWNET may seem 'obvious', but I think doing so when you unshare
CLONE_NEWUSER make much more sense considering netlink's use for audit
and now for quota.

> think we should pass some universal identifier rather than try to find out

Even with isolating netlink we still may want to send out an identifier.
However, just as with mounts extensions we're printing out the memory
address of vfsmounts, we might just want to print out the memory address
of the userns.  It's not universal, but should be good enough.

-serge

> who is listening etc. I think such identifiers would be useful for other
> things too, won't they?
>   BTW: Do you have some idea, when would be the infrastructure clearer?
> Whether it makes sence to currently proceed with UIDs and later change it
> to something generic or whether I should wait before you sort it out :).
> 
> 								Honza
> -- 
> Jan Kara <jack@suse.cz>
> SuSE CR Labs
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30 19:10             ` Serge E. Hallyn
@ 2007-08-30 22:18               ` Jan Kara
  2007-08-30 22:14                 ` Serge E. Hallyn
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-08-30 22:18 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

On Thu 30-08-07 14:10:10, Serge E. Hallyn wrote:
> Quoting Jan Kara (jack@suse.cz):
> > On Wed 29-08-07 15:06:43, Eric W. Biederman wrote:
> > > Jan Kara <jack@suse.cz> writes:
> > > >> However I'm still confused about the use of current->user.  If that
> > > >> is what we really want and not the user who's quota will be charged
> > > >> it gets to be a really trick business, because potentially the uid
> > > >> we want to deliver varies depending on who opened the netlink socket.
> > > >   I see it's a complicated matter :). What I need to somehow pass to
> > > > userspace is something (and I don't really care whether it will be number,
> > > > string or whatever) that userspace can read and e.g. find a terminal
> > > > window or desktop the affected user has open and also translate the
> > > > identity to some user-understandable name (average user Joe has to
> > > > understand that he should quickly cleanup his home directory ;).
> > > >   Thinking more about it, we could probably pass a string to userspace in
> > > > the format:
> > > >   <namespace type>:<user identification>
> > > >
> > > > So for example we can have something like:
> > > >   unix:1000 (traditional unix UIDs)
> > > >   nfs4:joe@machine
> > > >
> > > > The problem is: Are we able to find out in which "namespace type" we are
> > > > and send enough identifying information from a context of unpriviledged
> > > > user?
> > > 
> > > Ok.  This provides enough context to understand what you are trying to do.
> > > You do want the unix user id, not the filesystem notion.  Because you
> > > are looking for the user.
> > > 
> > > So we have to figure out how to do the hard thing which is look at
> > > who opened our netlink broadcast see if they are in the same user
> > > namespace as current->user.  Which is a pain and we don't currently
> > > have the infrastructure for.
> >   There can be arbitrary number of listeners (potentially from different
> > namespaces if I understand it correctly) listening to broadcasts. So I
> 
> Currently that is true, but i think isolating netlink sockets is going
> to have to be done pretty soon.
> 
> On the one hand cloning a new netlink socket ns when you unshare
> CLONE_NEWNET may seem 'obvious', but I think doing so when you unshare
> CLONE_NEWUSER make much more sense considering netlink's use for audit
> and now for quota.
> 
> > think we should pass some universal identifier rather than try to find out
> 
> Even with isolating netlink we still may want to send out an identifier.
> However, just as with mounts extensions we're printing out the memory
> address of vfsmounts, we might just want to print out the memory address
> of the userns.  It's not universal, but should be good enough.
  Maybe before proceeding further with the discussion I'd like to
understand following: What are these user namespaces supposed to be good
for?
  I imagine it so that you have a machine and on it several virtual
machines which are sharing a filesystem (or it could be a cluster). Now you
want UIDs to be independent between these virtual machines. That's it,
right?
  Now to continue the example: Alice has UID 100 on machineA, Bob has
 UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
filesystem. Process of Alice writes to a file and Bob becomes to be over
quota. In this situation, there would be probably two processes (from
machineA and machineB) listening on the netlink socket. We want to send a
message so that on Alice's desktop we can show a message: "You caused
Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
are over quota.".
  Because there may be is not a notion of Bob on machineA or of Alice on
machineB, we are in trouble, right? What I like the most is to use the
filesystem identities (as you suggested in some other email). I. e. because
both Alice and Bob share a filesystem, identities of both have to make sense
to it (for example for purposes of permission checking). So we can probably
send via netlink these (in our example ids 1000 and 1001) and hope that
inside machineA and machineB there will be a way to translate these
identities to names "Alice" and "Bob". So that user can understand what
is happenning. Does this sound plausible?
  If we go this route, then we only need a kernel function, that will
for a pair ($filesystem, $task) return indentity of that $task used
for operations on $filesystem...

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30 22:18               ` Jan Kara
@ 2007-08-30 22:14                 ` Serge E. Hallyn
  2007-09-03 14:21                   ` Jan Kara
  0 siblings, 1 reply; 33+ messages in thread
From: Serge E. Hallyn @ 2007-08-30 22:14 UTC (permalink / raw)
  To: Jan Kara
  Cc: Serge E. Hallyn, Eric W. Biederman, Andrew Morton, linux-kernel,
	Balbir Singh, Serge E. Hallyn, containers

Quoting Jan Kara (jack@suse.cz):
> On Thu 30-08-07 14:10:10, Serge E. Hallyn wrote:
> > Quoting Jan Kara (jack@suse.cz):
> > > On Wed 29-08-07 15:06:43, Eric W. Biederman wrote:
> > > > Jan Kara <jack@suse.cz> writes:
> > > > >> However I'm still confused about the use of current->user.  If that
> > > > >> is what we really want and not the user who's quota will be charged
> > > > >> it gets to be a really trick business, because potentially the uid
> > > > >> we want to deliver varies depending on who opened the netlink socket.
> > > > >   I see it's a complicated matter :). What I need to somehow pass to
> > > > > userspace is something (and I don't really care whether it will be number,
> > > > > string or whatever) that userspace can read and e.g. find a terminal
> > > > > window or desktop the affected user has open and also translate the
> > > > > identity to some user-understandable name (average user Joe has to
> > > > > understand that he should quickly cleanup his home directory ;).
> > > > >   Thinking more about it, we could probably pass a string to userspace in
> > > > > the format:
> > > > >   <namespace type>:<user identification>
> > > > >
> > > > > So for example we can have something like:
> > > > >   unix:1000 (traditional unix UIDs)
> > > > >   nfs4:joe@machine
> > > > >
> > > > > The problem is: Are we able to find out in which "namespace type" we are
> > > > > and send enough identifying information from a context of unpriviledged
> > > > > user?
> > > > 
> > > > Ok.  This provides enough context to understand what you are trying to do.
> > > > You do want the unix user id, not the filesystem notion.  Because you
> > > > are looking for the user.
> > > > 
> > > > So we have to figure out how to do the hard thing which is look at
> > > > who opened our netlink broadcast see if they are in the same user
> > > > namespace as current->user.  Which is a pain and we don't currently
> > > > have the infrastructure for.
> > >   There can be arbitrary number of listeners (potentially from different
> > > namespaces if I understand it correctly) listening to broadcasts. So I
> > 
> > Currently that is true, but i think isolating netlink sockets is going
> > to have to be done pretty soon.
> > 
> > On the one hand cloning a new netlink socket ns when you unshare
> > CLONE_NEWNET may seem 'obvious', but I think doing so when you unshare
> > CLONE_NEWUSER make much more sense considering netlink's use for audit
> > and now for quota.
> > 
> > > think we should pass some universal identifier rather than try to find out
> > 
> > Even with isolating netlink we still may want to send out an identifier.
> > However, just as with mounts extensions we're printing out the memory
> > address of vfsmounts, we might just want to print out the memory address
> > of the userns.  It's not universal, but should be good enough.
>   Maybe before proceeding further with the discussion I'd like to
> understand following: What are these user namespaces supposed to be good
> for?

(Please skip to the message end first, as I think you may not care about
the next bit of my blathering)

Right now they are only good for providing some separate accounting for
uid 1000 in one user namespace versus uid 1000 in another namespace.
All security enforcement must be done by actually providing separate
filesystems and separate pid namespaces and, hopefully, with a selinux
policy.

Eventually the idea will be that uid 1000 in one user namespace and uid
1000 in another namespace will be completely separate entities.  A
mounted filesystem will be tied to a particuler user namespace, and
the kernel will provide any cross-userns access perhaps the way I
described, with uid equivalence implemented through the keyring.

But note that this isn't really relevant when we get to NFS.  Two user
namespaces on one machine should have different network namespaces and
network addresses as well, and so should look to the NFS server like two
separate machines.

So the user namespaces are only really relevant when talking about local
filesystems.

>   I imagine it so that you have a machine and on it several virtual
> machines which are sharing a filesystem (or it could be a cluster). Now you
> want UIDs to be independent between these virtual machines. That's it,
> right?
>   Now to continue the example: Alice has UID 100 on machineA, Bob has
>  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> filesystem. Process of Alice writes to a file and Bob becomes to be over
> quota. In this situation, there would be probably two processes (from
> machineA and machineB) listening on the netlink socket. We want to send a
> message so that on Alice's desktop we can show a message: "You caused
> Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> are over quota.".

Since this is over NFS, you handle it the way you would any other time
that user Alice on some other machine managed to do this.

>   Because there may be is not a notion of Bob on machineA or of Alice on
> machineB, we are in trouble, right? What I like the most is to use the
> filesystem identities (as you suggested in some other email). I. e. because
> both Alice and Bob share a filesystem, identities of both have to make sense
> to it (for example for purposes of permission checking). So we can probably

Right, so long as we're talking about local filesystems that's the way
to go.  If a file write was allowed which brought bob over quota,
clearly the person responsible had some uid valid on the filesystem to
allow him to do so.

> send via netlink these (in our example ids 1000 and 1001) and hope that
> inside machineA and machineB there will be a way to translate these
> identities to names "Alice" and "Bob". So that user can understand what
> is happenning. Does this sound plausible?
>   If we go this route, then we only need a kernel function, that will
> for a pair ($filesystem, $task) return indentity of that $task used
> for operations on $filesystem...

Ok, now I see.  This is again unrelated to user namespaces, it's an
issue regardless.

Is there no way to just report Alice as the guilty party to Bob on his
machine as (host=nfsserver,uid=1000)?

-serge

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-30 22:14                 ` Serge E. Hallyn
@ 2007-09-03 14:21                   ` Jan Kara
  2007-09-04 21:32                     ` Serge E. Hallyn
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-09-03 14:21 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Eric W. Biederman, Andrew Morton, linux-kernel, Balbir Singh,
	Serge E. Hallyn, containers

On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> Quoting Jan Kara (jack@suse.cz):
> >   Maybe before proceeding further with the discussion I'd like to
> > understand following: What are these user namespaces supposed to be good
> > for?
> 
> (Please skip to the message end first, as I think you may not care about
> the next bit of my blathering)
> 
> Right now they are only good for providing some separate accounting for
> uid 1000 in one user namespace versus uid 1000 in another namespace.
> All security enforcement must be done by actually providing separate
> filesystems and separate pid namespaces and, hopefully, with a selinux
> policy.
> 
> Eventually the idea will be that uid 1000 in one user namespace and uid
> 1000 in another namespace will be completely separate entities.  A
> mounted filesystem will be tied to a particuler user namespace, and
> the kernel will provide any cross-userns access perhaps the way I
> described, with uid equivalence implemented through the keyring.
  I see. Thanks for explanation.

> But note that this isn't really relevant when we get to NFS.  Two user
> namespaces on one machine should have different network namespaces and
> network addresses as well, and so should look to the NFS server like two
> separate machines.
> 
> So the user namespaces are only really relevant when talking about local
> filesystems.
> 
> >   I imagine it so that you have a machine and on it several virtual
> > machines which are sharing a filesystem (or it could be a cluster). Now you
> > want UIDs to be independent between these virtual machines. That's it,
> > right?
> >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > quota. In this situation, there would be probably two processes (from
> > machineA and machineB) listening on the netlink socket. We want to send a
> > message so that on Alice's desktop we can show a message: "You caused
> > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > are over quota.".
> 
> Since this is over NFS, you handle it the way you would any other time
> that user Alice on some other machine managed to do this.
  I meant this would actually happen over a local filesystem (imagine
something like "hostfs" from UML).

> >   Because there may be is not a notion of Bob on machineA or of Alice on
> > machineB, we are in trouble, right? What I like the most is to use the
> > filesystem identities (as you suggested in some other email). I. e. because
> > both Alice and Bob share a filesystem, identities of both have to make sense
> > to it (for example for purposes of permission checking). So we can probably
> 
> Right, so long as we're talking about local filesystems that's the way
> to go.  If a file write was allowed which brought bob over quota,
> clearly the person responsible had some uid valid on the filesystem to
> allow him to do so.
  Fine. So I'll keep UID in the quota netlink protocol with the meaning
"the identity of the user for filesystem operations".

> > send via netlink these (in our example ids 1000 and 1001) and hope that
> > inside machineA and machineB there will be a way to translate these
> > identities to names "Alice" and "Bob". So that user can understand what
> > is happenning. Does this sound plausible?
> >   If we go this route, then we only need a kernel function, that will
> > for a pair ($filesystem, $task) return indentity of that $task used
> > for operations on $filesystem...
> 
> Ok, now I see.  This is again unrelated to user namespaces, it's an
> issue regardless.
> 
> Is there no way to just report Alice as the guilty party to Bob on his
> machine as (host=nfsserver,uid=1000)?
  You know, in fact this contains all the information but it is quite useless
for an ordinary user. The message should be understandable to average desktop
user so it should contain some name rather than UID - but resolving the
"filesystem" UID to some meaningful name is completely different issue
and I'd probably leave that for the moment when the kernel infrastructure
and use cases would be clearer...

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-03 14:21                   ` Jan Kara
@ 2007-09-04 21:32                     ` Serge E. Hallyn
  2007-09-04 22:49                       ` Jan Kara
  0 siblings, 1 reply; 33+ messages in thread
From: Serge E. Hallyn @ 2007-09-04 21:32 UTC (permalink / raw)
  To: Jan Kara
  Cc: Serge E. Hallyn, Eric W. Biederman, Andrew Morton, linux-kernel,
	Balbir Singh, Serge E. Hallyn, containers

Quoting Jan Kara (jack@suse.cz):
> On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> > Quoting Jan Kara (jack@suse.cz):
> > >   Maybe before proceeding further with the discussion I'd like to
> > > understand following: What are these user namespaces supposed to be good
> > > for?
> > 
> > (Please skip to the message end first, as I think you may not care about
> > the next bit of my blathering)
> > 
> > Right now they are only good for providing some separate accounting for
> > uid 1000 in one user namespace versus uid 1000 in another namespace.
> > All security enforcement must be done by actually providing separate
> > filesystems and separate pid namespaces and, hopefully, with a selinux
> > policy.
> > 
> > Eventually the idea will be that uid 1000 in one user namespace and uid
> > 1000 in another namespace will be completely separate entities.  A
> > mounted filesystem will be tied to a particuler user namespace, and
> > the kernel will provide any cross-userns access perhaps the way I
> > described, with uid equivalence implemented through the keyring.
>   I see. Thanks for explanation.
> 
> > But note that this isn't really relevant when we get to NFS.  Two user
> > namespaces on one machine should have different network namespaces and
> > network addresses as well, and so should look to the NFS server like two
> > separate machines.
> > 
> > So the user namespaces are only really relevant when talking about local
> > filesystems.
> > 
> > >   I imagine it so that you have a machine and on it several virtual
> > > machines which are sharing a filesystem (or it could be a cluster). Now you
> > > want UIDs to be independent between these virtual machines. That's it,
> > > right?
> > >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> > >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > > quota. In this situation, there would be probably two processes (from
> > > machineA and machineB) listening on the netlink socket. We want to send a
> > > message so that on Alice's desktop we can show a message: "You caused
> > > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > > are over quota.".
> > 
> > Since this is over NFS, you handle it the way you would any other time
> > that user Alice on some other machine managed to do this.
>   I meant this would actually happen over a local filesystem (imagine
> something like "hostfs" from UML).

Ok, then that is where I was previously suggesting that we use an api to
report a uid meaningful in bob's context, where we currently (in the
absense of meaningful mount uids and uid equivalence) tell Bob that root
was the one who brought him over quota.  From a user pov 'nobody' would
make more sense, but I don't think we want the kernel to know about user
nobody, right?

So if the msg weren't broadcast, or netlink sockets were tied to one
user namespace, we could call a
	int uid_in_user_ns(struct user *, struct user_ns *)
sending in Alice's user struct and Bob's userns, and use the result in
the netlink message.  Otherwise I'm not sure what is the right answer.
We just might need the equivalent of 'struct pid' to struct user, or
persistant global user namespace ids (persistant after user namespace
destruction, not across reboot) so we can safely send the user_ns * in a
netlink msg.

> > >   Because there may be is not a notion of Bob on machineA or of Alice on
> > > machineB, we are in trouble, right? What I like the most is to use the
> > > filesystem identities (as you suggested in some other email). I. e. because
> > > both Alice and Bob share a filesystem, identities of both have to make sense
> > > to it (for example for purposes of permission checking). So we can probably
> > 
> > Right, so long as we're talking about local filesystems that's the way
> > to go.  If a file write was allowed which brought bob over quota,
> > clearly the person responsible had some uid valid on the filesystem to
> > allow him to do so.
>   Fine. So I'll keep UID in the quota netlink protocol with the meaning
> "the identity of the user for filesystem operations".

I think that's ok.

Hopefully when that changes to accomodate user namespaces, we can use
netlink field versioning to make that transition pretty seamless?

If not, then we probably should in fact make some decision now so as not
to change the api.

> > > send via netlink these (in our example ids 1000 and 1001) and hope that
> > > inside machineA and machineB there will be a way to translate these
> > > identities to names "Alice" and "Bob". So that user can understand what
> > > is happenning. Does this sound plausible?
> > >   If we go this route, then we only need a kernel function, that will
> > > for a pair ($filesystem, $task) return indentity of that $task used
> > > for operations on $filesystem...
> > 
> > Ok, now I see.  This is again unrelated to user namespaces, it's an
> > issue regardless.
> > 
> > Is there no way to just report Alice as the guilty party to Bob on his
> > machine as (host=nfsserver,uid=1000)?
>   You know, in fact this contains all the information but it is quite useless
> for an ordinary user. The message should be understandable to average desktop

What is the ordinary user going to do about it?  If the user didn't set
up the nfsserver and/or the second client, the only thing he can do is
report the guilty user to an admin.  In which case the tuple
(host=nfsserver,uid=1000) is exactly the data he needs to report.

> user so it should contain some name rather than UID - but resolving the
> "filesystem" UID to some meaningful name is completely different issue
> and I'd probably leave that for the moment when the kernel infrastructure
> and use cases would be clearer...
> 
> 								Honza
> -- 
> Jan Kara <jack@suse.cz>
> SuSE CR Labs

thanks,
-serge

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-04 21:32                     ` Serge E. Hallyn
@ 2007-09-04 22:49                       ` Jan Kara
  2007-09-04 23:48                         ` Serge E. Hallyn
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-09-04 22:49 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Jan Kara, Serge E. Hallyn, Eric W. Biederman, Andrew Morton,
	linux-kernel, Balbir Singh, containers

On Tue 04-09-07 16:32:10, Serge E. Hallyn wrote:
> Quoting Jan Kara (jack@suse.cz):
> > On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> > > Quoting Jan Kara (jack@suse.cz):
> > > >   I imagine it so that you have a machine and on it several virtual
> > > > machines which are sharing a filesystem (or it could be a cluster). Now you
> > > > want UIDs to be independent between these virtual machines. That's it,
> > > > right?
> > > >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> > > >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > > > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > > > quota. In this situation, there would be probably two processes (from
> > > > machineA and machineB) listening on the netlink socket. We want to send a
> > > > message so that on Alice's desktop we can show a message: "You caused
> > > > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > > > are over quota.".
> > > 
> > > Since this is over NFS, you handle it the way you would any other time
> > > that user Alice on some other machine managed to do this.
> >   I meant this would actually happen over a local filesystem (imagine
> > something like "hostfs" from UML).
> 
> Ok, then that is where I was previously suggesting that we use an api to
> report a uid meaningful in bob's context, where we currently (in the
> absense of meaningful mount uids and uid equivalence) tell Bob that root
> was the one who brought him over quota.  From a user pov 'nobody' would
> make more sense, but I don't think we want the kernel to know about user
> nobody, right?
  But what is the problem with using the filesystem ids? All virtual
machines in my example should have a notion of those...

> So if the msg weren't broadcast, or netlink sockets were tied to one
> user namespace, we could call a
> 	int uid_in_user_ns(struct user *, struct user_ns *)
> sending in Alice's user struct and Bob's userns, and use the result in
> the netlink message.  Otherwise I'm not sure what is the right answer.
> We just might need the equivalent of 'struct pid' to struct user, or
> persistant global user namespace ids (persistant after user namespace
> destruction, not across reboot) so we can safely send the user_ns * in a
> netlink msg.
  Yes, that could also be a solution.

> > > >   Because there may be is not a notion of Bob on machineA or of Alice on
> > > > machineB, we are in trouble, right? What I like the most is to use the
> > > > filesystem identities (as you suggested in some other email). I. e. because
> > > > both Alice and Bob share a filesystem, identities of both have to make sense
> > > > to it (for example for purposes of permission checking). So we can probably
> > > 
> > > Right, so long as we're talking about local filesystems that's the way
> > > to go.  If a file write was allowed which brought bob over quota,
> > > clearly the person responsible had some uid valid on the filesystem to
> > > allow him to do so.
> >   Fine. So I'll keep UID in the quota netlink protocol with the meaning
> > "the identity of the user for filesystem operations".
> 
> I think that's ok.
> 
> Hopefully when that changes to accomodate user namespaces, we can use
> netlink field versioning to make that transition pretty seamless?
  Yes, we'd just assign the attribute a different number and teach
userspace about the new attribute format...

> If not, then we probably should in fact make some decision now so as not
> to change the api.
> 
> > > > send via netlink these (in our example ids 1000 and 1001) and hope that
> > > > inside machineA and machineB there will be a way to translate these
> > > > identities to names "Alice" and "Bob". So that user can understand what
> > > > is happenning. Does this sound plausible?
> > > >   If we go this route, then we only need a kernel function, that will
> > > > for a pair ($filesystem, $task) return indentity of that $task used
> > > > for operations on $filesystem...
> > > 
> > > Ok, now I see.  This is again unrelated to user namespaces, it's an
> > > issue regardless.
> > > 
> > > Is there no way to just report Alice as the guilty party to Bob on his
> > > machine as (host=nfsserver,uid=1000)?
> >   You know, in fact this contains all the information but it is quite useless
> > for an ordinary user. The message should be understandable to average desktop
> 
> What is the ordinary user going to do about it?  If the user didn't set
> up the nfsserver and/or the second client, the only thing he can do is
> report the guilty user to an admin.  In which case the tuple
> (host=nfsserver,uid=1000) is exactly the data he needs to report.
  Maybe write him an email or go and bang him with a baseball bat ;)
Seriously, if someone (like admin) is able to find a physical identity of the
guilty user, then we should be able to do this in a software too, shouldn't
we?

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-04 22:49                       ` Jan Kara
@ 2007-09-04 23:48                         ` Serge E. Hallyn
  2007-09-05 13:32                           ` Jan Kara
  0 siblings, 1 reply; 33+ messages in thread
From: Serge E. Hallyn @ 2007-09-04 23:48 UTC (permalink / raw)
  To: Jan Kara
  Cc: Serge E. Hallyn, Serge E. Hallyn, Eric W. Biederman,
	Andrew Morton, linux-kernel, Balbir Singh, containers

Quoting Jan Kara (jack@suse.cz):
> On Tue 04-09-07 16:32:10, Serge E. Hallyn wrote:
> > Quoting Jan Kara (jack@suse.cz):
> > > On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> > > > Quoting Jan Kara (jack@suse.cz):
> > > > >   I imagine it so that you have a machine and on it several virtual
> > > > > machines which are sharing a filesystem (or it could be a cluster). Now you
> > > > > want UIDs to be independent between these virtual machines. That's it,
> > > > > right?
> > > > >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> > > > >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > > > > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > > > > quota. In this situation, there would be probably two processes (from
> > > > > machineA and machineB) listening on the netlink socket. We want to send a
> > > > > message so that on Alice's desktop we can show a message: "You caused
> > > > > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > > > > are over quota.".
> > > > 
> > > > Since this is over NFS, you handle it the way you would any other time
> > > > that user Alice on some other machine managed to do this.
> > >   I meant this would actually happen over a local filesystem (imagine
> > > something like "hostfs" from UML).
> > 
> > Ok, then that is where I was previously suggesting that we use an api to
> > report a uid meaningful in bob's context, where we currently (in the
> > absense of meaningful mount uids and uid equivalence) tell Bob that root
> > was the one who brought him over quota.  From a user pov 'nobody' would
> > make more sense, but I don't think we want the kernel to know about user
> > nobody, right?
>   But what is the problem with using the filesystem ids? All virtual
> machines in my example should have a notion of those...

I don't know what you mean by filesystem ids.  Do you mean the uid
stored on the fs?  I imagine a network fs could get fancy and store
something more detailed than the unix uid, based on the user's keys.

Do you mean the inode->i_uid?  Nothing wrong with that.  Then we just
assume that either you are in the superblock or mount's user namespace
(depending on how we implement it, probably superblock), or can figure
out what that is.

> > So if the msg weren't broadcast, or netlink sockets were tied to one
> > user namespace, we could call a
> > 	int uid_in_user_ns(struct user *, struct user_ns *)
> > sending in Alice's user struct and Bob's userns, and use the result in
> > the netlink message.  Otherwise I'm not sure what is the right answer.
> > We just might need the equivalent of 'struct pid' to struct user, or
> > persistant global user namespace ids (persistant after user namespace
> > destruction, not across reboot) so we can safely send the user_ns * in a
> > netlink msg.
>   Yes, that could also be a solution.
> 
> > > > >   Because there may be is not a notion of Bob on machineA or of Alice on
> > > > > machineB, we are in trouble, right? What I like the most is to use the
> > > > > filesystem identities (as you suggested in some other email). I. e. because
> > > > > both Alice and Bob share a filesystem, identities of both have to make sense
> > > > > to it (for example for purposes of permission checking). So we can probably
> > > > 
> > > > Right, so long as we're talking about local filesystems that's the way
> > > > to go.  If a file write was allowed which brought bob over quota,
> > > > clearly the person responsible had some uid valid on the filesystem to
> > > > allow him to do so.
> > >   Fine. So I'll keep UID in the quota netlink protocol with the meaning
> > > "the identity of the user for filesystem operations".
> > 
> > I think that's ok.
> > 
> > Hopefully when that changes to accomodate user namespaces, we can use
> > netlink field versioning to make that transition pretty seamless?
>   Yes, we'd just assign the attribute a different number and teach
> userspace about the new attribute format...

Ok.

> > If not, then we probably should in fact make some decision now so as not
> > to change the api.
> > 
> > > > > send via netlink these (in our example ids 1000 and 1001) and hope that
> > > > > inside machineA and machineB there will be a way to translate these
> > > > > identities to names "Alice" and "Bob". So that user can understand what
> > > > > is happenning. Does this sound plausible?
> > > > >   If we go this route, then we only need a kernel function, that will
> > > > > for a pair ($filesystem, $task) return indentity of that $task used
> > > > > for operations on $filesystem...
> > > > 
> > > > Ok, now I see.  This is again unrelated to user namespaces, it's an
> > > > issue regardless.
> > > > 
> > > > Is there no way to just report Alice as the guilty party to Bob on his
> > > > machine as (host=nfsserver,uid=1000)?
> > >   You know, in fact this contains all the information but it is quite useless
> > > for an ordinary user. The message should be understandable to average desktop
> > 
> > What is the ordinary user going to do about it?  If the user didn't set
> > up the nfsserver and/or the second client, the only thing he can do is
> > report the guilty user to an admin.  In which case the tuple
> > (host=nfsserver,uid=1000) is exactly the data he needs to report.
>   Maybe write him an email or go and bang him with a baseball bat ;)
> Seriously, if someone (like admin) is able to find a physical identity of the
> guilty user, then we should be able to do this in a software too, shouldn't
> we?

Sure, and in many ways.  But if working with NFS, as far as I know the
most common way to solve it is to enforce a common /etc/passwd across
all the valid NFS clients  :)

-serge

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-04 23:48                         ` Serge E. Hallyn
@ 2007-09-05 13:32                           ` Jan Kara
  2007-09-05 14:28                             ` Serge E. Hallyn
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-09-05 13:32 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: Serge E. Hallyn, Eric W. Biederman, Andrew Morton, linux-kernel,
	Balbir Singh, containers

On Tue 04-09-07 18:48:52, Serge E. Hallyn wrote:
> Quoting Jan Kara (jack@suse.cz):
> > On Tue 04-09-07 16:32:10, Serge E. Hallyn wrote:
> > > Quoting Jan Kara (jack@suse.cz):
> > > > On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> > > > > Quoting Jan Kara (jack@suse.cz):
> > > > > >   I imagine it so that you have a machine and on it several virtual
> > > > > > machines which are sharing a filesystem (or it could be a cluster). Now you
> > > > > > want UIDs to be independent between these virtual machines. That's it,
> > > > > > right?
> > > > > >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> > > > > >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > > > > > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > > > > > quota. In this situation, there would be probably two processes (from
> > > > > > machineA and machineB) listening on the netlink socket. We want to send a
> > > > > > message so that on Alice's desktop we can show a message: "You caused
> > > > > > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > > > > > are over quota.".
> > > > > 
> > > > > Since this is over NFS, you handle it the way you would any other time
> > > > > that user Alice on some other machine managed to do this.
> > > >   I meant this would actually happen over a local filesystem (imagine
> > > > something like "hostfs" from UML).
> > > 
> > > Ok, then that is where I was previously suggesting that we use an api to
> > > report a uid meaningful in bob's context, where we currently (in the
> > > absense of meaningful mount uids and uid equivalence) tell Bob that root
> > > was the one who brought him over quota.  From a user pov 'nobody' would
> > > make more sense, but I don't think we want the kernel to know about user
> > > nobody, right?
> >   But what is the problem with using the filesystem ids? All virtual
> > machines in my example should have a notion of those...
> 
> I don't know what you mean by filesystem ids.  Do you mean the uid
> stored on the fs?  I imagine a network fs could get fancy and store
> something more detailed than the unix uid, based on the user's keys.
> 
> Do you mean the inode->i_uid?  Nothing wrong with that.  Then we just
> assume that either you are in the superblock or mount's user namespace
> (depending on how we implement it, probably superblock), or can figure
> out what that is.
  I meant the identity the process uses to access the filesystem (to
identify the user who caused the limit excess) and also the identity stored
in the quota file (to identify whose quota was exceeded).
  Anyway, any identity more complicated than just a number needs changes in
both quota file format and filesystems so at that moment, we can also
change the netlink interface...

> Sure, and in many ways.  But if working with NFS, as far as I know the
> most common way to solve it is to enforce a common /etc/passwd across
> all the valid NFS clients  :)
  Then one wonders whether user namespaces are really what users want ;).

								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-05 13:32                           ` Jan Kara
@ 2007-09-05 14:28                             ` Serge E. Hallyn
  0 siblings, 0 replies; 33+ messages in thread
From: Serge E. Hallyn @ 2007-09-05 14:28 UTC (permalink / raw)
  To: Jan Kara
  Cc: Serge E. Hallyn, Eric W. Biederman, Andrew Morton, linux-kernel,
	Balbir Singh, containers, Daniel Lezcano

Quoting Jan Kara (jack@suse.cz):
> On Tue 04-09-07 18:48:52, Serge E. Hallyn wrote:
> > Quoting Jan Kara (jack@suse.cz):
> > > On Tue 04-09-07 16:32:10, Serge E. Hallyn wrote:
> > > > Quoting Jan Kara (jack@suse.cz):
> > > > > On Thu 30-08-07 17:14:47, Serge E. Hallyn wrote:
> > > > > > Quoting Jan Kara (jack@suse.cz):
> > > > > > >   I imagine it so that you have a machine and on it several virtual
> > > > > > > machines which are sharing a filesystem (or it could be a cluster). Now you
> > > > > > > want UIDs to be independent between these virtual machines. That's it,
> > > > > > > right?
> > > > > > >   Now to continue the example: Alice has UID 100 on machineA, Bob has
> > > > > > >  UID 100 on machineB. These translate to UIDs 1000 and 1001 on the common
> > > > > > > filesystem. Process of Alice writes to a file and Bob becomes to be over
> > > > > > > quota. In this situation, there would be probably two processes (from
> > > > > > > machineA and machineB) listening on the netlink socket. We want to send a
> > > > > > > message so that on Alice's desktop we can show a message: "You caused
> > > > > > > Bob to exceed his quotas" and of Bob's desktop: "Alice has caused that you
> > > > > > > are over quota.".
> > > > > > 
> > > > > > Since this is over NFS, you handle it the way you would any other time
> > > > > > that user Alice on some other machine managed to do this.
> > > > >   I meant this would actually happen over a local filesystem (imagine
> > > > > something like "hostfs" from UML).
> > > > 
> > > > Ok, then that is where I was previously suggesting that we use an api to
> > > > report a uid meaningful in bob's context, where we currently (in the
> > > > absense of meaningful mount uids and uid equivalence) tell Bob that root
> > > > was the one who brought him over quota.  From a user pov 'nobody' would
> > > > make more sense, but I don't think we want the kernel to know about user
> > > > nobody, right?
> > >   But what is the problem with using the filesystem ids? All virtual
> > > machines in my example should have a notion of those...
> > 
> > I don't know what you mean by filesystem ids.  Do you mean the uid
> > stored on the fs?  I imagine a network fs could get fancy and store
> > something more detailed than the unix uid, based on the user's keys.
> > 
> > Do you mean the inode->i_uid?  Nothing wrong with that.  Then we just
> > assume that either you are in the superblock or mount's user namespace
> > (depending on how we implement it, probably superblock), or can figure
> > out what that is.
>   I meant the identity the process uses to access the filesystem (to
> identify the user who caused the limit excess) and also the identity stored
> in the quota file (to identify whose quota was exceeded).
>   Anyway, any identity more complicated than just a number needs changes in
> both quota file format and filesystems so at that moment, we can also
> change the netlink interface...
> 
> > Sure, and in many ways.  But if working with NFS, as far as I know the
> > most common way to solve it is to enforce a common /etc/passwd across
> > all the valid NFS clients  :)
>   Then one wonders whether user namespaces are really what users want ;).

Absolutely.

You use nfs to share filesystems among separate machines that you want
to have look similar.

You use user namespaces to pretend one machine is a bunch of separate
machines.  So if you're just going to split up your machine into 5
vms and then have them all share disk over nfs, you may just want to
keep it as one machine :)

Ideally each vm would have completely separate disk space, so file
access across user namespaces wouldn't happen.  More realistically,
file trees will be shared read-only - i.e. /lib, /usr, etc.  Some of
that can be handled simply using read-only bind mounts.  We'd like
to allow users to create vm's as well, so then we want uid 500 in
the initial user namespace to be uid 0 in a newly created user
namespace.

So what Eric and I are worried about are corner cases and admin
mistakes, not regular function.

(And again I really do think we'll want to tie netlink sockets to a user
namespace, not a network namespace, so there may be no issue at all
so long as proper filesystem access checks are implemented so that every
action on some filesystem is done with credentials valid in that
filesystems' user namespace)

-serge

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-28 14:13 [PATCH] Send quota messages via netlink Jan Kara
  2007-08-29  4:13 ` Andrew Morton
@ 2007-08-29  4:51 ` Andrew Morton
  2007-08-29 10:03   ` Jan Kara
  2007-09-03 14:43   ` Jan Kara
  1 sibling, 2 replies; 33+ messages in thread
From: Andrew Morton @ 2007-08-29  4:51 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-kernel

On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:

> +static void send_warning(const struct dquot *dquot, const char warntype)
> +{
> +	static unsigned long seq;
> +	struct sk_buff *skb;
> +	void *msg_head;
> +	int ret;
> +
> +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> +	if (!skb) {
> +		printk(KERN_ERR
> +		  "VFS: Not enough memory to send quota warning.\n");
> +		return;
> +	}
> +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);

The access to seq is racy, isn't it?

If so, that can be solved with a lock, or with atomic_add_return().

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:51 ` Andrew Morton
@ 2007-08-29 10:03   ` Jan Kara
  2007-09-03 14:43   ` Jan Kara
  1 sibling, 0 replies; 33+ messages in thread
From: Jan Kara @ 2007-08-29 10:03 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

On Tue 28-08-07 21:51:28, Andrew Morton wrote:
> On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> 
> > +static void send_warning(const struct dquot *dquot, const char warntype)
> > +{
> > +	static unsigned long seq;
> > +	struct sk_buff *skb;
> > +	void *msg_head;
> > +	int ret;
> > +
> > +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> > +	if (!skb) {
> > +		printk(KERN_ERR
> > +		  "VFS: Not enough memory to send quota warning.\n");
> > +		return;
> > +	}
> > +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
> 
> The access to seq is racy, isn't it?
> 
> If so, that can be solved with a lock, or with atomic_add_return().
  You're right. I've made atomic_t from seq. Thanks for spotting this.

										Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-08-29  4:51 ` Andrew Morton
  2007-08-29 10:03   ` Jan Kara
@ 2007-09-03 14:43   ` Jan Kara
  2007-09-03 17:12     ` Randy Dunlap
  1 sibling, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-09-03 14:43 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1454 bytes --]

On Tue 28-08-07 21:51:28, Andrew Morton wrote:
> On Tue, 28 Aug 2007 16:13:18 +0200 Jan Kara <jack@suse.cz> wrote:
> 
> > +static void send_warning(const struct dquot *dquot, const char warntype)
> > +{
> > +	static unsigned long seq;
> > +	struct sk_buff *skb;
> > +	void *msg_head;
> > +	int ret;
> > +
> > +	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
> > +	if (!skb) {
> > +		printk(KERN_ERR
> > +		  "VFS: Not enough memory to send quota warning.\n");
> > +		return;
> > +	}
> > +	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
> 
> The access to seq is racy, isn't it?
> 
> If so, that can be solved with a lock, or with atomic_add_return().
  Attached is an incremental patch solving the issues you've spotted.
Thanks for review.  The result of the discussion with namespace guys is
that the id used as an identify for filesystem operations should be fine.
If it will ever be something different than a number, we can change the
protocol which should be no problem...
  Also after some more reading, I've found out that we can even easily find
out, which attributes have been sent in the netlink message. So I don't see
a real reason for some versioning of the protocol - either the message has
all the attributes we are interested and then we report it, or it does not
and then we complain that tools are too old and don't understand the
protocol...

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

[-- Attachment #2: quota-2.6.23-rc4-1-quota_messages_update.diff --]
[-- Type: text/x-patch, Size: 4774 bytes --]

Add documentation of quota netlink interface, fix possible race in handling of
packet numbers, add a comment about GFP_NOFS allocation.

Signed-off-by: Jan Kara <jack@suse.cz>

diff -NrupX /home/jack/.kerndiffexclude linux-2.6.23-rc4-1-quota_messages_old/Documentation/filesystems/quota.txt linux-2.6.23-rc4-1-quota_messages/Documentation/filesystems/quota.txt
--- linux-2.6.23-rc4-1-quota_messages_old/Documentation/filesystems/quota.txt	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.23-rc4-1-quota_messages/Documentation/filesystems/quota.txt	2007-08-30 16:23:55.000000000 +0200
@@ -0,0 +1,59 @@
+
+Quota subsystem
+===============
+
+Quota subsystem allows system administrator to set limits on used space and
+number of used inodes (inode is a filesystem structure which is associated
+with each file or directory) for users and/or groups. For both used space and
+number of used inodes there are actually two limits. The first one is called
+softlimit and the second one hardlimit.  An user can never exceed a hardlimit
+for any resource. User is allowed to exceed softlimit but only for limited
+period of time. This period is called "grace period" or "grace time". When
+grace time is over, user is not able to allocate more space/inodes until he
+frees enough of them to get below softlimit.
+
+Quota limits (and amount of grace time) are set independently for each
+filesystem.
+
+For more details about quota design, see the documentation in quota-tools package
+(http://sourceforge.net/projects/linuxquota).
+
+Quota netlink interface
+=======================
+When user exceeds a softlimit, runs out of grace time or reaches hardlimit,
+quota subsystem traditionally printed a message to the controlling terminal of
+the process which caused the excess. This method has the disadvantage that
+when user is using a graphical desktop he usually cannot see the message.
+Thus quota netlink interface has been designed to pass information about
+the above events to userspace. There they can be captured by an application
+and processed accordingly.
+
+The interface uses generic netlink framework (see
+http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
+details about this layer). The name of the quota generic netlink interface
+is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
+  Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
+This command is used to send a notification about any of the above mentioned
+events. Each message has six attributes. These are (type of the argument is
+in braces):
+        QUOTA_NL_A_QTYPE (u32)
+	  - type of quota beging exceeded (one of USRQUOTA, GRPQUOTA)
+        QUOTA_NL_A_EXCESS_ID (u64)
+	  - UID/GID (depends on quota type) of user / group whose limit
+	    is being exceeded.
+        QUOTA_NL_A_CAUSED_ID (u64)
+	  - UID of a user who caused the event
+        QUOTA_NL_A_WARNING (u32)
+	  - what kind of limit is exceeded:
+		QUOTA_NL_IHARDWARN - inode hardlimit
+		QUOTA_NL_ISOFTLONGWARN - inode softlimit is exceeded longer
+		  than given grace period
+		QUOTA_NL_ISOFTWARN - inode softlimit
+		QUOTA_NL_BHARDWARN - space (block) hardlimit
+		QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded
+		  longer than given grace period.
+		QUOTA_NL_BSOFTWARN - space (block) softlimit
+        QUOTA_NL_A_DEV_MAJOR (u32)
+	  - major number of a device with the affected filesystem
+        QUOTA_NL_A_DEV_MINOR (u32)
+	  - minor number of a device with the affected filesystem
diff -NrupX /home/jack/.kerndiffexclude linux-2.6.23-rc4-1-quota_messages_old/fs/dquot.c linux-2.6.23-rc4-1-quota_messages/fs/dquot.c
--- linux-2.6.23-rc4-1-quota_messages_old/fs/dquot.c	2007-09-03 16:25:21.000000000 +0200
+++ linux-2.6.23-rc4-1-quota_messages/fs/dquot.c	2007-08-29 19:42:23.000000000 +0200
@@ -910,18 +910,22 @@ static struct genl_family quota_genl_fam
 /* Send warning to userspace about user which exceeded quota */
 static void send_warning(const struct dquot *dquot, const char warntype)
 {
-	static unsigned long seq;
+	static atomic_t seq;
 	struct sk_buff *skb;
 	void *msg_head;
 	int ret;
 
+	/* We have to allocate using GFP_NOFS as we are called from a
+	 * filesystem performing write and thus further recursion into
+	 * the fs to free some data could cause deadlocks. */
 	skb = genlmsg_new(QUOTA_NL_MSG_SIZE, GFP_NOFS);
 	if (!skb) {
 		printk(KERN_ERR
 		  "VFS: Not enough memory to send quota warning.\n");
 		return;
 	}
-	msg_head = genlmsg_put(skb, 0, seq++, &quota_genl_family, 0, QUOTA_NL_C_WARNING);
+	msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq),
+			&quota_genl_family, 0, QUOTA_NL_C_WARNING);
 	if (!msg_head) {
 		printk(KERN_ERR
 		  "VFS: Cannot store netlink header in quota warning.\n");

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-03 14:43   ` Jan Kara
@ 2007-09-03 17:12     ` Randy Dunlap
  2007-09-03 17:48       ` Jan Kara
  0 siblings, 1 reply; 33+ messages in thread
From: Randy Dunlap @ 2007-09-03 17:12 UTC (permalink / raw)
  To: Jan Kara; +Cc: Andrew Morton, linux-kernel

On Mon, 3 Sep 2007 16:43:36 +0200 Jan Kara wrote:

> User-Agent: Mutt/1.5.13 (2006-08-11)

Mutt knows how to send patches inline vs. attachments... :(


Anyway, on to the patch.  Thanks for adding the new doc file.


+This command is used to send a notification about any of the above mentioned
+events. Each message has six attributes. These are (type of the argument is
+in braces):

s/braces/parentheses/

+        QUOTA_NL_A_QTYPE (u32)
+	  - type of quota beging exceeded (one of USRQUOTA, GRPQUOTA)

s/beging/being/


---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-03 17:12     ` Randy Dunlap
@ 2007-09-03 17:48       ` Jan Kara
  2007-09-03 18:41         ` Andrew Morton
  0 siblings, 1 reply; 33+ messages in thread
From: Jan Kara @ 2007-09-03 17:48 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Andrew Morton, linux-kernel

On Mon 03-09-07 10:12:34, Randy Dunlap wrote:
> On Mon, 3 Sep 2007 16:43:36 +0200 Jan Kara wrote:
> 
> > User-Agent: Mutt/1.5.13 (2006-08-11)
> 
> Mutt knows how to send patches inline vs. attachments... :(
  Hmm, I thought Andrew either does not mind or prefers attachments. If
it isn't the case, I can inline patches. Andrew? BTW: I personally prefer
attachments - mutt inlines text attachments for me anyway and sometimes
it's useful to have the file structured by MIME.

> +This command is used to send a notification about any of the above mentioned
> +events. Each message has six attributes. These are (type of the argument is
> +in braces):
> 
> s/braces/parentheses/
> 
> +        QUOTA_NL_A_QTYPE (u32)
> +	  - type of quota beging exceeded (one of USRQUOTA, GRPQUOTA)
> 
> s/beging/being/
  Thanks for reading it. Andrew, should I resend the patch or will you
substitute it in the patch?

									Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Send quota messages via netlink
  2007-09-03 17:48       ` Jan Kara
@ 2007-09-03 18:41         ` Andrew Morton
  0 siblings, 0 replies; 33+ messages in thread
From: Andrew Morton @ 2007-09-03 18:41 UTC (permalink / raw)
  To: Jan Kara; +Cc: randy.dunlap, linux-kernel

> On Mon, 3 Sep 2007 19:48:46 +0200 Jan Kara <jack@suse.cz> wrote:
> On Mon 03-09-07 10:12:34, Randy Dunlap wrote:
> > On Mon, 3 Sep 2007 16:43:36 +0200 Jan Kara wrote:
> > 
> > > User-Agent: Mutt/1.5.13 (2006-08-11)
> > 
> > Mutt knows how to send patches inline vs. attachments... :(
>   Hmm, I thought Andrew either does not mind or prefers attachments. If
> it isn't the case, I can inline patches. Andrew? BTW: I personally prefer
> attachments - mutt inlines text attachments for me anyway and sometimes
> it's useful to have the file structured by MIME.

inlined is a bit better, mainly because one can reply to it and the email
client will quote the whole thing.

> > +This command is used to send a notification about any of the above mentioned
> > +events. Each message has six attributes. These are (type of the argument is
> > +in braces):
> > 
> > s/braces/parentheses/
> > 
> > +        QUOTA_NL_A_QTYPE (u32)
> > +	  - type of quota beging exceeded (one of USRQUOTA, GRPQUOTA)
> > 
> > s/beging/being/
>   Thanks for reading it. Andrew, should I resend the patch or will you
> substitute it in the patch?

I'll sort it out, thanks.

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2007-09-05 14:28 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-28 14:13 [PATCH] Send quota messages via netlink Jan Kara
2007-08-29  4:13 ` Andrew Morton
2007-08-29  4:54   ` David Miller
2007-08-29  5:41   ` Eric W. Biederman
2007-08-29  6:30   ` Balbir Singh
2007-08-29 12:46     ` Jan Kara
2007-08-31  6:59       ` Balbir Singh
2007-09-03 10:18         ` Jan Kara
2007-08-29 12:26   ` Jan Kara
2007-08-29 15:57     ` Randy Dunlap
2007-08-29 18:31     ` Eric W. Biederman
2007-08-29 19:26       ` Jan Kara
2007-08-29 21:06         ` Eric W. Biederman
2007-08-29 21:19           ` Valdis.Kletnieks
2007-08-30  9:25           ` Jan Kara
2007-08-30 17:33             ` Eric W. Biederman
2007-08-30 18:54               ` Serge E. Hallyn
2007-08-30 19:18               ` Serge E. Hallyn
2007-08-30 19:10             ` Serge E. Hallyn
2007-08-30 22:18               ` Jan Kara
2007-08-30 22:14                 ` Serge E. Hallyn
2007-09-03 14:21                   ` Jan Kara
2007-09-04 21:32                     ` Serge E. Hallyn
2007-09-04 22:49                       ` Jan Kara
2007-09-04 23:48                         ` Serge E. Hallyn
2007-09-05 13:32                           ` Jan Kara
2007-09-05 14:28                             ` Serge E. Hallyn
2007-08-29  4:51 ` Andrew Morton
2007-08-29 10:03   ` Jan Kara
2007-09-03 14:43   ` Jan Kara
2007-09-03 17:12     ` Randy Dunlap
2007-09-03 17:48       ` Jan Kara
2007-09-03 18:41         ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox