* [PATCH 1/4] hashtable: Add __HASHTABLE_INITIALIZER
[not found] ` <1467207425-22072-1-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
@ 2016-06-29 13:37 ` Nikolay Borisov
2016-06-29 13:37 ` [PATCH 2/4] misc: Rename the HASH_SIZE macro Nikolay Borisov
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Nikolay Borisov @ 2016-06-29 13:37 UTC (permalink / raw)
To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
Cc: jack-AlSwsSmVLrQ, avagin-GEFAQzZX7r8dnm+yROfE0A,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
operations-/eCPMmvKun9pLGFMi4vTTA, Nikolay Borisov,
gorcunov-GEFAQzZX7r8dnm+yROfE0A
This is used so that one can initialize a hashtbale declared in a
struct.
Signed-off-by: Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org>
---
include/linux/hashtable.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 661e5c2a8e2a..92d6a791b218 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -23,6 +23,9 @@
#define DECLARE_HASHTABLE(name, bits) \
struct hlist_head name[1 << (bits)]
+#define __HASHTABLE_INITIALIZER(bits) \
+ { [0 ... (( 1 << bits)) - 1] = HLIST_HEAD_INIT }
+
#define HASH_SIZE(name) (ARRAY_SIZE(name))
#define HASH_BITS(name) ilog2(HASH_SIZE(name))
--
2.5.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/4] misc: Rename the HASH_SIZE macro
[not found] ` <1467207425-22072-1-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
2016-06-29 13:37 ` [PATCH 1/4] hashtable: Add __HASHTABLE_INITIALIZER Nikolay Borisov
@ 2016-06-29 13:37 ` Nikolay Borisov
2016-06-29 13:37 ` [PATCH 3/4] userns/inotify: Initial implementation of inotify per-userns Nikolay Borisov
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Nikolay Borisov @ 2016-06-29 13:37 UTC (permalink / raw)
To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
Cc: jack-AlSwsSmVLrQ, avagin-GEFAQzZX7r8dnm+yROfE0A,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
operations-/eCPMmvKun9pLGFMi4vTTA, Nikolay Borisov,
gorcunov-GEFAQzZX7r8dnm+yROfE0A
This change is required since the inotify-per-namespace code added
hashtable.h to the include list of sched.h. This in turn causes
compiler warnings since HASH_SIZE is being defined in multiple
locations
Signed-off-by: Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org>
Acked-by: David S. Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
---
fs/logfs/dir.c | 6 +++---
net/ipv6/ip6_gre.c | 8 ++++----
net/ipv6/ip6_tunnel.c | 10 +++++-----
net/ipv6/ip6_vti.c | 10 +++++-----
net/ipv6/sit.c | 10 +++++-----
security/keys/encrypted-keys/encrypted.c | 32 ++++++++++++++++----------------
6 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 2d5336bd4efd..bcd754d216bd 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -95,7 +95,7 @@ static int beyond_eof(struct inode *inode, loff_t bix)
* of each character and pick a prime nearby, preferably a bit-sparse
* one.
*/
-static u32 hash_32(const char *s, int len, u32 seed)
+static u32 logfs_hash_32(const char *s, int len, u32 seed)
{
u32 hash = seed;
int i;
@@ -159,7 +159,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
struct qstr *name = &dentry->d_name;
struct page *page;
struct logfs_disk_dentry *dd;
- u32 hash = hash_32(name->name, name->len, 0);
+ u32 hash = logfs_hash_32(name->name, name->len, 0);
pgoff_t index;
int round;
@@ -370,7 +370,7 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
{
struct page *page;
struct logfs_disk_dentry *dd;
- u32 hash = hash_32(dentry->d_name.name, dentry->d_name.len, 0);
+ u32 hash = logfs_hash_32(dentry->d_name.name, dentry->d_name.len, 0);
pgoff_t index;
int round, err;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index fdc9de276ab1..56bb4df088cd 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -62,11 +62,11 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6G_HASH_SIZE (1 << HASH_SIZE_SHIFT)
static int ip6gre_net_id __read_mostly;
struct ip6gre_net {
- struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+ struct ip6_tnl __rcu *tunnels[4][IP6G_HASH_SIZE];
struct net_device *fb_tunnel_dev;
};
@@ -96,7 +96,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
will match fallback tunnel.
*/
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6G_HASH_SIZE - 1))
static u32 HASH_ADDR(const struct in6_addr *addr)
{
u32 hash = ipv6_addr_hash(addr);
@@ -1089,7 +1089,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6G_HASH_SIZE; h++) {
struct ip6_tnl *t;
t = rtnl_dereference(ign->tunnels[prio][h]);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7b0481e3738f..50b57a435f05 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -64,8 +64,8 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ip6tnl");
MODULE_ALIAS_NETDEV("ip6tnl0");
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_HASH_SIZE_SHIFT 5
+#define IP6_HASH_SIZE (1 << IP6_HASH_SIZE_SHIFT)
static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
@@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, IP6_HASH_SIZE_SHIFT);
}
static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,7 +87,7 @@ struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[IP6_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
@@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
if (dev->rtnl_link_ops == &ip6_link_ops)
unregister_netdevice_queue(dev, &list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < IP6_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
/* If dev is in the same netns, it has already
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f14040..30e242140909 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -50,14 +50,14 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE_SHIFT 5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define VTI_HASH_SIZE_SHIFT 5
+#define VTI_HASH_SIZE (1 << VTI_HASH_SIZE_SHIFT)
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
- return hash_32(hash, HASH_SIZE_SHIFT);
+ return hash_32(hash, VTI_HASH_SIZE_SHIFT);
}
static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@ struct vti6_net {
/* the vti6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
- struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ struct ip6_tnl __rcu *tnls_r_l[VTI_HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
};
@@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
struct ip6_tnl *t;
LIST_HEAD(list);
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, &list);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0a5a255277e5..757ec087ce01 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,7 @@
For comments look at net/ipv4/ip_gre.c --ANK
*/
-#define HASH_SIZE 16
+#define SIT_HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
static bool log_ecn_error = true;
@@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly;
static int sit_net_id __read_mostly;
struct sit_net {
- struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r_l[SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_r[SIT_HASH_SIZE];
+ struct ip_tunnel __rcu *tunnels_l[SIT_HASH_SIZE];
struct ip_tunnel __rcu *tunnels_wc[1];
struct ip_tunnel __rcu **tunnels[4];
@@ -1773,7 +1773,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
for (prio = 1; prio < 4; prio++) {
int h;
- for (h = 0; h < HASH_SIZE; h++) {
+ for (h = 0; h < SIT_HASH_SIZE; h++) {
struct ip_tunnel *t;
t = rtnl_dereference(sitn->tunnels[prio][h]);
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5adbfc32242f..1c2271db2918 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -49,7 +49,7 @@ static int blksize;
#define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1)
#define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1)
#define KEY_ECRYPTFS_DESC_LEN 16
-#define HASH_SIZE SHA256_DIGEST_SIZE
+#define E_HASH_SIZE SHA256_DIGEST_SIZE
#define MAX_DATA_SIZE 4096
#define MIN_DATA_SIZE 20
@@ -380,8 +380,8 @@ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type,
int ret;
derived_buf_len = strlen("AUTH_KEY") + 1 + master_keylen;
- if (derived_buf_len < HASH_SIZE)
- derived_buf_len = HASH_SIZE;
+ if (derived_buf_len < E_HASH_SIZE)
+ derived_buf_len = E_HASH_SIZE;
derived_buf = kzalloc(derived_buf_len, GFP_KERNEL);
if (!derived_buf) {
@@ -517,7 +517,7 @@ out:
static int datablob_hmac_append(struct encrypted_key_payload *epayload,
const u8 *master_key, size_t master_keylen)
{
- u8 derived_key[HASH_SIZE];
+ u8 derived_key[E_HASH_SIZE];
u8 *digest;
int ret;
@@ -529,7 +529,7 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload,
ret = calc_hmac(digest, derived_key, sizeof derived_key,
epayload->format, epayload->datablob_len);
if (!ret)
- dump_hmac(NULL, digest, HASH_SIZE);
+ dump_hmac(NULL, digest, E_HASH_SIZE);
out:
return ret;
}
@@ -539,8 +539,8 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
const u8 *format, const u8 *master_key,
size_t master_keylen)
{
- u8 derived_key[HASH_SIZE];
- u8 digest[HASH_SIZE];
+ u8 derived_key[E_HASH_SIZE];
+ u8 digest[E_HASH_SIZE];
int ret;
char *p;
unsigned short len;
@@ -565,8 +565,8 @@ static int datablob_hmac_verify(struct encrypted_key_payload *epayload,
ret = -EINVAL;
dump_hmac("datablob",
epayload->format + epayload->datablob_len,
- HASH_SIZE);
- dump_hmac("calc", digest, HASH_SIZE);
+ E_HASH_SIZE);
+ dump_hmac("calc", digest, E_HASH_SIZE);
}
out:
return ret;
@@ -651,12 +651,12 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key,
+ strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen;
ret = key_payload_reserve(key, payload_datalen + datablob_len
- + HASH_SIZE + 1);
+ + E_HASH_SIZE + 1);
if (ret < 0)
return ERR_PTR(ret);
epayload = kzalloc(sizeof(*epayload) + payload_datalen +
- datablob_len + HASH_SIZE + 1, GFP_KERNEL);
+ datablob_len + E_HASH_SIZE + 1, GFP_KERNEL);
if (!epayload)
return ERR_PTR(-ENOMEM);
@@ -670,7 +670,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
const char *format, const char *hex_encoded_iv)
{
struct key *mkey;
- u8 derived_key[HASH_SIZE];
+ u8 derived_key[E_HASH_SIZE];
const u8 *master_key;
u8 *hmac;
const char *hex_encoded_data;
@@ -680,7 +680,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
int ret;
encrypted_datalen = roundup(epayload->decrypted_datalen, blksize);
- asciilen = (ivsize + 1 + encrypted_datalen + HASH_SIZE) * 2;
+ asciilen = (ivsize + 1 + encrypted_datalen + E_HASH_SIZE) * 2;
if (strlen(hex_encoded_iv) != asciilen)
return -EINVAL;
@@ -695,7 +695,7 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload,
hmac = epayload->format + epayload->datablob_len;
ret = hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2),
- HASH_SIZE);
+ E_HASH_SIZE);
if (ret < 0)
return -EINVAL;
@@ -918,7 +918,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
struct key *mkey;
const u8 *master_key;
size_t master_keylen;
- char derived_key[HASH_SIZE];
+ char derived_key[E_HASH_SIZE];
char *ascii_buf;
size_t asciiblob_len;
int ret;
@@ -928,7 +928,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
/* returns the hex encoded iv, encrypted-data, and hmac as ascii */
asciiblob_len = epayload->datablob_len + ivsize + 1
+ roundup(epayload->decrypted_datalen, blksize)
- + (HASH_SIZE * 2);
+ + (E_HASH_SIZE * 2);
if (!buffer || buflen < asciiblob_len)
return asciiblob_len;
--
2.5.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/4] userns/inotify: Initial implementation of inotify per-userns
[not found] ` <1467207425-22072-1-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
2016-06-29 13:37 ` [PATCH 1/4] hashtable: Add __HASHTABLE_INITIALIZER Nikolay Borisov
2016-06-29 13:37 ` [PATCH 2/4] misc: Rename the HASH_SIZE macro Nikolay Borisov
@ 2016-06-29 13:37 ` Nikolay Borisov
[not found] ` <1467207425-22072-4-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
2016-06-29 13:37 ` [PATCH 4/4] inotify: Convert to using new userns infrastructure Nikolay Borisov
2016-07-06 16:47 ` [RFC PATCH 0/4 v2] Inotify limits per usernamespace Eric W. Biederman
4 siblings, 1 reply; 11+ messages in thread
From: Nikolay Borisov @ 2016-06-29 13:37 UTC (permalink / raw)
To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
Cc: jack-AlSwsSmVLrQ, avagin-GEFAQzZX7r8dnm+yROfE0A,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
operations-/eCPMmvKun9pLGFMi4vTTA, Nikolay Borisov,
gorcunov-GEFAQzZX7r8dnm+yROfE0A
This is still very much a WIP and there are certain things which need
to be addressed:
Changes include:
* Added 2 new sysctls:
- inotify_reserved_user_instances and inotify_reserved_user_watches
these essentially control the distribution of instances/watches
down the hierarchy. For example if we have instances/watches limit of
1024/256 and reserved instances/watches are set to 128/32 then at every
level of the hierarchy instances/watches are going to be reduced by 128/32,
so at userns level of 1 (e.g. init_user_ns->level_1_user_ns) each user would
have 896/224 respectively. Currently the defaults are calculated so that at
least 8 levels of indirection are allowed. Those can be set only by global
root user.
* When a user inside a NS creates an inotify context for the first time,
the code locks the whole namespace hierarchy up to the parent of the last
namespace which has a mapping for a user. And then proceeds to create all
the intermediary levels. And then unlocks them, this is a bit cumbersome
IMO but at least ensures that no hard-to-trace races occur.
There are also some things which need to be considered:
* One problem currently not resovled is the cleanup of the intermediary
state in namespaces. E.g. if we have a hierarchy of 4 namespaces and an
inotify instance is created in the bottom-most namespace, and later is
destroyed this guarantees that only the state in the bottom-most ns is
freed and not in the intermediary nodes. I guess some sort of hierarchical
mechanism is needed to connect several inotify_state structs. I'm very
much open to discussing how to fix this. Also due to the way locks are
acquired lockdep prints a splat, but it is a false-positive.
* Another thing which I think needs serious consideration is the semantic
of the reserved sysctls. What should happen when they are changed - should
those changes propagate to existing counter or shouldn't they? Depending
on the outcome of that decision it's possible to blow the complexity of the
solution through the roof.
Signed-off-by: Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org>
---
fs/notify/inotify/inotify.h | 25 +++++
fs/notify/inotify/inotify_user.c | 194 +++++++++++++++++++++++++++++++++++++++
include/linux/fsnotify_backend.h | 3 +
include/linux/user_namespace.h | 10 ++
kernel/user.c | 5 +
kernel/user_namespace.c | 22 ++++-
6 files changed, 258 insertions(+), 1 deletion(-)
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ed855ef6f077..64dd281e28a4 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -1,6 +1,8 @@
#include <linux/fsnotify_backend.h>
#include <linux/inotify.h>
#include <linux/slab.h> /* struct kmem_cache */
+#include <linux/page_counter.h>
+#include <linux/user_namespace.h>
struct inotify_event_info {
struct fsnotify_event fse;
@@ -15,6 +17,14 @@ struct inotify_inode_mark {
int wd;
};
+struct inotify_state {
+ struct hlist_node node;
+ uid_t uid; /* user_namespace ptr */
+ struct page_counter watches; /* How many inotify watches does this user */
+ struct page_counter instances; /* How many inotify devs does this user have opened? */
+};
+
+
static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
{
return container_of(fse, struct inotify_event_info, fse);
@@ -30,3 +40,18 @@ extern int inotify_handle_event(struct fsnotify_group *group,
const unsigned char *file_name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
+
+/* Helpers for manipulating various inotify state, stored in user_struct */
+static inline struct inotify_state *__find_inotify_state(struct user_namespace *ns,
+ uid_t uid)
+{
+ struct inotify_state *state;
+
+ WARN_ON(!mutex_is_locked(&ns->inotify_lock));
+
+ hash_for_each_possible(ns->inotify_counts, state, node, uid)
+ if (state->uid == uid)
+ return state;
+
+ return NULL;
+}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0d0a4d..06797ae76527 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -48,6 +48,8 @@
static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
static int inotify_max_user_watches __read_mostly;
+int inotify_reserved_user_instances __read_mostly;
+int inotify_reserved_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
@@ -57,6 +59,17 @@ static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
static int zero;
+
+static int proc_dointvec_minmax_root(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ /* Allows writes only from the root user of the machine */
+ if (write && !uid_eq(current_uid(), GLOBAL_ROOT_UID))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+
struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
@@ -82,10 +95,186 @@ struct ctl_table inotify_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero
},
+ {
+ .procname = "reserved_user_instances",
+ .data = &inotify_reserved_user_instances,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_root,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "reserved_user_watches",
+ .data = &inotify_reserved_user_watches,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax_root,
+ .extra1 = &zero,
+ },
{ }
};
#endif /* CONFIG_SYSCTL */
+static inline void __init_counters(struct inotify_state *state,
+ struct inotify_state *parent,
+ struct user_namespace *ns)
+{
+ if (ns == &init_user_ns) {
+ page_counter_init(&state->watches, NULL);
+ page_counter_init(&state->instances, NULL);
+ page_counter_limit(&state->watches,
+ init_user_ns.inotify_max_user_watches);
+ page_counter_limit(&state->instances,
+ init_user_ns.inotify_max_user_instances);
+ } else {
+
+ page_counter_init(&state->watches, &parent->watches);
+ page_counter_init(&state->instances,&parent->instances);
+ page_counter_limit(&state->watches, ns->inotify_max_user_watches);
+ page_counter_limit(&state->instances, ns->inotify_max_user_instances);
+ }
+}
+
+struct creation_ctx {
+ uid_t uid;
+ struct user_namespace *ns;
+};
+
+static noinline int __create_parent_state(struct user_namespace *ns, uid_t uid)
+{
+ int i = 1, j = 0, k = 0;
+ int ret = 0;
+ struct inotify_state *state;
+ struct user_namespace *parent_ns, *current_ns = ns;
+ struct page_counter *cnt;
+ bool locked_parent = false;
+ struct creation_ctx *pns = kzalloc(32*sizeof(struct creation_ctx), GFP_KERNEL);
+ if (!pns)
+ return -ENOMEM;
+
+ pns[0].ns = ns;
+ pns[0].uid = uid;
+
+ /* Walk up the hierarchy to see in which NS we need to build state */
+ for (parent_ns = ns->parent; parent_ns;
+ current_ns = parent_ns, parent_ns = parent_ns->parent) {
+
+ uid_t owner_uid = from_kuid(parent_ns, current_ns->owner);
+
+ mutex_lock(&parent_ns->inotify_lock);
+ state = __find_inotify_state(parent_ns, owner_uid);
+
+ pns[i].ns = parent_ns;
+ pns[i].uid = owner_uid;
+ ++i;
+
+ /* When we stop at a particular level in the hierarchy also make
+ * sure the parent is also locked
+ */
+ if (state) {
+ if (parent_ns->parent) {
+ mutex_lock(&parent_ns->parent->inotify_lock);
+ locked_parent = true;
+ }
+ break;
+ }
+ }
+
+ j = k = i;
+
+ /* For every remembered NS in which we do not have state, create some,
+ * by walking backwards (towards the child namespace we wanted to create
+ * inotify state in the first place)*/
+ for (--i; i!=-1; i--) {
+ uid_t parent_uid;
+ struct inotify_state *parent_state = NULL;
+
+ /* 1. Get state from parent */
+ current_ns = pns[i].ns;
+ if (current_ns != &init_user_ns) {
+ parent_uid = from_kuid(current_ns->parent, current_ns->owner);
+ parent_state = __find_inotify_state(current_ns->parent, parent_uid);
+ BUG_ON(!parent_state);
+ }
+
+ state = kzalloc(sizeof(struct inotify_state), GFP_KERNEL);
+ if (!state) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* 2. Init */
+ state->uid = pns[i].uid;
+ __init_counters(state, parent_state, current_ns);
+
+ /* 3. Finally add to the hash table */
+ hash_add(current_ns->inotify_counts, &state->node, state->uid);
+ }
+
+ state = __find_inotify_state(ns, uid);
+ BUG_ON(!state);
+ BUG_ON(page_counter_read(&state->instances) != 0);
+
+ /* This can fail if the hierarchical limits are exceeded */
+ if (!page_counter_try_charge(&state->instances, 1, &cnt))
+ ret = -EMFILE;
+out:
+ /* This doesn't unlock pns[0] since it is the child ns which is going to
+ * be unlocked in inotify_init_state
+ */
+ for (--j; j; j--) {
+ mutex_unlock(&pns[j].ns->inotify_lock);
+ }
+
+ if (locked_parent) {
+ --k;
+ mutex_unlock(&pns[k].ns->parent->inotify_lock);
+ }
+ kfree(pns);
+ return ret;
+}
+
+static noinline int inotify_init_state(struct user_namespace *ns, uid_t uid)
+{
+ int ret = 0;
+ struct inotify_state *state;
+ struct page_counter *cnt;
+
+ mutex_lock(&ns->inotify_lock);
+ state = __find_inotify_state(ns, uid);
+
+ if (!state) {
+
+ if (ns == &init_user_ns) {
+ state = kzalloc(sizeof(struct inotify_state), GFP_KERNEL);
+ if (!state) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ state->uid = uid;
+ __init_counters(state, NULL, ns);
+ page_counter_charge(&state->instances, 1);
+ hash_add(ns->inotify_counts, &state->node, uid);
+ } else {
+ ret = __create_parent_state(ns, uid);
+ if (ret < 0)
+ goto out;
+ }
+
+ } else {
+ if (!page_counter_try_charge(&state->instances, 1, &cnt)) {
+ ret = -EMFILE;
+ goto out;
+ }
+ }
+
+out:
+ mutex_unlock(&ns->inotify_lock);
+ return ret;
+}
+
+
static inline __u32 inotify_arg_to_mask(u32 arg)
{
__u32 mask;
@@ -821,6 +1010,11 @@ static int __init inotify_user_setup(void)
inotify_max_queued_events = 16384;
inotify_max_user_instances = 128;
inotify_max_user_watches = 8192;
+ init_user_ns.inotify_max_user_instances = 256;
+ init_user_ns.inotify_max_user_watches = 8192;
+ /* These reserves should allow for 8 levels of nesting in userns */
+ inotify_reserved_user_instances = 32;
+ inotify_reserved_user_watches = 1024;
return 0;
}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 29f917517299..a4723b7e08bb 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -170,6 +170,9 @@ struct fsnotify_group {
spinlock_t idr_lock;
struct idr idr;
struct user_struct *user;
+ struct user_namespace *userns;
+ uid_t uid; /* id in the userns this group is
+ associated with */
} inotify_data;
#endif
#ifdef CONFIG_FANOTIFY
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 8297e5b341d8..d34d8ef68e27 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -6,6 +6,8 @@
#include <linux/ns_common.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/hashtable.h>
+#include <linux/spinlock.h>
#define UID_GID_MAP_MAX_EXTENTS 5
@@ -39,6 +41,14 @@ struct user_namespace {
struct key *persistent_keyring_register;
struct rw_semaphore persistent_keyring_register_sem;
#endif
+
+#ifdef CONFIG_INOTIFY_USER
+#define INOTIFY_HASHTABLE_BITS 6
+ struct mutex inotify_lock;
+ DECLARE_HASHTABLE(inotify_counts, INOTIFY_HASHTABLE_BITS);
+ int inotify_max_user_instances;
+ int inotify_max_user_watches;
+#endif
};
extern struct user_namespace init_user_ns;
diff --git a/kernel/user.c b/kernel/user.c
index b069ccbfb0b0..6dcb8eea358d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -59,6 +59,11 @@ struct user_namespace init_user_ns = {
.persistent_keyring_register_sem =
__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
#endif
+
+#ifdef CONFIG_INOTIFY_USER
+ .inotify_lock =__MUTEX_INITIALIZER(init_user_ns.inotify_lock),
+ .inotify_counts = __HASHTABLE_INITIALIZER(INOTIFY_HASHTABLE_BITS),
+#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9bafc211930c..bce1b7427ec4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,10 +22,17 @@
#include <linux/ctype.h>
#include <linux/projid.h>
#include <linux/fs_struct.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
static struct kmem_cache *user_ns_cachep __read_mostly;
static DEFINE_MUTEX(userns_state_mutex);
+#ifdef CONFIG_INOTIFY_USER
+extern int inotify_reserved_user_instances;
+extern int inotify_reserved_user_watches;
+#endif
+
static bool new_idmap_permitted(const struct file *file,
struct user_namespace *ns, int cap_setid,
struct uid_gid_map *map);
@@ -63,7 +70,9 @@ int create_user_ns(struct cred *new)
kuid_t owner = new->euid;
kgid_t group = new->egid;
int ret;
-
+#ifdef CONFIG_INOTIFY_USER
+ int tmp;
+#endif
if (parent_ns->level > 32)
return -EUSERS;
@@ -112,6 +121,17 @@ int create_user_ns(struct cred *new)
#ifdef CONFIG_PERSISTENT_KEYRINGS
init_rwsem(&ns->persistent_keyring_register_sem);
#endif
+
+#ifdef CONFIG_INOTIFY_USER
+ mutex_init(&ns->inotify_lock);
+ hash_init(ns->inotify_counts);
+
+ tmp = parent_ns->inotify_max_user_instances - inotify_reserved_user_instances;
+ ns->inotify_max_user_instances = max(0, tmp);
+
+ tmp = parent_ns->inotify_max_user_watches - inotify_reserved_user_watches;
+ ns->inotify_max_user_watches = max(0, tmp);
+#endif
return 0;
}
--
2.5.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 4/4] inotify: Convert to using new userns infrastructure
[not found] ` <1467207425-22072-1-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
` (2 preceding siblings ...)
2016-06-29 13:37 ` [PATCH 3/4] userns/inotify: Initial implementation of inotify per-userns Nikolay Borisov
@ 2016-06-29 13:37 ` Nikolay Borisov
2016-07-06 16:47 ` [RFC PATCH 0/4 v2] Inotify limits per usernamespace Eric W. Biederman
4 siblings, 0 replies; 11+ messages in thread
From: Nikolay Borisov @ 2016-06-29 13:37 UTC (permalink / raw)
To: ebiederm-aS9lmoZGLiVWk0Htik3J/w
Cc: jack-AlSwsSmVLrQ, avagin-GEFAQzZX7r8dnm+yROfE0A,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
operations-/eCPMmvKun9pLGFMi4vTTA, Nikolay Borisov,
gorcunov-GEFAQzZX7r8dnm+yROfE0A
Signed-off-by: Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org>
---
fs/notify/inotify/inotify_fsnotify.c | 16 ++++++++++++-
fs/notify/inotify/inotify_user.c | 46 ++++++++++++++++++++++++------------
include/linux/sched.h | 4 ----
3 files changed, 46 insertions(+), 20 deletions(-)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 2cd900c2c737..bc2441c64809 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -166,7 +166,21 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
idr_for_each(&group->inotify_data.idr, idr_callback, group);
idr_destroy(&group->inotify_data.idr);
if (group->inotify_data.user) {
- atomic_dec(&group->inotify_data.user->inotify_devs);
+ struct inotify_state *state;
+
+ mutex_lock(&group->inotify_data.userns->inotify_lock);
+ state = __find_inotify_state(group->inotify_data.userns,
+ group->inotify_data.uid);
+
+ BUG_ON(!state);
+
+ page_counter_uncharge(&state->instances, 1);
+ if (page_counter_read(&state->instances) == 0) {
+ hash_del(&state->node);
+ kfree(state);
+ }
+ mutex_unlock(&group->inotify_data.userns->inotify_lock);
+
free_uid(group->inotify_data.user);
}
}
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 06797ae76527..9c857428a2e8 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -45,9 +45,7 @@
#include <asm/ioctls.h>
/* these are configurable via /proc/sys/fs/inotify/ */
-static int inotify_max_user_instances __read_mostly;
static int inotify_max_queued_events __read_mostly;
-static int inotify_max_user_watches __read_mostly;
int inotify_reserved_user_instances __read_mostly;
int inotify_reserved_user_watches __read_mostly;
@@ -73,7 +71,7 @@ static int proc_dointvec_minmax_root(struct ctl_table *table, int write,
struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
- .data = &inotify_max_user_instances,
+ .data = &init_user_ns.inotify_max_user_instances,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -81,7 +79,7 @@ struct ctl_table inotify_table[] = {
},
{
.procname = "max_user_watches",
- .data = &inotify_max_user_watches,
+ .data = &init_user_ns.inotify_max_user_watches,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -680,6 +678,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
+ struct inotify_state *state;
/* Queue ignore event for the watch */
inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
@@ -689,7 +688,13 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
/* remove this mark from the idr */
inotify_remove_from_idr(group, i_mark);
- atomic_dec(&group->inotify_data.user->inotify_watches);
+ mutex_lock(&group->inotify_data.userns->inotify_lock);
+ state = __find_inotify_state(group->inotify_data.userns,
+ group->inotify_data.uid);
+ BUG_ON(!state);
+ page_counter_uncharge(&state->watches, 1);
+
+ mutex_unlock(&group->inotify_data.userns->inotify_lock);
}
/* ding dong the mark is dead */
@@ -762,6 +767,8 @@ static int inotify_new_watch(struct fsnotify_group *group,
int ret;
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+ struct inotify_state *state;
+ struct page_counter *cnt;
mask = inotify_arg_to_mask(arg);
@@ -773,10 +780,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
tmp_i_mark->fsn_mark.mask = mask;
tmp_i_mark->wd = -1;
- ret = -ENOSPC;
- if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
- goto out_err;
-
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
if (ret)
goto out_err;
@@ -791,7 +794,18 @@ static int inotify_new_watch(struct fsnotify_group *group,
}
/* increment the number of watches the user has */
- atomic_inc(&group->inotify_data.user->inotify_watches);
+ mutex_lock(&group->inotify_data.userns->inotify_lock);
+ state = __find_inotify_state(group->inotify_data.userns,
+ group->inotify_data.uid);
+ BUG_ON(!state);
+ ret = -ENOSPC;
+ if (!page_counter_try_charge(&state->watches, 1, &cnt)) {
+ mutex_unlock(&group->inotify_data.userns->inotify_lock);
+ inotify_remove_from_idr(group, tmp_i_mark);
+ goto out_err;
+ }
+
+ mutex_unlock(&group->inotify_data.userns->inotify_lock);
/* return the watch descriptor for this new mark */
ret = tmp_i_mark->wd;
@@ -822,6 +836,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
{
struct fsnotify_group *group;
struct inotify_event_info *oevent;
+ int ret;
group = fsnotify_alloc_group(&inotify_fsnotify_ops);
if (IS_ERR(group))
@@ -843,11 +858,14 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
group->inotify_data.user = get_current_user();
+ group->inotify_data.userns = current_user_ns();
+ group->inotify_data.uid = from_kuid(current_user_ns(), current_uid());
+
+ ret = inotify_init_state(current_user_ns(), group->inotify_data.uid);
- if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
- inotify_max_user_instances) {
+ if (ret < 0) {
fsnotify_destroy_group(group);
- return ERR_PTR(-EMFILE);
+ return ERR_PTR(ret);
}
return group;
@@ -1008,8 +1026,6 @@ static int __init inotify_user_setup(void)
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
inotify_max_queued_events = 16384;
- inotify_max_user_instances = 128;
- inotify_max_user_watches = 8192;
init_user_ns.inotify_max_user_instances = 256;
init_user_ns.inotify_max_user_watches = 8192;
/* These reserves should allow for 8 levels of nesting in userns */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..04ba3443aa36 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -838,10 +838,6 @@ struct user_struct {
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
-#ifdef CONFIG_INOTIFY_USER
- atomic_t inotify_watches; /* How many inotify watches does this user have? */
- atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
-#endif
#ifdef CONFIG_FANOTIFY
atomic_t fanotify_listeners;
#endif
--
2.5.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [RFC PATCH 0/4 v2] Inotify limits per usernamespace
[not found] ` <1467207425-22072-1-git-send-email-kernel-6AxghH7DbtA@public.gmane.org>
` (3 preceding siblings ...)
2016-06-29 13:37 ` [PATCH 4/4] inotify: Convert to using new userns infrastructure Nikolay Borisov
@ 2016-07-06 16:47 ` Eric W. Biederman
4 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2016-07-06 16:47 UTC (permalink / raw)
To: Nikolay Borisov
Cc: jack-AlSwsSmVLrQ, avagin-GEFAQzZX7r8dnm+yROfE0A,
containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA,
operations-/eCPMmvKun9pLGFMi4vTTA,
gorcunov-GEFAQzZX7r8dnm+yROfE0A
Nikolay Borisov <kernel-6AxghH7DbtA@public.gmane.org> writes:
> So this is the 2nd incarnation of the inotify-limits per namespace,
> following the lenghty discussions with Eric Biederman. The core of
> the series lies in patch 3, as this contains most of the code to
> implement the new semantics. The major difference is now that
> inotify limits are going to be accounted per-user/per-namespace.
>
> Patch 1 adds a __HASHTABLE_INITIALIZER, much in the same way as
> other kernel constructs have, so that one can use them directly
> into structure definitions. It's a self-contained patch
>
> Patch 2 is unchanged from the previous submissions and just
> renames some defines in various networking files, implementing
> their own hashtable as this creates certain warnings due to
> hashtable.h inclusion in linux/user_namespace.h. This has already
> been acked-by David Miller.
>
> Patch 3 is the core, it implements all the necessary changes
> to allow. More information about the implementation in the patch
> changelog. This has been completely changed than in the first
> submission to cope with the requirements that emerged in
> discussion with Eric Biederman.
>
> Patch 4 is plain conversion, to the new interface inotify code
> structure.
>
> The series has received moderate testing in a KVM guest, using
> the stress-ng to create multiple inotify instances and test
> whether the locking is correct which seems to be the case. I've
> tested with 2/3 level hierarchies of namespaces.
Thanks for getting this out. I am just starting to look at these
changes. I have been deep in another set of changes and haven't had the
brain cells to start reviewing this before now.
Eric
> Nikolay Borisov (4):
> hashtable: Add __HASHTABLE_INITIALIZER
> misc: Rename the HASH_SIZE macro
> userns/inotify: Initial implementation of inotify per-userns
> inotify: Convert to using new userns infrastructure
>
> fs/logfs/dir.c | 6 +-
> fs/notify/inotify/inotify.h | 25 ++++
> fs/notify/inotify/inotify_fsnotify.c | 16 ++-
> fs/notify/inotify/inotify_user.c | 240 +++++++++++++++++++++++++++++--
> include/linux/fsnotify_backend.h | 3 +
> include/linux/hashtable.h | 3 +
> include/linux/sched.h | 4 -
> include/linux/user_namespace.h | 10 ++
> kernel/user.c | 5 +
> kernel/user_namespace.c | 22 ++-
> net/ipv6/ip6_gre.c | 8 +-
> net/ipv6/ip6_tunnel.c | 10 +-
> net/ipv6/ip6_vti.c | 10 +-
> net/ipv6/sit.c | 10 +-
> security/keys/encrypted-keys/encrypted.c | 32 ++---
> 15 files changed, 345 insertions(+), 59 deletions(-)
^ permalink raw reply [flat|nested] 11+ messages in thread