* [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces [not found] ` <cover.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> @ 2015-09-09 18:53 ` Michael J. Coss 2015-09-09 18:53 ` [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function Michael J. Coss 2015-09-09 18:53 ` [PATCH 3/3] net/udevns: Netlink module to forward uevent to containers Michael J. Coss 2 siblings, 0 replies; 10+ messages in thread From: Michael J. Coss @ 2015-09-09 18:53 UTC (permalink / raw) To: containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I; +Cc: Michael J. Coss Restrict sending uevents to only those listeners operating in the same network namespace as the system init process. This is the first step toward allowing policy control of the forwarding of events to other namespaces in userspace. Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> --- lib/kobject_uevent.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index f6c2c1e..d791e33 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -295,6 +295,10 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, if (!netlink_has_listeners(uevent_sock, 1)) continue; + /* forward event only to the host systems network namespaces */ + if (!net_eq(sock_net(uevent_sock), &init_net)) + continue; + /* allocate message with the maximum possible size */ len = strlen(action_string) + strlen(devpath) + 2; skb = alloc_skb(len + env->buflen, GFP_KERNEL); -- 2.4.6 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <cover.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> 2015-09-09 18:53 ` [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces Michael J. Coss @ 2015-09-09 18:53 ` Michael J. Coss 2015-09-09 18:53 ` [PATCH 3/3] net/udevns: Netlink module to forward uevent to containers Michael J. Coss 2 siblings, 0 replies; 10+ messages in thread From: Michael J. Coss @ 2015-09-09 18:53 UTC (permalink / raw) To: containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I; +Cc: Michael J. Coss Adds capability to allow userspace programs to forward a given event to a specific network namespace as determined by the provided pid. In addition, support for a per-namespace kobject_sequence counter was added. Sysfs was modified to return the correct event counter based on the current network namespace. Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> --- include/linux/kobject.h | 3 ++ include/net/net_namespace.h | 3 ++ kernel/ksysfs.c | 12 ++++++ lib/kobject_uevent.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+) diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 637f670..d1bb509 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; int kobject_uevent(struct kobject *kobj, enum kobject_action action); int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, char *envp[]); +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); +#endif __printf(2, 3) int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e951453..a4013e5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -134,6 +134,9 @@ struct net { #if IS_ENABLED(CONFIG_MPLS) struct netns_mpls mpls; #endif +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) + u64 kevent_seqnum; +#endif struct sock *diag_nlsk; atomic_t fnhe_genid; }; diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 6683cce..4bc15fd 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -21,6 +21,9 @@ #include <linux/compiler.h> #include <linux/rcupdate.h> /* rcu_expedited */ +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) +#include <net/net_namespace.h> +#endif #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ static ssize_t uevent_seqnum_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) + pid_t p = task_pid_vnr(current); + struct net *n = get_net_ns_by_pid(p); + + if (n != ERR_PTR(-ESRCH)) { + if (!net_eq(n, &init_net)) + return sprintf(buf, "%llu\n", n->kevent_seqnum); + } +#endif return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); } KERNEL_ATTR_RO(uevent_seqnum); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index d791e33..7589745 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum kobject_action action) } EXPORT_SYMBOL_GPL(kobject_uevent); +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) +/** + * kobject_uevent_forward - forward event to specified network namespace + * + * @buf: event buffer + * @len: event length + * @pid: pid of network namespace + * + * Returns 0 if kobject_uevent_forward() is completed with success or the + * corresponding error when it fails. + */ +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) +{ + int retval = 0; +#if defined(CONFIG_NET) + struct uevent_sock *ue_sk; + struct net *pns; + char *p; + u64 num; + + /* grab the network namespace of the provided pid */ + pns = get_net_ns_by_pid(pid); + if (pns == ERR_PTR(-ESRCH)) + return -ESRCH; + + /* find sequence number in buffer */ + p = buf; + num = 0; + while (p < (buf + len)) { + if (strncmp(p, "SEQNUM=", 7) == 0) { + int r; + + p += 7; + r = kstrtoull(p, 10, &num); + if (r) { + put_net(pns); + return r; + } + break; + } + p += (strlen(p) + 1); + } + + /* if we didn't see a valid seqnum, or none was present, return error */ + if (num == 0) { + put_net(pns); + return -EINVAL; + } + /* update per namespace sequence number as needed */ + if (pns->kevent_seqnum < num) + pns->kevent_seqnum = num; + + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; + struct sk_buff *skb; + + if (!netlink_has_listeners(uevent_sock, 1)) + continue; + /* + * only send to sockets share the same network namespace + * as the passed pid + */ + if (!net_eq(sock_net(uevent_sock), pns)) + continue; + + /* allocate message with the maximum possible size */ + skb = alloc_skb(len, GFP_KERNEL); + if (skb) { + char *p; + + p = skb_put(skb, len); + memcpy(p, buf, len); + NETLINK_CB(skb).dst_group = 1; + retval = netlink_broadcast(uevent_sock, skb, 0, 1, + GFP_KERNEL); + + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS || retval == -ESRCH) + retval = 0; + } else { + retval = -ENOMEM; + } + } + put_net(pns); +#endif + return retval; +} +EXPORT_SYMBOL_GPL(kobject_uevent_forward); +#endif + /** * add_uevent_var - add key value string to the environment buffer * @env: environment buffer structure -- 2.4.6 ^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] net/udevns: Netlink module to forward uevent to containers [not found] ` <cover.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> 2015-09-09 18:53 ` [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces Michael J. Coss 2015-09-09 18:53 ` [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function Michael J. Coss @ 2015-09-09 18:53 ` Michael J. Coss 2 siblings, 0 replies; 10+ messages in thread From: Michael J. Coss @ 2015-09-09 18:53 UTC (permalink / raw) To: containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I; +Cc: Michael J. Coss New generic netlink module to provide an interface with the new forwarding interface for uevent. The driver allows a user to direct a uevent as read from the kernel to a specific network namespace by providing the uevent message, and a target process id. The uapi header file provides the message format. Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/udevns.h | 19 ++++++++ net/Kconfig | 1 + net/Makefile | 1 + net/udevns/Kconfig | 9 ++++ net/udevns/Makefile | 5 ++ net/udevns/udevns.c | 112 ++++++++++++++++++++++++++++++++++++++++++++ net/udevns/udevns.h | 19 ++++++++ 8 files changed, 167 insertions(+) create mode 100644 include/uapi/linux/udevns.h create mode 100644 net/udevns/Kconfig create mode 100644 net/udevns/Makefile create mode 100644 net/udevns/udevns.c create mode 100644 net/udevns/udevns.h diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1ff9942..9fb9c59 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -404,6 +404,7 @@ header-y += toshiba.h header-y += tty_flags.h header-y += tty.h header-y += types.h +header-y += udevns.h header-y += udf_fs_i.h header-y += udp.h header-y += uhid.h diff --git a/include/uapi/linux/udevns.h b/include/uapi/linux/udevns.h new file mode 100644 index 0000000..f5702f5 --- /dev/null +++ b/include/uapi/linux/udevns.h @@ -0,0 +1,19 @@ +#ifndef _UDEVNS_H_ +#define _UDEVNS_H_ + +enum udevns_msg_types { + UDEVNS_FORWARD_MSG = 0x1, + UDEVNS_CMD_MAX, +}; + +enum udevns_attr { + UDEVNS_UNSPEC, + UDEVNS_PID, + UDEVNS_MSG, + __UDEVNS_ATTR_MAX, +}; + +#define UDEVNS_ATTR_MAX (__UDEVNS_ATTR_MAX - 1) +#define UDEVNS_VERSION 0x1 + +#endif diff --git a/net/Kconfig b/net/Kconfig index 57a7c5a..465e288 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -54,6 +54,7 @@ source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" source "net/iucv/Kconfig" +source "net/udevns/Kconfig" config INET bool "TCP/IP networking" diff --git a/net/Makefile b/net/Makefile index 3995613..bde7775 100644 --- a/net/Makefile +++ b/net/Makefile @@ -74,3 +74,4 @@ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ endif +obj-$(CONFIG_UDEVNS) += udevns/ diff --git a/net/udevns/Kconfig b/net/udevns/Kconfig new file mode 100644 index 0000000..367e650 --- /dev/null +++ b/net/udevns/Kconfig @@ -0,0 +1,9 @@ +config UDEVNS + tristate "UDEV namespace bridge" + depends on SYSFS + default n + help + This option enables support for explicit forwarding of UDEV events to + other network namespaces + + If unsure, say N. diff --git a/net/udevns/Makefile b/net/udevns/Makefile new file mode 100644 index 0000000..44c6b12 --- /dev/null +++ b/net/udevns/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the uevent namespace aware forwarder +# +# +obj-$(CONFIG_UDEVNS) += udevns.o diff --git a/net/udevns/udevns.c b/net/udevns/udevns.c new file mode 100644 index 0000000..8b23751 --- /dev/null +++ b/net/udevns/udevns.c @@ -0,0 +1,112 @@ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <net/sock.h> +#include <net/genetlink.h> +#include <linux/netlink.h> +#include <linux/skbuff.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include "udevns.h" + +#define DRIVER_AUTHOR "Michael J Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>" +#define DRIVER_DESC "new udev namespace bridge" +#define DEVICE_NAME "udevns" + +#ifdef MODULE +#define UDEVNS_NAME (THIS_MODULE->name) +#else +#define UDEVNS_NAME "udevns" +#endif + +#define UDEVNS_INFO(fmt, args...) \ + pr_info("[%s] " fmt, UDEVNS_NAME, ## args) + +#define UDEVNS_ERROR(fmt, args...) \ + pr_err("[ERROR:%s:%s] " fmt, UDEVNS_NAME, __func__, ## args) + +#define UDEVNS_WARNING(fmt, args...) \ + pr_warn("[WARNING:%s:%s] " fmt, UDEVNS_NAME, __func__, ## args) + +static struct genl_family udevns_genl_family = { + .id = GENL_ID_GENERATE, + .name = DEVICE_NAME, + .hdrsize = 0, + .version = UDEVNS_VERSION, + .maxattr = UDEVNS_ATTR_MAX, +}; + +static const struct nla_policy udevns_fmsgpolicy[UDEVNS_ATTR_MAX + 1] = { + [UDEVNS_PID] = { .type = NLA_U32 }, + [UDEVNS_MSG] = { .type = NLA_STRING, .len = UEVENT_BUFFER_SIZE}, +}; + +static int udevns_forwardmsg(struct sk_buff *skb, struct genl_info *info) +{ + pid_t pid; + char *msg; + int msglen; + int err; + + if (!info->attrs[UDEVNS_PID]) { + UDEVNS_WARNING("missing PID from UDEVNS_FORWARD_MSG.\n"); + return -EINVAL; + } + + if (!info->attrs[UDEVNS_MSG]) { + UDEVNS_WARNING("missing uevent from UDEVNS_FORWARD_MSG.\n"); + return -EINVAL; + } + + pid = nla_get_u32(info->attrs[UDEVNS_PID]); + msg = nla_data(info->attrs[UDEVNS_MSG]); + msglen = nla_len(info->attrs[UDEVNS_MSG]); + + if (msglen < 0) { + UDEVNS_ERROR("Malformed uevent from UDEVNS_FORWARD_MSG.\n"); + return -EINVAL; + } + + err = kobject_uevent_forward(msg, msglen, pid); + return err; +} + +static struct genl_ops udevns_genl_ops[] = { + { + .cmd = UDEVNS_FORWARD_MSG, + .flags = GENL_ADMIN_PERM, + .doit = udevns_forwardmsg, + .policy = udevns_fmsgpolicy, + }, +}; + +static int __init udevns_init(void) +{ + int rc; + + UDEVNS_INFO("Starting udevns module\n"); + rc = genl_register_family_with_ops(&udevns_genl_family, + udevns_genl_ops); + if (rc) { + UDEVNS_ERROR("Failed to register netlink interface\n"); + return rc; + } + return 0; +} + +static void __exit udevns_exit(void) +{ + UDEVNS_INFO("Exiting udevns module\n"); + genl_unregister_family(&udevns_genl_family); +} + +module_init(udevns_init); +module_exit(udevns_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_AUTHOR(DRIVER_AUTHOR); diff --git a/net/udevns/udevns.h b/net/udevns/udevns.h new file mode 100644 index 0000000..f5702f5 --- /dev/null +++ b/net/udevns/udevns.h @@ -0,0 +1,19 @@ +#ifndef _UDEVNS_H_ +#define _UDEVNS_H_ + +enum udevns_msg_types { + UDEVNS_FORWARD_MSG = 0x1, + UDEVNS_CMD_MAX, +}; + +enum udevns_attr { + UDEVNS_UNSPEC, + UDEVNS_PID, + UDEVNS_MSG, + __UDEVNS_ATTR_MAX, +}; + +#define UDEVNS_ATTR_MAX (__UDEVNS_ATTR_MAX - 1) +#define UDEVNS_VERSION 0x1 + +#endif -- 2.4.6 ^ permalink raw reply related [flat|nested] 10+ messages in thread
[parent not found: <51c185b6fa89f0b8e9e7dcaffb3c21c975c84302.1441762578.git.michael.coss@alcatel-lucent.com>]
[parent not found: <51c185b6fa89f0b8e9e7dcaffb3c21c975c84302.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>]
* Re: [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces [not found] ` <51c185b6fa89f0b8e9e7dcaffb3c21c975c84302.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> @ 2015-10-02 17:40 ` Oren Laadan 0 siblings, 0 replies; 10+ messages in thread From: Oren Laadan @ 2015-10-02 17:40 UTC (permalink / raw) To: Michael J. Coss Cc: Serge Hallyn, containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I, Eric Biederman Hi Michael, While experimenting with your patches, I discovered a couple of issues: 1) One problem is that the test to disable broadcast has an undesired side-effect: it silently drops kernel uevents designated to specific net namespace(s). For example, uevents related to the "net" subsystem are now gone. More specifically, kobject_uevent_env() eventually calls netlink_broadcast_filtered() with "kobj_bcast_filter()" as the @filter argument; This filter is invoked by the netlink delivery code (for each target socket): if the respective kobject has a valid "struct kobj_ns_type_operations ops" then it will use the ops->netlink_ns() as the target network namespace, and only post to sockets that belong to that target network namespace. To remedy this, I suggest to move the test into "kobj_bcast_filter()", by replacing the final "return 0;" with "return !net_eq(sock_net(dsk), &init_net);". 2) Another problem is that when a task writes to the special file "uevent" in /sys/..., e.g. "/sys/devices/virtual/block/dm-0/uevent", it should ideally expect to see the resulting uevent in the network namespace to which it belongs, and only there. With broadcast disabled it will instead reach only the init network namespace (while before the patch it would reach all network namespaces). This could be fixed by having the userspace daemon that listens in the init network namespace forward such uevents to the "origin" network namespace (i.e. where the task belongs). However, I couldn't figure out a way for userspace to tell whether a particular uevent was "task made" via the respective "uevent" file and if so, in which network namespace - or by which task/pid - it was done. So I can't think of another solution but to do it in the kernel: handle writes to "uevent" in a way that only posts them in the network namespace of the writer task. Do you see a better option? Thanks, Oren. On Wed, Sep 9, 2015 at 2:53 PM, Michael J. Coss < michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> wrote: > Restrict sending uevents to only those listeners operating in the same > network namespace as the system init process. This is the first step > toward allowing policy control of the forwarding of events to other > namespaces in userspace. > > Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> > --- > lib/kobject_uevent.c | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c > index f6c2c1e..d791e33 100644 > --- a/lib/kobject_uevent.c > +++ b/lib/kobject_uevent.c > @@ -295,6 +295,10 @@ int kobject_uevent_env(struct kobject *kobj, enum > kobject_action action, > if (!netlink_has_listeners(uevent_sock, 1)) > continue; > > + /* forward event only to the host systems network > namespaces */ > + if (!net_eq(sock_net(uevent_sock), &init_net)) > + continue; > + > /* allocate message with the maximum possible size */ > len = strlen(action_string) + strlen(devpath) + 2; > skb = alloc_skb(len + env->buflen, GFP_KERNEL); > -- > 2.4.6 > > _______________________________________________ > Containers mailing list > Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org > https://lists.linuxfoundation.org/mailman/listinfo/containers > ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <3456750fe7a5a5eb709e315618facf5704cc1885.1441762578.git.michael.coss@alcatel-lucent.com>]
[parent not found: <20150909035527.GB5153@kroah.com>]
[parent not found: <20150909035527.GB5153-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <20150909035527.GB5153-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org> @ 2015-09-09 19:24 ` Michael J Coss [not found] ` <55F0875C.6060108-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> 0 siblings, 1 reply; 10+ messages in thread From: Michael J Coss @ 2015-09-09 19:24 UTC (permalink / raw) To: Greg KH Cc: serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I, davem-fT/PcQaiUtIeIZ0/mPfg9Q, linux-kernel-u79uwXL29TY76Z2rM5mHXA On 9/8/2015 11:55 PM, Greg KH wrote: > On Tue, Sep 08, 2015 at 10:10:29PM -0400, Michael J. Coss wrote: >> Adds capability to allow userspace programs to forward a given event to >> a specific network namespace as determined by the provided pid. In >> addition, support for a per-namespace kobject_sequence counter was >> added. Sysfs was modified to return the correct event counter based on >> the current network namespace. >> >> Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> >> --- >> include/linux/kobject.h | 3 ++ >> include/net/net_namespace.h | 3 ++ >> kernel/ksysfs.c | 12 ++++++ >> lib/kobject_uevent.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 108 insertions(+) >> >> diff --git a/include/linux/kobject.h b/include/linux/kobject.h >> index 637f670..d1bb509 100644 >> --- a/include/linux/kobject.h >> +++ b/include/linux/kobject.h >> @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; >> int kobject_uevent(struct kobject *kobj, enum kobject_action action); >> int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, >> char *envp[]); >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); >> +#endif >> >> __printf(2, 3) >> int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); >> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h >> index e951453..a4013e5 100644 >> --- a/include/net/net_namespace.h >> +++ b/include/net/net_namespace.h >> @@ -134,6 +134,9 @@ struct net { >> #if IS_ENABLED(CONFIG_MPLS) >> struct netns_mpls mpls; >> #endif >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> + u64 kevent_seqnum; >> +#endif >> struct sock *diag_nlsk; >> atomic_t fnhe_genid; >> }; >> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c >> index 6683cce..4bc15fd 100644 >> --- a/kernel/ksysfs.c >> +++ b/kernel/ksysfs.c >> @@ -21,6 +21,9 @@ >> #include <linux/compiler.h> >> >> #include <linux/rcupdate.h> /* rcu_expedited */ >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +#include <net/net_namespace.h> >> +#endif > #if isn't needed here, right? > >> >> #define KERNEL_ATTR_RO(_name) \ >> static struct kobj_attribute _name##_attr = __ATTR_RO(_name) >> @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ >> static ssize_t uevent_seqnum_show(struct kobject *kobj, >> struct kobj_attribute *attr, char *buf) >> { >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> + pid_t p = task_pid_vnr(current); >> + struct net *n = get_net_ns_by_pid(p); >> + >> + if (n != ERR_PTR(-ESRCH)) { >> + if (!net_eq(n, &init_net)) >> + return sprintf(buf, "%llu\n", n->kevent_seqnum); >> + } >> +#endif >> return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); >> } >> KERNEL_ATTR_RO(uevent_seqnum); >> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c >> index d791e33..7589745 100644 >> --- a/lib/kobject_uevent.c >> +++ b/lib/kobject_uevent.c >> @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum kobject_action action) >> } >> EXPORT_SYMBOL_GPL(kobject_uevent); >> >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +/** >> + * kobject_uevent_forward - forward event to specified network namespace >> + * >> + * @buf: event buffer >> + * @len: event length >> + * @pid: pid of network namespace >> + * >> + * Returns 0 if kobject_uevent_forward() is completed with success or the >> + * corresponding error when it fails. >> + */ >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) >> +{ >> + int retval = 0; >> +#if defined(CONFIG_NET) >> + struct uevent_sock *ue_sk; >> + struct net *pns; >> + char *p; >> + u64 num; >> + >> + /* grab the network namespace of the provided pid */ >> + pns = get_net_ns_by_pid(pid); >> + if (pns == ERR_PTR(-ESRCH)) >> + return -ESRCH; >> + >> + /* find sequence number in buffer */ >> + p = buf; >> + num = 0; >> + while (p < (buf + len)) { >> + if (strncmp(p, "SEQNUM=", 7) == 0) { >> + int r; >> + >> + p += 7; >> + r = kstrtoull(p, 10, &num); >> + if (r) { >> + put_net(pns); >> + return r; >> + } >> + break; >> + } >> + p += (strlen(p) + 1); >> + } > Ok, that's crazy, replacing an existing sequence number with a sequence > number of the namespace? An interesting hack, yes, but something we > want to maintain for the next 20+ years, no. Surely there's a better > way to do this? > > But again, I'm not sold on this whole idea anyway. > > greg k-h > Re: the #if The #if is only needed because the new udevns code references a structure defined in that header. Obviously it could be included without consequences but I #if it to show it was part of the forwarding code. Re: sequence # I wanted it as transparent as possible, without this the udevd running inside the container has issues with the values of the sequence numbers seen, by making it tied to the namespace, udevd could run unchanged. Our goal was to minimize the changes to a linux distro and still be able to run a full desktop inside a container. But even in absence of our use case, the first question is should uevents be broadcasts to every network namespace? I don't think that broadcasting is the correct action. And follow on questions are what if anything should be done with regards to uevents and containers. -- ---Michael J Coss ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <55F0875C.6060108-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <55F0875C.6060108-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> @ 2015-09-09 20:11 ` Greg KH [not found] ` <20150909201123.GC9328-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org> 0 siblings, 1 reply; 10+ messages in thread From: Greg KH @ 2015-09-09 20:11 UTC (permalink / raw) To: Michael J Coss Cc: serge.hallyn-GeWIH/nMZzLQT0dZR+AlfA, containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I, davem-fT/PcQaiUtIeIZ0/mPfg9Q, linux-kernel-u79uwXL29TY76Z2rM5mHXA On Wed, Sep 09, 2015 at 03:24:12PM -0400, Michael J Coss wrote: > On 9/8/2015 11:55 PM, Greg KH wrote: > > On Tue, Sep 08, 2015 at 10:10:29PM -0400, Michael J. Coss wrote: > >> Adds capability to allow userspace programs to forward a given event to > >> a specific network namespace as determined by the provided pid. In > >> addition, support for a per-namespace kobject_sequence counter was > >> added. Sysfs was modified to return the correct event counter based on > >> the current network namespace. > >> > >> Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> > >> --- > >> include/linux/kobject.h | 3 ++ > >> include/net/net_namespace.h | 3 ++ > >> kernel/ksysfs.c | 12 ++++++ > >> lib/kobject_uevent.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ > >> 4 files changed, 108 insertions(+) > >> > >> diff --git a/include/linux/kobject.h b/include/linux/kobject.h > >> index 637f670..d1bb509 100644 > >> --- a/include/linux/kobject.h > >> +++ b/include/linux/kobject.h > >> @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; > >> int kobject_uevent(struct kobject *kobj, enum kobject_action action); > >> int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, > >> char *envp[]); > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); > >> +#endif > >> > >> __printf(2, 3) > >> int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); > >> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h > >> index e951453..a4013e5 100644 > >> --- a/include/net/net_namespace.h > >> +++ b/include/net/net_namespace.h > >> @@ -134,6 +134,9 @@ struct net { > >> #if IS_ENABLED(CONFIG_MPLS) > >> struct netns_mpls mpls; > >> #endif > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > >> + u64 kevent_seqnum; > >> +#endif > >> struct sock *diag_nlsk; > >> atomic_t fnhe_genid; > >> }; > >> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c > >> index 6683cce..4bc15fd 100644 > >> --- a/kernel/ksysfs.c > >> +++ b/kernel/ksysfs.c > >> @@ -21,6 +21,9 @@ > >> #include <linux/compiler.h> > >> > >> #include <linux/rcupdate.h> /* rcu_expedited */ > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > >> +#include <net/net_namespace.h> > >> +#endif > > #if isn't needed here, right? > > > >> > >> #define KERNEL_ATTR_RO(_name) \ > >> static struct kobj_attribute _name##_attr = __ATTR_RO(_name) > >> @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ > >> static ssize_t uevent_seqnum_show(struct kobject *kobj, > >> struct kobj_attribute *attr, char *buf) > >> { > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > >> + pid_t p = task_pid_vnr(current); > >> + struct net *n = get_net_ns_by_pid(p); > >> + > >> + if (n != ERR_PTR(-ESRCH)) { > >> + if (!net_eq(n, &init_net)) > >> + return sprintf(buf, "%llu\n", n->kevent_seqnum); > >> + } > >> +#endif > >> return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); > >> } > >> KERNEL_ATTR_RO(uevent_seqnum); > >> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c > >> index d791e33..7589745 100644 > >> --- a/lib/kobject_uevent.c > >> +++ b/lib/kobject_uevent.c > >> @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum kobject_action action) > >> } > >> EXPORT_SYMBOL_GPL(kobject_uevent); > >> > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > >> +/** > >> + * kobject_uevent_forward - forward event to specified network namespace > >> + * > >> + * @buf: event buffer > >> + * @len: event length > >> + * @pid: pid of network namespace > >> + * > >> + * Returns 0 if kobject_uevent_forward() is completed with success or the > >> + * corresponding error when it fails. > >> + */ > >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) > >> +{ > >> + int retval = 0; > >> +#if defined(CONFIG_NET) > >> + struct uevent_sock *ue_sk; > >> + struct net *pns; > >> + char *p; > >> + u64 num; > >> + > >> + /* grab the network namespace of the provided pid */ > >> + pns = get_net_ns_by_pid(pid); > >> + if (pns == ERR_PTR(-ESRCH)) > >> + return -ESRCH; > >> + > >> + /* find sequence number in buffer */ > >> + p = buf; > >> + num = 0; > >> + while (p < (buf + len)) { > >> + if (strncmp(p, "SEQNUM=", 7) == 0) { > >> + int r; > >> + > >> + p += 7; > >> + r = kstrtoull(p, 10, &num); > >> + if (r) { > >> + put_net(pns); > >> + return r; > >> + } > >> + break; > >> + } > >> + p += (strlen(p) + 1); > >> + } > > Ok, that's crazy, replacing an existing sequence number with a sequence > > number of the namespace? An interesting hack, yes, but something we > > want to maintain for the next 20+ years, no. Surely there's a better > > way to do this? > > > > But again, I'm not sold on this whole idea anyway. > > > > greg k-h > > > Re: the #if > The #if is only needed because the new udevns code references a > structure defined in that header. Obviously it could be included > without consequences but I #if it to show it was part of the forwarding > code. That's not an issue, we don't put #ifdefs in .c code if at all possible, you will note that you added a bunch of them :( > Re: sequence # > I wanted it as transparent as possible, without this the udevd running > inside the container has issues with the values of the sequence numbers > seen, by making it tied to the namespace, udevd could run unchanged. Oh I know why you did it, I just don't like it :) > Our goal was to minimize the changes to a linux distro and still be able > to run a full desktop inside a container. But even in absence of our > use case, the first question is should uevents be broadcasts to every > network namespace? I don't think that broadcasting is the correct > action. And follow on questions are what if anything should be done > with regards to uevents and containers. I don't think you should be running udevd within a container, as a device is never bound to a namespace (network devices are the only exception), it's a false thing to think that a uevent is only for a single namespace as well. I understand your wish to change only the kernel to get unmodified userspace to run, but I suggest modifying your userspace instead :) sorry, greg k-h ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <20150909201123.GC9328-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <20150909201123.GC9328-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org> @ 2015-09-10 5:43 ` Amir Goldstein [not found] ` <CAA2m6vcnUz4EeS-FH2P=GjKSquXit=j1NE5Yut8_baLA+TvjJA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 10+ messages in thread From: Amir Goldstein @ 2015-09-10 5:43 UTC (permalink / raw) To: Greg KH Cc: Michael J Coss, containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I, Serge Hallyn, linux-kernel-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman, davem-fT/PcQaiUtIeIZ0/mPfg9Q On Wed, Sep 9, 2015 at 11:11 PM, Greg KH <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org> wrote: > > On Wed, Sep 09, 2015 at 03:24:12PM -0400, Michael J Coss wrote: > > On 9/8/2015 11:55 PM, Greg KH wrote: > > > On Tue, Sep 08, 2015 at 10:10:29PM -0400, Michael J. Coss wrote: > > >> Adds capability to allow userspace programs to forward a given event to > > >> a specific network namespace as determined by the provided pid. In > > >> addition, support for a per-namespace kobject_sequence counter was > > >> added. Sysfs was modified to return the correct event counter based on > > >> the current network namespace. > > >> > > >> Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> > > >> --- > > >> include/linux/kobject.h | 3 ++ > > >> include/net/net_namespace.h | 3 ++ > > >> kernel/ksysfs.c | 12 ++++++ > > >> lib/kobject_uevent.c | 90 +++++++++++++++++++++++++++++++++++++++++++++ > > >> 4 files changed, 108 insertions(+) > > >> > > >> diff --git a/include/linux/kobject.h b/include/linux/kobject.h > > >> index 637f670..d1bb509 100644 > > >> --- a/include/linux/kobject.h > > >> +++ b/include/linux/kobject.h > > >> @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; > > >> int kobject_uevent(struct kobject *kobj, enum kobject_action action); > > >> int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, > > >> char *envp[]); > > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > > >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); > > >> +#endif > > >> > > >> __printf(2, 3) > > >> int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); > > >> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h > > >> index e951453..a4013e5 100644 > > >> --- a/include/net/net_namespace.h > > >> +++ b/include/net/net_namespace.h > > >> @@ -134,6 +134,9 @@ struct net { > > >> #if IS_ENABLED(CONFIG_MPLS) > > >> struct netns_mpls mpls; > > >> #endif > > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > > >> + u64 kevent_seqnum; > > >> +#endif > > >> struct sock *diag_nlsk; > > >> atomic_t fnhe_genid; > > >> }; > > >> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c > > >> index 6683cce..4bc15fd 100644 > > >> --- a/kernel/ksysfs.c > > >> +++ b/kernel/ksysfs.c > > >> @@ -21,6 +21,9 @@ > > >> #include <linux/compiler.h> > > >> > > >> #include <linux/rcupdate.h> /* rcu_expedited */ > > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > > >> +#include <net/net_namespace.h> > > >> +#endif > > > #if isn't needed here, right? > > > > > >> > > >> #define KERNEL_ATTR_RO(_name) \ > > >> static struct kobj_attribute _name##_attr = __ATTR_RO(_name) > > >> @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ > > >> static ssize_t uevent_seqnum_show(struct kobject *kobj, > > >> struct kobj_attribute *attr, char *buf) > > >> { > > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > > >> + pid_t p = task_pid_vnr(current); > > >> + struct net *n = get_net_ns_by_pid(p); > > >> + > > >> + if (n != ERR_PTR(-ESRCH)) { > > >> + if (!net_eq(n, &init_net)) > > >> + return sprintf(buf, "%llu\n", n->kevent_seqnum); > > >> + } > > >> +#endif > > >> return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); > > >> } > > >> KERNEL_ATTR_RO(uevent_seqnum); > > >> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c > > >> index d791e33..7589745 100644 > > >> --- a/lib/kobject_uevent.c > > >> +++ b/lib/kobject_uevent.c > > >> @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum kobject_action action) > > >> } > > >> EXPORT_SYMBOL_GPL(kobject_uevent); > > >> > > >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > > >> +/** > > >> + * kobject_uevent_forward - forward event to specified network namespace > > >> + * > > >> + * @buf: event buffer > > >> + * @len: event length > > >> + * @pid: pid of network namespace > > >> + * > > >> + * Returns 0 if kobject_uevent_forward() is completed with success or the > > >> + * corresponding error when it fails. > > >> + */ > > >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) > > >> +{ > > >> + int retval = 0; > > >> +#if defined(CONFIG_NET) > > >> + struct uevent_sock *ue_sk; > > >> + struct net *pns; > > >> + char *p; > > >> + u64 num; > > >> + > > >> + /* grab the network namespace of the provided pid */ > > >> + pns = get_net_ns_by_pid(pid); > > >> + if (pns == ERR_PTR(-ESRCH)) > > >> + return -ESRCH; > > >> + > > >> + /* find sequence number in buffer */ > > >> + p = buf; > > >> + num = 0; > > >> + while (p < (buf + len)) { > > >> + if (strncmp(p, "SEQNUM=", 7) == 0) { > > >> + int r; > > >> + > > >> + p += 7; > > >> + r = kstrtoull(p, 10, &num); > > >> + if (r) { > > >> + put_net(pns); > > >> + return r; > > >> + } > > >> + break; > > >> + } > > >> + p += (strlen(p) + 1); > > >> + } > > > Ok, that's crazy, replacing an existing sequence number with a sequence > > > number of the namespace? An interesting hack, yes, but something we > > > want to maintain for the next 20+ years, no. Surely there's a better > > > way to do this? > > > > > > But again, I'm not sold on this whole idea anyway. > > > > > > greg k-h > > > > > Re: the #if > > The #if is only needed because the new udevns code references a > > structure defined in that header. Obviously it could be included > > without consequences but I #if it to show it was part of the forwarding > > code. > > That's not an issue, we don't put #ifdefs in .c code if at all possible, > you will note that you added a bunch of them :( > > > Re: sequence # > > I wanted it as transparent as possible, without this the udevd running > > inside the container has issues with the values of the sequence numbers > > seen, by making it tied to the namespace, udevd could run unchanged. > > Oh I know why you did it, I just don't like it :) > > > Our goal was to minimize the changes to a linux distro and still be able > > to run a full desktop inside a container. But even in absence of our > > use case, the first question is should uevents be broadcasts to every > > network namespace? I don't think that broadcasting is the correct > > action. And follow on questions are what if anything should be done > > with regards to uevents and containers. > > I don't think you should be running udevd within a container, as a > device is never bound to a namespace (network devices are the only > exception), it's a false thing to think that a uevent is only for a > single namespace as well. I understand your wish to change only the > kernel to get unmodified userspace to run, but I suggest modifying your > userspace instead :) Greg, I fully agree with you that running unmodified distro inside a container is not a justified cause for kernel changes. However, I would like to highlight the point that udev is not the only consumer of uevents, so changing userspace, as you rightfully suggested, may not be as simple as you imagine. For example, in our Android use case, we have no intention of running unmodified Android inside container and modifying Android's ueventd is not an issue for us. Android userspace uses uevents extensively. For example, vold uses uevents to be notified of SDCARD insert event and that is just one example. We can get around vold and maybe we can get around every other open source Android tool that make use of uevents, but phone vendors tend to use uevents to communicate messages between their drivers and proprietary software and this is were we must have a way to filter those uevents in the kernel. You argue that "device is never bound to a namespace" (and that it never will be) and I don't disagree with that. However, as Eric said, the "broadcast everything" logic does not make much sense. In a way, the broadcast logic is opposite to your suggestion to modify userspace. In almost every existing implementation of containers, only the root namespace owns responsibility for booting the machine and configuring devices, so if all containers were running modified userspace, there would have been no need to broadcast uevents to all namespaces. At the very least, you should be able to consent with the idea of having the broadcast policy decided by userspace. Legacy distros can keep broadcast on by default to not break unmodified userspace programs. Modern distros, that were modified to run inside containers, be them systemd driven or other, can turn broadcast off and let the daemons running in root ns be in charge. Many of us would like the feature that this patch set is set to achieve. Can you please provide constructive feedback to Michael's work, pointing at the parts you think can go into kernel this way or another and the parts for which you see a valid userspace alternative. Thanks, Amir. > > sorry, > > greg k-h > _______________________________________________ > Containers mailing list > Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org > https://lists.linuxfoundation.org/mailman/listinfo/containers ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <CAA2m6vcnUz4EeS-FH2P=GjKSquXit=j1NE5Yut8_baLA+TvjJA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <CAA2m6vcnUz4EeS-FH2P=GjKSquXit=j1NE5Yut8_baLA+TvjJA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2015-09-10 5:58 ` Greg KH 0 siblings, 0 replies; 10+ messages in thread From: Greg KH @ 2015-09-10 5:58 UTC (permalink / raw) To: Amir Goldstein Cc: Michael J Coss, containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I, Serge Hallyn, linux-kernel-u79uwXL29TY76Z2rM5mHXA, Eric W. Biederman, davem-fT/PcQaiUtIeIZ0/mPfg9Q On Thu, Sep 10, 2015 at 08:43:28AM +0300, Amir Goldstein wrote: > I fully agree with you that running unmodified distro inside a > container is not a justified > cause for kernel changes. Great, end of discussion :) > However, I would like to highlight the point that udev is not the only > consumer of uevents, so changing userspace, as you rightfully > suggested, may not be as simple as you imagine. But you have control over who consumes uevents, so if you really want to create such a "only send some uevents to some containers" you can do so from the "host" container today. Exactly like udev does this today if you look at how udevd works, udevd uses the kernel as the filter to only wake up processes that have subscribed to specific events. It's as if no one actually looks at how this works :( > For example, in our Android use case, we have no intention of running > unmodified Android > inside container and modifying Android's ueventd is not an issue for us. > Android userspace uses uevents extensively. For example, vold uses uevents to be > notified of SDCARD insert event and that is just one example. > We can get around vold and maybe we can get around every other open > source Android tool > that make use of uevents, but phone vendors tend to use uevents to communicate > messages between their drivers and proprietary software and this is > were we must have > a way to filter those uevents in the kernel. If you are going to modify Android, just use udevd and filter things that way, you can do it today. Or just modify the tools that Android uses for listening to uevents. > You argue that "device is never bound to a namespace" (and that it > never will be) and I don't disagree with that. Great, end of discussion :) > However, as Eric said, the "broadcast everything" logic does not make > much sense. Only one thing is listening to these events, put your filter there (again, just like udevd does today...) > In a way, the broadcast logic is opposite to your suggestion to modify > userspace. Nope, see above. > In almost every existing implementation of containers, only the root namespace > owns responsibility for booting the machine and configuring devices, so if all > containers were running modified userspace, there would have been no need to > broadcast uevents to all namespaces. Wonderful, as you said you were going to modify the container applications, this isn't an issue :) > At the very least, you should be able to consent with the idea of > having the broadcast policy decided by userspace. But it already is, userspace can decide what it wants to do with every event today. Again, to sound like a broken record, just like udevd does. greg k-h ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <3456750fe7a5a5eb709e315618facf5704cc1885.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <3456750fe7a5a5eb709e315618facf5704cc1885.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> @ 2015-10-02 18:00 ` Oren Laadan [not found] ` <CAA4jN2br76atf9UuOhJVcoQPZ6GMN91Mk1GsoXcVFC-eFvFafA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 10+ messages in thread From: Oren Laadan @ 2015-10-02 18:00 UTC (permalink / raw) To: Michael J. Coss; +Cc: containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I On Wed, Sep 9, 2015 at 2:53 PM, Michael J. Coss < michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> wrote: > Adds capability to allow userspace programs to forward a given event to > a specific network namespace as determined by the provided pid. In > addition, support for a per-namespace kobject_sequence counter was > added. Sysfs was modified to return the correct event counter based on > the current network namespace. > > Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> > --- > include/linux/kobject.h | 3 ++ > include/net/net_namespace.h | 3 ++ > kernel/ksysfs.c | 12 ++++++ > lib/kobject_uevent.c | 90 > +++++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 108 insertions(+) > > diff --git a/include/linux/kobject.h b/include/linux/kobject.h > index 637f670..d1bb509 100644 > --- a/include/linux/kobject.h > +++ b/include/linux/kobject.h > @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; > int kobject_uevent(struct kobject *kobj, enum kobject_action action); > int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, > char *envp[]); > +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); > +#endif > > __printf(2, 3) > int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); > diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h > index e951453..a4013e5 100644 > --- a/include/net/net_namespace.h > +++ b/include/net/net_namespace.h > @@ -134,6 +134,9 @@ struct net { > #if IS_ENABLED(CONFIG_MPLS) > struct netns_mpls mpls; > #endif > +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > + u64 kevent_seqnum; > +#endif > struct sock *diag_nlsk; > atomic_t fnhe_genid; > }; > diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c > index 6683cce..4bc15fd 100644 > --- a/kernel/ksysfs.c > +++ b/kernel/ksysfs.c > @@ -21,6 +21,9 @@ > #include <linux/compiler.h> > > #include <linux/rcupdate.h> /* rcu_expedited */ > +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > +#include <net/net_namespace.h> > +#endif > > #define KERNEL_ATTR_RO(_name) \ > static struct kobj_attribute _name##_attr = __ATTR_RO(_name) > @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ > static ssize_t uevent_seqnum_show(struct kobject *kobj, > struct kobj_attribute *attr, char *buf) > { > +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > + pid_t p = task_pid_vnr(current); > + struct net *n = get_net_ns_by_pid(p); > + > + if (n != ERR_PTR(-ESRCH)) { > + if (!net_eq(n, &init_net)) > + return sprintf(buf, "%llu\n", n->kevent_seqnum); > + } > +#endif > return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); > } > KERNEL_ATTR_RO(uevent_seqnum); > diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c > index d791e33..7589745 100644 > --- a/lib/kobject_uevent.c > +++ b/lib/kobject_uevent.c > @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum > kobject_action action) > } > EXPORT_SYMBOL_GPL(kobject_uevent); > > +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) > +/** > + * kobject_uevent_forward - forward event to specified network namespace > + * > + * @buf: event buffer > + * @len: event length > + * @pid: pid of network namespace > + * > + * Returns 0 if kobject_uevent_forward() is completed with success or the > + * corresponding error when it fails. > + */ > +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) > +{ > + int retval = 0; > +#if defined(CONFIG_NET) > + struct uevent_sock *ue_sk; > + struct net *pns; > + char *p; > + u64 num; > + > + /* grab the network namespace of the provided pid */ > + pns = get_net_ns_by_pid(pid); > + if (pns == ERR_PTR(-ESRCH)) > + return -ESRCH; > + > + /* find sequence number in buffer */ > + p = buf; > + num = 0; > + while (p < (buf + len)) { > + if (strncmp(p, "SEQNUM=", 7) == 0) { > + int r; > + > + p += 7; > + r = kstrtoull(p, 10, &num); > + if (r) { > + put_net(pns); > + return r; > + } > + break; > + } > + p += (strlen(p) + 1); > + } > + > + /* if we didn't see a valid seqnum, or none was present, return > error */ > + if (num == 0) { > + put_net(pns); > + return -EINVAL; > + } > + /* update per namespace sequence number as needed */ > + if (pns->kevent_seqnum < num) > + pns->kevent_seqnum = num; > + > + list_for_each_entry(ue_sk, &uevent_sock_list, list) { > + struct sock *uevent_sock = ue_sk->sk; > + struct sk_buff *skb; > + > + if (!netlink_has_listeners(uevent_sock, 1)) > + continue; > + /* > + * only send to sockets share the same network namespace > + * as the passed pid > + */ > + if (!net_eq(sock_net(uevent_sock), pns)) > + continue; > + > + /* allocate message with the maximum possible size */ > + skb = alloc_skb(len, GFP_KERNEL); > + if (skb) { > + char *p; > + > + p = skb_put(skb, len); > + memcpy(p, buf, len); > + NETLINK_CB(skb).dst_group = 1; > + retval = netlink_broadcast(uevent_sock, skb, 0, 1, > + GFP_KERNEL); > + > + /* ENOBUFS should be handled in userspace */ > + if (retval == -ENOBUFS || retval == -ESRCH) > + retval = 0; > This may mask an error from an earlier send (to a distinct listener socket). Instead, we should retain and report either the first error seen or some error seen. > + } else { > + retval = -ENOMEM; > + } > + } > + put_net(pns); > +#endif > + return retval; > +} > +EXPORT_SYMBOL_GPL(kobject_uevent_forward); > +#endif > + > /** > * add_uevent_var - add key value string to the environment buffer > * @env: environment buffer structure > -- > 2.4.6 > > _______________________________________________ > Containers mailing list > Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org > https://lists.linuxfoundation.org/mailman/listinfo/containers > ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <CAA4jN2br76atf9UuOhJVcoQPZ6GMN91Mk1GsoXcVFC-eFvFafA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function [not found] ` <CAA4jN2br76atf9UuOhJVcoQPZ6GMN91Mk1GsoXcVFC-eFvFafA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2015-10-14 3:40 ` Oren Laadan 0 siblings, 0 replies; 10+ messages in thread From: Oren Laadan @ 2015-10-14 3:40 UTC (permalink / raw) To: Michael J. Coss; +Cc: containers-cunTk1MwBs98uUxBSJOaYoYkZiVZrdSR2LY78lusg7I On Fri, Oct 2, 2015 at 2:00 PM, Oren Laadan <orenl-3AfRa/s5aFdBDgjK7y7TUQ@public.gmane.org> wrote: > > > On Wed, Sep 9, 2015 at 2:53 PM, Michael J. Coss < > michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> wrote: > >> Adds capability to allow userspace programs to forward a given event to >> a specific network namespace as determined by the provided pid. In >> addition, support for a per-namespace kobject_sequence counter was >> added. Sysfs was modified to return the correct event counter based on >> the current network namespace. >> >> Signed-off-by: Michael J. Coss <michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org> >> --- >> include/linux/kobject.h | 3 ++ >> include/net/net_namespace.h | 3 ++ >> kernel/ksysfs.c | 12 ++++++ >> lib/kobject_uevent.c | 90 >> +++++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 108 insertions(+) >> >> diff --git a/include/linux/kobject.h b/include/linux/kobject.h >> index 637f670..d1bb509 100644 >> --- a/include/linux/kobject.h >> +++ b/include/linux/kobject.h >> @@ -215,6 +215,9 @@ extern struct kobject *firmware_kobj; >> int kobject_uevent(struct kobject *kobj, enum kobject_action action); >> int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, >> char *envp[]); >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid); >> +#endif >> >> __printf(2, 3) >> int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...); >> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h >> index e951453..a4013e5 100644 >> --- a/include/net/net_namespace.h >> +++ b/include/net/net_namespace.h >> @@ -134,6 +134,9 @@ struct net { >> #if IS_ENABLED(CONFIG_MPLS) >> struct netns_mpls mpls; >> #endif >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> + u64 kevent_seqnum; >> +#endif >> struct sock *diag_nlsk; >> atomic_t fnhe_genid; >> }; >> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c >> index 6683cce..4bc15fd 100644 >> --- a/kernel/ksysfs.c >> +++ b/kernel/ksysfs.c >> @@ -21,6 +21,9 @@ >> #include <linux/compiler.h> >> >> #include <linux/rcupdate.h> /* rcu_expedited */ >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +#include <net/net_namespace.h> >> +#endif >> >> #define KERNEL_ATTR_RO(_name) \ >> static struct kobj_attribute _name##_attr = __ATTR_RO(_name) >> @@ -33,6 +36,15 @@ static struct kobj_attribute _name##_attr = \ >> static ssize_t uevent_seqnum_show(struct kobject *kobj, >> struct kobj_attribute *attr, char *buf) >> { >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> + pid_t p = task_pid_vnr(current); >> + struct net *n = get_net_ns_by_pid(p); >> + >> + if (n != ERR_PTR(-ESRCH)) { >> + if (!net_eq(n, &init_net)) >> + return sprintf(buf, "%llu\n", n->kevent_seqnum); >> + } >> +#endif >> return sprintf(buf, "%llu\n", (unsigned long long)uevent_seqnum); >> } >> KERNEL_ATTR_RO(uevent_seqnum); >> diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c >> index d791e33..7589745 100644 >> --- a/lib/kobject_uevent.c >> +++ b/lib/kobject_uevent.c >> @@ -379,6 +379,96 @@ int kobject_uevent(struct kobject *kobj, enum >> kobject_action action) >> } >> EXPORT_SYMBOL_GPL(kobject_uevent); >> >> +#if defined(CONFIG_UDEVNS) || defined(CONFIG_UDEVNS_MODULE) >> +/** >> + * kobject_uevent_forward - forward event to specified network namespace >> + * >> + * @buf: event buffer >> + * @len: event length >> + * @pid: pid of network namespace >> + * >> + * Returns 0 if kobject_uevent_forward() is completed with success or the >> + * corresponding error when it fails. >> + */ >> +int kobject_uevent_forward(char *buf, size_t len, pid_t pid) >> +{ >> + int retval = 0; >> +#if defined(CONFIG_NET) >> + struct uevent_sock *ue_sk; >> + struct net *pns; >> + char *p; >> + u64 num; >> + >> + /* grab the network namespace of the provided pid */ >> + pns = get_net_ns_by_pid(pid); >> + if (pns == ERR_PTR(-ESRCH)) >> + return -ESRCH; >> + >> + /* find sequence number in buffer */ >> + p = buf; >> + num = 0; >> + while (p < (buf + len)) { >> + if (strncmp(p, "SEQNUM=", 7) == 0) { >> + int r; >> + >> + p += 7; >> + r = kstrtoull(p, 10, &num); >> + if (r) { >> + put_net(pns); >> + return r; >> + } >> + break; >> + } >> + p += (strlen(p) + 1); >> + } >> + >> + /* if we didn't see a valid seqnum, or none was present, return >> error */ >> + if (num == 0) { >> + put_net(pns); >> + return -EINVAL; >> + } >> + /* update per namespace sequence number as needed */ >> + if (pns->kevent_seqnum < num) >> + pns->kevent_seqnum = num; >> + >> + list_for_each_entry(ue_sk, &uevent_sock_list, list) { >> + struct sock *uevent_sock = ue_sk->sk; >> + struct sk_buff *skb; >> + >> + if (!netlink_has_listeners(uevent_sock, 1)) >> + continue; >> + /* >> + * only send to sockets share the same network namespace >> + * as the passed pid >> + */ >> + if (!net_eq(sock_net(uevent_sock), pns)) >> + continue; >> + >> + /* allocate message with the maximum possible size */ >> + skb = alloc_skb(len, GFP_KERNEL); >> + if (skb) { >> + char *p; >> + >> + p = skb_put(skb, len); >> + memcpy(p, buf, len); >> + NETLINK_CB(skb).dst_group = 1; >> + retval = netlink_broadcast(uevent_sock, skb, 0, 1, >> + GFP_KERNEL); >> + >> + /* ENOBUFS should be handled in userspace */ >> + if (retval == -ENOBUFS || retval == -ESRCH) >> + retval = 0; >> > > This may mask an error from an earlier send (to a distinct listener > socket). Instead, we should retain and report either the first error seen > or some error seen. > > Ok, scratch that: I see the model function kobject_uevent_env() does the same thing; and returning a meaningful error on a broadcast is questionable anyway... Oren. ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2015-10-14 3:40 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <cover.1441762578.git.michael.coss@alcatel-lucent.com>
[not found] ` <cover.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>
2015-09-09 18:53 ` [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces Michael J. Coss
2015-09-09 18:53 ` [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function Michael J. Coss
2015-09-09 18:53 ` [PATCH 3/3] net/udevns: Netlink module to forward uevent to containers Michael J. Coss
[not found] ` <51c185b6fa89f0b8e9e7dcaffb3c21c975c84302.1441762578.git.michael.coss@alcatel-lucent.com>
[not found] ` <51c185b6fa89f0b8e9e7dcaffb3c21c975c84302.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>
2015-10-02 17:40 ` [PATCH 1/3] lib/kobject_uevent.c: disable broadcast of uevents to other namespaces Oren Laadan
[not found] ` <3456750fe7a5a5eb709e315618facf5704cc1885.1441762578.git.michael.coss@alcatel-lucent.com>
[not found] ` <20150909035527.GB5153@kroah.com>
[not found] ` <20150909035527.GB5153-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-09-09 19:24 ` [PATCH 2/3] lib/kobject_uevent.c: add uevent forwarding function Michael J Coss
[not found] ` <55F0875C.6060108-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>
2015-09-09 20:11 ` Greg KH
[not found] ` <20150909201123.GC9328-U8xfFu+wG4EAvxtiuMwx3w@public.gmane.org>
2015-09-10 5:43 ` Amir Goldstein
[not found] ` <CAA2m6vcnUz4EeS-FH2P=GjKSquXit=j1NE5Yut8_baLA+TvjJA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-10 5:58 ` Greg KH
[not found] ` <3456750fe7a5a5eb709e315618facf5704cc1885.1441762578.git.michael.coss-cfy2TCaE7SFv+uJa97DSA9BPR1lH4CV8@public.gmane.org>
2015-10-02 18:00 ` Oren Laadan
[not found] ` <CAA4jN2br76atf9UuOhJVcoQPZ6GMN91Mk1GsoXcVFC-eFvFafA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-10-14 3:40 ` Oren Laadan
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox