* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
[not found] <20070711210159.GF24114@agk.fab.redhat.com>
@ 2007-07-11 21:34 ` Andrew Morton
2007-07-12 0:40 ` Mike Anderson
2007-07-12 15:07 ` Eric W. Biederman
0 siblings, 2 replies; 8+ messages in thread
From: Andrew Morton @ 2007-07-11 21:34 UTC (permalink / raw)
To: Alasdair G Kergon; +Cc: dm-devel, linux-kernel, Mike Anderson, netdev
On Wed, 11 Jul 2007 22:01:59 +0100
Alasdair G Kergon <agk@redhat.com> wrote:
> From: Mike Anderson <andmike@us.ibm.com>
>
> This patch adds support for the dm_path_event dm_send_event funtions which
> create and send netlink attribute events.
>
> ...
>
> --- linux.orig/drivers/md/dm-netlink.c 2007-07-11 21:37:50.000000000 +0100
> +++ linux/drivers/md/dm-netlink.c 2007-07-11 21:37:51.000000000 +0100
> @@ -40,6 +40,17 @@ struct dm_event_cache {
>
> static struct dm_event_cache _dme_cache;
>
> +struct dm_event {
> + struct dm_event_cache *cdata;
> + struct mapped_device *md;
> + struct sk_buff *skb;
> + struct list_head elist;
> +};
> +
> +static struct sock *_dm_netlink_sock;
> +static uint32_t _dm_netlink_daemon_pid;
> +static DEFINE_SPINLOCK(_dm_netlink_pid_lock);
The usage of this lock makes my head spin a bit. It's a shame it wasn't
documented.
There's obviously something very significant happening with process IDs in
here. A description of the design would be helpful. Especially for the
containerisation guys who no doubt will need to tear their hair out over it
all ;)
> +static int dm_netlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> + int r = 0;
> +
> + if (security_netlink_recv(skb, CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + spin_lock(&_dm_netlink_pid_lock);
> + if (_dm_netlink_daemon_pid) {
> + if (_dm_netlink_daemon_pid != nlh->nlmsg_pid)
> + r = -EBUSY;
> + } else
> + _dm_netlink_daemon_pid = nlh->nlmsg_pid;
> + spin_unlock(&_dm_netlink_pid_lock);
> +
> + return r;
> +}
This really does need some comments. nfi what it's all trying to do here.
> +static void dm_netlink_rcv(struct sock *sk, int len)
> +{
> + unsigned qlen = 0;
stupid gcc.
> +
> + do
> + netlink_run_queue(sk, &qlen, &dm_netlink_rcv_msg);
> + while (qlen);
> +
> +}
stray blank line there.
> +static int dm_netlink_rcv_event(struct notifier_block *this,
> + unsigned long event, void *ptr)
> +{
> + struct netlink_notify *n = ptr;
> +
> + spin_lock(&_dm_netlink_pid_lock);
> +
> + if (event == NETLINK_URELEASE &&
> + n->protocol == NETLINK_DM && n->pid &&
> + n->pid == _dm_netlink_daemon_pid)
> + _dm_netlink_daemon_pid = 0;
> +
> + spin_unlock(&_dm_netlink_pid_lock);
> +
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block dm_netlink_notifier = {
> + .notifier_call = dm_netlink_rcv_event,
> +};
> +
> int __init dm_netlink_init(void)
> {
> int r;
>
> + r = netlink_register_notifier(&dm_netlink_notifier);
> + if (r)
> + return r;
> +
> + _dm_netlink_sock = netlink_kernel_create(NETLINK_DM, 0,
> + dm_netlink_rcv, NULL,
> + THIS_MODULE);
I think we're supposed to use the genetlink APIs here. One for the net
guys to check, please.
> + if (!_dm_netlink_sock) {
> + r = -ENOBUFS;
> + goto notifier_out;
> + }
> r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE);
> - if (!r)
> - DMINFO("version 1.0.0 loaded");
> + if (r)
> + goto socket_out;
> +
> + DMINFO("version 1.0.0 loaded");
> +
> + return 0;
> +
> +socket_out:
> + sock_release(_dm_netlink_sock->sk_socket);
> +notifier_out:
> + netlink_unregister_notifier(&dm_netlink_notifier);
> + DMERR("%s: dme_cache_init failed: %d", __FUNCTION__, r);
>
> return r;
> }
> @@ -100,4 +292,6 @@ int __init dm_netlink_init(void)
> void dm_netlink_exit(void)
> {
> dme_cache_destroy(&_dme_cache);
> + sock_release(_dm_netlink_sock->sk_socket);
> + netlink_unregister_notifier(&dm_netlink_notifier);
> }
> Index: linux/drivers/md/dm-netlink.h
> ===================================================================
> --- linux.orig/drivers/md/dm-netlink.h 2007-07-11 21:37:50.000000000 +0100
> +++ linux/drivers/md/dm-netlink.h 2007-07-11 21:37:51.000000000 +0100
> @@ -21,19 +21,22 @@
> #ifndef DM_NETLINK_H
> #define DM_NETLINK_H
>
> -struct dm_event_cache;
> +#include <linux/dm-netlink-if.h>
> +
> +struct dm_table;
> struct mapped_device;
> -struct dm_event {
> - struct dm_event_cache *cdata;
> - struct mapped_device *md;
> - struct sk_buff *skb;
> - struct list_head elist;
> -};
> +struct dm_event;
> +
> +void dm_event_add(struct mapped_device *md, struct list_head *elist);
>
> #ifdef CONFIG_DM_NETLINK
>
> int dm_netlink_init(void);
> void dm_netlink_exit(void);
> +void dm_netlink_send_events(struct list_head *events);
> +
> +void dm_path_event(enum dm_netlink_event_type evt_type, struct dm_table *t,
> + const char *path, int nr_valid_paths);
>
> #else /* CONFIG_DM_NETLINK */
>
> @@ -44,6 +47,14 @@ static inline int __init dm_netlink_init
> static inline void dm_netlink_exit(void)
> {
> }
> +static void inline dm_netlink_send_events(struct list_head *events)
> +{
> +}
> +static void inline dm_path_event(enum dm_netlink_event_type evt_type,
> + struct dm_table *t, const char *path,
> + int nr_valid_paths)
> +{
> +}
Please use `static inline void', not `static void inline'. Just a
consistency thing.
> #endif /* CONFIG_DM_NETLINK */
>
> Index: linux/drivers/md/dm.c
> ===================================================================
> --- linux.orig/drivers/md/dm.c 2007-07-11 21:37:50.000000000 +0100
> +++ linux/drivers/md/dm.c 2007-07-11 21:37:51.000000000 +0100
> @@ -111,8 +111,11 @@ struct mapped_device {
> /*
> * Event handling.
> */
> + atomic_t event_seq; /* Used for netlink events */
> atomic_t event_nr;
> wait_queue_head_t eventq;
> + struct list_head event_list;
> + spinlock_t event_lock;
>
> /*
> * freeze/thaw support require holding onto a super block
> @@ -1001,6 +1004,9 @@ static struct mapped_device *alloc_dev(i
> atomic_set(&md->holders, 1);
> atomic_set(&md->open_count, 0);
> atomic_set(&md->event_nr, 0);
> + atomic_set(&md->event_seq, 0);
> + INIT_LIST_HEAD(&md->event_list);
> + spin_lock_init(&md->event_lock);
>
> md->queue = blk_alloc_queue(GFP_KERNEL);
> if (!md->queue)
> @@ -1099,6 +1105,14 @@ static void free_dev(struct mapped_devic
> static void event_callback(void *context)
> {
> struct mapped_device *md = (struct mapped_device *) context;
> + unsigned long flags;
> + LIST_HEAD(events);
> +
> + spin_lock_irqsave(&md->event_lock, flags);
> + list_splice_init(&md->event_list, &events);
> + spin_unlock_irqrestore(&md->event_lock, flags);
> +
> + dm_netlink_send_events(&events);
>
> atomic_inc(&md->event_nr);
> wake_up(&md->eventq);
> @@ -1516,6 +1530,11 @@ out:
> /*-----------------------------------------------------------------
> * Event notification.
> *---------------------------------------------------------------*/
> +uint32_t dm_next_event_seq(struct mapped_device *md)
> +{
> + return atomic_add_return(1, &md->event_seq);
> +}
> +
> uint32_t dm_get_event_nr(struct mapped_device *md)
> {
> return atomic_read(&md->event_nr);
> @@ -1527,6 +1546,15 @@ int dm_wait_event(struct mapped_device *
> (event_nr != atomic_read(&md->event_nr)));
> }
>
> +void dm_event_add(struct mapped_device *md, struct list_head *elist)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&md->event_lock, flags);
> + list_add(elist, &md->event_list);
> + spin_unlock_irqrestore(&md->event_lock, flags);
> +}
> +
>
> ...
>
> +/*
> + * Device Mapper Netlink Interface
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright IBM Corporation, 2005, 2006
> + * Author: Mike Anderson <andmike@us.ibm.com>
> + */
> +#ifndef _LINUX_DM_NETLINK_IF_H
> +#define _LINUX_DM_NETLINK_IF_H
> +
> +#include <linux/netlink.h>
> +
> +/*
> + * Single netlink message type used for all DM events
> + */
> +#define DM_EVENT_MSG NLMSG_MIN_TYPE + 1
> +
> +enum dm_netlink_event_type {
> + DM_EVENT_PATH_FAILED = 1,
> + DM_EVENT_PATH_REINSTATED = 2,
> + DM_EVENT_MAX,
> +};
> +
> +enum dm_netlink_event_attr {
> + DM_EVENT_ATTR_SEQNUM = 1, /* Sequence number */
> + DM_EVENT_ATTR_TSSEC = 2, /* Time Stamp seconds */
> + DM_EVENT_ATTR_TSUSEC = 3, /* Time Stamp micro seconds */
> + DM_EVENT_ATTR_NAME = 4, /* Major:Minor */
> + /* 5 is deprecated */
> + DM_EVENT_ATTR_NUM_VALID_PATHS = 6, /* (Multipath) */
> + DM_EVENT_ATTR_PATH = 7, /* (Multipath) Pathname */
> + DM_EVENT_ATTR_MAX,
> +};
> +
> +#define DM_EVENT_IF_VERSION 0x10
> +
> +struct dm_netlink_msghdr {
> + uint16_t type;
> + uint16_t version;
> + uint16_t reserved1;
> + uint16_t reserved2;
> +} __attribute__((aligned(sizeof(uint64_t))));
> +
> +#endif /* _LINUX_DM_NETLINK_IF_H */
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-11 21:34 ` [2.6.23 PATCH 14/18] dm: netlink add to core Andrew Morton
@ 2007-07-12 0:40 ` Mike Anderson
2007-07-12 23:13 ` Johannes Berg
2007-07-12 15:07 ` Eric W. Biederman
1 sibling, 1 reply; 8+ messages in thread
From: Mike Anderson @ 2007-07-12 0:40 UTC (permalink / raw)
To: Andrew Morton; +Cc: netdev, dm-devel, linux-kernel, Alasdair G Kergon
Andrew Morton <akpm@linux-foundation.org> wrote:
> On Wed, 11 Jul 2007 22:01:59 +0100
> Alasdair G Kergon <agk@redhat.com> wrote:
>
> > From: Mike Anderson <andmike@us.ibm.com>
> >
> > This patch adds support for the dm_path_event dm_send_event funtions which
> > create and send netlink attribute events.
> >
> > ...
> >
> > --- linux.orig/drivers/md/dm-netlink.c 2007-07-11 21:37:50.000000000 +0100
> > +++ linux/drivers/md/dm-netlink.c 2007-07-11 21:37:51.000000000 +0100
> > @@ -40,6 +40,17 @@ struct dm_event_cache {
> >
> > static struct dm_event_cache _dme_cache;
> >
> > +struct dm_event {
> > + struct dm_event_cache *cdata;
> > + struct mapped_device *md;
> > + struct sk_buff *skb;
> > + struct list_head elist;
> > +};
> > +
> > +static struct sock *_dm_netlink_sock;
> > +static uint32_t _dm_netlink_daemon_pid;
> > +static DEFINE_SPINLOCK(_dm_netlink_pid_lock);
>
> The usage of this lock makes my head spin a bit. It's a shame it wasn't
> documented.
>
> There's obviously something very significant happening with process IDs in
> here. A description of the design would be helpful. Especially for the
> containerisation guys who no doubt will need to tear their hair out over it
> all ;)
>
ok, answered below.
>
> > +static int dm_netlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> > +{
> > + int r = 0;
> > +
> > + if (security_netlink_recv(skb, CAP_SYS_ADMIN))
> > + return -EPERM;
> > +
> > + spin_lock(&_dm_netlink_pid_lock);
> > + if (_dm_netlink_daemon_pid) {
> > + if (_dm_netlink_daemon_pid != nlh->nlmsg_pid)
> > + r = -EBUSY;
> > + } else
> > + _dm_netlink_daemon_pid = nlh->nlmsg_pid;
> > + spin_unlock(&_dm_netlink_pid_lock);
> > +
> > + return r;
> > +}
>
> This really does need some comments. nfi what it's all trying to do here.
>
The code is restricting the connection to only one daemon.
I added the lock above as in some testing of connect / disconnect cycles
with a lot of events I receiving errors.
The pid is a hold over from older code. If this will cause issue for other
users I can switch to using nlmsg_multicast (genlmsg_multicast depending
on the comment if I need to switch to the genl interface) and remove this
code all together.
> > +static void dm_netlink_rcv(struct sock *sk, int len)
> > +{
> > + unsigned qlen = 0;
>
> stupid gcc.
>
> > +
> > + do
> > + netlink_run_queue(sk, &qlen, &dm_netlink_rcv_msg);
> > + while (qlen);
> > +
> > +}
>
> stray blank line there.
>
ok, removed.
> > +static int dm_netlink_rcv_event(struct notifier_block *this,
> > + unsigned long event, void *ptr)
> > +{
> > + struct netlink_notify *n = ptr;
> > +
> > + spin_lock(&_dm_netlink_pid_lock);
> > +
> > + if (event == NETLINK_URELEASE &&
> > + n->protocol == NETLINK_DM && n->pid &&
> > + n->pid == _dm_netlink_daemon_pid)
> > + _dm_netlink_daemon_pid = 0;
> > +
> > + spin_unlock(&_dm_netlink_pid_lock);
> > +
> > + return NOTIFY_DONE;
> > +}
> > +
> > +static struct notifier_block dm_netlink_notifier = {
> > + .notifier_call = dm_netlink_rcv_event,
> > +};
> > +
> > int __init dm_netlink_init(void)
> > {
> > int r;
> >
> > + r = netlink_register_notifier(&dm_netlink_notifier);
> > + if (r)
> > + return r;
> > +
> > + _dm_netlink_sock = netlink_kernel_create(NETLINK_DM, 0,
> > + dm_netlink_rcv, NULL,
> > + THIS_MODULE);
>
> I think we're supposed to use the genetlink APIs here. One for the net
> guys to check, please.
>
ok, I can switch if that is the new recommended method.
> > + if (!_dm_netlink_sock) {
> > + r = -ENOBUFS;
> > + goto notifier_out;
> > + }
> > r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE);
> > - if (!r)
> > - DMINFO("version 1.0.0 loaded");
> > + if (r)
> > + goto socket_out;
> > +
> > + DMINFO("version 1.0.0 loaded");
> > +
> > + return 0;
> > +
> > +socket_out:
> > + sock_release(_dm_netlink_sock->sk_socket);
> > +notifier_out:
> > + netlink_unregister_notifier(&dm_netlink_notifier);
> > + DMERR("%s: dme_cache_init failed: %d", __FUNCTION__, r);
> >
> > return r;
> > }
> > @@ -100,4 +292,6 @@ int __init dm_netlink_init(void)
> > void dm_netlink_exit(void)
> > {
> > dme_cache_destroy(&_dme_cache);
> > + sock_release(_dm_netlink_sock->sk_socket);
> > + netlink_unregister_notifier(&dm_netlink_notifier);
> > }
> > Index: linux/drivers/md/dm-netlink.h
> > ===================================================================
> > --- linux.orig/drivers/md/dm-netlink.h 2007-07-11 21:37:50.000000000 +0100
> > +++ linux/drivers/md/dm-netlink.h 2007-07-11 21:37:51.000000000 +0100
> > @@ -21,19 +21,22 @@
> > #ifndef DM_NETLINK_H
> > #define DM_NETLINK_H
> >
> > -struct dm_event_cache;
> > +#include <linux/dm-netlink-if.h>
> > +
> > +struct dm_table;
> > struct mapped_device;
> > -struct dm_event {
> > - struct dm_event_cache *cdata;
> > - struct mapped_device *md;
> > - struct sk_buff *skb;
> > - struct list_head elist;
> > -};
> > +struct dm_event;
> > +
> > +void dm_event_add(struct mapped_device *md, struct list_head *elist);
> >
> > #ifdef CONFIG_DM_NETLINK
> >
> > int dm_netlink_init(void);
> > void dm_netlink_exit(void);
> > +void dm_netlink_send_events(struct list_head *events);
> > +
> > +void dm_path_event(enum dm_netlink_event_type evt_type, struct dm_table *t,
> > + const char *path, int nr_valid_paths);
> >
> > #else /* CONFIG_DM_NETLINK */
> >
> > @@ -44,6 +47,14 @@ static inline int __init dm_netlink_init
> > static inline void dm_netlink_exit(void)
> > {
> > }
> > +static void inline dm_netlink_send_events(struct list_head *events)
> > +{
> > +}
> > +static void inline dm_path_event(enum dm_netlink_event_type evt_type,
> > + struct dm_table *t, const char *path,
> > + int nr_valid_paths)
> > +{
> > +}
>
> Please use `static inline void', not `static void inline'. Just a
> consistency thing.
>
Done.
-andmike
--
Michael Anderson
andmike@us.ibm.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-11 21:34 ` [2.6.23 PATCH 14/18] dm: netlink add to core Andrew Morton
2007-07-12 0:40 ` Mike Anderson
@ 2007-07-12 15:07 ` Eric W. Biederman
2007-07-12 17:34 ` Mike Anderson
1 sibling, 1 reply; 8+ messages in thread
From: Eric W. Biederman @ 2007-07-12 15:07 UTC (permalink / raw)
To: Andrew Morton
Cc: Alasdair G Kergon, dm-devel, linux-kernel, Mike Anderson, netdev
Andrew Morton <akpm@linux-foundation.org> writes:
> On Wed, 11 Jul 2007 22:01:59 +0100
> Alasdair G Kergon <agk@redhat.com> wrote:
>
>> From: Mike Anderson <andmike@us.ibm.com>
>>
>> This patch adds support for the dm_path_event dm_send_event funtions which
>> create and send netlink attribute events.
>>
>> ...
>>
>> --- linux.orig/drivers/md/dm-netlink.c 2007-07-11 21:37:50.000000000 +0100
>> +++ linux/drivers/md/dm-netlink.c 2007-07-11 21:37:51.000000000 +0100
>> @@ -40,6 +40,17 @@ struct dm_event_cache {
>>
>> static struct dm_event_cache _dme_cache;
>>
>> +struct dm_event {
>> + struct dm_event_cache *cdata;
>> + struct mapped_device *md;
>> + struct sk_buff *skb;
>> + struct list_head elist;
>> +};
>> +
>> +static struct sock *_dm_netlink_sock;
>> +static uint32_t _dm_netlink_daemon_pid;
>> +static DEFINE_SPINLOCK(_dm_netlink_pid_lock);
>
> The usage of this lock makes my head spin a bit. It's a shame it wasn't
> documented.
>
> There's obviously something very significant happening with process IDs in
> here. A description of the design would be helpful. Especially for the
> containerisation guys who no doubt will need to tear their hair out over it
> all ;)
Andrew this actually has nothing to do with process IDs. The pid
referred to here is the netlink port number. It just happens to be called
a pid.
In this case netlink is being used to simply broadcast events.
I may be a little off but looking at the events types defined.
device down, device up. Defining a completely new interface for this
looks absolutely absurd.
This is device hotplug isn't it? As such we should be using the
hotplug infrastructure and not reinventing the wheel here.
If it isn't hotplug it looks like something that inotify should
handle.
If that isn't the case I am fairly certain that md already has a
mechanism to handle this, and those two should stay in sync
if at all possible on this kind of thing.
So this appears to be a gratuitous user interface addition.
Why do we need a new user interface for this?
Eric
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-12 15:07 ` Eric W. Biederman
@ 2007-07-12 17:34 ` Mike Anderson
2007-07-12 19:37 ` Eric W. Biederman
0 siblings, 1 reply; 8+ messages in thread
From: Mike Anderson @ 2007-07-12 17:34 UTC (permalink / raw)
To: Eric W. Biederman
Cc: netdev, Andrew Morton, dm-devel, Alasdair G Kergon, linux-kernel
Eric W. Biederman <ebiederm@xmission.com> wrote:
> I may be a little off but looking at the events types defined.
> device down, device up. Defining a completely new interface for this
> looks absolutely absurd.
>
>
> This is device hotplug isn't it? As such we should be using the
> hotplug infrastructure and not reinventing the wheel here.
>
I assume device hotplug means kobject_uevent and KOBJ_* events. The
original intent was to have a little more structure in the data format the
env strings. I also wanted to reduce the number of allocations that where
happening with GFP_KERNEL to send an event.
Currently the patch is only supporting a couple of events with the intent of
adding more over time. I see that I could map most events to KOBJ_CHANGE,
previously it did not seem like the correct fit.
> If it isn't hotplug it looks like something that inotify should
> handle.
>
> If that isn't the case I am fairly certain that md already has a
> mechanism to handle this, and those two should stay in sync
> if at all possible on this kind of thing.
>
Device mapper does have a "event happened" interface today, but post the
event the user must determine the context of the event (dm also sends a
kobject_uevent KOBJ_CHANGE only for a resume event). This patch was only
effecting dm, but I know the md has similar infrastructure. This patch
was passing out the event context through netlink that already existed but
was lost through the current generic event interface.
The existing event interfaces was left in place to not effect existing users
allowing migration over to a netlink interface over time.
> So this appears to be a gratuitous user interface addition.
> Why do we need a new user interface for this?
While I understand Evgeniy and Davids comment about utilizing the
genetlink interface, I guess I am not seeing that utilizing a netlink
channel for a subsystem as a gratuitous user interface addition vs.
running everything through kobject_uevent.
Thanks,
-andmike
--
Michael Anderson
andmike@us.ibm.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-12 17:34 ` Mike Anderson
@ 2007-07-12 19:37 ` Eric W. Biederman
2007-07-12 21:18 ` Alasdair G Kergon
2007-07-12 23:41 ` Mike Anderson
0 siblings, 2 replies; 8+ messages in thread
From: Eric W. Biederman @ 2007-07-12 19:37 UTC (permalink / raw)
To: Mike Anderson
Cc: Andrew Morton, Alasdair G Kergon, dm-devel, linux-kernel, netdev
Mike Anderson <andmike@us.ibm.com> writes:
> Eric W. Biederman <ebiederm@xmission.com> wrote:
>> I may be a little off but looking at the events types defined.
>> device down, device up. Defining a completely new interface for this
>> looks absolutely absurd.
>>
>>
>> This is device hotplug isn't it? As such we should be using the
>> hotplug infrastructure and not reinventing the wheel here.
>>
>
> I assume device hotplug means kobject_uevent and KOBJ_* events. The
> original intent was to have a little more structure in the data format the
> env strings. I also wanted to reduce the number of allocations that where
> happening with GFP_KERNEL to send an event.
>
> Currently the patch is only supporting a couple of events with the intent of
> adding more over time. I see that I could map most events to KOBJ_CHANGE,
> previously it did not seem like the correct fit.
Yes. kobject_uevent is what I was thinking of.
Even if it means it makes sense to add a few more types to handle
the situation at first glance it seems to fit well.
I know if I was looking for a notification that a something was added
or removed from a raid array I would look at the generic hotplug
events first.
Possibly KOBJ_CHANGE isn't the proper fit, possibly you have something
that is sufficient unique that it needs it's own enumeration type.
If so we can fix the generic code.
At the moment though KOBJ_CHANGE does seem to describe what you are
doing and the event data you have don't look like they are actually a
problem to fit into a user space environment.
As for worry about kmallocs do these events happen often? I would
not expect any of them in the normal course of operation for a system.
Worst case you handle extra kmallocs with a library function.
It's not like you are using GFP_ATOMIC.
Plus there are other little bonuses if these events are reported
with kobject_uevent such as an already existing user space tools
to look at them and do something, and a scripting infrastructure
to take action when things happen.
>> If it isn't hotplug it looks like something that inotify should
>> handle.
>>
>> If that isn't the case I am fairly certain that md already has a
>> mechanism to handle this, and those two should stay in sync
>> if at all possible on this kind of thing.
>>
>
> Device mapper does have a "event happened" interface today, but post the
> event the user must determine the context of the event (dm also sends a
> kobject_uevent KOBJ_CHANGE only for a resume event). This patch was only
> effecting dm, but I know the md has similar infrastructure. This patch
> was passing out the event context through netlink that already existed but
> was lost through the current generic event interface.
>
> The existing event interfaces was left in place to not effect existing users
> allowing migration over to a netlink interface over time.
I know sending some of the context with an event is important so
that you can filter out events you don't need. Is the lack of context
actually a problem today?
>> So this appears to be a gratuitous user interface addition.
>> Why do we need a new user interface for this?
>
> While I understand Evgeniy and Davids comment about utilizing the
> genetlink interface, I guess I am not seeing that utilizing a netlink
> channel for a subsystem as a gratuitous user interface addition vs.
> running everything through kobject_uevent.
Gratuitous in the sense that if we already have a mechanism for performing a
function we should not invent a new mechanism for doing that same function
for each different subsystem.
The fewer unique types of user interfaces we have the easier it to
maintain the kernel, the easier it is to write programs that work
with several kernel subsystems, and the easier it is for other
developers to understand and use the kernel.
This of course assumes things are a good fit or the other pieces
can be made to be a good fit. Currently I don't see the what makes
device mapper's events not fit the existing models, and I don't
see it in the description for this patch why we need to invent
something new.
Eric
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-12 19:37 ` Eric W. Biederman
@ 2007-07-12 21:18 ` Alasdair G Kergon
2007-07-12 23:41 ` Mike Anderson
1 sibling, 0 replies; 8+ messages in thread
From: Alasdair G Kergon @ 2007-07-12 21:18 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Mike Anderson, netdev, linux-kernel, dm-devel, Andrew Morton,
Alasdair G Kergon
On Thu, Jul 12, 2007 at 01:37:43PM -0600, Eric W. Biederman wrote:
> This of course assumes things are a good fit or the other pieces
> can be made to be a good fit. Currently I don't see the what makes
> device mapper's events not fit the existing models, and I don't
> see it in the description for this patch why we need to invent
> something new.
The existing event protocol used by device-mapper is trivial. It just
lets kernel device-mapper say to userspace 'Something happened on mapped
device X in which you might be interested: Check it out!'. Userspace
then runs various checks to work out what happened and decide whether
any action should be taken. Though simple, in practice this often
proves very inefficient.
We want to change the mechanism so device-mapper can pass an arbitrary
small payload to userspace to tell it exactly what happened immediately
so it no longer needs to waste time probing for things that could have
happened but kernel device-mapper knows didn't.
These netlink patches are the first attempt to put such a mechanism in
place for a few specific multipath conditions. Sure, we *could*
shoe-horn this into extended dm ioctls but I see that as a last resort
if we can't make an existing mechanism do what we need.
Examples of events raised against device-mapper block devices to switch
to whichever new mechanism is chosen:
- a new dm table has gone live (payload holds some basic info about
the new table)
- the dm device name has just been changed (new name in payload)
- a path in a multipath table has been marked as failed e.g. after
an I/O failure (payload says which path)
- a failed path in a multipath table has been reenabled (payload says
which path)
- a specific group of multipath paths has been bypassed (payload says
which group)
- I/O is being directed to a different group of multipath paths
(payload says which group)
- mirror recovery is complete
- a mirror's log device has failed
- a snapshot has been marked invalid (payload indicates reason)
Depending on the context, the events are caught by a userspace
device-mapper process (sometimes a long-lived daemon) - one that is
often locked in memory with preallocated buffers all set to process the
information and resolve the problem etc.
Alasdair
--
agk@redhat.com
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-12 0:40 ` Mike Anderson
@ 2007-07-12 23:13 ` Johannes Berg
0 siblings, 0 replies; 8+ messages in thread
From: Johannes Berg @ 2007-07-12 23:13 UTC (permalink / raw)
To: Mike Anderson
Cc: Andrew Morton, Alasdair G Kergon, dm-devel, linux-kernel, netdev
[-- Attachment #1: Type: text/plain, Size: 427 bytes --]
On Wed, 2007-07-11 at 17:40 -0700, Mike Anderson wrote:
> The pid is a hold over from older code. If this will cause issue for other
> users I can switch to using nlmsg_multicast (genlmsg_multicast depending
> on the comment if I need to switch to the genl interface) and remove this
> code all together.
Another possible genl multicast user :)
Has anybody had a chance to look at my patches for this?
johannes
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [2.6.23 PATCH 14/18] dm: netlink add to core
2007-07-12 19:37 ` Eric W. Biederman
2007-07-12 21:18 ` Alasdair G Kergon
@ 2007-07-12 23:41 ` Mike Anderson
1 sibling, 0 replies; 8+ messages in thread
From: Mike Anderson @ 2007-07-12 23:41 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Andrew Morton, Alasdair G Kergon, dm-devel, linux-kernel, netdev
Eric W. Biederman <ebiederm@xmission.com> wrote:
> As for worry about kmallocs do these events happen often?
The worst case would most likely be in a dm multipath configuration where
you could get a burst of N number events (N being equal to the number of
luns times the number of paths that are having an issue).
> I would
> not expect any of them in the normal course of operation for a system.
Yes, the ones that are part of this patch are unexpected events or
recovery of the unexpected event.
> Worst case you handle extra kmallocs with a library function.
> It's not like you are using GFP_ATOMIC.
I was using GFP_ATOMIC as I did not want __GFP_IO as in some testing there
was a case where heavy file system IO was going on and an injected error
event caused the swap device into a temporary queued condition while an
event was trying to be sent. I may need to go back and investigate this
case on recent kernels as it has been a while since I did the test case.
-andmike
--
Michael Anderson
andmike@us.ibm.com
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2007-07-13 9:09 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20070711210159.GF24114@agk.fab.redhat.com>
2007-07-11 21:34 ` [2.6.23 PATCH 14/18] dm: netlink add to core Andrew Morton
2007-07-12 0:40 ` Mike Anderson
2007-07-12 23:13 ` Johannes Berg
2007-07-12 15:07 ` Eric W. Biederman
2007-07-12 17:34 ` Mike Anderson
2007-07-12 19:37 ` Eric W. Biederman
2007-07-12 21:18 ` Alasdair G Kergon
2007-07-12 23:41 ` Mike Anderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).