public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] IB Netlink Interface and RDMA CM exports
@ 2010-11-14 16:12 Nir Muchtar
       [not found] ` <4CE00A72.30001-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Nir Muchtar @ 2010-11-14 16:12 UTC (permalink / raw)
  To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-hKgKHo2Ms0FWk0Htik3J/w

This patch set provides means for communicating internal data from IB modules
to the userspace.It is composed of three components:
1. Main ib_netlink module which is independent of IB modules.(ib_netlink.ko).
2. "plug-in" modules per client IB module.(only ib_netlink_rdma_cm.ko for now).
   Depends on (1) and (3). Their role is to keep (1) and (3) independent
   as well as choosing callbacks to call, based on the requested op.
   This doesn't actually happen in ib_netlink_rdma_cm.ko because at the moment,
   only one callback is implemented.
3. additional callbacks which are implemented inside existing IB modules.
   (only rdma_cm for now).
   No additional dependencies, and existing flows stay untouched.

At the moment the implementation is basic and generic.
ib_netlink uses the standard netlink module and defines a new netlink unit
(NETLINK_INFINIBAND) in netlink.h.
Upon receiving a request from userspace, it finds the target client using a
registration mechanism, allocates a raw buffer (skbuff) for the client IB module
to write its data on, and then, forwards the result back.
The size of the buffer space to be allocated is returned by the IB module,
which is also responsible to write no more than the given size.
The exact format of the returned data is unknown to ib_netlink itself.
It is shared between the kernel and userspace in the form of common headers.
The current choice of format is for reasons of simplicity.

A quick and dirty userspace demo application output+source is attached for reference.
Sample output:
Type  Device   Port  PID    Net_dev    Src Address          Dst Address          Space  State           QPN      
IB    mthca0   1     27404  ib0        192.168.168.3/7174   N/A                  TCP    LISTEN          0        
IB    mthca0   2     27415  ib1        192.168.2.3/7174     N/A                  TCP    LISTEN          0        
IB    mthca0   1     30     ib0        192.168.168.3/7174   192.168.168.2/57354  TCP    CONNECT         590854   
IB    mthca0   2     15     ib1        192.168.2.3/7174     192.168.2.4/33290    TCP    CONNECT         590855   

Source:
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>

#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <linux/netlink.h>
#include "rdma_cma.h"
#include "ib_netlink.h"

#include <sys/ioctl.h>
#include <net/if.h>
#include <net/if_arp.h>

#define MAX_PAYLOAD 1024  

struct sockaddr_nl src_addr, dest_addr;
struct nlmsghdr *nlh = NULL;
struct msghdr msg;
struct iovec iov;
int sock_fd;
struct rdma_cm_stats *stats;
struct rdma_cm_device_stats *cur_device_stats;
struct rdma_cm_id_stats *cur_id_stats;
void *buff_head;
int i, j;

char *get_ifname(int index)
{
	static struct ifreq req;
	int sock = socket(AF_INET, SOCK_DGRAM, 0);
	req.ifr_ifindex = index;
	if (index == 0) {
		return "N/A";
	}
	if (ioctl(sock, SIOCGIFNAME, &req) < 0) {
		fprintf(stderr, "SIOCGIFNAME failed for index %d\n", index);
		return "N/A";
	}
	return req.ifr_name;
}

static const char *format_cma_state(enum cma_state s)
{
	switch (s) {
	case CMA_IDLE:           return "IDLE";
	case CMA_ADDR_QUERY:     return "ADDR_QUERY";
	case CMA_ADDR_RESOLVED:  return "ADDR_RESOLVED";
	case CMA_ROUTE_QUERY:    return "ROUTE_QUERY";
	case CMA_ROUTE_RESOLVED: return "ROUTE_RESOLVED";
	case CMA_CONNECT:        return "CONNECT";
	case CMA_DISCONNECT:     return "DISCONNECT";
	case CMA_ADDR_BOUND:     return "ADDR_BOUND";
	case CMA_LISTEN:         return "LISTEN";
	case CMA_DEVICE_REMOVAL: return "DEVICE_REMOVAL";
	case CMA_DESTROYING:     return "DESTROYING";
	default: 	         return "N/A";
	}
}

static const char *format_port_space(enum rdma_port_space ps)
{
	switch (ps) {
	case RDMA_PS_SDP:       return "SDP";
	case RDMA_PS_IPOIB:     return "IPOIB";
	case RDMA_PS_TCP:       return "TCP";
	case RDMA_PS_UDP:       return "UDP";
	default: 	        return "N/A";
	}
}

static const char *format_node_type(enum rdma_node_type nt)
{
	switch (nt) {
	case ARPHRD_INFINIBAND:	return "IB";
	case ARPHRD_ETHER: 	return "IW";
	default:		return "N/A";
	}
}

static int format_address(struct sockaddr *addr, char *buff)
{
	struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
	if (addr_in->sin_addr.s_addr) {
		sprintf(buff, "%s/%d", inet_ntoa(addr_in->sin_addr), ntohs(addr_in->sin_port));
	} 
	else
		sprintf(buff, "N/A");
	return 0;
}

int main()
{
	char tmp_buff[64];

	sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_INFINIBAND);
	if (sock_fd < 0) {
		printf("Failed to create socket. Error: %s (%d)\n", strerror(errno), errno);
		return -1;
	}

	memset(&src_addr, 0, sizeof(src_addr));
	src_addr.nl_family = AF_NETLINK;
	src_addr.nl_pid = getpid();  /* self pid */
	src_addr.nl_groups = 0;  /* not in mcast groups */
	bind(sock_fd, (struct sockaddr*)&src_addr, sizeof(src_addr));

	memset(&dest_addr, 0, sizeof(dest_addr));
	dest_addr.nl_family = AF_NETLINK;
	dest_addr.nl_pid = 0;   /* For Linux Kernel */
	dest_addr.nl_groups = 0; /* unicast */

	nlh=(struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
	/* Fill the netlink message header */
	nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
	nlh->nlmsg_pid = getpid();  /* self pid */
	nlh->nlmsg_flags = NLM_F_REQUEST;
	nlh->nlmsg_type = IBNL_GET_TYPE(IBNL_RDMA_CM, IBNL_RDMA_CM_STATS);

	iov.iov_base = (void *)nlh;
	iov.iov_len = nlh->nlmsg_len;
	msg.msg_name = (void *)&dest_addr;
	msg.msg_namelen = sizeof(dest_addr);
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;

	sendmsg(sock_fd, &msg, 0);

	memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
	recvmsg(sock_fd, &msg, 0);
	stats = NLMSG_DATA(nlh);
	buff_head = stats + 1;
	for (i = 0; i < stats->num_devices; i++) {
		cur_device_stats = buff_head;
		buff_head = cur_device_stats + 1;
		printf("%-5s %-8s %-5s %-6s %-10s %-20s %-20s %-6s %-15s %-8s \n",
			"Type", "Device", "Port", "PID", "Net_dev", "Src Address",
			"Dst Address", "Space", "State", "QPN");
		for (j = 0; j < cur_device_stats->num_ids; j++) {
			cur_id_stats = buff_head;
			buff_head = cur_id_stats + 1;
			printf("%-5s %-8s %-5d %-6u %-10s ", 
				format_node_type(cur_id_stats->nt), 
				cur_device_stats->name, 
				cur_id_stats->port_num,
				cur_id_stats->pid,
				get_ifname(cur_id_stats->bound_dev_if));
			format_address(&cur_id_stats->local_addr, tmp_buff);
			printf("%-20s ",tmp_buff);
			format_address(&cur_id_stats->remote_addr, tmp_buff);
			printf("%-20s ",tmp_buff);
			printf("%-6s %-15s %-8d \n",
				format_port_space(cur_id_stats->ps),
				format_cma_state(cur_id_stats->cma_state),
				cur_id_stats->qp_num);
		}
	}
	close(sock_fd);
	return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found] ` <4CE00A72.30001-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-11-15  3:19   ` Jason Gunthorpe
       [not found]     ` <20101115031904.GA3566-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
       [not found]     ` <1289836238.2258.1088.camel@nirm-desktop>
  0 siblings, 2 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2010-11-15  3:19 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	monis-smomgflXvOZWk0Htik3J/w, ogerlitz-hKgKHo2Ms0FWk0Htik3J/w

On Sun, Nov 14, 2010 at 06:12:34PM +0200, Nir Muchtar wrote:
> This patch set provides means for communicating internal data from IB modules
> to the userspace.It is composed of three components:

I would like to review this more closely when I am not at a trade
show, but thanks for getting this done! IMHO some netlink support can
make a big difference to the visibility of the IB stuff.

> 1. Main ib_netlink module which is independent of IB modules.(ib_netlink.ko).
> 2. "plug-in" modules per client IB module.(only ib_netlink_rdma_cm.ko for now).
>    Depends on (1) and (3). Their role is to keep (1) and (3) independent
>    as well as choosing callbacks to call, based on the requested op.
>    This doesn't actually happen in ib_netlink_rdma_cm.ko because at the moment,
>    only one callback is implemented.
> 3. additional callbacks which are implemented inside existing IB modules.
>    (only rdma_cm for now).
>    No additional dependencies, and existing flows stay untouched.

I'd really prefer that this not be seperate modules. I think it would
be good to stick the core stuff as part of ib_uverbs. Especially since
it doesn't look too big. For embedded you can have a
CONFIG_RDMA_NETLINK or something. (For embedded I think it would use
less memory to be able to forgo sysfs and use netlink entirely, someday.)

Ideally I think the netlink schema should build up from QPs and add on
IB CM, and IB RDMA CM information seperately as appropriate. Getting
info on non-CM QPs is very important as well, IMHO. Maybe the first
cut only reports the RDMA CM QPs but the schema should support
reporting everything. 

I'll comment on what you have specifically later, but just a quick
glance makes me wonder if you reviewed how the 'ss' program exchanges
very similar information over netlink for IP sockets when you designed
this??

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]     ` <20101115031904.GA3566-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-11-15 15:56       ` Nir Muchtar
  2010-11-16  6:26         ` Jason Gunthorpe
  0 siblings, 1 reply; 13+ messages in thread
From: Nir Muchtar @ 2010-11-15 15:56 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	monis-smomgflXvOZWk0Htik3J/w, Or Gerlitz


> I'd really prefer that this not be seperate modules. I think it would
> be good to stick the core stuff as part of ib_uverbs. Especially since
> it doesn't look too big. For embedded you can have a
> CONFIG_RDMA_NETLINK or something. (For embedded I think it would use
> less memory to be able to forgo sysfs and use netlink entirely, someday.)
> 
Well, the main reason for the module separation is to allow
extensibility and independence.
Code separation is just a bonus. I like the idea of having the runtime
option to use this interface (or not).
A part of the patch is CONFIG_IB_NETLINK, which compiles all of the new
modules. 
What I wanted to achieve is an IB independent infrastructure that can be
used 
in parts. I.e a plugin for every module (the first example is rdma_cm).
This way only the modules of interest are joined to the infrastructure. 
The necessity of this flexibility can be examined of course.
> Ideally I think the netlink schema should build up from QPs and add on
> IB CM, and IB RDMA CM information seperately as appropriate. Getting
> info on non-CM QPs is very important as well, IMHO. Maybe the first
> cut only reports the RDMA CM QPs but the schema should support
> reporting everything. 
> 
I agree. That's one of the main goals.
For example, I have plans for adding ipoib exports as well as other
ideas.
> I'll comment on what you have specifically later, but just a quick
> glance makes me wonder if you reviewed how the 'ss' program exchanges
> very similar information over netlink for IP sockets when you designed
> this??
> 
> Jason
Yes I have actually. Some ideas are from NETLINK_INET_DIAG which is the
back-end for ss.
There are a few differences here that made the result different. 
I'd say this is a mix between NETLINK_INET_DIAG and NETLINK_NETFILTER.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
  2010-11-15 15:56       ` Nir Muchtar
@ 2010-11-16  6:26         ` Jason Gunthorpe
       [not found]           ` <20101116062602.GA488-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Gunthorpe @ 2010-11-16  6:26 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	monis-smomgflXvOZWk0Htik3J/w, Or Gerlitz

On Mon, Nov 15, 2010 at 05:56:44PM +0200, Nir Muchtar wrote:

> > I'd really prefer that this not be seperate modules. I think it would
> > be good to stick the core stuff as part of ib_uverbs. Especially since
> > it doesn't look too big. For embedded you can have a
> > CONFIG_RDMA_NETLINK or something. (For embedded I think it would use
> > less memory to be able to forgo sysfs and use netlink entirely, someday.)

> Well, the main reason for the module separation is to allow
> extensibility and independence.

Those are orthogonal issues, you can keep a pluggable API without
having modules around it.

> Code separation is just a bonus. I like the idea of having the runtime
> option to use this interface (or not).

Well, I do not like that option at all. This means you can't rely on
netlink being available so people won't use it. If you have to rely on
an admin to add a bunch of module names to /etc/modules then you have
already lost. IMHO. Modules are best used when auto-detection is
possible, other cases are troublesome. I've already seen that (for
instance) using ib_ucm's interface is virtually impossible because
most sites don't load the module, and the end-users that want to use
the software that relies on it can't get an admin to install it.

More non-automatic modules == bad.

> What I wanted to achieve is an IB independent infrastructure that can be
> used in parts. 

That doesn't seem useful, what is really needed here is an RDMA
*dependent* netlink interface. Ie I think your plug-in point is at the
wrong place, ib_verbs should be enumerating all QPs and calling back
to the code that owns them to fill in additional information for that
QP. Ie SRP can annotate what the host ID is, RDMA-CM can include the
IP address, IB-CM can include the PRs/etc/etc

> This way only the modules of interest are joined to the infrastructure. 
> The necessity of this flexibility can be examined of course.

*shrug* why would anyone care except for embedded?

> I agree. That's one of the main goals.
> For example, I have plans for adding ipoib exports as well as other
> ideas.

I have a patch someplace that exports the IPOIB path as part of the
normal netlink neighbour dump, which, IMHO, is appropriate for most
IPOIB information. I can send it to you if you like. A similar
approach can be done for the multicast paths. The locking was
problematic which is why it never was sent to the list..

> > I'll comment on what you have specifically later, but just a quick
> > glance makes me wonder if you reviewed how the 'ss' program exchanges
> > very similar information over netlink for IP sockets when you designed
> > this??

> Yes I have actually. Some ideas are from NETLINK_INET_DIAG which is the
> back-end for ss.
> There are a few differences here that made the result different. 
> I'd say this is a mix between NETLINK_INET_DIAG and NETLINK_NETFILTER.

Well, I don't see it, your code should have calls to
netlink_dump_start, and lots of calls to 
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))

ie the NETLINK_INET_DIAG reply is returend as a series of netlink
messages for inet_diag_msg structures with sub structures of things
like INET_DIAG_INFO/INET_DIAG_VEGASINFO/etc terminated by NLMSG_DONE.

What you have done is just concatenate rdma_cm_id_stats structures,
which is not extensible, doesn't have natural netlink message
boundaries to let userspace re-call recv, and introduces a 32/64 bit
issue (which is an big no-go).

So for QP's I'd imagine similar, a netlink message for each QP. Basic
information like QPN and RDMA device ID, then sub-structures like
QP_RDMA_CM (port numbers and IP addresses), QP_IB_CM (path records),
etc that include additional information provided by that service.

And like I said, maybe today you only dump the RDMACM table, but
the userspace API should be built to dump the entire QP table and
support QPs created without CM, with UCM, and with RDMACM, which is a
trivial API to build if you use netlink the way it was ment to be used.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]           ` <20101116062602.GA488-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-11-16 15:57             ` Nir Muchtar
  2010-11-17 21:34               ` Roland Dreier
  0 siblings, 1 reply; 13+ messages in thread
From: Nir Muchtar @ 2010-11-16 15:57 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	monis-smomgflXvOZWk0Htik3J/w, Or Gerlitz

On Mon, 2010-11-15 at 23:26 -0700, Jason Gunthorpe wrote:
> > Code separation is just a bonus. I like the idea of having the runtime
> > option to use this interface (or not).
> 
> Well, I do not like that option at all. This means you can't rely on
> netlink being available so people won't use it. If you have to rely on
> an admin to add a bunch of module names to /etc/modules then you have
> already lost. IMHO. Modules are best used when auto-detection is
> possible, other cases are troublesome. I've already seen that (for
> instance) using ib_ucm's interface is virtually impossible because
> most sites don't load the module, and the end-users that want to use
> the software that relies on it can't get an admin to install it.
> 
> More non-automatic modules == bad.
I was under the impression that a different module is appropriate here
based on the structure of the IB modules and similar modules like
netfilter_netlink, and other architectures in the kernel. However, I'm
naturally open to other views. If the other way around is the consensus
nowadays, and I'm in minority here, then certainly, things can be done
the other way. I'd be happy to hear other views to concur/reject my
view.
> 
> > What I wanted to achieve is an IB independent infrastructure that can be
> > used in parts. 
> 
> That doesn't seem useful, what is really needed here is an RDMA
> *dependent* netlink interface. Ie I think your plug-in point is at the
> wrong place, ib_verbs should be enumerating all QPs and calling back
> to the code that owns them to fill in additional information for that
> QP. Ie SRP can annotate what the host ID is, RDMA-CM can include the
> IP address, IB-CM can include the PRs/etc/etc

What do you mean by RDMA dependent? RDMA CM? All QP's that are related
to RDMA CM?
Also, why should we limit ourselves to QP related information only? What
if we need access to other data? (Even HW specific)

> > Yes I have actually. Some ideas are from NETLINK_INET_DIAG which is the
> > back-end for ss.
> > There are a few differences here that made the result different. 
> > I'd say this is a mix between NETLINK_INET_DIAG and NETLINK_NETFILTER.
> 
> Well, I don't see it, your code should have calls to
> netlink_dump_start, and lots of calls to 
> RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
> 
> ie the NETLINK_INET_DIAG reply is returend as a series of netlink
> messages for inet_diag_msg structures with sub structures of things
> like INET_DIAG_INFO/INET_DIAG_VEGASINFO/etc terminated by NLMSG_DONE.
> 
> What you have done is just concatenate rdma_cm_id_stats structures,
> which is not extensible, doesn't have natural netlink message
> boundaries to let userspace re-call recv, and introduces a 32/64 bit
> issue (which is an big no-go).

I can change the message transport itself to behave like inet_diag if
that is acceptable. It will add some complexity to the infrastructure
and callbacks and make plugins harder to add, but I agree that it will
present a cleaner API to the userspace. 

Thanks for your input,
Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
  2010-11-16 15:57             ` Nir Muchtar
@ 2010-11-17 21:34               ` Roland Dreier
       [not found]                 ` <aday68riqhm.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Roland Dreier @ 2010-11-17 21:34 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: Jason Gunthorpe, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	Or Gerlitz

 > I was under the impression that a different module is appropriate here
 > based on the structure of the IB modules and similar modules like
 > netfilter_netlink, and other architectures in the kernel. However, I'm
 > naturally open to other views. If the other way around is the consensus
 > nowadays, and I'm in minority here, then certainly, things can be done
 > the other way. I'd be happy to hear other views to concur/reject my
 > view.

I think we should probably put this in the core module rather than
creating yet another module.  I don't really see what the advantage of
separating netlink into its own module is.

We probably over-modularized the RDMA stack early on, it might in fact
make sense to collapse things down from where they are now.

In fact I'm not sure even for embedded it's worth making this something
that can be configured out.  We probably have too many config options
that no one ever uses as it is.  Are there any embedded systems that are
both so small that a few K of code matters and also use RDMA?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]     ` <1289836238.2258.1088.camel@nirm-desktop>
@ 2010-11-17 21:39       ` Roland Dreier
       [not found]         ` <adapqu3iq9i.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Roland Dreier @ 2010-11-17 21:39 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: Jason Gunthorpe, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-hKgKHo2Ms0FWk0Htik3J/w

 > Well, the main reason for the module separation is to allow
 > extensibility and independence.

I don't understand how the physical location of object code has anything
to do with extensibility and independence?  ie what is the relevance of
what .ko file the netlink support code sits in?

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]                 ` <aday68riqhm.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2010-11-18  7:18                   ` Jason Gunthorpe
       [not found]                     ` <20101118071828.GA19281-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
  2010-11-18 14:23                   ` Or Gerlitz
  1 sibling, 1 reply; 13+ messages in thread
From: Jason Gunthorpe @ 2010-11-18  7:18 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Nir Muchtar, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	Or Gerlitz

On Wed, Nov 17, 2010 at 01:34:29PM -0800, Roland Dreier wrote:

> In fact I'm not sure even for embedded it's worth making this something
> that can be configured out.  We probably have too many config options
> that no one ever uses as it is.  Are there any embedded systems that are
> both so small that a few K of code matters and also use RDMA?

I experimented with re-tasking rxe to act as a soft-IB. The only
reason to do this was to run IPOIB, and the system was very small. So
in that instance I did benefit from running without may of the IB
modules - but I would have been entirely happy with a CONFIG_XX
controlled by CONFIG_EMBEDDED, since the system didn't use modules at
all anyhow.

I agree that there are too many IB modules - the main problem is that
they do not demand-load. Just today I had to walk someone through
loading rdma_ucm because the system was brand new Ubuntu and there is
no automatic script to load them.

There is sort of a general failing of the module dependency/autoload
system here that would be awesome to fix..

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]                     ` <20101118071828.GA19281-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
@ 2010-11-18 13:09                       ` Nir Muchtar
  2010-11-18 20:44                         ` Roland Dreier
  0 siblings, 1 reply; 13+ messages in thread
From: Nir Muchtar @ 2010-11-18 13:09 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Roland Dreier, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	Or Gerlitz

On Thu, 2010-11-18 at 00:18 -0700, Jason Gunthorpe wrote:
> On Wed, Nov 17, 2010 at 01:34:29PM -0800, Roland Dreier wrote:
> 
> > In fact I'm not sure even for embedded it's worth making this something
> > that can be configured out.  We probably have too many config options
> > that no one ever uses as it is.  Are there any embedded systems that are
> > both so small that a few K of code matters and also use RDMA?
> 
> I experimented with re-tasking rxe to act as a soft-IB. The only
> reason to do this was to run IPOIB, and the system was very small. So
> in that instance I did benefit from running without may of the IB
> modules - but I would have been entirely happy with a CONFIG_XX
> controlled by CONFIG_EMBEDDED, since the system didn't use modules at
> all anyhow.
> 
So, in an attempt to arrive at a consensus:
1. ib_netlink will not be in a different module. Instead it will be a
part of ib_uverbs.
2. I will rework the message serialization in both sides to use nl
attributes and to be transported one record at a time.
3. For now, there will be no config option for ib_netlink.

Is that acceptable? Are there anymore issues to attend to?

Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]         ` <adapqu3iq9i.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
@ 2010-11-18 13:24           ` Nir Muchtar
  0 siblings, 0 replies; 13+ messages in thread
From: Nir Muchtar @ 2010-11-18 13:24 UTC (permalink / raw)
  To: Roland Dreier
  Cc: Jason Gunthorpe, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	ogerlitz-hKgKHo2Ms0FWk0Htik3J/w

On Wed, 2010-11-17 at 13:39 -0800, Roland Dreier wrote:
> > Well, the main reason for the module separation is to allow
>  > extensibility and independence.
> 
> I don't understand how the physical location of object code has anything
> to do with extensibility and independence?  ie what is the relevance of
> what .ko file the netlink support code sits in?
> 
>  - R.
Using a different ib_netlink module is an easy way to keep other IB
modules independent of the linux netlink mecahinsm and allows for
runtime loading and unloading of the netlink interface seperately for
each possible IB module. (RDMA CM in the current patch set)

Anyway, I realize now that these qualities are not very desirable (by
anyone but me apparently...), so I'll rework the patch set.

Thanks,
Nir

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]                 ` <aday68riqhm.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
  2010-11-18  7:18                   ` Jason Gunthorpe
@ 2010-11-18 14:23                   ` Or Gerlitz
       [not found]                     ` <4CE536CE.6010705-smomgflXvOZWk0Htik3J/w@public.gmane.org>
  1 sibling, 1 reply; 13+ messages in thread
From: Or Gerlitz @ 2010-11-18 14:23 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: Roland Dreier, Jason Gunthorpe, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w

Roland Dreier wrote:
> I think we should probably put this in the core module rather than creating yet another module.  I don't really see what the advantage of separating netlink into its own module is.
If we're going on putting the netlink code in an existing module, 
ib_core would fit better, since there are occasions  where uverbs will 
not be loaded but still we'd like very much (...) to trace things such 
as rdma_cm connections made by kernel ULPs such as nfs-rdma, iser, rds 
and alike.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
       [not found]                     ` <4CE536CE.6010705-smomgflXvOZWk0Htik3J/w@public.gmane.org>
@ 2010-11-18 16:13                       ` Jason Gunthorpe
  0 siblings, 0 replies; 13+ messages in thread
From: Jason Gunthorpe @ 2010-11-18 16:13 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Nir Muchtar, Roland Dreier, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w

On Thu, Nov 18, 2010 at 04:23:10PM +0200, Or Gerlitz wrote:
> Roland Dreier wrote:
> >I think we should probably put this in the core module rather than 
> >creating yet another module.  I don't really see what the advantage of 
> >separating netlink into its own module is.

> If we're going on putting the netlink code in an existing module, 
> ib_core would fit better, since there are occasions  where uverbs will 
> not be loaded but still we'd like very much (...) to trace things such 
> as rdma_cm connections made by kernel ULPs such as nfs-rdma, iser, rds 
> and alike.

Nir's patch is really in two parts -
 1) The creation of NETLINK_INFINIBAND and associated dispatch of
    ops carried on the class
 2) The processing of IBNL_RDMA_CM_STATS op

The #1 should live in ib_core, but #2 should have parts in ib_verbs,
ib_uverbs, rdma_cm, and ib_cm, at least. To get a complete picure of
the QPs we will need information from all those places ..

What I would like to see is for ib_verbs to own the IBNL_RDMA_CM_STATS
op, to walk the QP list and call out to the other modules to add
appropriate netlink attributes.

Some other ops that would be great to see in future:
 - Report CQ status and information - enough to correlate with lsof
 - Report RDMA devices and the information we see in sysfs today
 - Report IPOIB path information through the usual IP netlink
 - netlink notifications for IB evens, like port up/down/etc

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] IB Netlink Interface and RDMA CM exports
  2010-11-18 13:09                       ` Nir Muchtar
@ 2010-11-18 20:44                         ` Roland Dreier
  0 siblings, 0 replies; 13+ messages in thread
From: Roland Dreier @ 2010-11-18 20:44 UTC (permalink / raw)
  To: Nir Muchtar
  Cc: Jason Gunthorpe, rolandd-FYB4Gu1CFyUAvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, monis-smomgflXvOZWk0Htik3J/w,
	Or Gerlitz

 > 1. ib_netlink will not be in a different module. Instead it will be a
 > part of ib_uverbs.

Not sure what Jason thinks, but I would say ib_core -- we would want to
use this independent of user verbs.

 - R.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-11-18 20:44 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-14 16:12 [PATCH 0/3] IB Netlink Interface and RDMA CM exports Nir Muchtar
     [not found] ` <4CE00A72.30001-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-11-15  3:19   ` Jason Gunthorpe
     [not found]     ` <20101115031904.GA3566-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-11-15 15:56       ` Nir Muchtar
2010-11-16  6:26         ` Jason Gunthorpe
     [not found]           ` <20101116062602.GA488-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-11-16 15:57             ` Nir Muchtar
2010-11-17 21:34               ` Roland Dreier
     [not found]                 ` <aday68riqhm.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2010-11-18  7:18                   ` Jason Gunthorpe
     [not found]                     ` <20101118071828.GA19281-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
2010-11-18 13:09                       ` Nir Muchtar
2010-11-18 20:44                         ` Roland Dreier
2010-11-18 14:23                   ` Or Gerlitz
     [not found]                     ` <4CE536CE.6010705-smomgflXvOZWk0Htik3J/w@public.gmane.org>
2010-11-18 16:13                       ` Jason Gunthorpe
     [not found]     ` <1289836238.2258.1088.camel@nirm-desktop>
2010-11-17 21:39       ` Roland Dreier
     [not found]         ` <adapqu3iq9i.fsf-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
2010-11-18 13:24           ` Nir Muchtar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox