Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] IPv6 support for NFS server
From: J. Bruce Fields @ 2008-01-17 20:38 UTC (permalink / raw)
  To: Aurélien Charbon; +Cc: netdev ML, Brian Haley, Mailing list NFSv4
In-Reply-To: <478F8AA3.9060707@ext.bull.net>

On Thu, Jan 17, 2008 at 06:04:35PM +0100, Aurélien Charbon wrote:
> Hi Bruce.
>
> Thanks for your comments.
> Here is the patch with some cleanups.

Thanks for the revisions.  We need to submit this with a patch comment
that:

	- Explains more precisely what this does (fixes export
	  interfaces to allow ipv6) and what remains to be done (?)
	- Credits the folks (like Brian Haley) who have provided
	  feedback.

I'll help clean up that comment if needed but please make sure it's
always included with the patch when you resend it.

--b.

>
> Regards,
> Aurélien
>
>
> -- 
>
> ********************************
>       Aurelien Charbon
>       Linux NFSv4 team
>           Bull SAS
>     Echirolles - France
> http://nfsv4.bullopensource.org/
> ********************************
>

> >From 51755892e19186cd18230bac3f783b0382bf9ae0 Mon Sep 17 00:00:00 2001
> From: Aurelien Charbon <aurelien.charbon@ext.bull.net>
> Date: Thu, 17 Jan 2008 14:55:03 +0100
> Subject: [PATCH 1/1] IPv6 support for NFS server
> 
> ---
>  fs/nfsd/export.c               |    9 ++-
>  fs/nfsd/nfsctl.c               |   15 +++++-
>  include/linux/sunrpc/svcauth.h |    4 +-
>  include/net/ipv6.h             |    9 +++
>  net/sunrpc/svcauth_unix.c      |  118 +++++++++++++++++++++++++++-------------
>  5 files changed, 110 insertions(+), 45 deletions(-)
> 
> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
> index 66d0aeb..208db3a 100644
> --- a/fs/nfsd/export.c
> +++ b/fs/nfsd/export.c
> @@ -35,6 +35,7 @@
>  #include <linux/lockd/bind.h>
>  #include <linux/sunrpc/msg_prot.h>
>  #include <linux/sunrpc/gss_api.h>
> +#include <net/ipv6.h>
>  
>  #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
>  
> @@ -1556,6 +1557,7 @@ exp_addclient(struct nfsctl_client *ncp)
>  {
>  	struct auth_domain	*dom;
>  	int			i, err;
> +	struct in6_addr addr6;
>  
>  	/* First, consistency check. */
>  	err = -EINVAL;
> @@ -1574,9 +1576,10 @@ exp_addclient(struct nfsctl_client *ncp)
>  		goto out_unlock;
>  
>  	/* Insert client into hashtable. */
> -	for (i = 0; i < ncp->cl_naddr; i++)
> -		auth_unix_add_addr(ncp->cl_addrlist[i], dom);
> -
> +	for (i = 0; i < ncp->cl_naddr; i++) {
> +		ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
> +		auth_unix_add_addr(&addr6, dom);
> +	}
>  	auth_unix_forget_old(dom);
>  	auth_domain_put(dom);
>  
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 77dc989..13d6b6b 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -37,6 +37,7 @@
>  #include <linux/nfsd/syscall.h>
>  
>  #include <asm/uaccess.h>
> +#include <net/ipv6.h>
>  
>  /*
>   *	We have a single directory with 9 nodes in it.
> @@ -222,6 +223,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
>  	struct auth_domain *clp;
>  	int err = 0;
>  	struct knfsd_fh *res;
> +	struct in6_addr in6;
>  
>  	if (size < sizeof(*data))
>  		return -EINVAL;
> @@ -236,7 +238,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
>  	res = (struct knfsd_fh*)buf;
>  
>  	exp_readlock();
> -	if (!(clp = auth_unix_lookup(sin->sin_addr)))
> +
> +	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
> +
> +	clp = auth_unix_lookup(&in6);
> +	if (!clp)
>  		err = -EPERM;
>  	else {
>  		err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
> @@ -257,6 +263,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
>  	int err = 0;
>  	struct knfsd_fh fh;
>  	char *res;
> +	struct in6_addr in6;
>  
>  	if (size < sizeof(*data))
>  		return -EINVAL;
> @@ -271,7 +278,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
>  	res = buf;
>  	sin = (struct sockaddr_in *)&data->gd_addr;
>  	exp_readlock();
> -	if (!(clp = auth_unix_lookup(sin->sin_addr)))
> +
> +	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr,&in6);
> +
> +	clp = auth_unix_lookup(&in6);
> +	if (!clp)
>  		err = -EPERM;
>  	else {
>  		err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
> diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
> index 22e1ef8..9e6fb86 100644
> --- a/include/linux/sunrpc/svcauth.h
> +++ b/include/linux/sunrpc/svcauth.h
> @@ -120,10 +120,10 @@ extern void	svc_auth_unregister(rpc_authflavor_t flavor);
>  
>  extern struct auth_domain *unix_domain_find(char *name);
>  extern void auth_domain_put(struct auth_domain *item);
> -extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom);
> +extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom);
>  extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
>  extern struct auth_domain *auth_domain_find(char *name);
> -extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
> +extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr);
>  extern int auth_unix_forget_old(struct auth_domain *dom);
>  extern void svcauth_unix_purge(void);
>  extern void svcauth_unix_info_release(void *);
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index ae328b6..9394710 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -400,6 +400,15 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
>  		 a->s6_addr32[2] == htonl(0x0000ffff));
>  }
>  
> +static inline void ipv6_addr_set_v4mapped(const __be32 addr,
> +					  struct in6_addr *v4mapped)
> +{
> +	ipv6_addr_set(v4mapped,
> +			0, 0,
> +			htonl(0x0000FFFF),
> +			addr);
> +}
> +
>  /*
>   * find the first different bit between two addresses
>   * length of address must be a multiple of 32bits
> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
> index 4114794..10ba208 100644
> --- a/net/sunrpc/svcauth_unix.c
> +++ b/net/sunrpc/svcauth_unix.c
> @@ -11,7 +11,8 @@
>  #include <linux/hash.h>
>  #include <linux/string.h>
>  #include <net/sock.h>
> -
> +#include <net/ipv6.h>
> +#include <linux/kernel.h>
>  #define RPCDBG_FACILITY	RPCDBG_AUTH
>  
>  
> @@ -84,7 +85,7 @@ static void svcauth_unix_domain_release(struct auth_domain *dom)
>  struct ip_map {
>  	struct cache_head	h;
>  	char			m_class[8]; /* e.g. "nfsd" */
> -	struct in_addr		m_addr;
> +	struct in6_addr		m_addr;
>  	struct unix_domain	*m_client;
>  	int			m_add_change;
>  };
> @@ -112,12 +113,19 @@ static inline int hash_ip(__be32 ip)
>  	return (hash ^ (hash>>8)) & 0xff;
>  }
>  #endif
> +static inline int hash_ip6(struct in6_addr ip)
> +{
> +	return (hash_ip(ip.s6_addr32[0]) ^
> +		hash_ip(ip.s6_addr32[1]) ^
> +		hash_ip(ip.s6_addr32[2]) ^
> +		hash_ip(ip.s6_addr32[3]));
> +}
>  static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
>  {
>  	struct ip_map *orig = container_of(corig, struct ip_map, h);
>  	struct ip_map *new = container_of(cnew, struct ip_map, h);
>  	return strcmp(orig->m_class, new->m_class) == 0
> -		&& orig->m_addr.s_addr == new->m_addr.s_addr;
> +		&& ipv6_addr_equal(&orig->m_addr, &new->m_addr);
>  }
>  static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
>  {
> @@ -125,7 +133,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
>  	struct ip_map *item = container_of(citem, struct ip_map, h);
>  
>  	strcpy(new->m_class, item->m_class);
> -	new->m_addr.s_addr = item->m_addr.s_addr;
> +	ipv6_addr_copy(&new->m_addr, &item->m_addr);
>  }
>  static void update(struct cache_head *cnew, struct cache_head *citem)
>  {
> @@ -149,22 +157,24 @@ static void ip_map_request(struct cache_detail *cd,
>  				  struct cache_head *h,
>  				  char **bpp, int *blen)
>  {
> -	char text_addr[20];
> +	char text_addr[40];
>  	struct ip_map *im = container_of(h, struct ip_map, h);
> -	__be32 addr = im->m_addr.s_addr;
> -
> -	snprintf(text_addr, 20, "%u.%u.%u.%u",
> -		 ntohl(addr) >> 24 & 0xff,
> -		 ntohl(addr) >> 16 & 0xff,
> -		 ntohl(addr) >>  8 & 0xff,
> -		 ntohl(addr) >>  0 & 0xff);
>  
> +	if (ipv6_addr_v4mapped(&(im->m_addr))) {
> +		snprintf(text_addr, 20, NIPQUAD_FMT,
> +				ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff,
> +				ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff,
> +				ntohl(im->m_addr.s6_addr32[3]) >>  8 & 0xff,
> +				ntohl(im->m_addr.s6_addr32[3]) >>  0 & 0xff);
> +	} else {
> +		snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr));
> +	}
>  	qword_add(bpp, blen, im->m_class);
>  	qword_add(bpp, blen, text_addr);
>  	(*bpp)[-1] = '\n';
>  }
>  
> -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr);
> +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr);
>  static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
>  
>  static int ip_map_parse(struct cache_detail *cd,
> @@ -175,10 +185,10 @@ static int ip_map_parse(struct cache_detail *cd,
>  	 * for scratch: */
>  	char *buf = mesg;
>  	int len;
> -	int b1,b2,b3,b4;
> +	int b1, b2, b3, b4, b5, b6, b7, b8;
>  	char c;
>  	char class[8];
> -	struct in_addr addr;
> +	struct in6_addr addr;
>  	int err;
>  
>  	struct ip_map *ipmp;
> @@ -197,7 +207,23 @@ static int ip_map_parse(struct cache_detail *cd,
>  	len = qword_get(&mesg, buf, mlen);
>  	if (len <= 0) return -EINVAL;
>  
> -	if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
> +	if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) {
> +		addr.s6_addr32[0] = 0;
> +		addr.s6_addr32[1] = 0;
> +		addr.s6_addr32[2] = htonl(0xffff);
> +		addr.s6_addr32[3] =
> +			htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
> +       } else if (sscanf(buf, NIP6_FMT "%c",
> +			&b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
> +		addr.s6_addr16[0] = htons(b1);
> +		addr.s6_addr16[1] = htons(b2);
> +		addr.s6_addr16[2] = htons(b3);
> +		addr.s6_addr16[3] = htons(b4);
> +		addr.s6_addr16[4] = htons(b5);
> +		addr.s6_addr16[5] = htons(b6);
> +		addr.s6_addr16[6] = htons(b7);
> +		addr.s6_addr16[7] = htons(b8);
> +       } else
>  		return -EINVAL;
>  
>  	expiry = get_expiry(&mesg);
> @@ -215,10 +241,7 @@ static int ip_map_parse(struct cache_detail *cd,
>  	} else
>  		dom = NULL;
>  
> -	addr.s_addr =
> -		htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
> -
> -	ipmp = ip_map_lookup(class,addr);
> +	ipmp = ip_map_lookup(class, &addr);
>  	if (ipmp) {
>  		err = ip_map_update(ipmp,
>  			     container_of(dom, struct unix_domain, h),
> @@ -238,7 +261,7 @@ static int ip_map_show(struct seq_file *m,
>  		       struct cache_head *h)
>  {
>  	struct ip_map *im;
> -	struct in_addr addr;
> +	struct in6_addr addr;
>  	char *dom = "-no-domain-";
>  
>  	if (h == NULL) {
> @@ -247,20 +270,24 @@ static int ip_map_show(struct seq_file *m,
>  	}
>  	im = container_of(h, struct ip_map, h);
>  	/* class addr domain */
> -	addr = im->m_addr;
> +	ipv6_addr_copy(&addr, &im->m_addr);
>  
>  	if (test_bit(CACHE_VALID, &h->flags) &&
>  	    !test_bit(CACHE_NEGATIVE, &h->flags))
>  		dom = im->m_client->h.name;
>  
> -	seq_printf(m, "%s %d.%d.%d.%d %s\n",
> -		   im->m_class,
> -		   ntohl(addr.s_addr) >> 24 & 0xff,
> -		   ntohl(addr.s_addr) >> 16 & 0xff,
> -		   ntohl(addr.s_addr) >>  8 & 0xff,
> -		   ntohl(addr.s_addr) >>  0 & 0xff,
> -		   dom
> -		   );
> +	if (ipv6_addr_v4mapped(&addr)) {
> +		seq_printf(m, "%s" NIPQUAD_FMT "%s\n",
> +			im->m_class,
> +			ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
> +			ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
> +			ntohl(addr.s6_addr32[3]) >>  8 & 0xff,
> +			ntohl(addr.s6_addr32[3]) >>  0 & 0xff,
> +			dom);
> +	} else {
> +		seq_printf(m, "%s" NIP6_FMT "%s\n",
> +			im->m_class, NIP6(addr), dom);
> +	}
>  	return 0;
>  }
>  
> @@ -280,16 +307,16 @@ struct cache_detail ip_map_cache = {
>  	.alloc		= ip_map_alloc,
>  };
>  
> -static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
> +static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
>  {
>  	struct ip_map ip;
>  	struct cache_head *ch;
>  
>  	strcpy(ip.m_class, class);
> -	ip.m_addr = addr;
> +	ipv6_addr_copy(&ip.m_addr, addr);
>  	ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
>  				 hash_str(class, IP_HASHBITS) ^
> -				 hash_ip(addr.s_addr));
> +				 hash_ip6(*addr));
>  
>  	if (ch)
>  		return container_of(ch, struct ip_map, h);
> @@ -318,14 +345,14 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
>  	ch = sunrpc_cache_update(&ip_map_cache,
>  				 &ip.h, &ipm->h,
>  				 hash_str(ipm->m_class, IP_HASHBITS) ^
> -				 hash_ip(ipm->m_addr.s_addr));
> +				 hash_ip6(ipm->m_addr));
>  	if (!ch)
>  		return -ENOMEM;
>  	cache_put(ch, &ip_map_cache);
>  	return 0;
>  }
>  
> -int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
> +int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
>  {
>  	struct unix_domain *udom;
>  	struct ip_map *ipmp;
> @@ -352,7 +379,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
>  	return 0;
>  }
>  
> -struct auth_domain *auth_unix_lookup(struct in_addr addr)
> +struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
>  {
>  	struct ip_map *ipm;
>  	struct auth_domain *rv;
> @@ -641,9 +668,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
>  int
>  svcauth_unix_set_client(struct svc_rqst *rqstp)
>  {
> -	struct sockaddr_in *sin = svc_addr_in(rqstp);
> +	struct sockaddr_in *sin;
> +	struct sockaddr_in6 *sin6, sin6_storage;
>  	struct ip_map *ipm;
>  
> +	switch (rqstp->rq_addr.ss_family) {
> +	case AF_INET:
> +		sin = svc_addr_in(rqstp);
> +		sin6 = &sin6_storage;
> +		ipv6_addr_set(&sin6->sin6_addr, 0, 0,
> +				htonl(0x0000FFFF), sin->sin_addr.s_addr);
> +		break;
> +	case AF_INET6:
> +		sin6 = svc_addr_in6(rqstp);
> +		break;
> +	default:
> +		BUG();
> +	}
> +
>  	rqstp->rq_client = NULL;
>  	if (rqstp->rq_proc == 0)
>  		return SVC_OK;
> @@ -651,7 +693,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
>  	ipm = ip_map_cached_get(rqstp);
>  	if (ipm == NULL)
>  		ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
> -				    sin->sin_addr);
> +				    &sin6->sin6_addr);
>  
>  	if (ipm == NULL)
>  		return SVC_DENIED;
> -- 
> 1.5.3.8
> 

_______________________________________________
NFSv4 mailing list
NFSv4@linux-nfs.org
http://linux-nfs.org/cgi-bin/mailman/listinfo/nfsv4

^ permalink raw reply

* [PATCH][TRIVIAL] ibm_emac/ibm_emac_mal.c:mal_poll: Fix MAL_DBG2 invocation
From: Hal Rosenstock @ 2008-01-17 18:47 UTC (permalink / raw)
  To: ebs; +Cc: linuxppc-dev, netdev

ibm_emac/ibm_emac_mal.c:mal_poll: Fix MAL_DBG2 invocation

Signed-off-by: Hal Rosenstock <hal@xsigo.com>

diff --git a/drivers/net/ibm_emac/ibm_emac_mal.c b/drivers/net/ibm_emac/ibm_emac_mal.c
index dcd8826..1977791 100644
--- a/drivers/net/ibm_emac/ibm_emac_mal.c
+++ b/drivers/net/ibm_emac/ibm_emac_mal.c
@@ -279,7 +279,7 @@ static int mal_poll(struct napi_struct *napi, int budget)
 	struct list_head *l;
 	int received = 0;
 
-	MAL_DBG2("%d: poll(%d) %d ->" NL, mal->def->index, *budget,
+	MAL_DBG2("%d: poll(%d) %d ->" NL, mal->def->index, budget,
 		 rx_work_limit);
       again:
 	/* Process TX skbs */


^ permalink raw reply related

* Eternity of Your Love
From: neil @ 2008-01-17 17:30 UTC (permalink / raw)
  To: netdev

You're in my Soul http://217.121.5.145/


^ permalink raw reply

* Re: [PATCH] IPv6 support for NFS server
From: Brian Haley @ 2008-01-17 17:17 UTC (permalink / raw)
  To: Aurélien Charbon; +Cc: J. Bruce Fields, netdev ML, Mailing list NFSv4
In-Reply-To: <478F8AA3.9060707@ext.bull.net>

Aurélien Charbon wrote:
> Thanks for your comments.
> Here is the patch with some cleanups.

Hi Aurelien,

Just two nits.

> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -400,6 +400,15 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
>  		 a->s6_addr32[2] == htonl(0x0000ffff));
>  }
>  
> +static inline void ipv6_addr_set_v4mapped(const __be32 addr,
> +					  struct in6_addr *v4mapped)
> +{
> +	ipv6_addr_set(v4mapped,
> +			0, 0,
> +			htonl(0x0000FFFF),
> +			addr);
> +}

I think Bruce wanted you to put as much on one line here as possible.

> @@ -641,9 +668,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
>  int
>  svcauth_unix_set_client(struct svc_rqst *rqstp)
>  {
> -	struct sockaddr_in *sin = svc_addr_in(rqstp);
> +	struct sockaddr_in *sin;
> +	struct sockaddr_in6 *sin6, sin6_storage;
>  	struct ip_map *ipm;
>  
> +	switch (rqstp->rq_addr.ss_family) {
> +	case AF_INET:
> +		sin = svc_addr_in(rqstp);
> +		sin6 = &sin6_storage;
> +		ipv6_addr_set(&sin6->sin6_addr, 0, 0,
> +				htonl(0x0000FFFF), sin->sin_addr.s_addr);
> +		break;

ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr);

-Brian

^ permalink raw reply

* Re: [PATCH] IPv6 support for NFS server
From: Aurélien Charbon @ 2008-01-17 17:04 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: netdev ML, Brian Haley, Mailing list NFSv4
In-Reply-To: <20080115231636.GB19658@fieldses.org>

[-- Attachment #1: Type: text/plain, Size: 294 bytes --]

Hi Bruce.

Thanks for your comments.
Here is the patch with some cleanups.

Regards,
Aurélien


-- 

********************************
       Aurelien Charbon
       Linux NFSv4 team
           Bull SAS
     Echirolles - France
http://nfsv4.bullopensource.org/
********************************


[-- Attachment #2: 0001-IPv6-support-for-NFS-server.patch --]
[-- Type: text/x-patch, Size: 12287 bytes --]

>From 51755892e19186cd18230bac3f783b0382bf9ae0 Mon Sep 17 00:00:00 2001
From: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Date: Thu, 17 Jan 2008 14:55:03 +0100
Subject: [PATCH 1/1] IPv6 support for NFS server

---
 fs/nfsd/export.c               |    9 ++-
 fs/nfsd/nfsctl.c               |   15 +++++-
 include/linux/sunrpc/svcauth.h |    4 +-
 include/net/ipv6.h             |    9 +++
 net/sunrpc/svcauth_unix.c      |  118 +++++++++++++++++++++++++++-------------
 5 files changed, 110 insertions(+), 45 deletions(-)

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 66d0aeb..208db3a 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -35,6 +35,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/gss_api.h>
+#include <net/ipv6.h>
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
@@ -1556,6 +1557,7 @@ exp_addclient(struct nfsctl_client *ncp)
 {
 	struct auth_domain	*dom;
 	int			i, err;
+	struct in6_addr addr6;
 
 	/* First, consistency check. */
 	err = -EINVAL;
@@ -1574,9 +1576,10 @@ exp_addclient(struct nfsctl_client *ncp)
 		goto out_unlock;
 
 	/* Insert client into hashtable. */
-	for (i = 0; i < ncp->cl_naddr; i++)
-		auth_unix_add_addr(ncp->cl_addrlist[i], dom);
-
+	for (i = 0; i < ncp->cl_naddr; i++) {
+		ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
+		auth_unix_add_addr(&addr6, dom);
+	}
 	auth_unix_forget_old(dom);
 	auth_domain_put(dom);
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 77dc989..13d6b6b 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -37,6 +37,7 @@
 #include <linux/nfsd/syscall.h>
 
 #include <asm/uaccess.h>
+#include <net/ipv6.h>
 
 /*
  *	We have a single directory with 9 nodes in it.
@@ -222,6 +223,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
 	struct auth_domain *clp;
 	int err = 0;
 	struct knfsd_fh *res;
+	struct in6_addr in6;
 
 	if (size < sizeof(*data))
 		return -EINVAL;
@@ -236,7 +238,11 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
 	res = (struct knfsd_fh*)buf;
 
 	exp_readlock();
-	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+
+	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
+
+	clp = auth_unix_lookup(&in6);
+	if (!clp)
 		err = -EPERM;
 	else {
 		err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
@@ -257,6 +263,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 	int err = 0;
 	struct knfsd_fh fh;
 	char *res;
+	struct in6_addr in6;
 
 	if (size < sizeof(*data))
 		return -EINVAL;
@@ -271,7 +278,11 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
 	res = buf;
 	sin = (struct sockaddr_in *)&data->gd_addr;
 	exp_readlock();
-	if (!(clp = auth_unix_lookup(sin->sin_addr)))
+
+	ipv6_addr_set_v4mapped(sin->sin_addr.s_addr,&in6);
+
+	clp = auth_unix_lookup(&in6);
+	if (!clp)
 		err = -EPERM;
 	else {
 		err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 22e1ef8..9e6fb86 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -120,10 +120,10 @@ extern void	svc_auth_unregister(rpc_authflavor_t flavor);
 
 extern struct auth_domain *unix_domain_find(char *name);
 extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom);
+extern int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom);
 extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
 extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct in_addr addr);
+extern struct auth_domain *auth_unix_lookup(struct in6_addr *addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
 extern void svcauth_unix_purge(void);
 extern void svcauth_unix_info_release(void *);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ae328b6..9394710 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -400,6 +400,15 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
 		 a->s6_addr32[2] == htonl(0x0000ffff));
 }
 
+static inline void ipv6_addr_set_v4mapped(const __be32 addr,
+					  struct in6_addr *v4mapped)
+{
+	ipv6_addr_set(v4mapped,
+			0, 0,
+			htonl(0x0000FFFF),
+			addr);
+}
+
 /*
  * find the first different bit between two addresses
  * length of address must be a multiple of 32bits
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 4114794..10ba208 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -11,7 +11,8 @@
 #include <linux/hash.h>
 #include <linux/string.h>
 #include <net/sock.h>
-
+#include <net/ipv6.h>
+#include <linux/kernel.h>
 #define RPCDBG_FACILITY	RPCDBG_AUTH
 
 
@@ -84,7 +85,7 @@ static void svcauth_unix_domain_release(struct auth_domain *dom)
 struct ip_map {
 	struct cache_head	h;
 	char			m_class[8]; /* e.g. "nfsd" */
-	struct in_addr		m_addr;
+	struct in6_addr		m_addr;
 	struct unix_domain	*m_client;
 	int			m_add_change;
 };
@@ -112,12 +113,19 @@ static inline int hash_ip(__be32 ip)
 	return (hash ^ (hash>>8)) & 0xff;
 }
 #endif
+static inline int hash_ip6(struct in6_addr ip)
+{
+	return (hash_ip(ip.s6_addr32[0]) ^
+		hash_ip(ip.s6_addr32[1]) ^
+		hash_ip(ip.s6_addr32[2]) ^
+		hash_ip(ip.s6_addr32[3]));
+}
 static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
 {
 	struct ip_map *orig = container_of(corig, struct ip_map, h);
 	struct ip_map *new = container_of(cnew, struct ip_map, h);
 	return strcmp(orig->m_class, new->m_class) == 0
-		&& orig->m_addr.s_addr == new->m_addr.s_addr;
+		&& ipv6_addr_equal(&orig->m_addr, &new->m_addr);
 }
 static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
 {
@@ -125,7 +133,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
 	struct ip_map *item = container_of(citem, struct ip_map, h);
 
 	strcpy(new->m_class, item->m_class);
-	new->m_addr.s_addr = item->m_addr.s_addr;
+	ipv6_addr_copy(&new->m_addr, &item->m_addr);
 }
 static void update(struct cache_head *cnew, struct cache_head *citem)
 {
@@ -149,22 +157,24 @@ static void ip_map_request(struct cache_detail *cd,
 				  struct cache_head *h,
 				  char **bpp, int *blen)
 {
-	char text_addr[20];
+	char text_addr[40];
 	struct ip_map *im = container_of(h, struct ip_map, h);
-	__be32 addr = im->m_addr.s_addr;
-
-	snprintf(text_addr, 20, "%u.%u.%u.%u",
-		 ntohl(addr) >> 24 & 0xff,
-		 ntohl(addr) >> 16 & 0xff,
-		 ntohl(addr) >>  8 & 0xff,
-		 ntohl(addr) >>  0 & 0xff);
 
+	if (ipv6_addr_v4mapped(&(im->m_addr))) {
+		snprintf(text_addr, 20, NIPQUAD_FMT,
+				ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff,
+				ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff,
+				ntohl(im->m_addr.s6_addr32[3]) >>  8 & 0xff,
+				ntohl(im->m_addr.s6_addr32[3]) >>  0 & 0xff);
+	} else {
+		snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr));
+	}
 	qword_add(bpp, blen, im->m_class);
 	qword_add(bpp, blen, text_addr);
 	(*bpp)[-1] = '\n';
 }
 
-static struct ip_map *ip_map_lookup(char *class, struct in_addr addr);
+static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr);
 static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
 
 static int ip_map_parse(struct cache_detail *cd,
@@ -175,10 +185,10 @@ static int ip_map_parse(struct cache_detail *cd,
 	 * for scratch: */
 	char *buf = mesg;
 	int len;
-	int b1,b2,b3,b4;
+	int b1, b2, b3, b4, b5, b6, b7, b8;
 	char c;
 	char class[8];
-	struct in_addr addr;
+	struct in6_addr addr;
 	int err;
 
 	struct ip_map *ipmp;
@@ -197,7 +207,23 @@ static int ip_map_parse(struct cache_detail *cd,
 	len = qword_get(&mesg, buf, mlen);
 	if (len <= 0) return -EINVAL;
 
-	if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
+	if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) {
+		addr.s6_addr32[0] = 0;
+		addr.s6_addr32[1] = 0;
+		addr.s6_addr32[2] = htonl(0xffff);
+		addr.s6_addr32[3] =
+			htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+       } else if (sscanf(buf, NIP6_FMT "%c",
+			&b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
+		addr.s6_addr16[0] = htons(b1);
+		addr.s6_addr16[1] = htons(b2);
+		addr.s6_addr16[2] = htons(b3);
+		addr.s6_addr16[3] = htons(b4);
+		addr.s6_addr16[4] = htons(b5);
+		addr.s6_addr16[5] = htons(b6);
+		addr.s6_addr16[6] = htons(b7);
+		addr.s6_addr16[7] = htons(b8);
+       } else
 		return -EINVAL;
 
 	expiry = get_expiry(&mesg);
@@ -215,10 +241,7 @@ static int ip_map_parse(struct cache_detail *cd,
 	} else
 		dom = NULL;
 
-	addr.s_addr =
-		htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
-
-	ipmp = ip_map_lookup(class,addr);
+	ipmp = ip_map_lookup(class, &addr);
 	if (ipmp) {
 		err = ip_map_update(ipmp,
 			     container_of(dom, struct unix_domain, h),
@@ -238,7 +261,7 @@ static int ip_map_show(struct seq_file *m,
 		       struct cache_head *h)
 {
 	struct ip_map *im;
-	struct in_addr addr;
+	struct in6_addr addr;
 	char *dom = "-no-domain-";
 
 	if (h == NULL) {
@@ -247,20 +270,24 @@ static int ip_map_show(struct seq_file *m,
 	}
 	im = container_of(h, struct ip_map, h);
 	/* class addr domain */
-	addr = im->m_addr;
+	ipv6_addr_copy(&addr, &im->m_addr);
 
 	if (test_bit(CACHE_VALID, &h->flags) &&
 	    !test_bit(CACHE_NEGATIVE, &h->flags))
 		dom = im->m_client->h.name;
 
-	seq_printf(m, "%s %d.%d.%d.%d %s\n",
-		   im->m_class,
-		   ntohl(addr.s_addr) >> 24 & 0xff,
-		   ntohl(addr.s_addr) >> 16 & 0xff,
-		   ntohl(addr.s_addr) >>  8 & 0xff,
-		   ntohl(addr.s_addr) >>  0 & 0xff,
-		   dom
-		   );
+	if (ipv6_addr_v4mapped(&addr)) {
+		seq_printf(m, "%s" NIPQUAD_FMT "%s\n",
+			im->m_class,
+			ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
+			ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
+			ntohl(addr.s6_addr32[3]) >>  8 & 0xff,
+			ntohl(addr.s6_addr32[3]) >>  0 & 0xff,
+			dom);
+	} else {
+		seq_printf(m, "%s" NIP6_FMT "%s\n",
+			im->m_class, NIP6(addr), dom);
+	}
 	return 0;
 }
 
@@ -280,16 +307,16 @@ struct cache_detail ip_map_cache = {
 	.alloc		= ip_map_alloc,
 };
 
-static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
+static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr)
 {
 	struct ip_map ip;
 	struct cache_head *ch;
 
 	strcpy(ip.m_class, class);
-	ip.m_addr = addr;
+	ipv6_addr_copy(&ip.m_addr, addr);
 	ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
 				 hash_str(class, IP_HASHBITS) ^
-				 hash_ip(addr.s_addr));
+				 hash_ip6(*addr));
 
 	if (ch)
 		return container_of(ch, struct ip_map, h);
@@ -318,14 +345,14 @@ static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t ex
 	ch = sunrpc_cache_update(&ip_map_cache,
 				 &ip.h, &ipm->h,
 				 hash_str(ipm->m_class, IP_HASHBITS) ^
-				 hash_ip(ipm->m_addr.s_addr));
+				 hash_ip6(ipm->m_addr));
 	if (!ch)
 		return -ENOMEM;
 	cache_put(ch, &ip_map_cache);
 	return 0;
 }
 
-int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
+int auth_unix_add_addr(struct in6_addr *addr, struct auth_domain *dom)
 {
 	struct unix_domain *udom;
 	struct ip_map *ipmp;
@@ -352,7 +379,7 @@ int auth_unix_forget_old(struct auth_domain *dom)
 	return 0;
 }
 
-struct auth_domain *auth_unix_lookup(struct in_addr addr)
+struct auth_domain *auth_unix_lookup(struct in6_addr *addr)
 {
 	struct ip_map *ipm;
 	struct auth_domain *rv;
@@ -641,9 +668,24 @@ static int unix_gid_find(uid_t uid, struct group_info **gip,
 int
 svcauth_unix_set_client(struct svc_rqst *rqstp)
 {
-	struct sockaddr_in *sin = svc_addr_in(rqstp);
+	struct sockaddr_in *sin;
+	struct sockaddr_in6 *sin6, sin6_storage;
 	struct ip_map *ipm;
 
+	switch (rqstp->rq_addr.ss_family) {
+	case AF_INET:
+		sin = svc_addr_in(rqstp);
+		sin6 = &sin6_storage;
+		ipv6_addr_set(&sin6->sin6_addr, 0, 0,
+				htonl(0x0000FFFF), sin->sin_addr.s_addr);
+		break;
+	case AF_INET6:
+		sin6 = svc_addr_in6(rqstp);
+		break;
+	default:
+		BUG();
+	}
+
 	rqstp->rq_client = NULL;
 	if (rqstp->rq_proc == 0)
 		return SVC_OK;
@@ -651,7 +693,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	ipm = ip_map_cached_get(rqstp);
 	if (ipm == NULL)
 		ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
-				    sin->sin_addr);
+				    &sin6->sin6_addr);
 
 	if (ipm == NULL)
 		return SVC_DENIED;
-- 
1.5.3.8


^ permalink raw reply related

* Re: [PATCH] request_irq() always returns -EINVAL with a NULL handler.
From: Stephen Hemminger @ 2008-01-17 16:22 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, linux-kerne
In-Reply-To: <200801171757.59026.rusty@rustcorp.com.au>

On Thu, 17 Jan 2008 17:57:58 +1100
Rusty Russell <rusty@rustcorp.com.au> wrote:

> I assume that these ancient network drivers were trying to find out if
> an irq is available.  eepro.c expecting +EBUSY was doubly wrong.
> 
> I'm not sure that can_request_irq() is the right thing, but these drivers
> are definitely wrong.
> 
> request_irq should BUG() on bad input, and these would have been found
> earlier.
> 
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> ---
>  drivers/net/3c503.c |    2 +-
>  drivers/net/e2100.c |    2 +-
>  drivers/net/eepro.c |    2 +-
>  drivers/net/hp.c    |    2 +-
>  kernel/irq/manage.c |    1 +
>  5 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff -r 0b7e4fbb6238 drivers/net/3c503.c
> --- a/drivers/net/3c503.c	Thu Jan 17 15:49:34 2008 +1100
> +++ b/drivers/net/3c503.c	Thu Jan 17 16:40:28 2008 +1100
> @@ -379,7 +379,7 @@ el2_open(struct net_device *dev)
>  
>  	outb(EGACFR_NORM, E33G_GACFR);	/* Enable RAM and interrupts. */
>  	do {
> -	    if (request_irq (*irqp, NULL, 0, "bogus", dev) != -EBUSY) {
> +	    if (can_request_irq(*irqp, 0)) {
>  		/* Twinkle the interrupt, and check if it's seen. */
>  		unsigned long cookie = probe_irq_on();
>  		outb_p(0x04 << ((*irqp == 9) ? 2 : *irqp), E33G_IDCFR);
> diff -r 0b7e4fbb6238 drivers/net/e2100.c
> --- a/drivers/net/e2100.c	Thu Jan 17 15:49:34 2008 +1100
> +++ b/drivers/net/e2100.c	Thu Jan 17 16:40:28 2008 +1100
> @@ -202,7 +202,7 @@ static int __init e21_probe1(struct net_
>  	if (dev->irq < 2) {
>  		int irqlist[] = {15,11,10,12,5,9,3,4}, i;
>  		for (i = 0; i < 8; i++)
> -			if (request_irq (irqlist[i], NULL, 0, "bogus", NULL) != -EBUSY) {
> +			if (can_request_irq(irqlist[i], 0)) {
>  				dev->irq = irqlist[i];
>  				break;
>  			}
> diff -r 0b7e4fbb6238 drivers/net/eepro.c
> --- a/drivers/net/eepro.c	Thu Jan 17 15:49:34 2008 +1100
> +++ b/drivers/net/eepro.c	Thu Jan 17 16:40:28 2008 +1100
> @@ -914,7 +914,7 @@ static int	eepro_grab_irq(struct net_dev
>  
>  		eepro_sw2bank0(ioaddr); /* Switch back to Bank 0 */
>  
> -		if (request_irq (*irqp, NULL, IRQF_SHARED, "bogus", dev) != EBUSY) {
> +		if (can_request_irq(*irqp, IRQF_SHARED)) {
>  			unsigned long irq_mask;
>  			/* Twinkle the interrupt, and check if it's seen */
>  			irq_mask = probe_irq_on();
> diff -r 0b7e4fbb6238 drivers/net/hp.c
> --- a/drivers/net/hp.c	Thu Jan 17 15:49:34 2008 +1100
> +++ b/drivers/net/hp.c	Thu Jan 17 16:40:28 2008 +1100
> @@ -170,7 +170,7 @@ static int __init hp_probe1(struct net_d
>  		int *irqp = wordmode ? irq_16list : irq_8list;
>  		do {
>  			int irq = *irqp;
> -			if (request_irq (irq, NULL, 0, "bogus", NULL) != -EBUSY) {
> +			if (can_request_irq(irq, 0)) {
>  				unsigned long cookie = probe_irq_on();
>  				/* Twinkle the interrupt, and check if it's seen. */
>  				outb_p(irqmap[irq] | HP_RUN, ioaddr + HP_CONFIGURE);
> diff -r 0b7e4fbb6238 kernel/irq/manage.c
> --- a/kernel/irq/manage.c	Thu Jan 17 15:49:34 2008 +1100
> +++ b/kernel/irq/manage.c	Thu Jan 17 16:40:28 2008 +1100
> @@ -252,6 +252,7 @@ int can_request_irq(unsigned int irq, un
>  
>  	return !action;
>  }
> +EXPORT_SYMBOL(can_request_irq);
>  
>  void compat_irq_chip_set_default_handler(struct irq_desc *desc)
>  {

Isn't this just inherently racy, like the old check_resource stuff that got pulled
out 2.5?

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Timo Teräs @ 2008-01-17 13:31 UTC (permalink / raw)
  To: Herbert Xu; +Cc: jamal, netdev, David Miller
In-Reply-To: <20080117125016.GA9820@gondor.apana.org.au>

Herbert Xu wrote:
> On Thu, Jan 17, 2008 at 07:42:30AM -0500, jamal wrote:
>> Looking at the pfkey RFC one more time, heres a funny quote:
>> "
>> The dump message is used for debugging
>> purposes only and is not intended for production use.
>> "
> 
> In fact it goes much further:
> 
>    Support for the dump message MAY be discontinued in future versions
>    of PF_KEY.  Key management applications MUST NOT depend on this
>    message for basic operation.

I guess the idea was that application should know about the SAs it
created. Though a SA dump needs to be done if you want to check
for existing entries (created by other processes, or if you are
recovering from a crash).

SPD dumping is still a must if you want to work nicely with kernel.

As noted earlier pfkey is not really standardized. E.g. the SPD
dumping message are not in the RFC as David noted. The above RFC
comments and the fact that SPD stuff is unspecified made me think
that making non-atomic dumps would be a lot better alternative then
leaving the socket to bad state which would make the application
completely unusable.


^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: jamal @ 2008-01-17 13:18 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Timo Teräs, netdev, David Miller
In-Reply-To: <20080117125016.GA9820@gondor.apana.org.au>

On Thu, 2008-17-01 at 23:50 +1100, Herbert Xu wrote:

> In fact it goes much further:
> 
>    Support for the dump message MAY be discontinued in future versions
>    of PF_KEY.  Key management applications MUST NOT depend on this
>    message for basic operation.

No doubt PF_KEY being an RFC has caused a lot of damage. 
Once something is deployed, unfortunately, it grows a foot and sometimes
a head[1]. 
Note: it's a big dilema in my mind as well and i agree in principle with
both Dave and you (we really should not be helping pfkey grow another
ear on the forehead); the only way i am convincing myself otherwise is
to note that Racoon/ipsec-tools is out there, shipped as default ipsec
user management tools by most if not all linux distros. If we really
want to stop the beast lets cut out the umbilicall code - just take out
pfkey altogether from Linux ;->


cheers,
jamal

[1] Whatever fix/approach Timo has will eventually show up in the BSDs
and solaris for example 


^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Herbert Xu @ 2008-01-17 12:50 UTC (permalink / raw)
  To: jamal; +Cc: Timo Teräs, netdev, David Miller
In-Reply-To: <1200573750.4508.29.camel@localhost>

On Thu, Jan 17, 2008 at 07:42:30AM -0500, jamal wrote:
> 
> Looking at the pfkey RFC one more time, heres a funny quote:
> "
> The dump message is used for debugging
> purposes only and is not intended for production use.
> "

In fact it goes much further:

   Support for the dump message MAY be discontinued in future versions
   of PF_KEY.  Key management applications MUST NOT depend on this
   message for basic operation.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: jamal @ 2008-01-17 12:42 UTC (permalink / raw)
  To: Timo Teräs; +Cc: Herbert Xu, netdev, David Miller
In-Reply-To: <478EED98.6080603@iki.fi>

On Thu, 2008-17-01 at 07:54 +0200, Timo Teräs wrote:

> You listen for the events. It is guaranteed that if the dumping code
> does return the entry to be deleted, the deletion notification will
> occur after that dump entry.

Ok, sounds reasonable - as long as there is a known order for
occurances, then there will be no ambiguity.
I am assuming that the same ordering will happen with
updates/modifications?
To go back to what i suggested earlier - is it possible to have this in
two stages? First pfkey with expected behavior being the same as current
netlink; then later the optimizations you are talking about.

Looking at the pfkey RFC one more time, heres a funny quote:
"
The dump message is used for debugging
purposes only and is not intended for production use.
"

One thing Dave mentioned thats extremely important is to ensure no ABI breakage. 
Think of racoon 0.6 which knows nothing of this; it should continue to work.

Dave: One reason i paid attention to this is because it was on your TODO
list from netconf 2005 ;-> It has just been sitting in the background
memory cells since.

cheers,
jamal


^ permalink raw reply

* Re: [PATCH 1/8] [TCP]: Uninline tcp_set_state
From: Andi Kleen @ 2008-01-17 12:41 UTC (permalink / raw)
  To: Ilpo Järvinen; +Cc: Stephen Hemminger, Netdev
In-Reply-To: <Pine.LNX.4.64.0801140858040.31652@kivilampi-30.cs.helsinki.fi>

"Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi> writes:
>
> Besides, it not always that obvious that gcc is able to determine "the
> constant state", considering e.g., the complexity in the cases with
> tcp_rcv_synsent_state_process, tcp_close_state, tcp_done. In such cases
> uninlining should be done and gcc is probably not able to mix both cases
> nicely for a single function?

I think it would be cleanest to completely unswitch the function 
and split into tcp_set_closed()  / tcp_set_established() / tcp_other_state() 
called by the callers directly.

That would probably lose the state trace, but I never found 
that useful for anything.

-Andi

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Timo Teräs @ 2008-01-17 12:37 UTC (permalink / raw)
  To: David Miller; +Cc: herbert, hadi, netdev
In-Reply-To: <20080117.030827.72477184.davem@davemloft.net>

David Miller wrote:
> From: Timo_Teräs <timo.teras@iki.fi>
> Date: Thu, 17 Jan 2008 13:00:09 +0200
> 
>> IMHO, it's a lot better then losing >50% of entries and the end
>> of sequence message on big dumps. SPD and SADB are not that
>> volatile; in most of the cases the dump would be as good as an
>> atomic one.
> 
> I humbly disagree with you.  Interface behavior stability
> is more important.

Small SPDs/SADBs would still be dumped atomically. The patch
affects only the cases when the receive queue is getting full.

>> I'm not sure if there's other major applications that we should
>> be concerned about, but at least ipsec-tools racoon does not
>> expect to get atomic dumps (which btw, comes originally from BSD).
> 
> Racoon was written as an addon to the BSD stack by an IPV6/IPSEC
> project in Japan named KAME, it did not "come from BSD".  It was
> added to BSD.
> 
> There are also other BSD based IPSEC daemons such as the one written
> by the OpenBSD folks.

Yes. I meant that it was originally written to be used in BSD. The
Linux port came later. Sorry for the ambiguous wording.

> I don't think this is arguable at all.  We're not changing semantics
> over what we've done for 4+ years and applications might depend upon.
> It's for a deprecated interface, which makes any semantic changes that
> much less inviting.
> 
> You can argue all you want, but it will not change the invariants in
> the previous paragraph.

True. If no one else agrees with me, I'll drop it. I can always run
my own patched kernel.

I'd appreciate feedback on the xfrm changes. I'll try to make that
part usable patch against net-2.6.25 git tree next week.

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: jamal @ 2008-01-17 12:26 UTC (permalink / raw)
  To: Herbert Xu; +Cc: Timo Teräs, netdev
In-Reply-To: <20080117111106.GA8932@gondor.apana.org.au>

On Thu, 2008-17-01 at 22:11 +1100, Herbert Xu wrote:

> Sure racoon uses pfkey but the question is does it use pfkey dumping?
> 

it does when trying to purge phase 2 SAs...

cheers,
jamal


^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Timo Teräs @ 2008-01-17 12:21 UTC (permalink / raw)
  To: Herbert Xu; +Cc: jamal, netdev
In-Reply-To: <20080117111106.GA8932@gondor.apana.org.au>

Herbert Xu wrote:
> On Thu, Jan 17, 2008 at 07:54:32AM +0200, Timo Teräs wrote:
>>> Racoon doesn't use pfkey dumping as far as I know.
>> ipsec-tools racoon uses pfkey and only pfkey. And it's non trivial to
>> make it use netlink; it relies heavily all around the code to pfkey
>> structs. It also runs on BSD so we cannot rip pfkey away; adding a
>> layer to work with both pfkey and netlink would be doable, but just a
>> lot of work.
> 
> Sure racoon uses pfkey but the question is does it use pfkey dumping?

Yes it does.

It does SPD dump at startup and keeps the SP database in sync by
listening to notifications.

It also does SA dumps when it is figuring out which SAs to purge.

I started to work on the xfrm/pfkey patch only because racoon is
having problems with (as is anything else using pfkey).

- Timo


^ permalink raw reply

* Re: Broken "Make ip6_frags per namespace" patch
From: Daniel Lezcano @ 2008-01-17 12:01 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: davem, den, netdev, devel, Pavel Emelianov
In-Reply-To: <20080117113041.GG6217@localhost.sw.ru>

Alexey Dobriyan wrote:
> On Thu, Jan 17, 2008 at 11:40:42AM +0100, Daniel Lezcano wrote:
>> Alexey Dobriyan wrote:
>>>> commit c064c4811b3e87ff8202f5a966ff4eea0bc54575
>>>> Author: Daniel Lezcano <dlezcano@fr.ibm.com>
>>>> Date:   Thu Jan 10 02:56:03 2008 -0800
>>>>
>>>>    [NETNS][IPV6]: Make ip6_frags per namespace.
>>>>    
>>>>    The ip6_frags is moved to the network namespace structure.  Because
>>>>    there can be multiple instances of the network namespaces, and the
>>>>    ip6_frags is no longer a global static variable, a helper function has
>>>>    been added to facilitate the initialization of the variables.
>>>>    
>>>>    Until the ipv6 protocol is not per namespace, the variables are
>>>>    accessed relatively from the initial network namespace.
>>>> --- a/include/net/netns/ipv6.h
>>>> +++ b/include/net/netns/ipv6.h
>>>> @@ -11,6 +13,7 @@ struct netns_sysctl_ipv6 {
>>>> #ifdef CONFIG_SYSCTL
>>>> 	struct ctl_table_header *table;
>>>> #endif
>>>> +	struct inet_frags_ctl frags;
>>>> --- a/net/ipv6/reassembly.c
>>>> +++ b/net/ipv6/reassembly.c
>>>> @@ -632,6 +625,11 @@ static struct inet6_protocol frag_protocol =
>>>> 	.flags		=	INET6_PROTO_NOPOLICY,
>>>> };
>>>>
>>>> +void ipv6_frag_sysctl_init(struct net *net)
>>>> +{
>>>> +	ip6_frags.ctl = &net->ipv6.sysctl.frags;
>>>> +}
>>> _This_ can't work. ip6frags is only one and ->ctl pointer is flipped
>>> onto per-netns data. Changelog is also misleading: ip6_frags_ctl is
>>> moved to netns not all ip6_frags.
>>>
>>> Oopsing place below -- f->ctl dereference in preparation of mod_timer() 
>>> call.
>>>
>>>
>>>
>>> BUG: unable to handle kernel paging request at virtual address f5da8fc8
>>> printing eip: c11d868a *pdpt = 0000000000003001 *pde = 0000000001728067 
>>> *pte = 0000000035da8000 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
>>> Modules linked in: ebt_ip ebt_dnat ebt_arpreply ebt_arp ebt_among 
>>> ebtable_nat ip6t_REJECT ip6table_filter ip6_tables ebtable_filter 
>>> ebtable_broute ebt_802_3 ebtables des_generic nf_conntrack_netbios_ns 
>>> nf_conntrack_ipv4 xt_state nf_conntrack xt_tcpudp ipt_REJECT 
>>> iptable_filter ip_tables deflate zlib_deflate zlib_inflate cryptomgr 
>>> crypto_hash cpufreq_stats cpufreq_ondemand cdrom cbc bridge llc blkcipher 
>>> crypto_algapi arpt_mangle arptable_filter arp_tables x_tables ah6 
>>> af_packet ipv6
>>>
>>> Pid: 0, comm: swapper Not tainted (2.6.24-rc7-net-2.6.25-nf-sysfs-n #30)
>>> EIP: 0060:[<c11d868a>] EFLAGS: 00010246 CPU: 1
>>> EIP is at inet_frag_secret_rebuild+0xaa/0xd0
>>> EAX: f5da8fbc EBX: 00000000 ECX: c1310000 EDX: 00000100
>>> ESI: f7cba000 EDI: f898f7a0 EBP: 00000040 ESP: c1310f90
>>> DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
>>> Process swapper (pid: 0, ti=c1310000 task=f7c9a580 task.ti=f7c9b000)
>>> Stack: f898f7a8 f898f8a8 000ddcbd f898f7a0 f7cba000 c1310fc4 00000100 
>>> c1026d60 00000002 00000001 c1191183 c4779ddc c11d85e0 f898c860 
>>>       f898c860 c12c4a88 00000001 c1308da0 0000000a c1023477 00000001 
>>>       c130b640 c130b640 f7c9bf34 Call Trace:
>>> [<c1026d60>] run_timer_softirq+0x120/0x190
>>> [<c1191183>] net_rx_action+0x53/0x220
>>> [<c11d85e0>] inet_frag_secret_rebuild+0x0/0xd0
>>> [<c1023477>] __do_softirq+0x87/0x100
>>> [<c10059cf>] do_softirq+0xaf/0x110
>>> [<c10233e3>] irq_exit+0x83/0x90
>>> [<c1010ce7>] smp_apic_timer_interrupt+0x57/0x90
>>> [<c10036e1>] apic_timer_interrupt+0x29/0x38
>>> [<c10036eb>] apic_timer_interrupt+0x33/0x38
>>> [<c1001460>] default_idle+0x0/0x60
>>> [<c10014a0>] default_idle+0x40/0x60
>>> [<c1000ea3>] cpu_idle+0x73/0xb0
>>> =======================
>>> Code: 8b 10 85 d2 89 13 74 03 89 5a 04 89 18 89 43 04 85 f6 89 f3 75 bb 45 
>>> 83 fd 40 75 a5 8b 44 24 04 e8 4c 3f 01 00 8b 87 50 01 00 00 <8b> 50 0c 01 
>>> 54 24 08 8d 87 38 01 00 00 8b 54 24 08 83 c4 0c 5b EIP: [<c11d868a>] 
>>> inet_frag_secret_rebuild+0xaa/0xd0 SS:ESP 0068:c1310f90
>>> Kernel panic - not syncing: Fatal exception in interrupt
>> Hi Alexey,
>>
>> does it happen after unsharing the network ?
> 
> Yep. clone(CLONE_NEWNET) in a loop and sooner or later you'll see this.

Thanks.

The network namespace is not yet complete, this is normal that you have 
not ip6_frag per namespace. Pavel is doing that.

Perhaps, I should disable ipv6_frag_sysctl_init when not in the init_net 
and enable it again when Pavel send its fragment patchset ?

^ permalink raw reply

* Re: [PATCH 4/4] bonding: Fix some RTNL taking
From: Jarek Poplawski @ 2008-01-17 11:46 UTC (permalink / raw)
  To: Makito SHIOKAWA; +Cc: netdev
In-Reply-To: <478EE7FD.3010802@miraclelinux.com>

On Thu, Jan 17, 2008 at 02:30:37PM +0900, Makito SHIOKAWA wrote:
>> Maybe I'm wrong, but since this read_lock() is given and taken anyway,
>> it seems this looks a bit better to me (why hold this rtnl longer
>> than needed?):
>> 		read_unlock(&bond->lock);
>> 		rtnl_unlock();
>> 		read_lock(&bond->lock);
> Seems better.
>
>> On the other hand, probably 'if (bond->kill_timers)' could be repeated
>> after this read_lock() retaking.
> Sorry, what do you mean? (A case that bond->kill_timers = 1 is done during 
> lock retaking, and work being queued only to do 'if (bond->kill_timers)'? 
> If so, I think that won't differ much.)

Probably the difference is not much, but since this all double locking,
unlocking and something between could take a while, and such a check
looks cheaper than re-queueing... But I don't persist in this.

Jarek P.

^ permalink raw reply

* Re: Broken "Make ip6_frags per namespace" patch
From: Alexey Dobriyan @ 2008-01-17 11:30 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: davem, den, netdev, devel
In-Reply-To: <478F30AA.7080704@fr.ibm.com>

On Thu, Jan 17, 2008 at 11:40:42AM +0100, Daniel Lezcano wrote:
> Alexey Dobriyan wrote:
> >>commit c064c4811b3e87ff8202f5a966ff4eea0bc54575
> >>Author: Daniel Lezcano <dlezcano@fr.ibm.com>
> >>Date:   Thu Jan 10 02:56:03 2008 -0800
> >>
> >>    [NETNS][IPV6]: Make ip6_frags per namespace.
> >>    
> >>    The ip6_frags is moved to the network namespace structure.  Because
> >>    there can be multiple instances of the network namespaces, and the
> >>    ip6_frags is no longer a global static variable, a helper function has
> >>    been added to facilitate the initialization of the variables.
> >>    
> >>    Until the ipv6 protocol is not per namespace, the variables are
> >>    accessed relatively from the initial network namespace.
> >
> >>--- a/include/net/netns/ipv6.h
> >>+++ b/include/net/netns/ipv6.h
> >
> >>@@ -11,6 +13,7 @@ struct netns_sysctl_ipv6 {
> >> #ifdef CONFIG_SYSCTL
> >> 	struct ctl_table_header *table;
> >> #endif
> >>+	struct inet_frags_ctl frags;
> >
> >>--- a/net/ipv6/reassembly.c
> >>+++ b/net/ipv6/reassembly.c
> >
> >>@@ -632,6 +625,11 @@ static struct inet6_protocol frag_protocol =
> >> 	.flags		=	INET6_PROTO_NOPOLICY,
> >> };
> >> 
> >>+void ipv6_frag_sysctl_init(struct net *net)
> >>+{
> >>+	ip6_frags.ctl = &net->ipv6.sysctl.frags;
> >>+}
> >
> >_This_ can't work. ip6frags is only one and ->ctl pointer is flipped
> >onto per-netns data. Changelog is also misleading: ip6_frags_ctl is
> >moved to netns not all ip6_frags.
> >
> >Oopsing place below -- f->ctl dereference in preparation of mod_timer() 
> >call.
> >
> >
> >
> >BUG: unable to handle kernel paging request at virtual address f5da8fc8
> >printing eip: c11d868a *pdpt = 0000000000003001 *pde = 0000000001728067 
> >*pte = 0000000035da8000 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
> >Modules linked in: ebt_ip ebt_dnat ebt_arpreply ebt_arp ebt_among 
> >ebtable_nat ip6t_REJECT ip6table_filter ip6_tables ebtable_filter 
> >ebtable_broute ebt_802_3 ebtables des_generic nf_conntrack_netbios_ns 
> >nf_conntrack_ipv4 xt_state nf_conntrack xt_tcpudp ipt_REJECT 
> >iptable_filter ip_tables deflate zlib_deflate zlib_inflate cryptomgr 
> >crypto_hash cpufreq_stats cpufreq_ondemand cdrom cbc bridge llc blkcipher 
> >crypto_algapi arpt_mangle arptable_filter arp_tables x_tables ah6 
> >af_packet ipv6
> >
> >Pid: 0, comm: swapper Not tainted (2.6.24-rc7-net-2.6.25-nf-sysfs-n #30)
> >EIP: 0060:[<c11d868a>] EFLAGS: 00010246 CPU: 1
> >EIP is at inet_frag_secret_rebuild+0xaa/0xd0
> >EAX: f5da8fbc EBX: 00000000 ECX: c1310000 EDX: 00000100
> >ESI: f7cba000 EDI: f898f7a0 EBP: 00000040 ESP: c1310f90
> > DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
> >Process swapper (pid: 0, ti=c1310000 task=f7c9a580 task.ti=f7c9b000)
> >Stack: f898f7a8 f898f8a8 000ddcbd f898f7a0 f7cba000 c1310fc4 00000100 
> >c1026d60 00000002 00000001 c1191183 c4779ddc c11d85e0 f898c860 
> >       f898c860 c12c4a88 00000001 c1308da0 0000000a c1023477 00000001 
> >       c130b640 c130b640 f7c9bf34 Call Trace:
> > [<c1026d60>] run_timer_softirq+0x120/0x190
> > [<c1191183>] net_rx_action+0x53/0x220
> > [<c11d85e0>] inet_frag_secret_rebuild+0x0/0xd0
> > [<c1023477>] __do_softirq+0x87/0x100
> > [<c10059cf>] do_softirq+0xaf/0x110
> > [<c10233e3>] irq_exit+0x83/0x90
> > [<c1010ce7>] smp_apic_timer_interrupt+0x57/0x90
> > [<c10036e1>] apic_timer_interrupt+0x29/0x38
> > [<c10036eb>] apic_timer_interrupt+0x33/0x38
> > [<c1001460>] default_idle+0x0/0x60
> > [<c10014a0>] default_idle+0x40/0x60
> > [<c1000ea3>] cpu_idle+0x73/0xb0
> >=======================
> >Code: 8b 10 85 d2 89 13 74 03 89 5a 04 89 18 89 43 04 85 f6 89 f3 75 bb 45 
> >83 fd 40 75 a5 8b 44 24 04 e8 4c 3f 01 00 8b 87 50 01 00 00 <8b> 50 0c 01 
> >54 24 08 8d 87 38 01 00 00 8b 54 24 08 83 c4 0c 5b EIP: [<c11d868a>] 
> >inet_frag_secret_rebuild+0xaa/0xd0 SS:ESP 0068:c1310f90
> >Kernel panic - not syncing: Fatal exception in interrupt
> 
> Hi Alexey,
> 
> does it happen after unsharing the network ?

Yep. clone(CLONE_NEWNET) in a loop and sooner or later you'll see this.


^ permalink raw reply

* [PATCH net-2.6.25] net: Improve cache line coherency of ingress qdisc
From: Neil Turton @ 2008-01-17 11:04 UTC (permalink / raw)
  To: netdev; +Cc: linux-net-drivers

Move the ingress qdisc members of struct net_device from the transmit
cache line to the receive cache line to avoid cache line ping-pong.
These members are only used on the receive path.

Signed-off-by: Neil Turton <nturton@solarflare.com>
---

--- net-2.6.25.git-orig/include/linux/netdevice.h	2008-01-15 17:43:08.000000000 +0000
+++ net-2.6.25.git-ndt1/include/linux/netdevice.h	2008-01-16 09:46:19.000000000 +0000
@@ -597,37 +597,37 @@ struct net_device
 /*
  * Cache line mostly used on receive path (including eth_type_trans())
  */
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast 
 							because most packets are unicast) */
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+	/* ingress path synchronizer */
+	spinlock_t		ingress_lock;
+	struct Qdisc		*qdisc_ingress;
+
 /*
  * Cache line mostly used on queue transmit path (qdisc)
  */
 	/* device queue lock */
 	spinlock_t		queue_lock ____cacheline_aligned_in_smp;
 	struct Qdisc		*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 	struct list_head	qdisc_list;
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 
 	/* Partially transmitted GSO packet. */
 	struct sk_buff		*gso_skb;
 
-	/* ingress path synchronizer */
-	spinlock_t		ingress_lock;
-	struct Qdisc		*qdisc_ingress;
-
 /*
  * One part is mostly used on xmit path (device)
  */
 	/* hard_start_xmit synchronizer */
 	spinlock_t		_xmit_lock ____cacheline_aligned_in_smp;
 	/* cpu id of processor entered to hard_start_xmit or -1,
 	   if nobody entered there.
 	 */
 	int			xmit_lock_owner;
 	void			*priv;	/* pointer to private data	*/


^ permalink raw reply

* Re: [PATCH 3/4] bonding: Fix work rearming
From: Jarek Poplawski @ 2008-01-17 11:18 UTC (permalink / raw)
  To: Makito SHIOKAWA; +Cc: netdev
In-Reply-To: <478EE7FC.4040301@miraclelinux.com>

On Thu, Jan 17, 2008 at 02:30:36PM +0900, Makito SHIOKAWA wrote:
>> But, since during this change from sysfs cancel_delayed_work_sync()
>> could be probably used, and it's rather efficient with killing
>> rearming works, it seems this check could be unnecessary yet.
> What going to be cancelled in bonding_store_miimon() when setting miimon to 
> 0 is arp monitor, not mii monitor. So, this check will be needed to stop 
> rearming mii monitor when value 0 is set to miimon.

Hmm... I'm not sure I understand your point, but it seems both
bonding_store_arp_interval() and bonding_store_miimon() where this
field could be changed, currently use cancel_delayed_work() with
flush_workqueue(), so I presume, there is no rtnl_lock() nor
write_lock(&bond->lock) held, so cancel_delayed_work_sync() could
be used, which doesn't require this additional check.

...Unless you mean that despite miimon value is changed there,
mii_work for some reason can't be cancelled at the same time?

Of course, if there is such a reason for doing this check each time
a work runs instead of controlling where the value changes, then OK!

Regards,
Jarek P.

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Herbert Xu @ 2008-01-17 11:11 UTC (permalink / raw)
  To: Timo Teräs; +Cc: jamal, netdev
In-Reply-To: <478EED98.6080603@iki.fi>

On Thu, Jan 17, 2008 at 07:54:32AM +0200, Timo Teräs wrote:
>
> > Racoon doesn't use pfkey dumping as far as I know.
> 
> ipsec-tools racoon uses pfkey and only pfkey. And it's non trivial to
> make it use netlink; it relies heavily all around the code to pfkey
> structs. It also runs on BSD so we cannot rip pfkey away; adding a
> layer to work with both pfkey and netlink would be doable, but just a
> lot of work.

Sure racoon uses pfkey but the question is does it use pfkey dumping?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: David Miller @ 2008-01-17 11:08 UTC (permalink / raw)
  To: timo.teras; +Cc: herbert, hadi, netdev
In-Reply-To: <478F3539.5060903@iki.fi>

From: Timo_Teräs <timo.teras@iki.fi>
Date: Thu, 17 Jan 2008 13:00:09 +0200

> IMHO, it's a lot better then losing >50% of entries and the end
> of sequence message on big dumps. SPD and SADB are not that
> volatile; in most of the cases the dump would be as good as an
> atomic one.

I humbly disagree with you.  Interface behavior stability
is more important.

> I'm not sure if there's other major applications that we should
> be concerned about, but at least ipsec-tools racoon does not
> expect to get atomic dumps (which btw, comes originally from BSD).

Racoon was written as an addon to the BSD stack by an IPV6/IPSEC
project in Japan named KAME, it did not "come from BSD".  It was
added to BSD.

There are also other BSD based IPSEC daemons such as the one written
by the OpenBSD folks.

I don't think this is arguable at all.  We're not changing semantics
over what we've done for 4+ years and applications might depend upon.
It's for a deprecated interface, which makes any semantic changes that
much less inviting.

You can argue all you want, but it will not change the invariants in
the previous paragraph.

All of the time you've spent arguing is time not spent on adding
netlink support to the daemons that do not do so already.  And that
would be 2 steps forwards compared to the 1 step backwards your
desired change would be.

I've stated my position as well as I can at this point so
respectfully, since I have tons of other things to do, I'm stepping
out of this specific discussion for now.

Thank you.


^ permalink raw reply

* Re: [RFC][PATCH] Fixing SA/SP dumps on netlink/af_key
From: Timo Teräs @ 2008-01-17 11:00 UTC (permalink / raw)
  To: David Miller; +Cc: herbert, hadi, netdev
In-Reply-To: <20080117.020616.136852595.davem@davemloft.net>

David Miller wrote:
> From: Timo_Teräs <timo.teras@iki.fi>
> Date: Thu, 17 Jan 2008 12:01:17 +0200
> 
>> David Miller wrote:
>>> This is an inherent aspect of AF_KEY (and what it was
>>> derived from, BSD routing sockets).
>> Yes, this is the way BSD does it.
>>  
>>> It has to provide dumps atomically, and if there is no
>>> space there is no way to provide those entries which
>>> would require more rcvbuf space.
>> RFC does not say it has to be atomic.
> 
> Every application out there in the universe expects BSD socket
> semantics, and therefore atomic dumps.  You cannot "fix" things
> without breaking applications.

IMHO, it's a lot better then losing >50% of entries and the end
of sequence message on big dumps. SPD and SADB are not that
volatile; in most of the cases the dump would be as good as an
atomic one.

Even if it did change during ongoing dump you still get an usable
dump. All the entries reflect real data and there is no dependency
between different entries.

I'm not sure if there's other major applications that we should
be concerned about, but at least ipsec-tools racoon does not
expect to get atomic dumps (which btw, comes originally from BSD).

Cheers,
  Timo


^ permalink raw reply

* Re: [PATCH 0/3 net-2.6.25] call FIB rule->action in the correct namespace
From: Daniel Lezcano @ 2008-01-17 10:41 UTC (permalink / raw)
  To: Denis V. Lunev; +Cc: David Miller, netdev, Linux Containers, devel
In-Reply-To: <478F2933.1000007@openvz.org>

Denis V. Lunev wrote:
> FIB rule->action should operate in the same namespace as fib_lookup.
> This is definitely missed right now.
> 
> There are two ways to implement this: pass struct net into another rules
> API call (2 levels) or place netns into rule struct directly. The second
> approach seems better as the code will grow less.
> 
> Additionally, the patchset cleanups struct net from
> fib_rules_register/unregister to have network namespace context at the
> time of default rules creation.
> 
> Signed-off-by: Denis V. Lunev <den@openvz.org>

Acked-by: Daniel Lezcano <dlezcano@fr.ibm.com>

-- 






















































Sauf indication contraire ci-dessus:
Compagnie IBM France
Sie`ge Social : Tour Descartes, 2, avenue Gambetta, La De'fense 5, 92400
Courbevoie
RCS Nanterre 552 118 465
Forme Sociale : S.A.S.
Capital Social : 542.737.118 ?
SIREN/SIRET : 552 118 465 02430

^ permalink raw reply

* Re: Broken "Make ip6_frags per namespace" patch
From: Daniel Lezcano @ 2008-01-17 10:40 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: davem, den, netdev, devel
In-Reply-To: <20080117100524.GF6217@localhost.sw.ru>

Alexey Dobriyan wrote:
>> commit c064c4811b3e87ff8202f5a966ff4eea0bc54575
>> Author: Daniel Lezcano <dlezcano@fr.ibm.com>
>> Date:   Thu Jan 10 02:56:03 2008 -0800
>>
>>     [NETNS][IPV6]: Make ip6_frags per namespace.
>>     
>>     The ip6_frags is moved to the network namespace structure.  Because
>>     there can be multiple instances of the network namespaces, and the
>>     ip6_frags is no longer a global static variable, a helper function has
>>     been added to facilitate the initialization of the variables.
>>     
>>     Until the ipv6 protocol is not per namespace, the variables are
>>     accessed relatively from the initial network namespace.
> 
>> --- a/include/net/netns/ipv6.h
>> +++ b/include/net/netns/ipv6.h
> 
>> @@ -11,6 +13,7 @@ struct netns_sysctl_ipv6 {
>>  #ifdef CONFIG_SYSCTL
>>  	struct ctl_table_header *table;
>>  #endif
>> +	struct inet_frags_ctl frags;
> 
>> --- a/net/ipv6/reassembly.c
>> +++ b/net/ipv6/reassembly.c
> 
>> @@ -632,6 +625,11 @@ static struct inet6_protocol frag_protocol =
>>  	.flags		=	INET6_PROTO_NOPOLICY,
>>  };
>>  
>> +void ipv6_frag_sysctl_init(struct net *net)
>> +{
>> +	ip6_frags.ctl = &net->ipv6.sysctl.frags;
>> +}
> 
> _This_ can't work. ip6frags is only one and ->ctl pointer is flipped
> onto per-netns data. Changelog is also misleading: ip6_frags_ctl is
> moved to netns not all ip6_frags.
> 
> Oopsing place below -- f->ctl dereference in preparation of mod_timer() call.
> 
> 
> 
> BUG: unable to handle kernel paging request at virtual address f5da8fc8
> printing eip: c11d868a *pdpt = 0000000000003001 *pde = 0000000001728067 *pte = 0000000035da8000 
> Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
> Modules linked in: ebt_ip ebt_dnat ebt_arpreply ebt_arp ebt_among ebtable_nat ip6t_REJECT ip6table_filter ip6_tables ebtable_filter ebtable_broute ebt_802_3 ebtables des_generic nf_conntrack_netbios_ns nf_conntrack_ipv4 xt_state nf_conntrack xt_tcpudp ipt_REJECT iptable_filter ip_tables deflate zlib_deflate zlib_inflate cryptomgr crypto_hash cpufreq_stats cpufreq_ondemand cdrom cbc bridge llc blkcipher crypto_algapi arpt_mangle arptable_filter arp_tables x_tables ah6 af_packet ipv6
> 
> Pid: 0, comm: swapper Not tainted (2.6.24-rc7-net-2.6.25-nf-sysfs-n #30)
> EIP: 0060:[<c11d868a>] EFLAGS: 00010246 CPU: 1
> EIP is at inet_frag_secret_rebuild+0xaa/0xd0
> EAX: f5da8fbc EBX: 00000000 ECX: c1310000 EDX: 00000100
> ESI: f7cba000 EDI: f898f7a0 EBP: 00000040 ESP: c1310f90
>  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
> Process swapper (pid: 0, ti=c1310000 task=f7c9a580 task.ti=f7c9b000)
> Stack: f898f7a8 f898f8a8 000ddcbd f898f7a0 f7cba000 c1310fc4 00000100 c1026d60 
>        00000002 00000001 c1191183 c4779ddc c11d85e0 f898c860 f898c860 c12c4a88 
>        00000001 c1308da0 0000000a c1023477 00000001 c130b640 c130b640 f7c9bf34 
> Call Trace:
>  [<c1026d60>] run_timer_softirq+0x120/0x190
>  [<c1191183>] net_rx_action+0x53/0x220
>  [<c11d85e0>] inet_frag_secret_rebuild+0x0/0xd0
>  [<c1023477>] __do_softirq+0x87/0x100
>  [<c10059cf>] do_softirq+0xaf/0x110
>  [<c10233e3>] irq_exit+0x83/0x90
>  [<c1010ce7>] smp_apic_timer_interrupt+0x57/0x90
>  [<c10036e1>] apic_timer_interrupt+0x29/0x38
>  [<c10036eb>] apic_timer_interrupt+0x33/0x38
>  [<c1001460>] default_idle+0x0/0x60
>  [<c10014a0>] default_idle+0x40/0x60
>  [<c1000ea3>] cpu_idle+0x73/0xb0
> =======================
> Code: 8b 10 85 d2 89 13 74 03 89 5a 04 89 18 89 43 04 85 f6 89 f3 75 bb 45 83 fd 40 75 a5 8b 44 24 04 e8 4c 3f 01 00 8b 87 50 01 00 00 <8b> 50 0c 01 54 24 08 8d 87 38 01 00 00 8b 54 24 08 83 c4 0c 5b 
> EIP: [<c11d868a>] inet_frag_secret_rebuild+0xaa/0xd0 SS:ESP 0068:c1310f90
> Kernel panic - not syncing: Fatal exception in interrupt

Hi Alexey,

does it happen after unsharing the network ?

^ permalink raw reply

* Re: [PATCH 1/5] spidernet: add missing initialization
From: Ishizaki Kou @ 2008-01-17 10:22 UTC (permalink / raw)
  To: jens; +Cc: netdev, cbe-oss-dev
In-Reply-To: <200801111344.35652.jens@de.ibm.com>

Jens-san,

> Hi Ishizaki,
>
> Linas has left the company and is no longer doing kernel related stuff,
> so I suggest, given Jeff is ok with that, that the two of us take over
> spidernet maintainership.
 (snip)
> Change maintainership for spidernet.
>
> Signed-off-by: Jens Osterkamp <jens@de.ibm.com>

I apologize to my late reply.

I hope to accept your suggestion. But I have to get authorization
to take maintainership in my company. I have started negotiation
to my boss.


I can't check that spidernet driver works on Cell Blade, because I
don't have one.  So I hope you check spidernet driver works on Cell
Blade when it changes.

And then, will you review our latest patches?

Best regards,
Kou Ishizaki

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox