netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
@ 2008-08-07 14:43 Julius Volz
  2008-08-07 14:43 ` [PATCHv3 1/2] IPVS: Add genetlink interface definitions to ip_vs.h Julius Volz
                   ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-07 14:43 UTC (permalink / raw)
  To: julius.volz, netdev, lvs-devel; +Cc: horms, kaber, davem, tgraf, vbusam

This is the third iteration of the IPVS Netlink interface, this time
with only a small fix for a typo found by Thomas Graf. If there are no
further major issues, can this be applied?

The two patches add a Generic Netlink interface to IPVS while keeping
the old get/setsockopt interface for userspace backwards compatibility.
The motivation for this is to have a more extensible interface for
future changes, such as the planned IPv6 support.

An ipvsadm that already uses the new interface is available here:

http://sixpak.org/vince/google/ipvsadm/
(by Vince Busam)

Old ipvsadms continue to work with this change.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCHv3 1/2] IPVS: Add genetlink interface definitions to ip_vs.h
  2008-08-07 14:43 [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Julius Volz
@ 2008-08-07 14:43 ` Julius Volz
  2008-08-07 14:43 ` [PATCHv3 2/2] IPVS: Add genetlink interface implementation Julius Volz
  2008-08-08  2:26 ` [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Simon Horman
  2 siblings, 0 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-07 14:43 UTC (permalink / raw)
  To: julius.volz, netdev, lvs-devel
  Cc: horms, kaber, davem, tgraf, vbusam, Julius Volz

Add IPVS Generic Netlink interface definitions to include/linux/ip_vs.h.

Signed-off-by: Julius Volz <juliusv@google.com>

 1 files changed, 160 insertions(+), 0 deletions(-)

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index ec6eb49..0f434a2 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -242,4 +242,164 @@ struct ip_vs_daemon_user {
 	int			syncid;
 };
 
+/*
+ *
+ * IPVS Generic Netlink interface definitions
+ *
+ */
+
+/* Generic Netlink family info */
+
+#define IPVS_GENL_NAME		"IPVS"
+#define IPVS_GENL_VERSION	0x1
+
+struct ip_vs_flags {
+	__be32 flags;
+	__be32 mask;
+};
+
+/* Generic Netlink command attributes */
+enum {
+	IPVS_CMD_UNSPEC = 0,
+
+	IPVS_CMD_NEW_SERVICE,		/* add service */
+	IPVS_CMD_SET_SERVICE,		/* modify service */
+	IPVS_CMD_DEL_SERVICE,		/* delete service */
+	IPVS_CMD_GET_SERVICE,		/* get service info */
+
+	IPVS_CMD_NEW_DEST,		/* add destination */
+	IPVS_CMD_SET_DEST,		/* modify destination */
+	IPVS_CMD_DEL_DEST,		/* delete destination */
+	IPVS_CMD_GET_DEST,		/* get destination info */
+
+	IPVS_CMD_NEW_DAEMON,		/* start sync daemon */
+	IPVS_CMD_DEL_DAEMON,		/* stop sync daemon */
+	IPVS_CMD_GET_DAEMON,		/* get sync daemon status */
+
+	IPVS_CMD_SET_CONFIG,		/* set config settings */
+	IPVS_CMD_GET_CONFIG,		/* get config settings */
+
+	IPVS_CMD_SET_INFO,		/* only used in GET_INFO reply */
+	IPVS_CMD_GET_INFO,		/* get general IPVS info */
+
+	IPVS_CMD_ZERO,			/* zero all counters and stats */
+	IPVS_CMD_FLUSH,			/* flush services and dests */
+
+	__IPVS_CMD_MAX,
+};
+
+#define IPVS_CMD_MAX (__IPVS_CMD_MAX - 1)
+
+/* Attributes used in the first level of commands */
+enum {
+	IPVS_CMD_ATTR_UNSPEC = 0,
+	IPVS_CMD_ATTR_SERVICE,		/* nested service attribute */
+	IPVS_CMD_ATTR_DEST,		/* nested destination attribute */
+	IPVS_CMD_ATTR_DAEMON,		/* nested sync daemon attribute */
+	IPVS_CMD_ATTR_TIMEOUT_TCP,	/* TCP connection timeout */
+	IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,	/* TCP FIN wait timeout */
+	IPVS_CMD_ATTR_TIMEOUT_UDP,	/* UDP timeout */
+	__IPVS_CMD_ATTR_MAX,
+};
+
+#define IPVS_CMD_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe a service
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_SERVICE
+ */
+enum {
+	IPVS_SVC_ATTR_UNSPEC = 0,
+	IPVS_SVC_ATTR_AF,		/* address family */
+	IPVS_SVC_ATTR_PROTOCOL,		/* virtual service protocol */
+	IPVS_SVC_ATTR_ADDR,		/* virtual service address */
+	IPVS_SVC_ATTR_PORT,		/* virtual service port */
+	IPVS_SVC_ATTR_FWMARK,		/* firewall mark of service */
+
+	IPVS_SVC_ATTR_SCHED_NAME,	/* name of scheduler */
+	IPVS_SVC_ATTR_FLAGS,		/* virtual service flags */
+	IPVS_SVC_ATTR_TIMEOUT,		/* persistent timeout */
+	IPVS_SVC_ATTR_NETMASK,		/* persistent netmask */
+
+	IPVS_SVC_ATTR_STATS,		/* nested attribute for service stats */
+	__IPVS_SVC_ATTR_MAX,
+};
+
+#define IPVS_SVC_ATTR_MAX (__IPVS_SVC_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe a destination (real server)
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_DEST
+ */
+enum {
+	IPVS_DEST_ATTR_UNSPEC = 0,
+	IPVS_DEST_ATTR_ADDR,		/* real server address */
+	IPVS_DEST_ATTR_PORT,		/* real server port */
+
+	IPVS_DEST_ATTR_FWD_METHOD,	/* forwarding method */
+	IPVS_DEST_ATTR_WEIGHT,		/* destination weight */
+
+	IPVS_DEST_ATTR_U_THRESH,	/* upper threshold */
+	IPVS_DEST_ATTR_L_THRESH,	/* lower threshold */
+
+	IPVS_DEST_ATTR_ACTIVE_CONNS,	/* active connections */
+	IPVS_DEST_ATTR_INACT_CONNS,	/* inactive connections */
+	IPVS_DEST_ATTR_PERSIST_CONNS,	/* persistent connections */
+
+	IPVS_DEST_ATTR_STATS,		/* nested attribute for dest stats */
+	__IPVS_DEST_ATTR_MAX,
+};
+
+#define IPVS_DEST_ATTR_MAX (__IPVS_DEST_ATTR_MAX - 1)
+
+/*
+ * Attributes describing a sync daemon
+ *
+ * Used inside nested attribute IPVS_CMD_ATTR_DAEMON
+ */
+enum {
+	IPVS_DAEMON_ATTR_UNSPEC = 0,
+	IPVS_DAEMON_ATTR_STATE,		/* sync daemon state (master/backup) */
+	IPVS_DAEMON_ATTR_MCAST_IFN,	/* multicast interface name */
+	IPVS_DAEMON_ATTR_SYNC_ID,	/* SyncID we belong to */
+	__IPVS_DAEMON_ATTR_MAX,
+};
+
+#define IPVS_DAEMON_ATTR_MAX (__IPVS_DAEMON_ATTR_MAX - 1)
+
+/*
+ * Attributes used to describe service or destination entry statistics
+ *
+ * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
+ */
+enum {
+	IPVS_STATS_ATTR_UNSPEC = 0,
+	IPVS_STATS_ATTR_CONNS,		/* connections scheduled */
+	IPVS_STATS_ATTR_INPKTS,		/* incoming packets */
+	IPVS_STATS_ATTR_OUTPKTS,	/* outgoing packets */
+	IPVS_STATS_ATTR_INBYTES,	/* incoming bytes */
+	IPVS_STATS_ATTR_OUTBYTES,	/* outgoing bytes */
+
+	IPVS_STATS_ATTR_CPS,		/* current connection rate */
+	IPVS_STATS_ATTR_INPPS,		/* current in packet rate */
+	IPVS_STATS_ATTR_OUTPPS,		/* current out packet rate */
+	IPVS_STATS_ATTR_INBPS,		/* current in byte rate */
+	IPVS_STATS_ATTR_OUTBPS,		/* current out byte rate */
+	__IPVS_STATS_ATTR_MAX,
+};
+
+#define IPVS_STATS_ATTR_MAX (__IPVS_STATS_ATTR_MAX - 1)
+
+/* Attributes used in response to IPVS_CMD_GET_INFO command */
+enum {
+	IPVS_INFO_ATTR_UNSPEC = 0,
+	IPVS_INFO_ATTR_VERSION,		/* IPVS version number */
+	IPVS_INFO_ATTR_CONN_TAB_SIZE,	/* size of connection hash table */
+	__IPVS_INFO_ATTR_MAX,
+};
+
+#define IPVS_INFO_ATTR_MAX (__IPVS_INFO_ATTR_MAX - 1)
+
 #endif	/* _IP_VS_H */
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-07 14:43 [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Julius Volz
  2008-08-07 14:43 ` [PATCHv3 1/2] IPVS: Add genetlink interface definitions to ip_vs.h Julius Volz
@ 2008-08-07 14:43 ` Julius Volz
  2008-08-08 11:29   ` Julius Volz
  2008-08-08  2:26 ` [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Simon Horman
  2 siblings, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-07 14:43 UTC (permalink / raw)
  To: julius.volz, netdev, lvs-devel
  Cc: horms, kaber, davem, tgraf, vbusam, Julius Volz

Add the implementation of the new Generic Netlink interface to IPVS and
keep the old set/getsockopt interface for userspace backwards
compatibility.

Signed-off-by: Julius Volz <juliusv@google.com>

 1 files changed, 880 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9a5ace0..b4c5cc3 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2305,6 +2306,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = 0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	/* For now, only support IPv4 */
+	if (nla_get_u16(nla_af) != AF_INET)
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+						  usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+
+		strlcpy(usvc->sched_name, nla_data(nla_sched),
+			sizeof(usvc->sched_name));
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+					   usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc;
+	struct ip_vs_service_user usvc;
+	struct ip_vs_dest_user udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	cmd = info->genlhdr->cmd;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	if (msg)
+		nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	/* SET commands */
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+int ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
 
 int ip_vs_control_init(void)
 {
@@ -2319,6 +3191,13 @@ int ip_vs_control_init(void)
 		return ret;
 	}
 
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
@@ -2355,6 +3234,7 @@ void ip_vs_control_cleanup(void)
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove(&init_net, "ip_vs_stats");
 	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-07 14:43 [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Julius Volz
  2008-08-07 14:43 ` [PATCHv3 1/2] IPVS: Add genetlink interface definitions to ip_vs.h Julius Volz
  2008-08-07 14:43 ` [PATCHv3 2/2] IPVS: Add genetlink interface implementation Julius Volz
@ 2008-08-08  2:26 ` Simon Horman
  2008-08-08 12:06   ` Julius Volz
  2 siblings, 1 reply; 23+ messages in thread
From: Simon Horman @ 2008-08-08  2:26 UTC (permalink / raw)
  To: Julius Volz; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Thu, Aug 07, 2008 at 04:43:37PM +0200, Julius Volz wrote:
> This is the third iteration of the IPVS Netlink interface, this time
> with only a small fix for a typo found by Thomas Graf. If there are no
> further major issues, can this be applied?
> 
> The two patches add a Generic Netlink interface to IPVS while keeping
> the old get/setsockopt interface for userspace backwards compatibility.
> The motivation for this is to have a more extensible interface for
> future changes, such as the planned IPv6 support.
> 
> An ipvsadm that already uses the new interface is available here:
> 
> http://sixpak.org/vince/google/ipvsadm/
> (by Vince Busam)
> 
> Old ipvsadms continue to work with this change.

Hi Julius,

these patches seem fine to me, however I am still seeing an oops
when using the new ipvsadm. Are we sure that this bug is in generic
code?

# ipvsadm -A -t 10.4.0.132:80
# ipvsadm -C
BUG: unable to handle kernel NULL pointer dereference at 00000028
IP: [<c021f034>] ip_vs_genl_set_cmd+0x254/0x494
*pde = 00000000 
Oops: 0000 [#1] 

Pid: 33, comm: ipvsadm Not tainted (2.6.27-rc2-kexec-11901-ge6fce5b-dirty #13)
EIP: 0060:[<c021f034>] EFLAGS: 00000206 CPU: 0
EIP is at ip_vs_genl_set_cmd+0x254/0x494
EAX: 00000014 EBX: 00000000 ECX: c7401300 EDX: c02e3018
ESI: 00000000 EDI: 00000011 EBP: c746dce4 ESP: c746dc20
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process ipvsadm (pid: 33, ti=c746c000 task=c6f22700 task.ti=c746c000)
Stack: 00000021 c0289340 c0289364 c0289374 c6faf088 00001102 c6faf0b4 c6faf058 
       00000003 c0289340 00000001 00000000 c02e0968 c01e948e 00000002 c6feb960 
       00000001 c6f84160 c6feb960 00000010 00000000 00000246 c01e4c57 00000246 
Call Trace:
 [<c01e948e>] ctrl_dumpfamily+0x86/0xd4
 [<c01e4c57>] sk_filter+0x67/0x7c
 [<c01e693d>] netlink_dump+0x15d/0x1a4
 [<c01e9858>] genl_rcv_msg+0x1dc/0x204
 [<c01e967c>] genl_rcv_msg+0x0/0x204
 [<c01e89d2>] netlink_rcv_skb+0x72/0x80
 [<c01e9671>] genl_rcv+0x19/0x24
 [<c01e84d2>] netlink_unicast+0x232/0x284
 [<c01e86dd>] netlink_sendmsg+0x1b9/0x288
 [<c01d0d3b>] sock_sendmsg+0xb7/0xe0
 [<c0120adc>] autoremove_wake_function+0x0/0x30
 [<c0120adc>] autoremove_wake_function+0x0/0x30
 [<c013a383>] __do_fault+0x15b/0x340
 [<c01d7832>] verify_iovec+0x2a/0x88
 [<c01d0e9f>] sys_sendmsg+0x13b/0x230
 [<c01d184a>] sys_recvmsg+0x1b6/0x240
 [<c01d1665>] move_addr_to_user+0x45/0x74
 [<c01d1b50>] sys_getsockname+0x94/0xa0
 [<c01d2a09>] release_sock+0x9/0x70
 [<c01d3bca>] sock_setsockopt+0x106/0x638
 [<c01d0a41>] sock_attach_fd+0x65/0xa0
 [<c01d1e88>] sys_socketcall+0x80/0x1d8
 [<c0103d42>] syscall_call+0x7/0xb
 =======================
Code: 8b 40 04 89 44 24 1c 8b 47 18 85 c0 74 07 8b 40 04 89 44 24 20 8d 44 24 18 e8 09 ca ff ff 89 c3 8b 84 24 88 00 00 00 85 c0 74 03 <ff> 48 14 b8 f0 90 28 c0 e8 e3 a5 00 00 89 d8 81 c4 8c 00 00 00 
EIP: [<c021f034>] ip_vs_genl_set_cmd+0x254/0x494 SS:ESP 0068:c746dc20
---[ end trace d371c2e324a230e1 ]---
Segmentation fault

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-07 14:43 ` [PATCHv3 2/2] IPVS: Add genetlink interface implementation Julius Volz
@ 2008-08-08 11:29   ` Julius Volz
  2008-08-13 21:51     ` Sven Wegener
  2008-08-14  5:39     ` Sven Wegener
  0 siblings, 2 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-08 11:29 UTC (permalink / raw)
  To: netdev, lvs-devel; +Cc: horms, kaber, davem, tgraf, vbusam

This still had two bugs:
- policies for IPVS_DEST_ATTR_FWD_METHOD and IPVS_SVC_ATTR_FLAGS
  were swapped
- svc not initialized to NULL at the beginning of ip_vs_genl_set_cmd()

The version below fixes this:

----
Add the implementation of the new Generic Netlink interface to IPVS and
keep the old set/getsockopt interface for userspace backwards
compatibility.

Signed-off-by: Julius Volz <juliusv@google.com>

 1 files changed, 880 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9a5ace0..8038420 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2305,6 +2306,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	/* For now, only support IPv4 */
+	if (nla_get_u16(nla_af) != AF_INET)
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+						  usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+
+		strlcpy(usvc->sched_name, nla_data(nla_sched),
+			sizeof(usvc->sched_name));
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+					   usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user usvc;
+	struct ip_vs_dest_user udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	cmd = info->genlhdr->cmd;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	if (msg)
+		nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	/* SET commands */
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+int ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
 
 int ip_vs_control_init(void)
 {
@@ -2319,6 +3191,13 @@ int ip_vs_control_init(void)
 		return ret;
 	}
 
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
@@ -2355,6 +3234,7 @@ void ip_vs_control_cleanup(void)
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove(&init_net, "ip_vs_stats");
 	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-08  2:26 ` [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Simon Horman
@ 2008-08-08 12:06   ` Julius Volz
  2008-08-09 14:23     ` Simon Horman
  0 siblings, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-08 12:06 UTC (permalink / raw)
  To: Simon Horman; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Fri, Aug 08, 2008 at 12:26:09PM +1000, Simon Horman wrote:
> these patches seem fine to me, however I am still seeing an oops
> when using the new ipvsadm. Are we sure that this bug is in generic
> code?
> 
> # ipvsadm -A -t 10.4.0.132:80
> # ipvsadm -C
> BUG: unable to handle kernel NULL pointer dereference at 00000028
> IP: [<c021f034>] ip_vs_genl_set_cmd+0x254/0x494
> *pde = 00000000 
> Oops: 0000 [#1] 

Ah, that made me find a bug where svc wasn't initialized to NULL at the
beginning of ip_vs_genl_set_cmd(). I'm not sure though if that was the
problem or if it still is some general genl issue. I just posted a newer
version version in response to my earlier post, could you give it a try?

Best with this updated ipvsadm:
http://www-user.tu-chemnitz.de/~volz/ipvsadm-1.25-nl-3.tar.gz
(later also at http://sixpak.org/vince/google/ipvsadm/)

Thanks,
Julius

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-08 12:06   ` Julius Volz
@ 2008-08-09 14:23     ` Simon Horman
  2008-08-09 19:57       ` Julius Volz
  2008-08-13 16:00       ` Julius Volz
  0 siblings, 2 replies; 23+ messages in thread
From: Simon Horman @ 2008-08-09 14:23 UTC (permalink / raw)
  To: Julius Volz; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Fri, Aug 08, 2008 at 02:06:43PM +0200, Julius Volz wrote:
> On Fri, Aug 08, 2008 at 12:26:09PM +1000, Simon Horman wrote:
> > these patches seem fine to me, however I am still seeing an oops
> > when using the new ipvsadm. Are we sure that this bug is in generic
> > code?
> > 
> > # ipvsadm -A -t 10.4.0.132:80
> > # ipvsadm -C
> > BUG: unable to handle kernel NULL pointer dereference at 00000028
> > IP: [<c021f034>] ip_vs_genl_set_cmd+0x254/0x494
> > *pde = 00000000 
> > Oops: 0000 [#1] 
> 
> Ah, that made me find a bug where svc wasn't initialized to NULL at the
> beginning of ip_vs_genl_set_cmd(). I'm not sure though if that was the
> problem or if it still is some general genl issue. I just posted a newer
> version version in response to my earlier post, could you give it a try?
> 
> Best with this updated ipvsadm:
> http://www-user.tu-chemnitz.de/~volz/ipvsadm-1.25-nl-3.tar.gz
> (later also at http://sixpak.org/vince/google/ipvsadm/)

Hi Julius,

your latest patch does resolve the problem that I was seeing
with ipvsadm-1.25-nl-2. ipvsadm-1.25-nl-3 also works :-)

I'd like to try and stress it out a bit more.
I will try and get to that tomorrow or on Monday.


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-09 14:23     ` Simon Horman
@ 2008-08-09 19:57       ` Julius Volz
  2008-08-13 16:00       ` Julius Volz
  1 sibling, 0 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-09 19:57 UTC (permalink / raw)
  To: Simon Horman; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Sat, Aug 9, 2008 at 4:23 PM, Simon Horman <horms@verge.net.au> wrote:
> your latest patch does resolve the problem that I was seeing
> with ipvsadm-1.25-nl-2. ipvsadm-1.25-nl-3 also works :-)

Nice!

> I'd like to try and stress it out a bit more.
> I will try and get to that tomorrow or on Monday.

Great, thank you!

Julius

-- 
Google Switzerland GmbH

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-09 14:23     ` Simon Horman
  2008-08-09 19:57       ` Julius Volz
@ 2008-08-13 16:00       ` Julius Volz
  2008-08-13 23:09         ` Simon Horman
  1 sibling, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-13 16:00 UTC (permalink / raw)
  To: Simon Horman; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Sat, Aug 9, 2008 at 4:23 PM, Simon Horman <horms@verge.net.au> wrote:
> Hi Julius,
>
> your latest patch does resolve the problem that I was seeing
> with ipvsadm-1.25-nl-2. ipvsadm-1.25-nl-3 also works :-)
>
> I'd like to try and stress it out a bit more.
> I will try and get to that tomorrow or on Monday.

Hi, you were probably occupied by the other IPVS patches. Is there any
news on this?

Julius

-- 
Google Switzerland GmbH

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-08 11:29   ` Julius Volz
@ 2008-08-13 21:51     ` Sven Wegener
  2008-08-13 21:53       ` Sven Wegener
  2008-08-14  9:32       ` Julius Volz
  2008-08-14  5:39     ` Sven Wegener
  1 sibling, 2 replies; 23+ messages in thread
From: Sven Wegener @ 2008-08-13 21:51 UTC (permalink / raw)
  To: Julius Volz; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Fri, 8 Aug 2008, Julius Volz wrote:

> This still had two bugs:
> - policies for IPVS_DEST_ATTR_FWD_METHOD and IPVS_SVC_ATTR_FLAGS
>   were swapped
> - svc not initialized to NULL at the beginning of ip_vs_genl_set_cmd()
> 
> The version below fixes this:
> 
> ----
> Add the implementation of the new Generic Netlink interface to IPVS and
> keep the old set/getsockopt interface for userspace backwards
> compatibility.
> 
> Signed-off-by: Julius Volz <juliusv@google.com>
> 
>  1 files changed, 880 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
> index 9a5ace0..8038420 100644
> --- a/net/ipv4/ipvs/ip_vs_ctl.c
> +++ b/net/ipv4/ipvs/ip_vs_ctl.c
> @@ -37,6 +37,7 @@
>  #include <net/ip.h>
>  #include <net/route.h>
>  #include <net/sock.h>
> +#include <net/genetlink.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -2305,6 +2306,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
>  	.owner		= THIS_MODULE,
>  };
>  
> +/*
> + * Generic Netlink interface
> + */
> +
> +/* IPVS genetlink family */
> +static struct genl_family ip_vs_genl_family = {
> +	.id		= GENL_ID_GENERATE,
> +	.hdrsize	= 0,
> +	.name		= IPVS_GENL_NAME,
> +	.version	= IPVS_GENL_VERSION,
> +	.maxattr	= IPVS_CMD_MAX,
> +};
> +
> +/* Policy used for first-level command attributes */
> +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
> +	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
> +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
> +	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
> +	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_IFNAME_MAXLEN },
> +	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
> +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
> +	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_SCHEDNAME_MAXLEN },
> +	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
> +					    .len = sizeof(struct ip_vs_flags) },
> +	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
> +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
> +	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
> +				 struct ip_vs_stats *stats)
> +{
> +	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
> +	if (!nl_stats)
> +		return -EMSGSIZE;
> +
> +	spin_lock_bh(&stats->lock);
> +
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
> +
> +	spin_unlock_bh(&stats->lock);
> +
> +	nla_nest_end(skb, nl_stats);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	spin_unlock_bh(&stats->lock);
> +	nla_nest_cancel(skb, nl_stats);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc)
> +{
> +	struct nlattr *nl_service;
> +	struct ip_vs_flags flags = { .flags = svc->flags,
> +				     .mask = ~0 };
> +
> +	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
> +	if (!nl_service)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
> +
> +	if (svc->fwmark) {
> +		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
> +	} else {
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
> +		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
> +	}
> +
> +	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
> +	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_service);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_service);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc,
> +				   struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_SERVICE);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_service(skb, svc) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_services(struct sk_buff *skb,
> +				    struct netlink_callback *cb)
> +{
> +	int idx = 0, i;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +	cb->args[0] = idx;
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
> +				    struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
> +	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
> +
> +	/* Parse mandatory identifying service fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
> +		return -EINVAL;
> +
> +	nla_af		= attrs[IPVS_SVC_ATTR_AF];
> +	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
> +	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
> +	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
> +
> +	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
> +		return -EINVAL;
> +
> +	/* For now, only support IPv4 */
> +	if (nla_get_u16(nla_af) != AF_INET)
> +		return -EAFNOSUPPORT;
> +
> +	if (nla_fwmark) {
> +		usvc->protocol = IPPROTO_TCP;
> +		usvc->fwmark = nla_get_u32(nla_fwmark);
> +	} else {
> +		usvc->protocol = nla_get_u16(nla_protocol);
> +		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
> +		usvc->port = nla_get_u16(nla_port);
> +		usvc->fwmark = 0;
> +	}
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
> +			      *nla_netmask;
> +		struct ip_vs_flags flags;
> +		struct ip_vs_service *svc;
> +
> +		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
> +		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
> +		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
> +		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
> +
> +		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
> +			return -EINVAL;
> +
> +		nla_memcpy(&flags, nla_flags, sizeof(flags));
> +
> +		/* prefill flags from service if it already exists */
> +		if (usvc->fwmark)
> +			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
> +		else
> +			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
> +						  usvc->port);
> +		if (svc) {
> +			usvc->flags = svc->flags;
> +			ip_vs_service_put(svc);
> +		} else
> +			usvc->flags = 0;
> +
> +		/* set new flags from userland */
> +		usvc->flags = (usvc->flags & ~flags.mask) |
> +			      (flags.flags & flags.mask);
> +
> +		strlcpy(usvc->sched_name, nla_data(nla_sched),
> +			sizeof(usvc->sched_name));
> +		usvc->timeout = nla_get_u32(nla_timeout);
> +		usvc->netmask = nla_get_u32(nla_netmask);
> +	}
> +
> +	return 0;
> +}
> +
> +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
> +{
> +	struct ip_vs_service_user usvc;
> +	int ret;
> +
> +	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	if (usvc.fwmark)
> +		return __ip_vs_svc_fwm_get(usvc.fwmark);
> +	else
> +		return __ip_vs_service_get(usvc.protocol, usvc.addr,
> +					   usvc.port);
> +}
> +
> +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
> +{
> +	struct nlattr *nl_dest;
> +
> +	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
> +	if (!nl_dest)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
> +	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
> +
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
> +		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
> +		    atomic_read(&dest->activeconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
> +		    atomic_read(&dest->inactconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
> +		    atomic_read(&dest->persistconns));
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_dest);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_dest);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> +				struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DEST);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_dest(skb, dest) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dests(struct sk_buff *skb,
> +				 struct netlink_callback *cb)
> +{
> +	int idx = 0;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +	struct ip_vs_dest *dest;
> +	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	/* Try to find the service for which to dump destinations */
> +	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
> +			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
> +		goto out_err;
> +
> +	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
> +	if (IS_ERR(svc) || svc == NULL)
> +		goto out_err;
> +
> +	/* Dump the destinations */
> +	list_for_each_entry(dest, &svc->destinations, n_list) {
> +		if (++idx <= start)
> +			continue;
> +		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> +			idx--;
> +			goto nla_put_failure;
> +		}
> +	}
> +
> +nla_put_failure:
> +	cb->args[0] = idx;
> +	ip_vs_service_put(svc);
> +
> +out_err:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
> +				 struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
> +	struct nlattr *nla_addr, *nla_port;
> +
> +	/* Parse mandatory identifying destination fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
> +		return -EINVAL;
> +
> +	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
> +
> +	if (!(nla_addr && nla_port))
> +		return -EINVAL;
> +
> +	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
> +	udest->port = nla_get_u16(nla_port);
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
> +			      *nla_l_thresh;
> +
> +		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
> +		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
> +		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
> +		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
> +
> +		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
> +			return -EINVAL;
> +
> +		udest->conn_flags = nla_get_u32(nla_fwd)
> +				    & IP_VS_CONN_F_FWD_MASK;
> +		udest->weight = nla_get_u32(nla_weight);
> +		udest->u_threshold = nla_get_u32(nla_u_thresh);
> +		udest->l_threshold = nla_get_u32(nla_l_thresh);
> +	}
> +
> +	return 0;
> +}
> +
> +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid)
> +{
> +	struct nlattr *nl_daemon;
> +
> +	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
> +	if (!nl_daemon)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
> +	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
> +
> +	nla_nest_end(skb, nl_daemon);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_daemon);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid,
> +				  struct netlink_callback *cb)
> +{
> +	void *hdr;
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DAEMON);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
> +				   struct netlink_callback *cb)
> +{
> +	mutex_lock(&__ip_vs_mutex);
> +	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
> +					   ip_vs_master_mcast_ifn,
> +					   ip_vs_master_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[0] = 1;
> +	}
> +
> +	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
> +					   ip_vs_backup_mcast_ifn,
> +					   ip_vs_backup_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[1] = 1;
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_new_daemon(struct nlattr **attrs)
> +{
> +	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
> +	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
> +	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
> +		return -EINVAL;
> +
> +	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
> +				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
> +				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
> +}
> +
> +static int ip_vs_genl_del_daemon(struct nlattr **attrs)
> +{
> +	if (!attrs[IPVS_DAEMON_ATTR_STATE])
> +		return -EINVAL;
> +
> +	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
> +}
> +
> +static int ip_vs_genl_set_config(struct nlattr **attrs)
> +{
> +	struct ip_vs_timeout_user t;
> +
> +	__ip_vs_get_timeouts(&t);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
> +		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
> +		t.tcp_fin_timeout =
> +			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
> +		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
> +
> +	return ip_vs_set_timeout(&t);
> +}
> +
> +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct ip_vs_service *svc = NULL;
> +	struct ip_vs_service_user usvc;
> +	struct ip_vs_dest_user udest;
> +	int ret = 0, cmd;
> +	int need_full_svc = 0, need_full_dest = 0;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	if (cmd == IPVS_CMD_FLUSH) {
> +		ret = ip_vs_flush();
> +		goto out;
> +	} else if (cmd == IPVS_CMD_SET_CONFIG) {
> +		ret = ip_vs_genl_set_config(info->attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
> +		   cmd == IPVS_CMD_DEL_DAEMON) {
> +
> +		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
> +
> +		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
> +		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
> +				     info->attrs[IPVS_CMD_ATTR_DAEMON],
> +				     ip_vs_daemon_policy)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (cmd == IPVS_CMD_NEW_DAEMON)
> +			ret = ip_vs_genl_new_daemon(daemon_attrs);
> +		else
> +			ret = ip_vs_genl_del_daemon(daemon_attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_ZERO &&
> +		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
> +		ret = ip_vs_zero_all();
> +		goto out;
> +	}
> +
> +	/* All following commands require a service argument, so check if we
> +	 * received a valid one. We need a full service specification when
> +	 * adding / editing a service. Only identifying members otherwise. */
> +	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
> +		need_full_svc = 1;
> +
> +	ret = ip_vs_genl_parse_service(&usvc,
> +				       info->attrs[IPVS_CMD_ATTR_SERVICE],
> +				       need_full_svc);
> +	if (ret)
> +		goto out;
> +
> +	/* Lookup the exact service by <protocol, addr, port> or fwmark */
> +	if (usvc.fwmark == 0)
> +		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
> +	else
> +		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
> +
> +	/* Unless we're adding a new service, the service must already exist */
> +	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
> +		ret = -ESRCH;
> +		goto out;
> +	}
> +
> +	/* Destination commands require a valid destination argument. For
> +	 * adding / editing a destination, we need a full destination
> +	 * specification. */
> +	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
> +	    cmd == IPVS_CMD_DEL_DEST) {
> +		if (cmd != IPVS_CMD_DEL_DEST)
> +			need_full_dest = 1;
> +
> +		ret = ip_vs_genl_parse_dest(&udest,
> +					    info->attrs[IPVS_CMD_ATTR_DEST],
> +					    need_full_dest);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	switch (cmd) {
> +	case IPVS_CMD_NEW_SERVICE:
> +		if (svc == NULL)
> +			ret = ip_vs_add_service(&usvc, &svc);
> +		else
> +			ret = -EEXIST;
> +		break;
> +	case IPVS_CMD_SET_SERVICE:
> +		ret = ip_vs_edit_service(svc, &usvc);
> +		break;
> +	case IPVS_CMD_DEL_SERVICE:
> +		ret = ip_vs_del_service(svc);
> +		break;
> +	case IPVS_CMD_NEW_DEST:
> +		ret = ip_vs_add_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_SET_DEST:
> +		ret = ip_vs_edit_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_DEL_DEST:
> +		ret = ip_vs_del_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_ZERO:
> +		ret = ip_vs_zero_service(svc);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +out:
> +	if (svc)
> +		ip_vs_service_put(svc);
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct sk_buff *msg;
> +	void *reply;
> +	int ret, cmd, reply_cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg) {
> +		ret = -ENOMEM;
> +		goto out_err;

Here you want out...

> +	}
> +
> +	if (cmd == IPVS_CMD_GET_SERVICE)
> +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> +	else if (cmd == IPVS_CMD_GET_INFO)
> +		reply_cmd = IPVS_CMD_SET_INFO;
> +	else if (cmd == IPVS_CMD_GET_CONFIG)
> +		reply_cmd = IPVS_CMD_SET_CONFIG;
> +	else {
> +		IP_VS_ERR("unknown Generic Netlink command\n");
> +		ret = -EINVAL;
> +		goto out;

..and here you want out_error, to not leak msg.

> +	}
> +
> +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> +	if (reply == NULL)
> +		goto nla_put_failure;
> +
> +	switch (cmd) {
> +	case IPVS_CMD_GET_SERVICE:
> +	{
> +		struct ip_vs_service *svc;
> +
> +		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
> +		if (IS_ERR(svc)) {
> +			ret = PTR_ERR(svc);
> +			goto out_err;
> +		} else if (svc) {
> +			ret = ip_vs_genl_fill_service(msg, svc);
> +			ip_vs_service_put(svc);
> +			if (ret)
> +				goto nla_put_failure;
> +		} else {
> +			ret = -ESRCH;
> +			goto out_err;
> +		}
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_CONFIG:
> +	{
> +		struct ip_vs_timeout_user t;
> +
> +		__ip_vs_get_timeouts(&t);
> +#ifdef CONFIG_IP_VS_PROTO_TCP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
> +			    t.tcp_fin_timeout);
> +#endif
> +#ifdef CONFIG_IP_VS_PROTO_UDP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
> +#endif
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_INFO:
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
> +			    IP_VS_CONN_TAB_SIZE);
> +		break;
> +	}
> +
> +	genlmsg_end(msg, reply);
> +	ret = genlmsg_unicast(msg, info->snd_pid);
> +	goto out;
> +
> +nla_put_failure:
> +	IP_VS_ERR("not enough space in Netlink message\n");
> +	ret = -EMSGSIZE;
> +
> +out_err:
> +	if (msg)
> +		nlmsg_free(msg);
> +out:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +
> +static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
> +	/* SET commands */
> +	{
> +		.cmd	= IPVS_CMD_NEW_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +		.dumpit	= ip_vs_genl_dump_services,
> +		.policy	= ip_vs_cmd_policy,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.dumpit	= ip_vs_genl_dump_dests,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.dumpit	= ip_vs_genl_dump_daemons,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_INFO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_ZERO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_FLUSH,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +};
> +
> +int ip_vs_genl_register(void)

static int __init

> +{
> +	int ret, i;
> +
> +	ret = genl_register_family(&ip_vs_genl_family);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
> +		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
> +		if (ret)
> +			goto err_out;
> +	}
> +	return 0;
> +
> +err_out:
> +	genl_unregister_family(&ip_vs_genl_family);
> +	return ret;
> +}
> +
> +void ip_vs_genl_unregister(void)

static void

> +{
> +	genl_unregister_family(&ip_vs_genl_family);
> +}
> +
> +/* End of Generic Netlink interface definitions */
> +
>  
>  int ip_vs_control_init(void)
>  {
> @@ -2319,6 +3191,13 @@ int ip_vs_control_init(void)
>  		return ret;
>  	}
>  
> +	ret = ip_vs_genl_register();
> +	if (ret) {
> +		IP_VS_ERR("cannot register Generic Netlink interface.\n");
> +		nf_unregister_sockopt(&ip_vs_sockopts);
> +		return ret;
> +	}
> +
>  	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
>  	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
>  
> @@ -2355,6 +3234,7 @@ void ip_vs_control_cleanup(void)
>  	unregister_sysctl_table(sysctl_header);
>  	proc_net_remove(&init_net, "ip_vs_stats");
>  	proc_net_remove(&init_net, "ip_vs");
> +	ip_vs_genl_unregister();
>  	nf_unregister_sockopt(&ip_vs_sockopts);
>  	LeaveFunction(2);
>  }

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-13 21:51     ` Sven Wegener
@ 2008-08-13 21:53       ` Sven Wegener
  2008-08-14  9:32       ` Julius Volz
  1 sibling, 0 replies; 23+ messages in thread
From: Sven Wegener @ 2008-08-13 21:53 UTC (permalink / raw)
  To: Julius Volz; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Wed, 13 Aug 2008, Sven Wegener wrote:

> On Fri, 8 Aug 2008, Julius Volz wrote:
> 
> > This still had two bugs:
> > - policies for IPVS_DEST_ATTR_FWD_METHOD and IPVS_SVC_ATTR_FLAGS
> >   were swapped
> > - svc not initialized to NULL at the beginning of ip_vs_genl_set_cmd()
> > 
> > The version below fixes this:
> > 
> > ----
> > Add the implementation of the new Generic Netlink interface to IPVS and
> > keep the old set/getsockopt interface for userspace backwards
> > compatibility.
> > 
> > Signed-off-by: Julius Volz <juliusv@google.com>
> > 
> >  1 files changed, 880 insertions(+), 0 deletions(-)
> > 
> > diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
> > index 9a5ace0..8038420 100644
> > --- a/net/ipv4/ipvs/ip_vs_ctl.c
> > +++ b/net/ipv4/ipvs/ip_vs_ctl.c
> > @@ -37,6 +37,7 @@
> >  #include <net/ip.h>
> >  #include <net/route.h>
> >  #include <net/sock.h>
> > +#include <net/genetlink.h>
> >  
> >  #include <asm/uaccess.h>
> >  
> > @@ -2305,6 +2306,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
> >  	.owner		= THIS_MODULE,
> >  };
> >  
> > +/*
> > + * Generic Netlink interface
> > + */
> > +
> > +/* IPVS genetlink family */
> > +static struct genl_family ip_vs_genl_family = {
> > +	.id		= GENL_ID_GENERATE,
> > +	.hdrsize	= 0,
> > +	.name		= IPVS_GENL_NAME,
> > +	.version	= IPVS_GENL_VERSION,
> > +	.maxattr	= IPVS_CMD_MAX,
> > +};
> > +
> > +/* Policy used for first-level command attributes */
> > +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
> > +	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
> > +	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
> > +	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
> > +	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
> > +	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
> > +	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
> > +};
> > +
> > +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
> > +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
> > +	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
> > +	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
> > +					    .len = IP_VS_IFNAME_MAXLEN },
> > +	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
> > +};
> > +
> > +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
> > +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
> > +	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
> > +	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
> > +	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
> > +					    .len = sizeof(union nf_inet_addr) },
> > +	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
> > +	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
> > +	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
> > +					    .len = IP_VS_SCHEDNAME_MAXLEN },
> > +	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
> > +					    .len = sizeof(struct ip_vs_flags) },
> > +	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
> > +	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
> > +	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
> > +};
> > +
> > +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
> > +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
> > +	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
> > +					    .len = sizeof(union nf_inet_addr) },
> > +	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
> > +	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
> > +	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
> > +};
> > +
> > +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
> > +				 struct ip_vs_stats *stats)
> > +{
> > +	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
> > +	if (!nl_stats)
> > +		return -EMSGSIZE;
> > +
> > +	spin_lock_bh(&stats->lock);
> > +
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
> > +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
> > +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
> > +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
> > +
> > +	spin_unlock_bh(&stats->lock);
> > +
> > +	nla_nest_end(skb, nl_stats);
> > +
> > +	return 0;
> > +
> > +nla_put_failure:
> > +	spin_unlock_bh(&stats->lock);
> > +	nla_nest_cancel(skb, nl_stats);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> > +				   struct ip_vs_service *svc)
> > +{
> > +	struct nlattr *nl_service;
> > +	struct ip_vs_flags flags = { .flags = svc->flags,
> > +				     .mask = ~0 };
> > +
> > +	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
> > +	if (!nl_service)
> > +		return -EMSGSIZE;
> > +
> > +	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
> > +
> > +	if (svc->fwmark) {
> > +		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
> > +	} else {
> > +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
> > +		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
> > +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
> > +	}
> > +
> > +	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
> > +	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
> > +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
> > +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
> > +
> > +	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
> > +		goto nla_put_failure;
> > +
> > +	nla_nest_end(skb, nl_service);
> > +
> > +	return 0;
> > +
> > +nla_put_failure:
> > +	nla_nest_cancel(skb, nl_service);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_service(struct sk_buff *skb,
> > +				   struct ip_vs_service *svc,
> > +				   struct netlink_callback *cb)
> > +{
> > +	void *hdr;
> > +
> > +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> > +			  &ip_vs_genl_family, NLM_F_MULTI,
> > +			  IPVS_CMD_NEW_SERVICE);
> > +	if (!hdr)
> > +		return -EMSGSIZE;
> > +
> > +	if (ip_vs_genl_fill_service(skb, svc) < 0)
> > +		goto nla_put_failure;
> > +
> > +	return genlmsg_end(skb, hdr);
> > +
> > +nla_put_failure:
> > +	genlmsg_cancel(skb, hdr);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_services(struct sk_buff *skb,
> > +				    struct netlink_callback *cb)
> > +{
> > +	int idx = 0, i;
> > +	int start = cb->args[0];
> > +	struct ip_vs_service *svc;
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> > +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> > +		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
> > +			if (++idx <= start)
> > +				continue;
> > +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> > +				idx--;
> > +				goto nla_put_failure;
> > +			}
> > +		}
> > +	}
> > +
> > +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> > +		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
> > +			if (++idx <= start)
> > +				continue;
> > +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> > +				idx--;
> > +				goto nla_put_failure;
> > +			}
> > +		}
> > +	}
> > +
> > +nla_put_failure:
> > +	mutex_unlock(&__ip_vs_mutex);
> > +	cb->args[0] = idx;
> > +
> > +	return skb->len;
> > +}
> > +
> > +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
> > +				    struct nlattr *nla, int full_entry)
> > +{
> > +	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
> > +	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
> > +
> > +	/* Parse mandatory identifying service fields first */
> > +	if (nla == NULL ||
> > +	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
> > +		return -EINVAL;
> > +
> > +	nla_af		= attrs[IPVS_SVC_ATTR_AF];
> > +	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
> > +	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
> > +	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
> > +	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
> > +
> > +	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
> > +		return -EINVAL;
> > +
> > +	/* For now, only support IPv4 */
> > +	if (nla_get_u16(nla_af) != AF_INET)
> > +		return -EAFNOSUPPORT;
> > +
> > +	if (nla_fwmark) {
> > +		usvc->protocol = IPPROTO_TCP;
> > +		usvc->fwmark = nla_get_u32(nla_fwmark);
> > +	} else {
> > +		usvc->protocol = nla_get_u16(nla_protocol);
> > +		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
> > +		usvc->port = nla_get_u16(nla_port);
> > +		usvc->fwmark = 0;
> > +	}
> > +
> > +	/* If a full entry was requested, check for the additional fields */
> > +	if (full_entry) {
> > +		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
> > +			      *nla_netmask;
> > +		struct ip_vs_flags flags;
> > +		struct ip_vs_service *svc;
> > +
> > +		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
> > +		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
> > +		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
> > +		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
> > +
> > +		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
> > +			return -EINVAL;
> > +
> > +		nla_memcpy(&flags, nla_flags, sizeof(flags));
> > +
> > +		/* prefill flags from service if it already exists */
> > +		if (usvc->fwmark)
> > +			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
> > +		else
> > +			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
> > +						  usvc->port);
> > +		if (svc) {
> > +			usvc->flags = svc->flags;
> > +			ip_vs_service_put(svc);
> > +		} else
> > +			usvc->flags = 0;
> > +
> > +		/* set new flags from userland */
> > +		usvc->flags = (usvc->flags & ~flags.mask) |
> > +			      (flags.flags & flags.mask);
> > +
> > +		strlcpy(usvc->sched_name, nla_data(nla_sched),
> > +			sizeof(usvc->sched_name));
> > +		usvc->timeout = nla_get_u32(nla_timeout);
> > +		usvc->netmask = nla_get_u32(nla_netmask);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
> > +{
> > +	struct ip_vs_service_user usvc;
> > +	int ret;
> > +
> > +	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
> > +	if (ret)
> > +		return ERR_PTR(ret);
> > +
> > +	if (usvc.fwmark)
> > +		return __ip_vs_svc_fwm_get(usvc.fwmark);
> > +	else
> > +		return __ip_vs_service_get(usvc.protocol, usvc.addr,
> > +					   usvc.port);
> > +}
> > +
> > +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
> > +{
> > +	struct nlattr *nl_dest;
> > +
> > +	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
> > +	if (!nl_dest)
> > +		return -EMSGSIZE;
> > +
> > +	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
> > +	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
> > +
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
> > +		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
> > +		    atomic_read(&dest->activeconns));
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
> > +		    atomic_read(&dest->inactconns));
> > +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
> > +		    atomic_read(&dest->persistconns));
> > +
> > +	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
> > +		goto nla_put_failure;
> > +
> > +	nla_nest_end(skb, nl_dest);
> > +
> > +	return 0;
> > +
> > +nla_put_failure:
> > +	nla_nest_cancel(skb, nl_dest);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> > +				struct netlink_callback *cb)
> > +{
> > +	void *hdr;
> > +
> > +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> > +			  &ip_vs_genl_family, NLM_F_MULTI,
> > +			  IPVS_CMD_NEW_DEST);
> > +	if (!hdr)
> > +		return -EMSGSIZE;
> > +
> > +	if (ip_vs_genl_fill_dest(skb, dest) < 0)
> > +		goto nla_put_failure;
> > +
> > +	return genlmsg_end(skb, hdr);
> > +
> > +nla_put_failure:
> > +	genlmsg_cancel(skb, hdr);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_dests(struct sk_buff *skb,
> > +				 struct netlink_callback *cb)
> > +{
> > +	int idx = 0;
> > +	int start = cb->args[0];
> > +	struct ip_vs_service *svc;
> > +	struct ip_vs_dest *dest;
> > +	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> > +
> > +	/* Try to find the service for which to dump destinations */
> > +	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
> > +			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
> > +		goto out_err;
> > +
> > +	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
> > +	if (IS_ERR(svc) || svc == NULL)
> > +		goto out_err;
> > +
> > +	/* Dump the destinations */
> > +	list_for_each_entry(dest, &svc->destinations, n_list) {
> > +		if (++idx <= start)
> > +			continue;
> > +		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> > +			idx--;
> > +			goto nla_put_failure;
> > +		}
> > +	}
> > +
> > +nla_put_failure:
> > +	cb->args[0] = idx;
> > +	ip_vs_service_put(svc);
> > +
> > +out_err:
> > +	mutex_unlock(&__ip_vs_mutex);
> > +
> > +	return skb->len;
> > +}
> > +
> > +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
> > +				 struct nlattr *nla, int full_entry)
> > +{
> > +	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
> > +	struct nlattr *nla_addr, *nla_port;
> > +
> > +	/* Parse mandatory identifying destination fields first */
> > +	if (nla == NULL ||
> > +	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
> > +		return -EINVAL;
> > +
> > +	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
> > +	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
> > +
> > +	if (!(nla_addr && nla_port))
> > +		return -EINVAL;
> > +
> > +	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
> > +	udest->port = nla_get_u16(nla_port);
> > +
> > +	/* If a full entry was requested, check for the additional fields */
> > +	if (full_entry) {
> > +		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
> > +			      *nla_l_thresh;
> > +
> > +		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
> > +		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
> > +		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
> > +		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
> > +
> > +		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
> > +			return -EINVAL;
> > +
> > +		udest->conn_flags = nla_get_u32(nla_fwd)
> > +				    & IP_VS_CONN_F_FWD_MASK;
> > +		udest->weight = nla_get_u32(nla_weight);
> > +		udest->u_threshold = nla_get_u32(nla_u_thresh);
> > +		udest->l_threshold = nla_get_u32(nla_l_thresh);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
> > +				  const char *mcast_ifn, __be32 syncid)
> > +{
> > +	struct nlattr *nl_daemon;
> > +
> > +	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
> > +	if (!nl_daemon)
> > +		return -EMSGSIZE;
> > +
> > +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
> > +	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
> > +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
> > +
> > +	nla_nest_end(skb, nl_daemon);
> > +
> > +	return 0;
> > +
> > +nla_put_failure:
> > +	nla_nest_cancel(skb, nl_daemon);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
> > +				  const char *mcast_ifn, __be32 syncid,
> > +				  struct netlink_callback *cb)
> > +{
> > +	void *hdr;
> > +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> > +			  &ip_vs_genl_family, NLM_F_MULTI,
> > +			  IPVS_CMD_NEW_DAEMON);
> > +	if (!hdr)
> > +		return -EMSGSIZE;
> > +
> > +	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
> > +		goto nla_put_failure;
> > +
> > +	return genlmsg_end(skb, hdr);
> > +
> > +nla_put_failure:
> > +	genlmsg_cancel(skb, hdr);
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
> > +				   struct netlink_callback *cb)
> > +{
> > +	mutex_lock(&__ip_vs_mutex);
> > +	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
> > +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
> > +					   ip_vs_master_mcast_ifn,
> > +					   ip_vs_master_syncid, cb) < 0)
> > +			goto nla_put_failure;
> > +
> > +		cb->args[0] = 1;
> > +	}
> > +
> > +	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
> > +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
> > +					   ip_vs_backup_mcast_ifn,
> > +					   ip_vs_backup_syncid, cb) < 0)
> > +			goto nla_put_failure;
> > +
> > +		cb->args[1] = 1;
> > +	}
> > +
> > +nla_put_failure:
> > +	mutex_unlock(&__ip_vs_mutex);
> > +
> > +	return skb->len;
> > +}
> > +
> > +static int ip_vs_genl_new_daemon(struct nlattr **attrs)
> > +{
> > +	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
> > +	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
> > +	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
> > +		return -EINVAL;
> > +
> > +	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
> > +				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
> > +				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
> > +}
> > +
> > +static int ip_vs_genl_del_daemon(struct nlattr **attrs)
> > +{
> > +	if (!attrs[IPVS_DAEMON_ATTR_STATE])
> > +		return -EINVAL;
> > +
> > +	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
> > +}
> > +
> > +static int ip_vs_genl_set_config(struct nlattr **attrs)
> > +{
> > +	struct ip_vs_timeout_user t;
> > +
> > +	__ip_vs_get_timeouts(&t);
> > +
> > +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
> > +		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
> > +
> > +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
> > +		t.tcp_fin_timeout =
> > +			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
> > +
> > +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
> > +		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
> > +
> > +	return ip_vs_set_timeout(&t);
> > +}
> > +
> > +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct ip_vs_service *svc = NULL;
> > +	struct ip_vs_service_user usvc;
> > +	struct ip_vs_dest_user udest;
> > +	int ret = 0, cmd;
> > +	int need_full_svc = 0, need_full_dest = 0;
> > +
> > +	cmd = info->genlhdr->cmd;
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> > +
> > +	if (cmd == IPVS_CMD_FLUSH) {
> > +		ret = ip_vs_flush();
> > +		goto out;
> > +	} else if (cmd == IPVS_CMD_SET_CONFIG) {
> > +		ret = ip_vs_genl_set_config(info->attrs);
> > +		goto out;
> > +	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
> > +		   cmd == IPVS_CMD_DEL_DAEMON) {
> > +
> > +		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
> > +
> > +		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
> > +		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
> > +				     info->attrs[IPVS_CMD_ATTR_DAEMON],
> > +				     ip_vs_daemon_policy)) {
> > +			ret = -EINVAL;
> > +			goto out;
> > +		}
> > +
> > +		if (cmd == IPVS_CMD_NEW_DAEMON)
> > +			ret = ip_vs_genl_new_daemon(daemon_attrs);
> > +		else
> > +			ret = ip_vs_genl_del_daemon(daemon_attrs);
> > +		goto out;
> > +	} else if (cmd == IPVS_CMD_ZERO &&
> > +		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
> > +		ret = ip_vs_zero_all();
> > +		goto out;
> > +	}
> > +
> > +	/* All following commands require a service argument, so check if we
> > +	 * received a valid one. We need a full service specification when
> > +	 * adding / editing a service. Only identifying members otherwise. */
> > +	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
> > +		need_full_svc = 1;
> > +
> > +	ret = ip_vs_genl_parse_service(&usvc,
> > +				       info->attrs[IPVS_CMD_ATTR_SERVICE],
> > +				       need_full_svc);
> > +	if (ret)
> > +		goto out;
> > +
> > +	/* Lookup the exact service by <protocol, addr, port> or fwmark */
> > +	if (usvc.fwmark == 0)
> > +		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
> > +	else
> > +		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
> > +
> > +	/* Unless we're adding a new service, the service must already exist */
> > +	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
> > +		ret = -ESRCH;
> > +		goto out;
> > +	}
> > +
> > +	/* Destination commands require a valid destination argument. For
> > +	 * adding / editing a destination, we need a full destination
> > +	 * specification. */
> > +	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
> > +	    cmd == IPVS_CMD_DEL_DEST) {
> > +		if (cmd != IPVS_CMD_DEL_DEST)
> > +			need_full_dest = 1;
> > +
> > +		ret = ip_vs_genl_parse_dest(&udest,
> > +					    info->attrs[IPVS_CMD_ATTR_DEST],
> > +					    need_full_dest);
> > +		if (ret)
> > +			goto out;
> > +	}
> > +
> > +	switch (cmd) {
> > +	case IPVS_CMD_NEW_SERVICE:
> > +		if (svc == NULL)
> > +			ret = ip_vs_add_service(&usvc, &svc);
> > +		else
> > +			ret = -EEXIST;
> > +		break;
> > +	case IPVS_CMD_SET_SERVICE:
> > +		ret = ip_vs_edit_service(svc, &usvc);
> > +		break;
> > +	case IPVS_CMD_DEL_SERVICE:
> > +		ret = ip_vs_del_service(svc);
> > +		break;
> > +	case IPVS_CMD_NEW_DEST:
> > +		ret = ip_vs_add_dest(svc, &udest);
> > +		break;
> > +	case IPVS_CMD_SET_DEST:
> > +		ret = ip_vs_edit_dest(svc, &udest);
> > +		break;
> > +	case IPVS_CMD_DEL_DEST:
> > +		ret = ip_vs_del_dest(svc, &udest);
> > +		break;
> > +	case IPVS_CMD_ZERO:
> > +		ret = ip_vs_zero_service(svc);
> > +		break;
> > +	default:
> > +		ret = -EINVAL;
> > +	}
> > +
> > +out:
> > +	if (svc)
> > +		ip_vs_service_put(svc);
> > +	mutex_unlock(&__ip_vs_mutex);
> > +
> > +	return ret;
> > +}
> > +
> > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct sk_buff *msg;
> > +	void *reply;
> > +	int ret, cmd, reply_cmd;
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> > +
> > +	cmd = info->genlhdr->cmd;
> > +
> > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > +	if (!msg) {
> > +		ret = -ENOMEM;
> > +		goto out_err;
> 
> Here you want out...
> 
> > +	}
> > +
> > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > +	else if (cmd == IPVS_CMD_GET_INFO)
> > +		reply_cmd = IPVS_CMD_SET_INFO;
> > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > +	else {
> > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > +		ret = -EINVAL;
> > +		goto out;
> 
> ..and here you want out_error, to not leak msg.

Actually, exchange the two code blocks and jump to out in both.

> > +	}
> > +
> > +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> > +	if (reply == NULL)
> > +		goto nla_put_failure;
> > +
> > +	switch (cmd) {
> > +	case IPVS_CMD_GET_SERVICE:
> > +	{
> > +		struct ip_vs_service *svc;
> > +
> > +		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
> > +		if (IS_ERR(svc)) {
> > +			ret = PTR_ERR(svc);
> > +			goto out_err;
> > +		} else if (svc) {
> > +			ret = ip_vs_genl_fill_service(msg, svc);
> > +			ip_vs_service_put(svc);
> > +			if (ret)
> > +				goto nla_put_failure;
> > +		} else {
> > +			ret = -ESRCH;
> > +			goto out_err;
> > +		}
> > +
> > +		break;
> > +	}
> > +
> > +	case IPVS_CMD_GET_CONFIG:
> > +	{
> > +		struct ip_vs_timeout_user t;
> > +
> > +		__ip_vs_get_timeouts(&t);
> > +#ifdef CONFIG_IP_VS_PROTO_TCP
> > +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
> > +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
> > +			    t.tcp_fin_timeout);
> > +#endif
> > +#ifdef CONFIG_IP_VS_PROTO_UDP
> > +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
> > +#endif
> > +
> > +		break;
> > +	}
> > +
> > +	case IPVS_CMD_GET_INFO:
> > +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
> > +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
> > +			    IP_VS_CONN_TAB_SIZE);
> > +		break;
> > +	}
> > +
> > +	genlmsg_end(msg, reply);
> > +	ret = genlmsg_unicast(msg, info->snd_pid);
> > +	goto out;
> > +
> > +nla_put_failure:
> > +	IP_VS_ERR("not enough space in Netlink message\n");
> > +	ret = -EMSGSIZE;
> > +
> > +out_err:
> > +	if (msg)
> > +		nlmsg_free(msg);
> > +out:
> > +	mutex_unlock(&__ip_vs_mutex);
> > +
> > +	return ret;
> > +}
> > +
> > +
> > +static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
> > +	/* SET commands */
> > +	{
> > +		.cmd	= IPVS_CMD_NEW_SERVICE,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_SET_SERVICE,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_DEL_SERVICE,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_GET_SERVICE,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.doit	= ip_vs_genl_get_cmd,
> > +		.dumpit	= ip_vs_genl_dump_services,
> > +		.policy	= ip_vs_cmd_policy,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_NEW_DEST,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_SET_DEST,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_DEL_DEST,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_GET_DEST,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.dumpit	= ip_vs_genl_dump_dests,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_NEW_DAEMON,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_DEL_DAEMON,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_GET_DAEMON,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.dumpit	= ip_vs_genl_dump_daemons,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_SET_CONFIG,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_GET_CONFIG,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.doit	= ip_vs_genl_get_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_GET_INFO,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.doit	= ip_vs_genl_get_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_ZERO,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.policy	= ip_vs_cmd_policy,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +	{
> > +		.cmd	= IPVS_CMD_FLUSH,
> > +		.flags	= GENL_ADMIN_PERM,
> > +		.doit	= ip_vs_genl_set_cmd,
> > +	},
> > +};
> > +
> > +int ip_vs_genl_register(void)
> 
> static int __init
> 
> > +{
> > +	int ret, i;
> > +
> > +	ret = genl_register_family(&ip_vs_genl_family);
> > +	if (ret)
> > +		return ret;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
> > +		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
> > +		if (ret)
> > +			goto err_out;
> > +	}
> > +	return 0;
> > +
> > +err_out:
> > +	genl_unregister_family(&ip_vs_genl_family);
> > +	return ret;
> > +}
> > +
> > +void ip_vs_genl_unregister(void)
> 
> static void
> 
> > +{
> > +	genl_unregister_family(&ip_vs_genl_family);
> > +}
> > +
> > +/* End of Generic Netlink interface definitions */
> > +
> >  
> >  int ip_vs_control_init(void)
> >  {
> > @@ -2319,6 +3191,13 @@ int ip_vs_control_init(void)
> >  		return ret;
> >  	}
> >  
> > +	ret = ip_vs_genl_register();
> > +	if (ret) {
> > +		IP_VS_ERR("cannot register Generic Netlink interface.\n");
> > +		nf_unregister_sockopt(&ip_vs_sockopts);
> > +		return ret;
> > +	}
> > +
> >  	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
> >  	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
> >  
> > @@ -2355,6 +3234,7 @@ void ip_vs_control_cleanup(void)
> >  	unregister_sysctl_table(sysctl_header);
> >  	proc_net_remove(&init_net, "ip_vs_stats");
> >  	proc_net_remove(&init_net, "ip_vs");
> > +	ip_vs_genl_unregister();
> >  	nf_unregister_sockopt(&ip_vs_sockopts);
> >  	LeaveFunction(2);
> >  }

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-13 16:00       ` Julius Volz
@ 2008-08-13 23:09         ` Simon Horman
  2008-08-14  4:12           ` Simon Horman
  0 siblings, 1 reply; 23+ messages in thread
From: Simon Horman @ 2008-08-13 23:09 UTC (permalink / raw)
  To: Julius Volz; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Wed, Aug 13, 2008 at 06:00:41PM +0200, Julius Volz wrote:
> On Sat, Aug 9, 2008 at 4:23 PM, Simon Horman <horms@verge.net.au> wrote:
> > Hi Julius,
> >
> > your latest patch does resolve the problem that I was seeing
> > with ipvsadm-1.25-nl-2. ipvsadm-1.25-nl-3 also works :-)
> >
> > I'd like to try and stress it out a bit more.
> > I will try and get to that tomorrow or on Monday.
> 
> Hi, you were probably occupied by the other IPVS patches. Is there any
> news on this?

Sorry Julius, I got side-tracked. I'll get onto this today, one way or another.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-13 23:09         ` Simon Horman
@ 2008-08-14  4:12           ` Simon Horman
  2008-08-14  9:34             ` Julius Volz
  0 siblings, 1 reply; 23+ messages in thread
From: Simon Horman @ 2008-08-14  4:12 UTC (permalink / raw)
  To: Julius Volz; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 09:09:29AM +1000, Simon Horman wrote:
> On Wed, Aug 13, 2008 at 06:00:41PM +0200, Julius Volz wrote:
> > On Sat, Aug 9, 2008 at 4:23 PM, Simon Horman <horms@verge.net.au> wrote:
> > > Hi Julius,
> > >
> > > your latest patch does resolve the problem that I was seeing
> > > with ipvsadm-1.25-nl-2. ipvsadm-1.25-nl-3 also works :-)
> > >
> > > I'd like to try and stress it out a bit more.
> > > I will try and get to that tomorrow or on Monday.
> > 
> > Hi, you were probably occupied by the other IPVS patches. Is there any
> > news on this?
> 
> Sorry Julius, I got side-tracked. I'll get onto this today, one way or another.

Hi Julius,

I tried to stress this code a bit more and nothing fell off.
I'm pretty happy with them, though Sven's comment about error checking
needs attention.

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-08 11:29   ` Julius Volz
  2008-08-13 21:51     ` Sven Wegener
@ 2008-08-14  5:39     ` Sven Wegener
  1 sibling, 0 replies; 23+ messages in thread
From: Sven Wegener @ 2008-08-14  5:39 UTC (permalink / raw)
  To: Julius Volz; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Fri, 8 Aug 2008, Julius Volz wrote:

> This still had two bugs:
> - policies for IPVS_DEST_ATTR_FWD_METHOD and IPVS_SVC_ATTR_FLAGS
>   were swapped
> - svc not initialized to NULL at the beginning of ip_vs_genl_set_cmd()
> 
> The version below fixes this:
> 
> ----
> Add the implementation of the new Generic Netlink interface to IPVS and
> keep the old set/getsockopt interface for userspace backwards
> compatibility.
> 
> Signed-off-by: Julius Volz <juliusv@google.com>
> 
>  1 files changed, 880 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
> index 9a5ace0..8038420 100644
> --- a/net/ipv4/ipvs/ip_vs_ctl.c
> +++ b/net/ipv4/ipvs/ip_vs_ctl.c
> @@ -37,6 +37,7 @@
>  #include <net/ip.h>
>  #include <net/route.h>
>  #include <net/sock.h>
> +#include <net/genetlink.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -2305,6 +2306,877 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
>  	.owner		= THIS_MODULE,
>  };
>  
> +/*
> + * Generic Netlink interface
> + */
> +
> +/* IPVS genetlink family */
> +static struct genl_family ip_vs_genl_family = {
> +	.id		= GENL_ID_GENERATE,
> +	.hdrsize	= 0,
> +	.name		= IPVS_GENL_NAME,
> +	.version	= IPVS_GENL_VERSION,
> +	.maxattr	= IPVS_CMD_MAX,
> +};
> +
> +/* Policy used for first-level command attributes */
> +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
> +	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
> +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
> +	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
> +	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_IFNAME_MAXLEN },
> +	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
> +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
> +	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_SCHEDNAME_MAXLEN },
> +	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
> +					    .len = sizeof(struct ip_vs_flags) },
> +	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
> +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
> +	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
> +				 struct ip_vs_stats *stats)
> +{
> +	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
> +	if (!nl_stats)
> +		return -EMSGSIZE;
> +
> +	spin_lock_bh(&stats->lock);
> +
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
> +
> +	spin_unlock_bh(&stats->lock);
> +
> +	nla_nest_end(skb, nl_stats);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	spin_unlock_bh(&stats->lock);
> +	nla_nest_cancel(skb, nl_stats);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc)
> +{
> +	struct nlattr *nl_service;
> +	struct ip_vs_flags flags = { .flags = svc->flags,
> +				     .mask = ~0 };
> +
> +	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
> +	if (!nl_service)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
> +
> +	if (svc->fwmark) {
> +		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
> +	} else {
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
> +		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
> +	}
> +
> +	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
> +	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_service);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_service);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc,
> +				   struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_SERVICE);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_service(skb, svc) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_services(struct sk_buff *skb,
> +				    struct netlink_callback *cb)
> +{
> +	int idx = 0, i;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +	cb->args[0] = idx;
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
> +				    struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
> +	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
> +
> +	/* Parse mandatory identifying service fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
> +		return -EINVAL;
> +
> +	nla_af		= attrs[IPVS_SVC_ATTR_AF];
> +	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
> +	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
> +	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
> +
> +	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
> +		return -EINVAL;
> +
> +	/* For now, only support IPv4 */
> +	if (nla_get_u16(nla_af) != AF_INET)
> +		return -EAFNOSUPPORT;
> +
> +	if (nla_fwmark) {
> +		usvc->protocol = IPPROTO_TCP;
> +		usvc->fwmark = nla_get_u32(nla_fwmark);
> +	} else {
> +		usvc->protocol = nla_get_u16(nla_protocol);
> +		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
> +		usvc->port = nla_get_u16(nla_port);
> +		usvc->fwmark = 0;
> +	}
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
> +			      *nla_netmask;
> +		struct ip_vs_flags flags;
> +		struct ip_vs_service *svc;
> +
> +		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
> +		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
> +		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
> +		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
> +
> +		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
> +			return -EINVAL;
> +
> +		nla_memcpy(&flags, nla_flags, sizeof(flags));
> +
> +		/* prefill flags from service if it already exists */
> +		if (usvc->fwmark)
> +			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
> +		else
> +			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
> +						  usvc->port);
> +		if (svc) {
> +			usvc->flags = svc->flags;
> +			ip_vs_service_put(svc);
> +		} else
> +			usvc->flags = 0;
> +
> +		/* set new flags from userland */
> +		usvc->flags = (usvc->flags & ~flags.mask) |
> +			      (flags.flags & flags.mask);
> +
> +		strlcpy(usvc->sched_name, nla_data(nla_sched),
> +			sizeof(usvc->sched_name));
> +		usvc->timeout = nla_get_u32(nla_timeout);
> +		usvc->netmask = nla_get_u32(nla_netmask);
> +	}
> +
> +	return 0;
> +}
> +
> +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
> +{
> +	struct ip_vs_service_user usvc;
> +	int ret;
> +
> +	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	if (usvc.fwmark)
> +		return __ip_vs_svc_fwm_get(usvc.fwmark);
> +	else
> +		return __ip_vs_service_get(usvc.protocol, usvc.addr,
> +					   usvc.port);
> +}
> +
> +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
> +{
> +	struct nlattr *nl_dest;
> +
> +	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
> +	if (!nl_dest)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
> +	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
> +
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
> +		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
> +		    atomic_read(&dest->activeconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
> +		    atomic_read(&dest->inactconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
> +		    atomic_read(&dest->persistconns));
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_dest);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_dest);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> +				struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DEST);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_dest(skb, dest) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dests(struct sk_buff *skb,
> +				 struct netlink_callback *cb)
> +{
> +	int idx = 0;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +	struct ip_vs_dest *dest;
> +	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	/* Try to find the service for which to dump destinations */
> +	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
> +			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
> +		goto out_err;
> +
> +	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
> +	if (IS_ERR(svc) || svc == NULL)
> +		goto out_err;
> +
> +	/* Dump the destinations */
> +	list_for_each_entry(dest, &svc->destinations, n_list) {
> +		if (++idx <= start)
> +			continue;
> +		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> +			idx--;
> +			goto nla_put_failure;
> +		}
> +	}
> +
> +nla_put_failure:
> +	cb->args[0] = idx;
> +	ip_vs_service_put(svc);
> +
> +out_err:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
> +				 struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
> +	struct nlattr *nla_addr, *nla_port;
> +
> +	/* Parse mandatory identifying destination fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
> +		return -EINVAL;
> +
> +	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
> +
> +	if (!(nla_addr && nla_port))
> +		return -EINVAL;
> +
> +	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
> +	udest->port = nla_get_u16(nla_port);
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
> +			      *nla_l_thresh;
> +
> +		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
> +		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
> +		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
> +		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
> +
> +		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
> +			return -EINVAL;
> +
> +		udest->conn_flags = nla_get_u32(nla_fwd)
> +				    & IP_VS_CONN_F_FWD_MASK;
> +		udest->weight = nla_get_u32(nla_weight);
> +		udest->u_threshold = nla_get_u32(nla_u_thresh);
> +		udest->l_threshold = nla_get_u32(nla_l_thresh);
> +	}
> +
> +	return 0;
> +}
> +
> +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid)
> +{
> +	struct nlattr *nl_daemon;
> +
> +	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
> +	if (!nl_daemon)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
> +	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
> +
> +	nla_nest_end(skb, nl_daemon);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_daemon);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid,
> +				  struct netlink_callback *cb)
> +{
> +	void *hdr;
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DAEMON);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
> +				   struct netlink_callback *cb)
> +{
> +	mutex_lock(&__ip_vs_mutex);
> +	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
> +					   ip_vs_master_mcast_ifn,
> +					   ip_vs_master_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[0] = 1;
> +	}
> +
> +	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
> +					   ip_vs_backup_mcast_ifn,
> +					   ip_vs_backup_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[1] = 1;
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_new_daemon(struct nlattr **attrs)
> +{
> +	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
> +	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
> +	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
> +		return -EINVAL;
> +
> +	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
> +				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
> +				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
> +}
> +
> +static int ip_vs_genl_del_daemon(struct nlattr **attrs)
> +{
> +	if (!attrs[IPVS_DAEMON_ATTR_STATE])
> +		return -EINVAL;
> +
> +	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
> +}
> +
> +static int ip_vs_genl_set_config(struct nlattr **attrs)
> +{
> +	struct ip_vs_timeout_user t;
> +
> +	__ip_vs_get_timeouts(&t);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
> +		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
> +		t.tcp_fin_timeout =
> +			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
> +		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
> +
> +	return ip_vs_set_timeout(&t);
> +}
> +
> +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct ip_vs_service *svc = NULL;
> +	struct ip_vs_service_user usvc;
> +	struct ip_vs_dest_user udest;
> +	int ret = 0, cmd;
> +	int need_full_svc = 0, need_full_dest = 0;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	if (cmd == IPVS_CMD_FLUSH) {
> +		ret = ip_vs_flush();
> +		goto out;
> +	} else if (cmd == IPVS_CMD_SET_CONFIG) {
> +		ret = ip_vs_genl_set_config(info->attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
> +		   cmd == IPVS_CMD_DEL_DAEMON) {
> +
> +		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
> +
> +		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
> +		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
> +				     info->attrs[IPVS_CMD_ATTR_DAEMON],
> +				     ip_vs_daemon_policy)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (cmd == IPVS_CMD_NEW_DAEMON)
> +			ret = ip_vs_genl_new_daemon(daemon_attrs);
> +		else
> +			ret = ip_vs_genl_del_daemon(daemon_attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_ZERO &&
> +		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
> +		ret = ip_vs_zero_all();
> +		goto out;
> +	}
> +
> +	/* All following commands require a service argument, so check if we
> +	 * received a valid one. We need a full service specification when
> +	 * adding / editing a service. Only identifying members otherwise. */
> +	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
> +		need_full_svc = 1;
> +
> +	ret = ip_vs_genl_parse_service(&usvc,
> +				       info->attrs[IPVS_CMD_ATTR_SERVICE],
> +				       need_full_svc);
> +	if (ret)
> +		goto out;
> +
> +	/* Lookup the exact service by <protocol, addr, port> or fwmark */
> +	if (usvc.fwmark == 0)
> +		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
> +	else
> +		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
> +
> +	/* Unless we're adding a new service, the service must already exist */
> +	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
> +		ret = -ESRCH;
> +		goto out;
> +	}
> +
> +	/* Destination commands require a valid destination argument. For
> +	 * adding / editing a destination, we need a full destination
> +	 * specification. */
> +	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
> +	    cmd == IPVS_CMD_DEL_DEST) {
> +		if (cmd != IPVS_CMD_DEL_DEST)
> +			need_full_dest = 1;
> +
> +		ret = ip_vs_genl_parse_dest(&udest,
> +					    info->attrs[IPVS_CMD_ATTR_DEST],
> +					    need_full_dest);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	switch (cmd) {
> +	case IPVS_CMD_NEW_SERVICE:
> +		if (svc == NULL)
> +			ret = ip_vs_add_service(&usvc, &svc);
> +		else
> +			ret = -EEXIST;
> +		break;
> +	case IPVS_CMD_SET_SERVICE:
> +		ret = ip_vs_edit_service(svc, &usvc);
> +		break;
> +	case IPVS_CMD_DEL_SERVICE:
> +		ret = ip_vs_del_service(svc);
> +		break;
> +	case IPVS_CMD_NEW_DEST:
> +		ret = ip_vs_add_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_SET_DEST:
> +		ret = ip_vs_edit_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_DEL_DEST:
> +		ret = ip_vs_del_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_ZERO:
> +		ret = ip_vs_zero_service(svc);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +out:
> +	if (svc)
> +		ip_vs_service_put(svc);
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct sk_buff *msg;
> +	void *reply;
> +	int ret, cmd, reply_cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
> +
> +	if (cmd == IPVS_CMD_GET_SERVICE)
> +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> +	else if (cmd == IPVS_CMD_GET_INFO)
> +		reply_cmd = IPVS_CMD_SET_INFO;
> +	else if (cmd == IPVS_CMD_GET_CONFIG)
> +		reply_cmd = IPVS_CMD_SET_CONFIG;
> +	else {
> +		IP_VS_ERR("unknown Generic Netlink command\n");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> +	if (reply == NULL)
> +		goto nla_put_failure;

And for what it's worth, the code above doesn't require __ip_vs_mutex, you 
can defer the locking until here for concurrency improvement.

> +
> +	switch (cmd) {
> +	case IPVS_CMD_GET_SERVICE:
> +	{
> +		struct ip_vs_service *svc;
> +
> +		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
> +		if (IS_ERR(svc)) {
> +			ret = PTR_ERR(svc);
> +			goto out_err;
> +		} else if (svc) {
> +			ret = ip_vs_genl_fill_service(msg, svc);
> +			ip_vs_service_put(svc);
> +			if (ret)
> +				goto nla_put_failure;
> +		} else {
> +			ret = -ESRCH;
> +			goto out_err;
> +		}
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_CONFIG:
> +	{
> +		struct ip_vs_timeout_user t;
> +
> +		__ip_vs_get_timeouts(&t);
> +#ifdef CONFIG_IP_VS_PROTO_TCP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
> +			    t.tcp_fin_timeout);
> +#endif
> +#ifdef CONFIG_IP_VS_PROTO_UDP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
> +#endif
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_INFO:
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
> +			    IP_VS_CONN_TAB_SIZE);
> +		break;
> +	}
> +
> +	genlmsg_end(msg, reply);
> +	ret = genlmsg_unicast(msg, info->snd_pid);
> +	goto out;
> +
> +nla_put_failure:
> +	IP_VS_ERR("not enough space in Netlink message\n");
> +	ret = -EMSGSIZE;
> +
> +out_err:
> +	if (msg)
> +		nlmsg_free(msg);
> +out:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +
> +static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
> +	/* SET commands */
> +	{
> +		.cmd	= IPVS_CMD_NEW_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +		.dumpit	= ip_vs_genl_dump_services,
> +		.policy	= ip_vs_cmd_policy,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.dumpit	= ip_vs_genl_dump_dests,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.dumpit	= ip_vs_genl_dump_daemons,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_INFO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_ZERO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_FLUSH,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +};
> +
> +int ip_vs_genl_register(void)
> +{
> +	int ret, i;
> +
> +	ret = genl_register_family(&ip_vs_genl_family);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
> +		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
> +		if (ret)
> +			goto err_out;
> +	}
> +	return 0;
> +
> +err_out:
> +	genl_unregister_family(&ip_vs_genl_family);
> +	return ret;
> +}
> +
> +void ip_vs_genl_unregister(void)
> +{
> +	genl_unregister_family(&ip_vs_genl_family);
> +}
> +
> +/* End of Generic Netlink interface definitions */
> +
>  
>  int ip_vs_control_init(void)
>  {
> @@ -2319,6 +3191,13 @@ int ip_vs_control_init(void)
>  		return ret;
>  	}
>  
> +	ret = ip_vs_genl_register();
> +	if (ret) {
> +		IP_VS_ERR("cannot register Generic Netlink interface.\n");
> +		nf_unregister_sockopt(&ip_vs_sockopts);
> +		return ret;
> +	}
> +
>  	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
>  	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
>  
> @@ -2355,6 +3234,7 @@ void ip_vs_control_cleanup(void)
>  	unregister_sysctl_table(sysctl_header);
>  	proc_net_remove(&init_net, "ip_vs_stats");
>  	proc_net_remove(&init_net, "ip_vs");
> +	ip_vs_genl_unregister();
>  	nf_unregister_sockopt(&ip_vs_sockopts);
>  	LeaveFunction(2);
>  }
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-13 21:51     ` Sven Wegener
  2008-08-13 21:53       ` Sven Wegener
@ 2008-08-14  9:32       ` Julius Volz
  2008-08-14  9:52         ` Simon Horman
  2008-08-14 10:04         ` Sven Wegener
  1 sibling, 2 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-14  9:32 UTC (permalink / raw)
  To: Sven Wegener; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

Hi Sven,

On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> On Fri, 8 Aug 2008, Julius Volz wrote:
> > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct sk_buff *msg;
> > +	void *reply;
> > +	int ret, cmd, reply_cmd;
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> > +
> > +	cmd = info->genlhdr->cmd;
> > +
> > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > +	if (!msg) {
> > +		ret = -ENOMEM;
> > +		goto out_err;
> 
> Here you want out...
> 
> > +	}
> > +
> > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > +	else if (cmd == IPVS_CMD_GET_INFO)
> > +		reply_cmd = IPVS_CMD_SET_INFO;
> > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > +	else {
> > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > +		ret = -EINVAL;
> > +		goto out;
> 
> ..and here you want out_error, to not leak msg.

Ouch, thanks! Fixed this and locked the mutex later. I also removed the
"if (msg)" from out_err, as it becomes unneeded now. Here's the updated
patch:

-------------
Add the implementation of the new Generic Netlink interface to IPVS and
keep the old set/getsockopt interface for userspace backwards
compatibility.

Signed-off-by: Julius Volz <juliusv@google.com>

 1 files changed, 878 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 6379705..63a1cbb 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2320,6 +2321,875 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	/* For now, only support IPv4 */
+	if (nla_get_u16(nla_af) != AF_INET)
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+						  usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+
+		strlcpy(usvc->sched_name, nla_data(nla_sched),
+			sizeof(usvc->sched_name));
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+					   usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user usvc;
+	struct ip_vs_dest_user udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+int ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
 
 int __init ip_vs_control_init(void)
 {
@@ -2334,6 +3204,13 @@ int __init ip_vs_control_init(void)
 		return ret;
 	}
 
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
@@ -2368,6 +3245,7 @@ void ip_vs_control_cleanup(void)
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove(&init_net, "ip_vs_stats");
 	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface
  2008-08-14  4:12           ` Simon Horman
@ 2008-08-14  9:34             ` Julius Volz
  0 siblings, 0 replies; 23+ messages in thread
From: Julius Volz @ 2008-08-14  9:34 UTC (permalink / raw)
  To: Simon Horman; +Cc: julius.volz, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 6:12 AM, Simon Horman <horms@verge.net.au> wrote:
> I tried to stress this code a bit more and nothing fell off.

Good, thanks!

> I'm pretty happy with them, though Sven's comment about error checking
> needs attention.

Yes, I just posted a fixed version.

Julius

-- 
Google Switzerland GmbH

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14  9:32       ` Julius Volz
@ 2008-08-14  9:52         ` Simon Horman
  2008-08-14 10:04         ` Sven Wegener
  1 sibling, 0 replies; 23+ messages in thread
From: Simon Horman @ 2008-08-14  9:52 UTC (permalink / raw)
  To: Julius Volz; +Cc: Sven Wegener, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 11:32:02AM +0200, Julius Volz wrote:
> Hi Sven,
> 
> On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> > On Fri, 8 Aug 2008, Julius Volz wrote:
> > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > +{
> > > +	struct sk_buff *msg;
> > > +	void *reply;
> > > +	int ret, cmd, reply_cmd;
> > > +
> > > +	mutex_lock(&__ip_vs_mutex);
> > > +
> > > +	cmd = info->genlhdr->cmd;
> > > +
> > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > +	if (!msg) {
> > > +		ret = -ENOMEM;
> > > +		goto out_err;
> > 
> > Here you want out...
> > 
> > > +	}
> > > +
> > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > +	else {
> > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > +		ret = -EINVAL;
> > > +		goto out;
> > 
> > ..and here you want out_error, to not leak msg.
> 
> Ouch, thanks! Fixed this and locked the mutex later. I also removed the
> "if (msg)" from out_err, as it becomes unneeded now. Here's the updated
> patch:
> 
> -------------
> Add the implementation of the new Generic Netlink interface to IPVS and
> keep the old set/getsockopt interface for userspace backwards
> compatibility.
> 
> Signed-off-by: Julius Volz <juliusv@google.com>

Acked-by: Simon Horman <horms@verge.net.au>

(ditto for the other patch in the series)

>  1 files changed, 878 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
> index 6379705..63a1cbb 100644
> --- a/net/ipv4/ipvs/ip_vs_ctl.c
> +++ b/net/ipv4/ipvs/ip_vs_ctl.c
> @@ -37,6 +37,7 @@
>  #include <net/ip.h>
>  #include <net/route.h>
>  #include <net/sock.h>
> +#include <net/genetlink.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -2320,6 +2321,875 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
>  	.owner		= THIS_MODULE,
>  };
>  
> +/*
> + * Generic Netlink interface
> + */
> +
> +/* IPVS genetlink family */
> +static struct genl_family ip_vs_genl_family = {
> +	.id		= GENL_ID_GENERATE,
> +	.hdrsize	= 0,
> +	.name		= IPVS_GENL_NAME,
> +	.version	= IPVS_GENL_VERSION,
> +	.maxattr	= IPVS_CMD_MAX,
> +};
> +
> +/* Policy used for first-level command attributes */
> +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
> +	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
> +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
> +	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
> +	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_IFNAME_MAXLEN },
> +	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
> +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
> +	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_SCHEDNAME_MAXLEN },
> +	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
> +					    .len = sizeof(struct ip_vs_flags) },
> +	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
> +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
> +	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
> +				 struct ip_vs_stats *stats)
> +{
> +	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
> +	if (!nl_stats)
> +		return -EMSGSIZE;
> +
> +	spin_lock_bh(&stats->lock);
> +
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
> +
> +	spin_unlock_bh(&stats->lock);
> +
> +	nla_nest_end(skb, nl_stats);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	spin_unlock_bh(&stats->lock);
> +	nla_nest_cancel(skb, nl_stats);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc)
> +{
> +	struct nlattr *nl_service;
> +	struct ip_vs_flags flags = { .flags = svc->flags,
> +				     .mask = ~0 };
> +
> +	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
> +	if (!nl_service)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
> +
> +	if (svc->fwmark) {
> +		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
> +	} else {
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
> +		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
> +	}
> +
> +	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
> +	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_service);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_service);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc,
> +				   struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_SERVICE);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_service(skb, svc) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_services(struct sk_buff *skb,
> +				    struct netlink_callback *cb)
> +{
> +	int idx = 0, i;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +	cb->args[0] = idx;
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
> +				    struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
> +	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
> +
> +	/* Parse mandatory identifying service fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
> +		return -EINVAL;
> +
> +	nla_af		= attrs[IPVS_SVC_ATTR_AF];
> +	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
> +	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
> +	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
> +
> +	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
> +		return -EINVAL;
> +
> +	/* For now, only support IPv4 */
> +	if (nla_get_u16(nla_af) != AF_INET)
> +		return -EAFNOSUPPORT;
> +
> +	if (nla_fwmark) {
> +		usvc->protocol = IPPROTO_TCP;
> +		usvc->fwmark = nla_get_u32(nla_fwmark);
> +	} else {
> +		usvc->protocol = nla_get_u16(nla_protocol);
> +		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
> +		usvc->port = nla_get_u16(nla_port);
> +		usvc->fwmark = 0;
> +	}
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
> +			      *nla_netmask;
> +		struct ip_vs_flags flags;
> +		struct ip_vs_service *svc;
> +
> +		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
> +		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
> +		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
> +		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
> +
> +		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
> +			return -EINVAL;
> +
> +		nla_memcpy(&flags, nla_flags, sizeof(flags));
> +
> +		/* prefill flags from service if it already exists */
> +		if (usvc->fwmark)
> +			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
> +		else
> +			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
> +						  usvc->port);
> +		if (svc) {
> +			usvc->flags = svc->flags;
> +			ip_vs_service_put(svc);
> +		} else
> +			usvc->flags = 0;
> +
> +		/* set new flags from userland */
> +		usvc->flags = (usvc->flags & ~flags.mask) |
> +			      (flags.flags & flags.mask);
> +
> +		strlcpy(usvc->sched_name, nla_data(nla_sched),
> +			sizeof(usvc->sched_name));
> +		usvc->timeout = nla_get_u32(nla_timeout);
> +		usvc->netmask = nla_get_u32(nla_netmask);
> +	}
> +
> +	return 0;
> +}
> +
> +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
> +{
> +	struct ip_vs_service_user usvc;
> +	int ret;
> +
> +	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	if (usvc.fwmark)
> +		return __ip_vs_svc_fwm_get(usvc.fwmark);
> +	else
> +		return __ip_vs_service_get(usvc.protocol, usvc.addr,
> +					   usvc.port);
> +}
> +
> +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
> +{
> +	struct nlattr *nl_dest;
> +
> +	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
> +	if (!nl_dest)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
> +	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
> +
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
> +		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
> +		    atomic_read(&dest->activeconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
> +		    atomic_read(&dest->inactconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
> +		    atomic_read(&dest->persistconns));
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_dest);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_dest);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> +				struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DEST);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_dest(skb, dest) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dests(struct sk_buff *skb,
> +				 struct netlink_callback *cb)
> +{
> +	int idx = 0;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +	struct ip_vs_dest *dest;
> +	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	/* Try to find the service for which to dump destinations */
> +	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
> +			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
> +		goto out_err;
> +
> +	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
> +	if (IS_ERR(svc) || svc == NULL)
> +		goto out_err;
> +
> +	/* Dump the destinations */
> +	list_for_each_entry(dest, &svc->destinations, n_list) {
> +		if (++idx <= start)
> +			continue;
> +		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> +			idx--;
> +			goto nla_put_failure;
> +		}
> +	}
> +
> +nla_put_failure:
> +	cb->args[0] = idx;
> +	ip_vs_service_put(svc);
> +
> +out_err:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
> +				 struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
> +	struct nlattr *nla_addr, *nla_port;
> +
> +	/* Parse mandatory identifying destination fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
> +		return -EINVAL;
> +
> +	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
> +
> +	if (!(nla_addr && nla_port))
> +		return -EINVAL;
> +
> +	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
> +	udest->port = nla_get_u16(nla_port);
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
> +			      *nla_l_thresh;
> +
> +		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
> +		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
> +		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
> +		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
> +
> +		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
> +			return -EINVAL;
> +
> +		udest->conn_flags = nla_get_u32(nla_fwd)
> +				    & IP_VS_CONN_F_FWD_MASK;
> +		udest->weight = nla_get_u32(nla_weight);
> +		udest->u_threshold = nla_get_u32(nla_u_thresh);
> +		udest->l_threshold = nla_get_u32(nla_l_thresh);
> +	}
> +
> +	return 0;
> +}
> +
> +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid)
> +{
> +	struct nlattr *nl_daemon;
> +
> +	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
> +	if (!nl_daemon)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
> +	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
> +
> +	nla_nest_end(skb, nl_daemon);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_daemon);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid,
> +				  struct netlink_callback *cb)
> +{
> +	void *hdr;
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DAEMON);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
> +				   struct netlink_callback *cb)
> +{
> +	mutex_lock(&__ip_vs_mutex);
> +	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
> +					   ip_vs_master_mcast_ifn,
> +					   ip_vs_master_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[0] = 1;
> +	}
> +
> +	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
> +					   ip_vs_backup_mcast_ifn,
> +					   ip_vs_backup_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[1] = 1;
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_new_daemon(struct nlattr **attrs)
> +{
> +	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
> +	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
> +	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
> +		return -EINVAL;
> +
> +	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
> +				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
> +				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
> +}
> +
> +static int ip_vs_genl_del_daemon(struct nlattr **attrs)
> +{
> +	if (!attrs[IPVS_DAEMON_ATTR_STATE])
> +		return -EINVAL;
> +
> +	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
> +}
> +
> +static int ip_vs_genl_set_config(struct nlattr **attrs)
> +{
> +	struct ip_vs_timeout_user t;
> +
> +	__ip_vs_get_timeouts(&t);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
> +		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
> +		t.tcp_fin_timeout =
> +			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
> +		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
> +
> +	return ip_vs_set_timeout(&t);
> +}
> +
> +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct ip_vs_service *svc = NULL;
> +	struct ip_vs_service_user usvc;
> +	struct ip_vs_dest_user udest;
> +	int ret = 0, cmd;
> +	int need_full_svc = 0, need_full_dest = 0;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	if (cmd == IPVS_CMD_FLUSH) {
> +		ret = ip_vs_flush();
> +		goto out;
> +	} else if (cmd == IPVS_CMD_SET_CONFIG) {
> +		ret = ip_vs_genl_set_config(info->attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
> +		   cmd == IPVS_CMD_DEL_DAEMON) {
> +
> +		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
> +
> +		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
> +		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
> +				     info->attrs[IPVS_CMD_ATTR_DAEMON],
> +				     ip_vs_daemon_policy)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (cmd == IPVS_CMD_NEW_DAEMON)
> +			ret = ip_vs_genl_new_daemon(daemon_attrs);
> +		else
> +			ret = ip_vs_genl_del_daemon(daemon_attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_ZERO &&
> +		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
> +		ret = ip_vs_zero_all();
> +		goto out;
> +	}
> +
> +	/* All following commands require a service argument, so check if we
> +	 * received a valid one. We need a full service specification when
> +	 * adding / editing a service. Only identifying members otherwise. */
> +	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
> +		need_full_svc = 1;
> +
> +	ret = ip_vs_genl_parse_service(&usvc,
> +				       info->attrs[IPVS_CMD_ATTR_SERVICE],
> +				       need_full_svc);
> +	if (ret)
> +		goto out;
> +
> +	/* Lookup the exact service by <protocol, addr, port> or fwmark */
> +	if (usvc.fwmark == 0)
> +		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
> +	else
> +		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
> +
> +	/* Unless we're adding a new service, the service must already exist */
> +	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
> +		ret = -ESRCH;
> +		goto out;
> +	}
> +
> +	/* Destination commands require a valid destination argument. For
> +	 * adding / editing a destination, we need a full destination
> +	 * specification. */
> +	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
> +	    cmd == IPVS_CMD_DEL_DEST) {
> +		if (cmd != IPVS_CMD_DEL_DEST)
> +			need_full_dest = 1;
> +
> +		ret = ip_vs_genl_parse_dest(&udest,
> +					    info->attrs[IPVS_CMD_ATTR_DEST],
> +					    need_full_dest);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	switch (cmd) {
> +	case IPVS_CMD_NEW_SERVICE:
> +		if (svc == NULL)
> +			ret = ip_vs_add_service(&usvc, &svc);
> +		else
> +			ret = -EEXIST;
> +		break;
> +	case IPVS_CMD_SET_SERVICE:
> +		ret = ip_vs_edit_service(svc, &usvc);
> +		break;
> +	case IPVS_CMD_DEL_SERVICE:
> +		ret = ip_vs_del_service(svc);
> +		break;
> +	case IPVS_CMD_NEW_DEST:
> +		ret = ip_vs_add_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_SET_DEST:
> +		ret = ip_vs_edit_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_DEL_DEST:
> +		ret = ip_vs_del_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_ZERO:
> +		ret = ip_vs_zero_service(svc);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +out:
> +	if (svc)
> +		ip_vs_service_put(svc);
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct sk_buff *msg;
> +	void *reply;
> +	int ret, cmd, reply_cmd;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	if (cmd == IPVS_CMD_GET_SERVICE)
> +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> +	else if (cmd == IPVS_CMD_GET_INFO)
> +		reply_cmd = IPVS_CMD_SET_INFO;
> +	else if (cmd == IPVS_CMD_GET_CONFIG)
> +		reply_cmd = IPVS_CMD_SET_CONFIG;
> +	else {
> +		IP_VS_ERR("unknown Generic Netlink command\n");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> +	if (reply == NULL)
> +		goto nla_put_failure;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	switch (cmd) {
> +	case IPVS_CMD_GET_SERVICE:
> +	{
> +		struct ip_vs_service *svc;
> +
> +		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
> +		if (IS_ERR(svc)) {
> +			ret = PTR_ERR(svc);
> +			goto out_err;
> +		} else if (svc) {
> +			ret = ip_vs_genl_fill_service(msg, svc);
> +			ip_vs_service_put(svc);
> +			if (ret)
> +				goto nla_put_failure;
> +		} else {
> +			ret = -ESRCH;
> +			goto out_err;
> +		}
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_CONFIG:
> +	{
> +		struct ip_vs_timeout_user t;
> +
> +		__ip_vs_get_timeouts(&t);
> +#ifdef CONFIG_IP_VS_PROTO_TCP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
> +			    t.tcp_fin_timeout);
> +#endif
> +#ifdef CONFIG_IP_VS_PROTO_UDP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
> +#endif
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_INFO:
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
> +			    IP_VS_CONN_TAB_SIZE);
> +		break;
> +	}
> +
> +	genlmsg_end(msg, reply);
> +	ret = genlmsg_unicast(msg, info->snd_pid);
> +	goto out;
> +
> +nla_put_failure:
> +	IP_VS_ERR("not enough space in Netlink message\n");
> +	ret = -EMSGSIZE;
> +
> +out_err:
> +	nlmsg_free(msg);
> +out:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +
> +static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
> +	{
> +		.cmd	= IPVS_CMD_NEW_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +		.dumpit	= ip_vs_genl_dump_services,
> +		.policy	= ip_vs_cmd_policy,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.dumpit	= ip_vs_genl_dump_dests,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.dumpit	= ip_vs_genl_dump_daemons,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_INFO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_ZERO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_FLUSH,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +};
> +
> +int ip_vs_genl_register(void)
> +{
> +	int ret, i;
> +
> +	ret = genl_register_family(&ip_vs_genl_family);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
> +		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
> +		if (ret)
> +			goto err_out;
> +	}
> +	return 0;
> +
> +err_out:
> +	genl_unregister_family(&ip_vs_genl_family);
> +	return ret;
> +}
> +
> +void ip_vs_genl_unregister(void)
> +{
> +	genl_unregister_family(&ip_vs_genl_family);
> +}
> +
> +/* End of Generic Netlink interface definitions */
> +
>  
>  int __init ip_vs_control_init(void)
>  {
> @@ -2334,6 +3204,13 @@ int __init ip_vs_control_init(void)
>  		return ret;
>  	}
>  
> +	ret = ip_vs_genl_register();
> +	if (ret) {
> +		IP_VS_ERR("cannot register Generic Netlink interface.\n");
> +		nf_unregister_sockopt(&ip_vs_sockopts);
> +		return ret;
> +	}
> +
>  	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
>  	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
>  
> @@ -2368,6 +3245,7 @@ void ip_vs_control_cleanup(void)
>  	unregister_sysctl_table(sysctl_header);
>  	proc_net_remove(&init_net, "ip_vs_stats");
>  	proc_net_remove(&init_net, "ip_vs");
> +	ip_vs_genl_unregister();
>  	nf_unregister_sockopt(&ip_vs_sockopts);
>  	LeaveFunction(2);
>  }
> -- 
> 1.5.4.5

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14  9:32       ` Julius Volz
  2008-08-14  9:52         ` Simon Horman
@ 2008-08-14 10:04         ` Sven Wegener
  2008-08-14 10:27           ` Julius Volz
  1 sibling, 1 reply; 23+ messages in thread
From: Sven Wegener @ 2008-08-14 10:04 UTC (permalink / raw)
  To: Julius Volz; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Thu, 14 Aug 2008, Julius Volz wrote:

> On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> > On Fri, 8 Aug 2008, Julius Volz wrote:
> > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > +{
> > > +	struct sk_buff *msg;
> > > +	void *reply;
> > > +	int ret, cmd, reply_cmd;
> > > +
> > > +	mutex_lock(&__ip_vs_mutex);
> > > +
> > > +	cmd = info->genlhdr->cmd;
> > > +
> > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > +	if (!msg) {
> > > +		ret = -ENOMEM;
> > > +		goto out_err;
> > 
> > Here you want out...
> > 
> > > +	}
> > > +
> > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > +	else {
> > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > +		ret = -EINVAL;
> > > +		goto out;
> > 
> > ..and here you want out_error, to not leak msg.
> 
> Ouch, thanks! Fixed this and locked the mutex later. I also removed the
> "if (msg)" from out_err, as it becomes unneeded now. Here's the updated
> patch:

You missed the static on the register and unregister functions down at the 
bottom. :) Also see my comments to your change regarding the above issue 
down here.

> -------------
> Add the implementation of the new Generic Netlink interface to IPVS and
> keep the old set/getsockopt interface for userspace backwards
> compatibility.
> 
> Signed-off-by: Julius Volz <juliusv@google.com>
> 
>  1 files changed, 878 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
> index 6379705..63a1cbb 100644
> --- a/net/ipv4/ipvs/ip_vs_ctl.c
> +++ b/net/ipv4/ipvs/ip_vs_ctl.c
> @@ -37,6 +37,7 @@
>  #include <net/ip.h>
>  #include <net/route.h>
>  #include <net/sock.h>
> +#include <net/genetlink.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -2320,6 +2321,875 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
>  	.owner		= THIS_MODULE,
>  };
>  
> +/*
> + * Generic Netlink interface
> + */
> +
> +/* IPVS genetlink family */
> +static struct genl_family ip_vs_genl_family = {
> +	.id		= GENL_ID_GENERATE,
> +	.hdrsize	= 0,
> +	.name		= IPVS_GENL_NAME,
> +	.version	= IPVS_GENL_VERSION,
> +	.maxattr	= IPVS_CMD_MAX,
> +};
> +
> +/* Policy used for first-level command attributes */
> +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
> +	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
> +	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
> +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
> +	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
> +	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_IFNAME_MAXLEN },
> +	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
> +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
> +	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
> +					    .len = IP_VS_SCHEDNAME_MAXLEN },
> +	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
> +					    .len = sizeof(struct ip_vs_flags) },
> +	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
> +	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
> +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
> +	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
> +					    .len = sizeof(union nf_inet_addr) },
> +	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
> +	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
> +	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
> +};
> +
> +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
> +				 struct ip_vs_stats *stats)
> +{
> +	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
> +	if (!nl_stats)
> +		return -EMSGSIZE;
> +
> +	spin_lock_bh(&stats->lock);
> +
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
> +	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
> +	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
> +
> +	spin_unlock_bh(&stats->lock);
> +
> +	nla_nest_end(skb, nl_stats);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	spin_unlock_bh(&stats->lock);
> +	nla_nest_cancel(skb, nl_stats);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_fill_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc)
> +{
> +	struct nlattr *nl_service;
> +	struct ip_vs_flags flags = { .flags = svc->flags,
> +				     .mask = ~0 };
> +
> +	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
> +	if (!nl_service)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
> +
> +	if (svc->fwmark) {
> +		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
> +	} else {
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
> +		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
> +		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
> +	}
> +
> +	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
> +	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
> +	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_service);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_service);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_service(struct sk_buff *skb,
> +				   struct ip_vs_service *svc,
> +				   struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_SERVICE);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_service(skb, svc) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_services(struct sk_buff *skb,
> +				    struct netlink_callback *cb)
> +{
> +	int idx = 0, i;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
> +		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
> +			if (++idx <= start)
> +				continue;
> +			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
> +				idx--;
> +				goto nla_put_failure;
> +			}
> +		}
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +	cb->args[0] = idx;
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
> +				    struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
> +	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
> +
> +	/* Parse mandatory identifying service fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
> +		return -EINVAL;
> +
> +	nla_af		= attrs[IPVS_SVC_ATTR_AF];
> +	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
> +	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
> +	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
> +
> +	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
> +		return -EINVAL;
> +
> +	/* For now, only support IPv4 */
> +	if (nla_get_u16(nla_af) != AF_INET)
> +		return -EAFNOSUPPORT;
> +
> +	if (nla_fwmark) {
> +		usvc->protocol = IPPROTO_TCP;
> +		usvc->fwmark = nla_get_u32(nla_fwmark);
> +	} else {
> +		usvc->protocol = nla_get_u16(nla_protocol);
> +		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
> +		usvc->port = nla_get_u16(nla_port);
> +		usvc->fwmark = 0;
> +	}
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
> +			      *nla_netmask;
> +		struct ip_vs_flags flags;
> +		struct ip_vs_service *svc;
> +
> +		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
> +		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
> +		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
> +		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
> +
> +		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
> +			return -EINVAL;
> +
> +		nla_memcpy(&flags, nla_flags, sizeof(flags));
> +
> +		/* prefill flags from service if it already exists */
> +		if (usvc->fwmark)
> +			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
> +		else
> +			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
> +						  usvc->port);
> +		if (svc) {
> +			usvc->flags = svc->flags;
> +			ip_vs_service_put(svc);
> +		} else
> +			usvc->flags = 0;
> +
> +		/* set new flags from userland */
> +		usvc->flags = (usvc->flags & ~flags.mask) |
> +			      (flags.flags & flags.mask);
> +
> +		strlcpy(usvc->sched_name, nla_data(nla_sched),
> +			sizeof(usvc->sched_name));
> +		usvc->timeout = nla_get_u32(nla_timeout);
> +		usvc->netmask = nla_get_u32(nla_netmask);
> +	}
> +
> +	return 0;
> +}
> +
> +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
> +{
> +	struct ip_vs_service_user usvc;
> +	int ret;
> +
> +	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	if (usvc.fwmark)
> +		return __ip_vs_svc_fwm_get(usvc.fwmark);
> +	else
> +		return __ip_vs_service_get(usvc.protocol, usvc.addr,
> +					   usvc.port);
> +}
> +
> +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
> +{
> +	struct nlattr *nl_dest;
> +
> +	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
> +	if (!nl_dest)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
> +	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
> +
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
> +		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
> +		    atomic_read(&dest->activeconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
> +		    atomic_read(&dest->inactconns));
> +	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
> +		    atomic_read(&dest->persistconns));
> +
> +	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
> +		goto nla_put_failure;
> +
> +	nla_nest_end(skb, nl_dest);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_dest);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
> +				struct netlink_callback *cb)
> +{
> +	void *hdr;
> +
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DEST);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_dest(skb, dest) < 0)
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_dests(struct sk_buff *skb,
> +				 struct netlink_callback *cb)
> +{
> +	int idx = 0;
> +	int start = cb->args[0];
> +	struct ip_vs_service *svc;
> +	struct ip_vs_dest *dest;
> +	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	/* Try to find the service for which to dump destinations */
> +	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
> +			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
> +		goto out_err;
> +
> +	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
> +	if (IS_ERR(svc) || svc == NULL)
> +		goto out_err;
> +
> +	/* Dump the destinations */
> +	list_for_each_entry(dest, &svc->destinations, n_list) {
> +		if (++idx <= start)
> +			continue;
> +		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
> +			idx--;
> +			goto nla_put_failure;
> +		}
> +	}
> +
> +nla_put_failure:
> +	cb->args[0] = idx;
> +	ip_vs_service_put(svc);
> +
> +out_err:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
> +				 struct nlattr *nla, int full_entry)
> +{
> +	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
> +	struct nlattr *nla_addr, *nla_port;
> +
> +	/* Parse mandatory identifying destination fields first */
> +	if (nla == NULL ||
> +	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
> +		return -EINVAL;
> +
> +	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
> +	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
> +
> +	if (!(nla_addr && nla_port))
> +		return -EINVAL;
> +
> +	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
> +	udest->port = nla_get_u16(nla_port);
> +
> +	/* If a full entry was requested, check for the additional fields */
> +	if (full_entry) {
> +		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
> +			      *nla_l_thresh;
> +
> +		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
> +		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
> +		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
> +		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
> +
> +		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
> +			return -EINVAL;
> +
> +		udest->conn_flags = nla_get_u32(nla_fwd)
> +				    & IP_VS_CONN_F_FWD_MASK;
> +		udest->weight = nla_get_u32(nla_weight);
> +		udest->u_threshold = nla_get_u32(nla_u_thresh);
> +		udest->l_threshold = nla_get_u32(nla_l_thresh);
> +	}
> +
> +	return 0;
> +}
> +
> +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid)
> +{
> +	struct nlattr *nl_daemon;
> +
> +	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
> +	if (!nl_daemon)
> +		return -EMSGSIZE;
> +
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
> +	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
> +	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
> +
> +	nla_nest_end(skb, nl_daemon);
> +
> +	return 0;
> +
> +nla_put_failure:
> +	nla_nest_cancel(skb, nl_daemon);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
> +				  const char *mcast_ifn, __be32 syncid,
> +				  struct netlink_callback *cb)
> +{
> +	void *hdr;
> +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
> +			  &ip_vs_genl_family, NLM_F_MULTI,
> +			  IPVS_CMD_NEW_DAEMON);
> +	if (!hdr)
> +		return -EMSGSIZE;
> +
> +	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
> +		goto nla_put_failure;
> +
> +	return genlmsg_end(skb, hdr);
> +
> +nla_put_failure:
> +	genlmsg_cancel(skb, hdr);
> +	return -EMSGSIZE;
> +}
> +
> +static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
> +				   struct netlink_callback *cb)
> +{
> +	mutex_lock(&__ip_vs_mutex);
> +	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
> +					   ip_vs_master_mcast_ifn,
> +					   ip_vs_master_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[0] = 1;
> +	}
> +
> +	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
> +		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
> +					   ip_vs_backup_mcast_ifn,
> +					   ip_vs_backup_syncid, cb) < 0)
> +			goto nla_put_failure;
> +
> +		cb->args[1] = 1;
> +	}
> +
> +nla_put_failure:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return skb->len;
> +}
> +
> +static int ip_vs_genl_new_daemon(struct nlattr **attrs)
> +{
> +	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
> +	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
> +	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
> +		return -EINVAL;
> +
> +	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
> +				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
> +				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
> +}
> +
> +static int ip_vs_genl_del_daemon(struct nlattr **attrs)
> +{
> +	if (!attrs[IPVS_DAEMON_ATTR_STATE])
> +		return -EINVAL;
> +
> +	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
> +}
> +
> +static int ip_vs_genl_set_config(struct nlattr **attrs)
> +{
> +	struct ip_vs_timeout_user t;
> +
> +	__ip_vs_get_timeouts(&t);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
> +		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
> +		t.tcp_fin_timeout =
> +			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
> +
> +	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
> +		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
> +
> +	return ip_vs_set_timeout(&t);
> +}
> +
> +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct ip_vs_service *svc = NULL;
> +	struct ip_vs_service_user usvc;
> +	struct ip_vs_dest_user udest;
> +	int ret = 0, cmd;
> +	int need_full_svc = 0, need_full_dest = 0;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	mutex_lock(&__ip_vs_mutex);
> +
> +	if (cmd == IPVS_CMD_FLUSH) {
> +		ret = ip_vs_flush();
> +		goto out;
> +	} else if (cmd == IPVS_CMD_SET_CONFIG) {
> +		ret = ip_vs_genl_set_config(info->attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
> +		   cmd == IPVS_CMD_DEL_DAEMON) {
> +
> +		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
> +
> +		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
> +		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
> +				     info->attrs[IPVS_CMD_ATTR_DAEMON],
> +				     ip_vs_daemon_policy)) {
> +			ret = -EINVAL;
> +			goto out;
> +		}
> +
> +		if (cmd == IPVS_CMD_NEW_DAEMON)
> +			ret = ip_vs_genl_new_daemon(daemon_attrs);
> +		else
> +			ret = ip_vs_genl_del_daemon(daemon_attrs);
> +		goto out;
> +	} else if (cmd == IPVS_CMD_ZERO &&
> +		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
> +		ret = ip_vs_zero_all();
> +		goto out;
> +	}
> +
> +	/* All following commands require a service argument, so check if we
> +	 * received a valid one. We need a full service specification when
> +	 * adding / editing a service. Only identifying members otherwise. */
> +	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
> +		need_full_svc = 1;
> +
> +	ret = ip_vs_genl_parse_service(&usvc,
> +				       info->attrs[IPVS_CMD_ATTR_SERVICE],
> +				       need_full_svc);
> +	if (ret)
> +		goto out;
> +
> +	/* Lookup the exact service by <protocol, addr, port> or fwmark */
> +	if (usvc.fwmark == 0)
> +		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
> +	else
> +		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
> +
> +	/* Unless we're adding a new service, the service must already exist */
> +	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
> +		ret = -ESRCH;
> +		goto out;
> +	}
> +
> +	/* Destination commands require a valid destination argument. For
> +	 * adding / editing a destination, we need a full destination
> +	 * specification. */
> +	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
> +	    cmd == IPVS_CMD_DEL_DEST) {
> +		if (cmd != IPVS_CMD_DEL_DEST)
> +			need_full_dest = 1;
> +
> +		ret = ip_vs_genl_parse_dest(&udest,
> +					    info->attrs[IPVS_CMD_ATTR_DEST],
> +					    need_full_dest);
> +		if (ret)
> +			goto out;
> +	}
> +
> +	switch (cmd) {
> +	case IPVS_CMD_NEW_SERVICE:
> +		if (svc == NULL)
> +			ret = ip_vs_add_service(&usvc, &svc);
> +		else
> +			ret = -EEXIST;
> +		break;
> +	case IPVS_CMD_SET_SERVICE:
> +		ret = ip_vs_edit_service(svc, &usvc);
> +		break;
> +	case IPVS_CMD_DEL_SERVICE:
> +		ret = ip_vs_del_service(svc);
> +		break;
> +	case IPVS_CMD_NEW_DEST:
> +		ret = ip_vs_add_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_SET_DEST:
> +		ret = ip_vs_edit_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_DEL_DEST:
> +		ret = ip_vs_del_dest(svc, &udest);
> +		break;
> +	case IPVS_CMD_ZERO:
> +		ret = ip_vs_zero_service(svc);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +out:
> +	if (svc)
> +		ip_vs_service_put(svc);
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> +{
> +	struct sk_buff *msg;
> +	void *reply;
> +	int ret, cmd, reply_cmd;
> +
> +	cmd = info->genlhdr->cmd;
> +
> +	if (cmd == IPVS_CMD_GET_SERVICE)
> +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> +	else if (cmd == IPVS_CMD_GET_INFO)
> +		reply_cmd = IPVS_CMD_SET_INFO;
> +	else if (cmd == IPVS_CMD_GET_CONFIG)
> +		reply_cmd = IPVS_CMD_SET_CONFIG;
> +	else {
> +		IP_VS_ERR("unknown Generic Netlink command\n");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> +	if (!msg) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> +	if (reply == NULL)
> +		goto nla_put_failure;

These gotos now unlock a not locked mutex down in the error path.

> +
> +	mutex_lock(&__ip_vs_mutex);

Is there a reason not using mutex_lock_interruptible() like the sockopt 
interface does? I wondered in your earlier patch, but didn't really 
bother.

> +
> +	switch (cmd) {
> +	case IPVS_CMD_GET_SERVICE:
> +	{
> +		struct ip_vs_service *svc;
> +
> +		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
> +		if (IS_ERR(svc)) {
> +			ret = PTR_ERR(svc);
> +			goto out_err;
> +		} else if (svc) {
> +			ret = ip_vs_genl_fill_service(msg, svc);
> +			ip_vs_service_put(svc);
> +			if (ret)
> +				goto nla_put_failure;
> +		} else {
> +			ret = -ESRCH;
> +			goto out_err;
> +		}
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_CONFIG:
> +	{
> +		struct ip_vs_timeout_user t;
> +
> +		__ip_vs_get_timeouts(&t);
> +#ifdef CONFIG_IP_VS_PROTO_TCP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
> +			    t.tcp_fin_timeout);
> +#endif
> +#ifdef CONFIG_IP_VS_PROTO_UDP
> +		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
> +#endif
> +
> +		break;
> +	}
> +
> +	case IPVS_CMD_GET_INFO:
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
> +		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
> +			    IP_VS_CONN_TAB_SIZE);
> +		break;
> +	}
> +
> +	genlmsg_end(msg, reply);
> +	ret = genlmsg_unicast(msg, info->snd_pid);
> +	goto out;
> +
> +nla_put_failure:
> +	IP_VS_ERR("not enough space in Netlink message\n");
> +	ret = -EMSGSIZE;
> +
> +out_err:
> +	nlmsg_free(msg);
> +out:
> +	mutex_unlock(&__ip_vs_mutex);
> +
> +	return ret;
> +}
> +
> +
> +static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
> +	{
> +		.cmd	= IPVS_CMD_NEW_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_SERVICE,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +		.dumpit	= ip_vs_genl_dump_services,
> +		.policy	= ip_vs_cmd_policy,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DEST,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.dumpit	= ip_vs_genl_dump_dests,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_NEW_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_DEL_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_DAEMON,
> +		.flags	= GENL_ADMIN_PERM,
> +		.dumpit	= ip_vs_genl_dump_daemons,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_SET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_CONFIG,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_GET_INFO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_get_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_ZERO,
> +		.flags	= GENL_ADMIN_PERM,
> +		.policy	= ip_vs_cmd_policy,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +	{
> +		.cmd	= IPVS_CMD_FLUSH,
> +		.flags	= GENL_ADMIN_PERM,
> +		.doit	= ip_vs_genl_set_cmd,
> +	},
> +};
> +
> +int ip_vs_genl_register(void)

static int __init

> +{
> +	int ret, i;
> +
> +	ret = genl_register_family(&ip_vs_genl_family);
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
> +		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
> +		if (ret)
> +			goto err_out;
> +	}
> +	return 0;
> +
> +err_out:
> +	genl_unregister_family(&ip_vs_genl_family);
> +	return ret;
> +}
> +
> +void ip_vs_genl_unregister(void)

static void

> +{
> +	genl_unregister_family(&ip_vs_genl_family);
> +}
> +
> +/* End of Generic Netlink interface definitions */
> +
>  
>  int __init ip_vs_control_init(void)
>  {
> @@ -2334,6 +3204,13 @@ int __init ip_vs_control_init(void)
>  		return ret;
>  	}
>  
> +	ret = ip_vs_genl_register();
> +	if (ret) {
> +		IP_VS_ERR("cannot register Generic Netlink interface.\n");
> +		nf_unregister_sockopt(&ip_vs_sockopts);
> +		return ret;
> +	}
> +
>  	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
>  	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
>  
> @@ -2368,6 +3245,7 @@ void ip_vs_control_cleanup(void)
>  	unregister_sysctl_table(sysctl_header);
>  	proc_net_remove(&init_net, "ip_vs_stats");
>  	proc_net_remove(&init_net, "ip_vs");
> +	ip_vs_genl_unregister();
>  	nf_unregister_sockopt(&ip_vs_sockopts);
>  	LeaveFunction(2);
>  }
> 

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14 10:04         ` Sven Wegener
@ 2008-08-14 10:27           ` Julius Volz
  2008-08-14 12:08             ` Julius Volz
  0 siblings, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-14 10:27 UTC (permalink / raw)
  To: Sven Wegener; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 12:04:50PM +0200, Sven Wegener wrote:
> On Thu, 14 Aug 2008, Julius Volz wrote:
> 
> > On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> > > On Fri, 8 Aug 2008, Julius Volz wrote:
> > > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > > +{
> > > > +	struct sk_buff *msg;
> > > > +	void *reply;
> > > > +	int ret, cmd, reply_cmd;
> > > > +
> > > > +	mutex_lock(&__ip_vs_mutex);
> > > > +
> > > > +	cmd = info->genlhdr->cmd;
> > > > +
> > > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > > +	if (!msg) {
> > > > +		ret = -ENOMEM;
> > > > +		goto out_err;
> > > 
> > > Here you want out...
> > > 
> > > > +	}
> > > > +
> > > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > > +	else {
> > > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > > +		ret = -EINVAL;
> > > > +		goto out;
> > > 
> > > ..and here you want out_error, to not leak msg.
> > 
> > Ouch, thanks! Fixed this and locked the mutex later. I also removed the
> > "if (msg)" from out_err, as it becomes unneeded now. Here's the updated
> > patch:
> 
> You missed the static on the register and unregister functions down at the 
> bottom. :) Also see my comments to your change regarding the above issue 
> down here.

Hrm, yes!

> > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct sk_buff *msg;
> > +	void *reply;
> > +	int ret, cmd, reply_cmd;
> > +
> > +	cmd = info->genlhdr->cmd;
> > +
> > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > +	else if (cmd == IPVS_CMD_GET_INFO)
> > +		reply_cmd = IPVS_CMD_SET_INFO;
> > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > +	else {
> > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > +		ret = -EINVAL;
> > +		goto out;
> > +	}
> > +
> > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > +	if (!msg) {
> > +		ret = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> > +	if (reply == NULL)
> > +		goto nla_put_failure;
> 
> These gotos now unlock a not locked mutex down in the error path.

What did they put into my water supply :-/ Thanks!

> 
> > +
> > +	mutex_lock(&__ip_vs_mutex);
> 
> Is there a reason not using mutex_lock_interruptible() like the sockopt 
> interface does? I wondered in your earlier patch, but didn't really 
> bother.

Not an expert here. I saw mutex_lock_interruptible() being used in the
old sockopt interface, but returning -ERESTARTSYS on interruption. Can I
do something similar to this from a genetlink function or what is the
right way? I was unsure, so I stuck to mutex_lock()...

Julius

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14 10:27           ` Julius Volz
@ 2008-08-14 12:08             ` Julius Volz
  2008-08-14 13:32               ` Sven Wegener
  0 siblings, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-14 12:08 UTC (permalink / raw)
  To: Sven Wegener; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 12:27:19PM +0200, Julius Volz wrote:
> On Thu, Aug 14, 2008 at 12:04:50PM +0200, Sven Wegener wrote:
> > On Thu, 14 Aug 2008, Julius Volz wrote:
> > 
> > > On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> > > > On Fri, 8 Aug 2008, Julius Volz wrote:
> > > > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > > > +{
> > > > > +	struct sk_buff *msg;
> > > > > +	void *reply;
> > > > > +	int ret, cmd, reply_cmd;
> > > > > +
> > > > > +	mutex_lock(&__ip_vs_mutex);
> > > > > +
> > > > > +	cmd = info->genlhdr->cmd;
> > > > > +
> > > > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > > > +	if (!msg) {
> > > > > +		ret = -ENOMEM;
> > > > > +		goto out_err;
> > > > 
> > > > Here you want out...
> > > > 
> > > > > +	}
> > > > > +
> > > > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > > > +	else {
> > > > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > > > +		ret = -EINVAL;
> > > > > +		goto out;
> > > > 
> > > > ..and here you want out_error, to not leak msg.
> > > 
> > > Ouch, thanks! Fixed this and locked the mutex later. I also removed the
> > > "if (msg)" from out_err, as it becomes unneeded now. Here's the updated
> > > patch:
> > 
> > You missed the static on the register and unregister functions down at the 
> > bottom. :) Also see my comments to your change regarding the above issue 
> > down here.
> 
> Hrm, yes!
> 
> > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > +{
> > > +	struct sk_buff *msg;
> > > +	void *reply;
> > > +	int ret, cmd, reply_cmd;
> > > +
> > > +	cmd = info->genlhdr->cmd;
> > > +
> > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > +	else {
> > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > +		ret = -EINVAL;
> > > +		goto out;
> > > +	}
> > > +
> > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > +	if (!msg) {
> > > +		ret = -ENOMEM;
> > > +		goto out;
> > > +	}
> > > +
> > > +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> > > +	if (reply == NULL)
> > > +		goto nla_put_failure;
> > 
> > These gotos now unlock a not locked mutex down in the error path.
> 
> What did they put into my water supply :-/ Thanks!

Ok, fixed this up. The mutex is not completely moved down to make the
code look a bit nicer (nla_put_failure assumes locked mutex). There
should not be much concurrency anyways since this mutex only locks the
userspace interface, which is mainly used by ipvsadm.

Julius

--------
Add the implementation of the new Generic Netlink interface to IPVS and
keep the old set/getsockopt interface for userspace backwards
compatibility.

Signed-off-by: Julius Volz <juliusv@google.com>

 1 files changed, 875 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 6379705..d1dbd8b 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -37,6 +37,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/sock.h>
+#include <net/genetlink.h>
 
 #include <asm/uaccess.h>
 
@@ -2320,6 +2321,872 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
+/*
+ * Generic Netlink interface
+ */
+
+/* IPVS genetlink family */
+static struct genl_family ip_vs_genl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= IPVS_GENL_NAME,
+	.version	= IPVS_GENL_VERSION,
+	.maxattr	= IPVS_CMD_MAX,
+};
+
+/* Policy used for first-level command attributes */
+static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
+	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
+	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
+static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
+	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
+	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_IFNAME_MAXLEN },
+	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
+static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
+	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
+					    .len = IP_VS_SCHEDNAME_MAXLEN },
+	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
+					    .len = sizeof(struct ip_vs_flags) },
+	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
+	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
+static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
+	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
+					    .len = sizeof(union nf_inet_addr) },
+	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
+	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
+	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
+};
+
+static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
+				 struct ip_vs_stats *stats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	spin_lock_bh(&stats->lock);
+
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
+	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
+	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
+
+	spin_unlock_bh(&stats->lock);
+
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	spin_unlock_bh(&stats->lock);
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc)
+{
+	struct nlattr *nl_service;
+	struct ip_vs_flags flags = { .flags = svc->flags,
+				     .mask = ~0 };
+
+	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
+	if (!nl_service)
+		return -EMSGSIZE;
+
+	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
+
+	if (svc->fwmark) {
+		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
+	} else {
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
+		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
+		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
+	}
+
+	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
+	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
+	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_service);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_service);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_service(struct sk_buff *skb,
+				   struct ip_vs_service *svc,
+				   struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_SERVICE);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_service(skb, svc) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_services(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	int idx = 0, i;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+
+	mutex_lock(&__ip_vs_mutex);
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
+			if (++idx <= start)
+				continue;
+			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
+				idx--;
+				goto nla_put_failure;
+			}
+		}
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc,
+				    struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
+	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
+
+	/* Parse mandatory identifying service fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
+		return -EINVAL;
+
+	nla_af		= attrs[IPVS_SVC_ATTR_AF];
+	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
+	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
+	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
+	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
+
+	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
+		return -EINVAL;
+
+	/* For now, only support IPv4 */
+	if (nla_get_u16(nla_af) != AF_INET)
+		return -EAFNOSUPPORT;
+
+	if (nla_fwmark) {
+		usvc->protocol = IPPROTO_TCP;
+		usvc->fwmark = nla_get_u32(nla_fwmark);
+	} else {
+		usvc->protocol = nla_get_u16(nla_protocol);
+		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
+		usvc->port = nla_get_u16(nla_port);
+		usvc->fwmark = 0;
+	}
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
+			      *nla_netmask;
+		struct ip_vs_flags flags;
+		struct ip_vs_service *svc;
+
+		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
+		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
+		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
+		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
+
+		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
+			return -EINVAL;
+
+		nla_memcpy(&flags, nla_flags, sizeof(flags));
+
+		/* prefill flags from service if it already exists */
+		if (usvc->fwmark)
+			svc = __ip_vs_svc_fwm_get(usvc->fwmark);
+		else
+			svc = __ip_vs_service_get(usvc->protocol, usvc->addr,
+						  usvc->port);
+		if (svc) {
+			usvc->flags = svc->flags;
+			ip_vs_service_put(svc);
+		} else
+			usvc->flags = 0;
+
+		/* set new flags from userland */
+		usvc->flags = (usvc->flags & ~flags.mask) |
+			      (flags.flags & flags.mask);
+
+		strlcpy(usvc->sched_name, nla_data(nla_sched),
+			sizeof(usvc->sched_name));
+		usvc->timeout = nla_get_u32(nla_timeout);
+		usvc->netmask = nla_get_u32(nla_netmask);
+	}
+
+	return 0;
+}
+
+static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+{
+	struct ip_vs_service_user usvc;
+	int ret;
+
+	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (usvc.fwmark)
+		return __ip_vs_svc_fwm_get(usvc.fwmark);
+	else
+		return __ip_vs_service_get(usvc.protocol, usvc.addr,
+					   usvc.port);
+}
+
+static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
+{
+	struct nlattr *nl_dest;
+
+	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
+	if (!nl_dest)
+		return -EMSGSIZE;
+
+	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
+	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
+
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
+		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
+		    atomic_read(&dest->activeconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
+		    atomic_read(&dest->inactconns));
+	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
+		    atomic_read(&dest->persistconns));
+
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nl_dest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_dest);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
+				struct netlink_callback *cb)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DEST);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_dest(skb, dest) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_dests(struct sk_buff *skb,
+				 struct netlink_callback *cb)
+{
+	int idx = 0;
+	int start = cb->args[0];
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+
+	mutex_lock(&__ip_vs_mutex);
+
+	/* Try to find the service for which to dump destinations */
+	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
+			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
+		goto out_err;
+
+	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+	if (IS_ERR(svc) || svc == NULL)
+		goto out_err;
+
+	/* Dump the destinations */
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		if (++idx <= start)
+			continue;
+		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
+			idx--;
+			goto nla_put_failure;
+		}
+	}
+
+nla_put_failure:
+	cb->args[0] = idx;
+	ip_vs_service_put(svc);
+
+out_err:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest,
+				 struct nlattr *nla, int full_entry)
+{
+	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
+	struct nlattr *nla_addr, *nla_port;
+
+	/* Parse mandatory identifying destination fields first */
+	if (nla == NULL ||
+	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
+		return -EINVAL;
+
+	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
+	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
+
+	if (!(nla_addr && nla_port))
+		return -EINVAL;
+
+	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
+	udest->port = nla_get_u16(nla_port);
+
+	/* If a full entry was requested, check for the additional fields */
+	if (full_entry) {
+		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
+			      *nla_l_thresh;
+
+		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
+		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
+		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
+		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
+
+		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
+			return -EINVAL;
+
+		udest->conn_flags = nla_get_u32(nla_fwd)
+				    & IP_VS_CONN_F_FWD_MASK;
+		udest->weight = nla_get_u32(nla_weight);
+		udest->u_threshold = nla_get_u32(nla_u_thresh);
+		udest->l_threshold = nla_get_u32(nla_l_thresh);
+	}
+
+	return 0;
+}
+
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid)
+{
+	struct nlattr *nl_daemon;
+
+	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
+	if (!nl_daemon)
+		return -EMSGSIZE;
+
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
+	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
+	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
+
+	nla_nest_end(skb, nl_daemon);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_daemon);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
+				  const char *mcast_ifn, __be32 syncid,
+				  struct netlink_callback *cb)
+{
+	void *hdr;
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &ip_vs_genl_family, NLM_F_MULTI,
+			  IPVS_CMD_NEW_DAEMON);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	mutex_lock(&__ip_vs_mutex);
+	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
+					   ip_vs_master_mcast_ifn,
+					   ip_vs_master_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[0] = 1;
+	}
+
+	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
+					   ip_vs_backup_mcast_ifn,
+					   ip_vs_backup_syncid, cb) < 0)
+			goto nla_put_failure;
+
+		cb->args[1] = 1;
+	}
+
+nla_put_failure:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return skb->len;
+}
+
+static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+{
+	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
+	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
+	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
+		return -EINVAL;
+
+	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
+				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
+}
+
+static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+{
+	if (!attrs[IPVS_DAEMON_ATTR_STATE])
+		return -EINVAL;
+
+	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+}
+
+static int ip_vs_genl_set_config(struct nlattr **attrs)
+{
+	struct ip_vs_timeout_user t;
+
+	__ip_vs_get_timeouts(&t);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
+		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
+		t.tcp_fin_timeout =
+			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
+
+	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
+		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
+
+	return ip_vs_set_timeout(&t);
+}
+
+static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ip_vs_service *svc = NULL;
+	struct ip_vs_service_user usvc;
+	struct ip_vs_dest_user udest;
+	int ret = 0, cmd;
+	int need_full_svc = 0, need_full_dest = 0;
+
+	cmd = info->genlhdr->cmd;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	if (cmd == IPVS_CMD_FLUSH) {
+		ret = ip_vs_flush();
+		goto out;
+	} else if (cmd == IPVS_CMD_SET_CONFIG) {
+		ret = ip_vs_genl_set_config(info->attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
+		   cmd == IPVS_CMD_DEL_DAEMON) {
+
+		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
+
+		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
+		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
+				     info->attrs[IPVS_CMD_ATTR_DAEMON],
+				     ip_vs_daemon_policy)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (cmd == IPVS_CMD_NEW_DAEMON)
+			ret = ip_vs_genl_new_daemon(daemon_attrs);
+		else
+			ret = ip_vs_genl_del_daemon(daemon_attrs);
+		goto out;
+	} else if (cmd == IPVS_CMD_ZERO &&
+		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
+		ret = ip_vs_zero_all();
+		goto out;
+	}
+
+	/* All following commands require a service argument, so check if we
+	 * received a valid one. We need a full service specification when
+	 * adding / editing a service. Only identifying members otherwise. */
+	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
+		need_full_svc = 1;
+
+	ret = ip_vs_genl_parse_service(&usvc,
+				       info->attrs[IPVS_CMD_ATTR_SERVICE],
+				       need_full_svc);
+	if (ret)
+		goto out;
+
+	/* Lookup the exact service by <protocol, addr, port> or fwmark */
+	if (usvc.fwmark == 0)
+		svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port);
+	else
+		svc = __ip_vs_svc_fwm_get(usvc.fwmark);
+
+	/* Unless we're adding a new service, the service must already exist */
+	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
+		ret = -ESRCH;
+		goto out;
+	}
+
+	/* Destination commands require a valid destination argument. For
+	 * adding / editing a destination, we need a full destination
+	 * specification. */
+	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
+	    cmd == IPVS_CMD_DEL_DEST) {
+		if (cmd != IPVS_CMD_DEL_DEST)
+			need_full_dest = 1;
+
+		ret = ip_vs_genl_parse_dest(&udest,
+					    info->attrs[IPVS_CMD_ATTR_DEST],
+					    need_full_dest);
+		if (ret)
+			goto out;
+	}
+
+	switch (cmd) {
+	case IPVS_CMD_NEW_SERVICE:
+		if (svc == NULL)
+			ret = ip_vs_add_service(&usvc, &svc);
+		else
+			ret = -EEXIST;
+		break;
+	case IPVS_CMD_SET_SERVICE:
+		ret = ip_vs_edit_service(svc, &usvc);
+		break;
+	case IPVS_CMD_DEL_SERVICE:
+		ret = ip_vs_del_service(svc);
+		break;
+	case IPVS_CMD_NEW_DEST:
+		ret = ip_vs_add_dest(svc, &udest);
+		break;
+	case IPVS_CMD_SET_DEST:
+		ret = ip_vs_edit_dest(svc, &udest);
+		break;
+	case IPVS_CMD_DEL_DEST:
+		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	case IPVS_CMD_ZERO:
+		ret = ip_vs_zero_service(svc);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+out:
+	if (svc)
+		ip_vs_service_put(svc);
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *reply;
+	int ret, cmd, reply_cmd;
+
+	cmd = info->genlhdr->cmd;
+
+	if (cmd == IPVS_CMD_GET_SERVICE)
+		reply_cmd = IPVS_CMD_NEW_SERVICE;
+	else if (cmd == IPVS_CMD_GET_INFO)
+		reply_cmd = IPVS_CMD_SET_INFO;
+	else if (cmd == IPVS_CMD_GET_CONFIG)
+		reply_cmd = IPVS_CMD_SET_CONFIG;
+	else {
+		IP_VS_ERR("unknown Generic Netlink command\n");
+		return -EINVAL;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&__ip_vs_mutex);
+
+	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
+	if (reply == NULL)
+		goto nla_put_failure;
+
+	switch (cmd) {
+	case IPVS_CMD_GET_SERVICE:
+	{
+		struct ip_vs_service *svc;
+
+		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+		if (IS_ERR(svc)) {
+			ret = PTR_ERR(svc);
+			goto out_err;
+		} else if (svc) {
+			ret = ip_vs_genl_fill_service(msg, svc);
+			ip_vs_service_put(svc);
+			if (ret)
+				goto nla_put_failure;
+		} else {
+			ret = -ESRCH;
+			goto out_err;
+		}
+
+		break;
+	}
+
+	case IPVS_CMD_GET_CONFIG:
+	{
+		struct ip_vs_timeout_user t;
+
+		__ip_vs_get_timeouts(&t);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
+			    t.tcp_fin_timeout);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
+#endif
+
+		break;
+	}
+
+	case IPVS_CMD_GET_INFO:
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
+		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
+			    IP_VS_CONN_TAB_SIZE);
+		break;
+	}
+
+	genlmsg_end(msg, reply);
+	ret = genlmsg_unicast(msg, info->snd_pid);
+	goto out;
+
+nla_put_failure:
+	IP_VS_ERR("not enough space in Netlink message\n");
+	ret = -EMSGSIZE;
+
+out_err:
+	nlmsg_free(msg);
+out:
+	mutex_unlock(&__ip_vs_mutex);
+
+	return ret;
+}
+
+
+static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
+	{
+		.cmd	= IPVS_CMD_NEW_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_SERVICE,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+		.dumpit	= ip_vs_genl_dump_services,
+		.policy	= ip_vs_cmd_policy,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DEST,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.dumpit	= ip_vs_genl_dump_dests,
+	},
+	{
+		.cmd	= IPVS_CMD_NEW_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_DEL_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_DAEMON,
+		.flags	= GENL_ADMIN_PERM,
+		.dumpit	= ip_vs_genl_dump_daemons,
+	},
+	{
+		.cmd	= IPVS_CMD_SET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_CONFIG,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_GET_INFO,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_get_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_ZERO,
+		.flags	= GENL_ADMIN_PERM,
+		.policy	= ip_vs_cmd_policy,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+	{
+		.cmd	= IPVS_CMD_FLUSH,
+		.flags	= GENL_ADMIN_PERM,
+		.doit	= ip_vs_genl_set_cmd,
+	},
+};
+
+static int __init ip_vs_genl_register(void)
+{
+	int ret, i;
+
+	ret = genl_register_family(&ip_vs_genl_family);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
+		ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
+		if (ret)
+			goto err_out;
+	}
+	return 0;
+
+err_out:
+	genl_unregister_family(&ip_vs_genl_family);
+	return ret;
+}
+
+static void ip_vs_genl_unregister(void)
+{
+	genl_unregister_family(&ip_vs_genl_family);
+}
+
+/* End of Generic Netlink interface definitions */
+
 
 int __init ip_vs_control_init(void)
 {
@@ -2334,6 +3201,13 @@ int __init ip_vs_control_init(void)
 		return ret;
 	}
 
+	ret = ip_vs_genl_register();
+	if (ret) {
+		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		nf_unregister_sockopt(&ip_vs_sockopts);
+		return ret;
+	}
+
 	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
 
@@ -2368,6 +3242,7 @@ void ip_vs_control_cleanup(void)
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove(&init_net, "ip_vs_stats");
 	proc_net_remove(&init_net, "ip_vs");
+	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
 }
-- 
1.5.4.5


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14 12:08             ` Julius Volz
@ 2008-08-14 13:32               ` Sven Wegener
  2008-08-14 14:22                 ` Julius Volz
  0 siblings, 1 reply; 23+ messages in thread
From: Sven Wegener @ 2008-08-14 13:32 UTC (permalink / raw)
  To: Julius Volz; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Thu, 14 Aug 2008, Julius Volz wrote:

> On Thu, Aug 14, 2008 at 12:27:19PM +0200, Julius Volz wrote:
> > On Thu, Aug 14, 2008 at 12:04:50PM +0200, Sven Wegener wrote:
> > > On Thu, 14 Aug 2008, Julius Volz wrote:
> > > 
> > > > On Wed, Aug 13, 2008 at 11:51:06PM +0200, Sven Wegener wrote:
> > > > > On Fri, 8 Aug 2008, Julius Volz wrote:
> > > > > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > > > > +{
> > > > > > +	struct sk_buff *msg;
> > > > > > +	void *reply;
> > > > > > +	int ret, cmd, reply_cmd;
> > > > > > +
> > > > > > +	mutex_lock(&__ip_vs_mutex);
> > > > > > +
> > > > > > +	cmd = info->genlhdr->cmd;
> > > > > > +
> > > > > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > > > > +	if (!msg) {
> > > > > > +		ret = -ENOMEM;
> > > > > > +		goto out_err;
> > > > > 
> > > > > Here you want out...
> > > > > 
> > > > > > +	}
> > > > > > +
> > > > > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > > > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > > > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > > > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > > > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > > > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > > > > +	else {
> > > > > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > > > > +		ret = -EINVAL;
> > > > > > +		goto out;
> > > > > 
> > > > > ..and here you want out_error, to not leak msg.
> > > > 
> > > > Ouch, thanks! Fixed this and locked the mutex later. I also removed the
> > > > "if (msg)" from out_err, as it becomes unneeded now. Here's the updated
> > > > patch:
> > > 
> > > You missed the static on the register and unregister functions down at the 
> > > bottom. :) Also see my comments to your change regarding the above issue 
> > > down here.
> > 
> > Hrm, yes!
> > 
> > > > +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
> > > > +{
> > > > +	struct sk_buff *msg;
> > > > +	void *reply;
> > > > +	int ret, cmd, reply_cmd;
> > > > +
> > > > +	cmd = info->genlhdr->cmd;
> > > > +
> > > > +	if (cmd == IPVS_CMD_GET_SERVICE)
> > > > +		reply_cmd = IPVS_CMD_NEW_SERVICE;
> > > > +	else if (cmd == IPVS_CMD_GET_INFO)
> > > > +		reply_cmd = IPVS_CMD_SET_INFO;
> > > > +	else if (cmd == IPVS_CMD_GET_CONFIG)
> > > > +		reply_cmd = IPVS_CMD_SET_CONFIG;
> > > > +	else {
> > > > +		IP_VS_ERR("unknown Generic Netlink command\n");
> > > > +		ret = -EINVAL;
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > > > +	if (!msg) {
> > > > +		ret = -ENOMEM;
> > > > +		goto out;
> > > > +	}
> > > > +
> > > > +	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
> > > > +	if (reply == NULL)
> > > > +		goto nla_put_failure;
> > > 
> > > These gotos now unlock a not locked mutex down in the error path.
> > 
> > What did they put into my water supply :-/ Thanks!
> 
> Ok, fixed this up. The mutex is not completely moved down to make the
> code look a bit nicer (nla_put_failure assumes locked mutex). There
> should not be much concurrency anyways since this mutex only locks the
> userspace interface, which is mainly used by ipvsadm.

True, it was just a hint for an optimization. Looks good to me.

Acked-by: Sven Wegener <sven.wegener@stealer.net>

Should we get this into 2.6.27? It's a new interface, currently unused, so 
the chance of breaking anything is marginal.

Sven

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14 13:32               ` Sven Wegener
@ 2008-08-14 14:22                 ` Julius Volz
  2008-08-14 22:53                   ` Simon Horman
  0 siblings, 1 reply; 23+ messages in thread
From: Julius Volz @ 2008-08-14 14:22 UTC (permalink / raw)
  To: Sven Wegener; +Cc: netdev, lvs-devel, horms, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 3:32 PM, Sven Wegener <sven.wegener@stealer.net> wrote:
> On Thu, 14 Aug 2008, Julius Volz wrote:
>> Ok, fixed this up. The mutex is not completely moved down to make the
>> code look a bit nicer (nla_put_failure assumes locked mutex). There
>> should not be much concurrency anyways since this mutex only locks the
>> userspace interface, which is mainly used by ipvsadm.
>
> True, it was just a hint for an optimization. Looks good to me.
>
> Acked-by: Sven Wegener <sven.wegener@stealer.net>

Thanks for the help with this!

> Should we get this into 2.6.27? It's a new interface, currently unused, so
> the chance of breaking anything is marginal.

Yeah, it shouldn't break anything existing and that would be great!

Julius

-- 
Google Switzerland GmbH

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCHv3 2/2] IPVS: Add genetlink interface implementation
  2008-08-14 14:22                 ` Julius Volz
@ 2008-08-14 22:53                   ` Simon Horman
  0 siblings, 0 replies; 23+ messages in thread
From: Simon Horman @ 2008-08-14 22:53 UTC (permalink / raw)
  To: Julius Volz; +Cc: Sven Wegener, netdev, lvs-devel, kaber, davem, tgraf, vbusam

On Thu, Aug 14, 2008 at 04:22:33PM +0200, Julius Volz wrote:
> On Thu, Aug 14, 2008 at 3:32 PM, Sven Wegener <sven.wegener@stealer.net> wrote:
> > On Thu, 14 Aug 2008, Julius Volz wrote:
> >> Ok, fixed this up. The mutex is not completely moved down to make the
> >> code look a bit nicer (nla_put_failure assumes locked mutex). There
> >> should not be much concurrency anyways since this mutex only locks the
> >> userspace interface, which is mainly used by ipvsadm.
> >
> > True, it was just a hint for an optimization. Looks good to me.
> >
> > Acked-by: Sven Wegener <sven.wegener@stealer.net>
> 
> Thanks for the help with this!
> 
> > Should we get this into 2.6.27? It's a new interface, currently unused, so
> > the chance of breaking anything is marginal.
> 
> Yeah, it shouldn't break anything existing and that would be great!

Fine by me.

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2008-08-14 22:53 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-07 14:43 [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Julius Volz
2008-08-07 14:43 ` [PATCHv3 1/2] IPVS: Add genetlink interface definitions to ip_vs.h Julius Volz
2008-08-07 14:43 ` [PATCHv3 2/2] IPVS: Add genetlink interface implementation Julius Volz
2008-08-08 11:29   ` Julius Volz
2008-08-13 21:51     ` Sven Wegener
2008-08-13 21:53       ` Sven Wegener
2008-08-14  9:32       ` Julius Volz
2008-08-14  9:52         ` Simon Horman
2008-08-14 10:04         ` Sven Wegener
2008-08-14 10:27           ` Julius Volz
2008-08-14 12:08             ` Julius Volz
2008-08-14 13:32               ` Sven Wegener
2008-08-14 14:22                 ` Julius Volz
2008-08-14 22:53                   ` Simon Horman
2008-08-14  5:39     ` Sven Wegener
2008-08-08  2:26 ` [PATCHv3 0/2] IPVS: Add Generic Netlink configuration interface Simon Horman
2008-08-08 12:06   ` Julius Volz
2008-08-09 14:23     ` Simon Horman
2008-08-09 19:57       ` Julius Volz
2008-08-13 16:00       ` Julius Volz
2008-08-13 23:09         ` Simon Horman
2008-08-14  4:12           ` Simon Horman
2008-08-14  9:34             ` Julius Volz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).