netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jon Maloy <jon.maloy@ericsson.com>
To: <davem@davemloft.net>, <netdev@vger.kernel.org>
Cc: <parthasarathy.bhuvaragan@ericsson.com>, <ying.xue@windriver.com>,
	<tipc-discussion@lists.sourceforge.net>
Subject: [net-next v2 12/18] tipc: introduce group anycast messaging
Date: Fri, 13 Oct 2017 11:04:28 +0200	[thread overview]
Message-ID: <1507885474-11213-13-git-send-email-jon.maloy@ericsson.com> (raw)
In-Reply-To: <1507885474-11213-1-git-send-email-jon.maloy@ericsson.com>

In this commit, we make it possible to send connectionless unicast
messages to any member corresponding to the given member identity,
when there is more than one such member. The sender must use a
TIPC_ADDR_NAME address to achieve this effect.

We also perform load balancing between the destinations, i.e., we
primarily select one which has advertised sufficient send window
to not cause a block/EAGAIN delay, if any. This mechanism is
overlayed on the always present round-robin selection.

Anycast messages are subject to the same start synchronization
and flow control mechanism as group broadcast messages.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
---
 net/tipc/group.c      |  7 +++++
 net/tipc/group.h      |  3 ++
 net/tipc/name_table.c | 41 +++++++++++++++++++++++++
 net/tipc/name_table.h |  3 ++
 net/tipc/socket.c     | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 138 insertions(+)

diff --git a/net/tipc/group.c b/net/tipc/group.c
index 18440be..16aaaa9 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -116,6 +116,13 @@ static bool tipc_group_is_receiver(struct tipc_member *m)
 	return m && m->state >= MBR_JOINED;
 }
 
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+	if (!grp->loopback)
+		return grp->portid;
+	return 0;
+}
+
 int tipc_group_size(struct tipc_group *grp)
 {
 	return grp->member_cnt;
diff --git a/net/tipc/group.h b/net/tipc/group.h
index 8f77290..e432066 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -49,6 +49,7 @@ void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
 struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
 void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
 		     int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
 void tipc_group_filter_msg(struct tipc_group *grp,
 			   struct sk_buff_head *inputq,
 			   struct sk_buff_head *xmitq);
@@ -68,5 +69,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 			       u32 port, struct sk_buff_head *xmitq);
 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
 void tipc_group_update_member(struct tipc_member *m, int len);
+struct tipc_member *tipc_group_find_sender(struct tipc_group *grp,
+					   u32 node, u32 port);
 int tipc_group_size(struct tipc_group *grp);
 #endif
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 114d72b..2856e19 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -597,6 +597,47 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
 	return ref;
 }
 
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+			 struct list_head *dsts, int *dstcnt, u32 exclude,
+			 bool all)
+{
+	u32 self = tipc_own_addr(net);
+	struct publication *publ;
+	struct name_info *info;
+	struct name_seq *seq;
+	struct sub_seq *sseq;
+
+	if (!tipc_in_scope(domain, self))
+		return false;
+
+	*dstcnt = 0;
+	rcu_read_lock();
+	seq = nametbl_find_seq(net, type);
+	if (unlikely(!seq))
+		goto exit;
+	spin_lock_bh(&seq->lock);
+	sseq = nameseq_find_subseq(seq, instance);
+	if (likely(sseq)) {
+		info = sseq->info;
+		list_for_each_entry(publ, &info->zone_list, zone_list) {
+			if (!tipc_in_scope(domain, publ->node))
+				continue;
+			if (publ->ref == exclude && publ->node == self)
+				continue;
+			tipc_dest_push(dsts, publ->node, publ->ref);
+			(*dstcnt)++;
+			if (all)
+				continue;
+			list_move_tail(&publ->zone_list, &info->zone_list);
+			break;
+		}
+	}
+	spin_unlock_bh(&seq->lock);
+exit:
+	rcu_read_unlock();
+	return !list_empty(dsts);
+}
+
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 			      u32 limit, struct list_head *dports)
 {
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 97646b1..71926e4 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -107,6 +107,9 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
 void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
 				   u32 upper, u32 domain,
 				   struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+			 struct list_head *dsts, int *dstcnt, u32 exclude,
+			 bool all);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 					 u32 upper, u32 scope, u32 port_ref,
 					 u32 key);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e71c8d2..66165e1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -905,6 +905,88 @@ static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
 }
 
 /**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+				   int dlen, long timeout)
+{
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	struct sock *sk = sock->sk;
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct list_head *cong_links = &tsk->cong_links;
+	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+	struct tipc_group *grp = tsk->group;
+	struct tipc_member *first = NULL;
+	struct tipc_member *mbr = NULL;
+	struct net *net = sock_net(sk);
+	u32 node, port, exclude;
+	u32 type, inst, domain;
+	struct list_head dsts;
+	int lookups = 0;
+	int dstcnt, rc;
+	bool cong;
+
+	INIT_LIST_HEAD(&dsts);
+
+	type = dest->addr.name.name.type;
+	inst = dest->addr.name.name.instance;
+	domain = addr_domain(net, dest->scope);
+	exclude = tipc_group_exclude(grp);
+
+	while (++lookups < 4) {
+		first = NULL;
+
+		/* Look for a non-congested destination member, if any */
+		while (1) {
+			if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+						 &dstcnt, exclude, false))
+				return -EHOSTUNREACH;
+			tipc_dest_pop(&dsts, &node, &port);
+			cong = tipc_group_cong(grp, node, port, blks, &mbr);
+			if (!cong)
+				break;
+			if (mbr == first)
+				break;
+			if (!first)
+				first = mbr;
+		}
+
+		/* Start over if destination was not in member list */
+		if (unlikely(!mbr))
+			continue;
+
+		if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+			break;
+
+		/* Block or return if destination link or member is congested */
+		rc = tipc_wait_for_cond(sock, &timeout,
+					!tipc_dest_find(cong_links, node, 0) &&
+					!tipc_group_cong(grp, node, port,
+							 blks, &mbr));
+		if (unlikely(rc))
+			return rc;
+
+		/* Send, unless destination disappeared while waiting */
+		if (likely(mbr))
+			break;
+	}
+
+	if (unlikely(lookups >= 4))
+		return -EHOSTUNREACH;
+
+	rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+	return rc ? rc : dlen;
+}
+
+/**
  * tipc_send_group_bcast - send message to all members in communication group
  * @sk: socket structure
  * @m: message to send
@@ -1127,6 +1209,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 	if (grp) {
 		if (!dest)
 			return tipc_send_group_bcast(sock, m, dlen, timeout);
+		if (dest->addrtype == TIPC_ADDR_NAME)
+			return tipc_send_group_anycast(sock, m, dlen, timeout);
 		if (dest->addrtype == TIPC_ADDR_ID)
 			return tipc_send_group_unicast(sock, m, dlen, timeout);
 		return -EINVAL;
-- 
2.1.4

  parent reply	other threads:[~2017-10-13  9:04 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-13  9:04 [net-next v2 00/18] tipc: Introduce Communcation Group feature Jon Maloy
2017-10-13  9:04 ` [net-next v2 01/18] tipc: add ability to order and receive topology events in driver Jon Maloy
2017-10-13  9:04 ` [net-next v2 02/18] tipc: improve address sanity check in tipc_connect() Jon Maloy
2017-10-13  9:04 ` [net-next v2 03/18] tipc: add ability to obtain node availability status from other files Jon Maloy
2017-10-13  9:04 ` [net-next v2 04/18] tipc: refactor function filter_rcv() Jon Maloy
2017-10-13  9:04 ` [net-next v2 05/18] tipc: add new function for sending multiple small messages Jon Maloy
2017-10-13  9:04 ` [net-next v2 06/18] tipc: improve destination linked list Jon Maloy
2017-10-13  9:04 ` [net-next v2 07/18] tipc: introduce communication groups Jon Maloy
2017-10-13  9:04 ` [net-next v2 08/18] tipc: add second source address to recvmsg()/recvfrom() Jon Maloy
2017-10-13  9:04 ` [net-next v2 09/18] tipc: receive group membership events via member socket Jon Maloy
2017-10-13  9:04 ` [net-next v2 10/18] tipc: introduce flow control for group broadcast messages Jon Maloy
2017-10-13  9:04 ` [net-next v2 11/18] tipc: introduce group unicast messaging Jon Maloy
2017-10-13  9:04 ` Jon Maloy [this message]
2017-10-13  9:04 ` [net-next v2 13/18] tipc: introduce group multicast messaging Jon Maloy
2017-10-13  9:04 ` [net-next v2 14/18] tipc: guarantee group unicast doesn't bypass group broadcast Jon Maloy
2017-10-13  9:04 ` [net-next v2 15/18] tipc: guarantee that group broadcast doesn't bypass group unicast Jon Maloy
2017-10-13  9:04 ` [net-next v2 16/18] tipc: guarantee delivery of UP event before first broadcast Jon Maloy
2017-10-13  9:04 ` [net-next v2 17/18] tipc: guarantee delivery of last broadcast before DOWN event Jon Maloy
2017-10-13  9:04 ` [net-next v2 18/18] tipc: add multipoint-to-point flow control Jon Maloy
2017-10-13 15:51 ` [net-next v2 00/18] tipc: Introduce Communcation Group feature David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1507885474-11213-13-git-send-email-jon.maloy@ericsson.com \
    --to=jon.maloy@ericsson.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=parthasarathy.bhuvaragan@ericsson.com \
    --cc=tipc-discussion@lists.sourceforge.net \
    --cc=ying.xue@windriver.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).