Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next] sfc: declare module version (same as ethtool drvinfo version)
From: David Miller @ 2017-01-03 15:54 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, bkenward, netdev
In-Reply-To: <3453b0f7-a522-e332-590a-e04d4d4b50a5@solarflare.com>

From: Edward Cree <ecree@solarflare.com>
Date: Tue, 3 Jan 2017 15:46:00 +0000

> Signed-off-by: Edward Cree <ecree@solarflare.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] sfc-falcon: declare module version (same as ethtool drvinfo version)
From: David Miller @ 2017-01-03 15:54 UTC (permalink / raw)
  To: ecree; +Cc: linux-net-drivers, bkenward, netdev
In-Reply-To: <f718c632-3a59-b841-1836-0a6f04553489@solarflare.com>

From: Edward Cree <ecree@solarflare.com>
Date: Tue, 3 Jan 2017 15:46:15 +0000

> Signed-off-by: Edward Cree <ecree@solarflare.com>

Applied.

^ permalink raw reply

* [net-next 0/3] tipc: improve interaction socket-link
From: Jon Maloy @ 2017-01-03 15:55 UTC (permalink / raw)
  To: davem; +Cc: Jon Maloy, netdev, tipc-discussion

We fix a very real starvation problem that may occur when a link
encounters send buffer congestion. At the same time we make the 
interaction between the socket and link layer simpler and more 
consistent.

Jon Maloy (3):
  tipc: unify tipc_wait_for_sndpkt() and tipc_wait_for_sndmsg()
    functions
  tipc: modify struct tipc_plist to be more versatile
  tipc: reduce risk of user starvation during link congestion

 net/tipc/bcast.c      |   6 +-
 net/tipc/link.c       |  75 ++++-----
 net/tipc/msg.h        |   2 -
 net/tipc/name_table.c | 100 +++++++----
 net/tipc/name_table.h |  21 +--
 net/tipc/node.c       |  15 +-
 net/tipc/socket.c     | 449 ++++++++++++++++++++++----------------------------
 7 files changed, 319 insertions(+), 349 deletions(-)

-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply

* [net-next 1/3] tipc: unify tipc_wait_for_sndpkt() and tipc_wait_for_sndmsg() functions
From: Jon Maloy @ 2017-01-03 15:55 UTC (permalink / raw)
  To: davem; +Cc: Jon Maloy, netdev, tipc-discussion
In-Reply-To: <1483458911-32549-1-git-send-email-jon.maloy@ericsson.com>

The functions tipc_wait_for_sndpkt() and tipc_wait_for_sndmsg() are very
similar. The latter function is also called from two locations, and
there will be more in the coming commits, which will all need to test on
different conditions.

Instead of making yet another duplicates of the function, we now
introduce a new macro tipc_wait_for_cond() where the wakeup condition
can be stated as an argument to the call. This macro replaces all
current and future uses of the two functions, which can now be
eliminated.

Acked-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/socket.c | 108 +++++++++++++++++++++++++-----------------------------
 1 file changed, 49 insertions(+), 59 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 800caaa..f27462e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -110,7 +110,6 @@ static void tipc_write_space(struct sock *sk);
 static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
-static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
 static void tipc_sk_timeout(unsigned long data);
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 			   struct tipc_name_seq const *seq);
@@ -334,6 +333,49 @@ static int tipc_set_sk_state(struct sock *sk, int state)
 	return res;
 }
 
+static int tipc_sk_sock_err(struct socket *sock, long *timeout)
+{
+	struct sock *sk = sock->sk;
+	int err = sock_error(sk);
+	int typ = sock->type;
+
+	if (err)
+		return err;
+	if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) {
+		if (sk->sk_state == TIPC_DISCONNECTING)
+			return -EPIPE;
+		else if (!tipc_sk_connected(sk))
+			return -ENOTCONN;
+	}
+	if (!*timeout)
+		return -EAGAIN;
+	if (signal_pending(current))
+		return sock_intr_errno(*timeout);
+
+	return 0;
+}
+
+#define tipc_wait_for_cond(sock_, timeout_, condition_)			\
+({								        \
+	int rc_ = 0;							\
+	int done_ = 0;							\
+									\
+	while (!(condition_) && !done_) {				\
+		struct sock *sk_ = sock->sk;				\
+		DEFINE_WAIT_FUNC(wait_, woken_wake_function);		\
+									\
+		rc_ = tipc_sk_sock_err(sock_, timeout_);		\
+		if (rc_)						\
+			break;						\
+		prepare_to_wait(sk_sleep(sk_), &wait_,			\
+				TASK_INTERRUPTIBLE);			\
+		done_ = sk_wait_event(sk_, timeout_,			\
+				      (condition_), &wait_);		\
+		remove_wait_queue(sk_sleep(sk_), &wait_);		\
+	}								\
+	rc_;								\
+})
+
 /**
  * tipc_sk_create - create a TIPC socket
  * @net: network namespace (must be default network)
@@ -721,7 +763,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 
 		if (rc == -ELINKCONG) {
 			tsk->link_cong = 1;
-			rc = tipc_wait_for_sndmsg(sock, &timeo);
+			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
 			if (!rc)
 				continue;
 		}
@@ -830,31 +872,6 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
 	kfree_skb(skb);
 }
 
-static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct sock *sk = sock->sk;
-	struct tipc_sock *tsk = tipc_sk(sk);
-	int done;
-
-	do {
-		int err = sock_error(sk);
-		if (err)
-			return err;
-		if (sk->sk_shutdown & SEND_SHUTDOWN)
-			return -EPIPE;
-		if (!*timeo_p)
-			return -EAGAIN;
-		if (signal_pending(current))
-			return sock_intr_errno(*timeo_p);
-
-		add_wait_queue(sk_sleep(sk), &wait);
-		done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait);
-		remove_wait_queue(sk_sleep(sk), &wait);
-	} while (!done);
-	return 0;
-}
-
 /**
  * tipc_sendmsg - send message in connectionless manner
  * @sock: socket structure
@@ -970,7 +987,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 		}
 		if (rc == -ELINKCONG) {
 			tsk->link_cong = 1;
-			rc = tipc_wait_for_sndmsg(sock, &timeo);
+			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
 			if (!rc)
 				continue;
 		}
@@ -985,36 +1002,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 	return rc;
 }
 
-static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
-{
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-	struct sock *sk = sock->sk;
-	struct tipc_sock *tsk = tipc_sk(sk);
-	int done;
-
-	do {
-		int err = sock_error(sk);
-		if (err)
-			return err;
-		if (sk->sk_state == TIPC_DISCONNECTING)
-			return -EPIPE;
-		else if (!tipc_sk_connected(sk))
-			return -ENOTCONN;
-		if (!*timeo_p)
-			return -EAGAIN;
-		if (signal_pending(current))
-			return sock_intr_errno(*timeo_p);
-
-		add_wait_queue(sk_sleep(sk), &wait);
-		done = sk_wait_event(sk, timeo_p,
-				     (!tsk->link_cong &&
-				      !tsk_conn_cong(tsk)) ||
-				      !tipc_sk_connected(sk), &wait);
-		remove_wait_queue(sk_sleep(sk), &wait);
-	} while (!done);
-	return 0;
-}
-
 /**
  * tipc_send_stream - send stream-oriented data
  * @sock: socket structure
@@ -1109,7 +1096,10 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 
 			tsk->link_cong = 1;
 		}
-		rc = tipc_wait_for_sndpkt(sock, &timeo);
+		rc = tipc_wait_for_cond(sock, &timeo,
+					(!tsk->link_cong &&
+					 !tsk_conn_cong(tsk) &&
+					 tipc_sk_connected(sk)));
 	} while (!rc);
 
 	__skb_queue_purge(&pktchain);
-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply related

* [net-next 2/3] tipc: modify struct tipc_plist to be more versatile
From: Jon Maloy @ 2017-01-03 15:55 UTC (permalink / raw)
  To: davem; +Cc: Jon Maloy, netdev, tipc-discussion
In-Reply-To: <1483458911-32549-1-git-send-email-jon.maloy@ericsson.com>

During multicast reception we currently use a simple linked list with
push/pop semantics to store port numbers.

We now see a need for a more generic list for storing values of type
u32. We therefore make some modifications to this list, while replacing
the prefix 'tipc_plist_' with 'u32_'. We also add a couple of new
functions which will come to use in the next commits.

Acked-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/name_table.c | 100 ++++++++++++++++++++++++++++++++++++--------------
 net/tipc/name_table.h |  21 ++++-------
 net/tipc/socket.c     |   8 ++--
 3 files changed, 83 insertions(+), 46 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index e190460..5a86df1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -608,7 +608,7 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance,
  * Returns non-zero if any off-node ports overlap
  */
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
-			      u32 limit, struct tipc_plist *dports)
+			      u32 limit, struct list_head *dports)
 {
 	struct name_seq *seq;
 	struct sub_seq *sseq;
@@ -633,7 +633,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
 		info = sseq->info;
 		list_for_each_entry(publ, &info->node_list, node_list) {
 			if (publ->scope <= limit)
-				tipc_plist_push(dports, publ->ref);
+				u32_push(dports, publ->ref);
 		}
 
 		if (info->cluster_list_size != info->node_list_size)
@@ -1022,40 +1022,84 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void tipc_plist_push(struct tipc_plist *pl, u32 port)
+struct u32_item {
+	struct list_head list;
+	u32 value;
+};
+
+bool u32_find(struct list_head *l, u32 value)
 {
-	struct tipc_plist *nl;
+	struct u32_item *item;
 
-	if (likely(!pl->port)) {
-		pl->port = port;
-		return;
+	list_for_each_entry(item, l, list) {
+		if (item->value == value)
+			return true;
 	}
-	if (pl->port == port)
-		return;
-	list_for_each_entry(nl, &pl->list, list) {
-		if (nl->port == port)
-			return;
+	return false;
+}
+
+bool u32_push(struct list_head *l, u32 value)
+{
+	struct u32_item *item;
+
+	list_for_each_entry(item, l, list) {
+		if (item->value == value)
+			return false;
+	}
+	item = kmalloc(sizeof(*item), GFP_ATOMIC);
+	if (unlikely(!item))
+		return false;
+
+	item->value = value;
+	list_add(&item->list, l);
+	return true;
+}
+
+u32 u32_pop(struct list_head *l)
+{
+	struct u32_item *item;
+	u32 value = 0;
+
+	if (list_empty(l))
+		return 0;
+	item = list_first_entry(l, typeof(*item), list);
+	value = item->value;
+	list_del(&item->list);
+	kfree(item);
+	return value;
+}
+
+bool u32_del(struct list_head *l, u32 value)
+{
+	struct u32_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, l, list) {
+		if (item->value != value)
+			continue;
+		list_del(&item->list);
+		kfree(item);
+		return true;
 	}
-	nl = kmalloc(sizeof(*nl), GFP_ATOMIC);
-	if (nl) {
-		nl->port = port;
-		list_add(&nl->list, &pl->list);
+	return false;
+}
+
+void u32_list_purge(struct list_head *l)
+{
+	struct u32_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, l, list) {
+		list_del(&item->list);
+		kfree(item);
 	}
 }
 
-u32 tipc_plist_pop(struct tipc_plist *pl)
+int u32_list_len(struct list_head *l)
 {
-	struct tipc_plist *nl;
-	u32 port = 0;
+	struct u32_item *item;
+	int i = 0;
 
-	if (likely(list_empty(&pl->list))) {
-		port = pl->port;
-		pl->port = 0;
-		return port;
+	list_for_each_entry(item, l, list) {
+		i++;
 	}
-	nl = list_first_entry(&pl->list, typeof(*nl), list);
-	port = nl->port;
-	list_del(&nl->list);
-	kfree(nl);
-	return port;
+	return i;
 }
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 1524a73..c89bb3f 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -99,7 +99,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
 u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
 int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
-			      u32 limit, struct tipc_plist *dports);
+			      u32 limit, struct list_head *dports);
 struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
 					 u32 upper, u32 scope, u32 port_ref,
 					 u32 key);
@@ -116,18 +116,11 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
 int tipc_nametbl_init(struct net *net);
 void tipc_nametbl_stop(struct net *net);
 
-struct tipc_plist {
-	struct list_head list;
-	u32 port;
-};
-
-static inline void tipc_plist_init(struct tipc_plist *pl)
-{
-	INIT_LIST_HEAD(&pl->list);
-	pl->port = 0;
-}
-
-void tipc_plist_push(struct tipc_plist *pl, u32 port);
-u32 tipc_plist_pop(struct tipc_plist *pl);
+bool u32_push(struct list_head *l, u32 value);
+u32 u32_pop(struct list_head *l);
+bool u32_find(struct list_head *l, u32 value);
+bool u32_del(struct list_head *l, u32 value);
+void u32_list_purge(struct list_head *l);
+int u32_list_len(struct list_head *l);
 
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index f27462e..fae6a55 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -788,7 +788,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		       struct sk_buff_head *inputq)
 {
 	struct tipc_msg *msg;
-	struct tipc_plist dports;
+	struct list_head dports;
 	u32 portid;
 	u32 scope = TIPC_CLUSTER_SCOPE;
 	struct sk_buff_head tmpq;
@@ -796,7 +796,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 	struct sk_buff *skb, *_skb;
 
 	__skb_queue_head_init(&tmpq);
-	tipc_plist_init(&dports);
+	INIT_LIST_HEAD(&dports);
 
 	skb = tipc_skb_peek(arrvq, &inputq->lock);
 	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
@@ -810,8 +810,8 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		tipc_nametbl_mc_translate(net,
 					  msg_nametype(msg), msg_namelower(msg),
 					  msg_nameupper(msg), scope, &dports);
-		portid = tipc_plist_pop(&dports);
-		for (; portid; portid = tipc_plist_pop(&dports)) {
+		portid = u32_pop(&dports);
+		for (; portid; portid = u32_pop(&dports)) {
 			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
 			if (_skb) {
 				msg_set_destport(buf_msg(_skb), portid);
-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply related

* [net-next 3/3] tipc: reduce risk of user starvation during link congestion
From: Jon Maloy @ 2017-01-03 15:55 UTC (permalink / raw)
  To: davem; +Cc: Jon Maloy, netdev, tipc-discussion
In-Reply-To: <1483458911-32549-1-git-send-email-jon.maloy@ericsson.com>

The socket code currently handles link congestion by either blocking
and trying to send again when the congestion has abated, or just
returning to the user with -EAGAIN and let him re-try later.

This mechanism is prone to starvation, because the wakeup algorithm is
non-atomic. During the time the link issues a wakeup signal, until the
socket wakes up and re-attempts sending, other senders may have come
in between and occupied the free buffer space in the link. This in turn
may lead to a socket having to make many send attempts before it is
successful. In extremely loaded systems we have observed latency times
of several seconds before a low-priority socket is able to send out a
message.

In this commit, we simplify this mechanism and reduce the risk of the
described scenario happening. When a message is attempted sent via a
congested link, we now let it be added to the link's backlog queue
anyway, thus permitting an oversubscription of one message per source
socket. We still create a wakeup item and return an error code, hence
instructing the sender to block or stop sending. Only when enough space
has been freed up in the link's backlog queue do we issue a wakeup event
that allows the sender to continue with the next message, if any.

The fact that a socket now can consider a message sent even when the
link returns a congestion code means that the sending socket code can
be simplified. Also, since this is a good opportunity to get rid of the
obsolete 'mtu change' condition in the three socket send functions, we
now choose to refactor those functions completely.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
---
 net/tipc/bcast.c  |   6 +-
 net/tipc/link.c   |  75 +++++-------
 net/tipc/msg.h    |   2 -
 net/tipc/node.c   |  15 +--
 net/tipc/socket.c | 347 ++++++++++++++++++++++++------------------------------
 5 files changed, 194 insertions(+), 251 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index aa1babb..c35fad3 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -174,7 +174,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
  *                    and to identified node local sockets
  * @net: the applicable net namespace
  * @list: chain of buffers containing message
- * Consumes the buffer chain, except when returning -ELINKCONG
+ * Consumes the buffer chain.
  * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
  */
 int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
@@ -197,7 +197,7 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
 	tipc_bcast_unlock(net);
 
 	/* Don't send to local node if adding to link failed */
-	if (unlikely(rc)) {
+	if (unlikely(rc && (rc != -ELINKCONG))) {
 		__skb_queue_purge(&rcvq);
 		return rc;
 	}
@@ -206,7 +206,7 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
 	tipc_bcbase_xmit(net, &xmitq);
 	tipc_sk_mcast_rcv(net, &rcvq, &inputq);
 	__skb_queue_purge(list);
-	return 0;
+	return rc;
 }
 
 /* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bda89bf..b758ca8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -776,60 +776,47 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 
 /**
  * link_schedule_user - schedule a message sender for wakeup after congestion
- * @link: congested link
- * @list: message that was attempted sent
+ * @l: congested link
+ * @hdr: header of message that is being sent
  * Create pseudo msg to send back to user when congestion abates
- * Does not consume buffer list
  */
-static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
+static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
 {
-	struct tipc_msg *msg = buf_msg(skb_peek(list));
-	int imp = msg_importance(msg);
-	u32 oport = msg_origport(msg);
-	u32 addr = tipc_own_addr(link->net);
+	u32 dnode = tipc_own_addr(l->net);
+	u32 dport = msg_origport(hdr);
 	struct sk_buff *skb;
 
-	/* This really cannot happen...  */
-	if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
-		pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
-		return -ENOBUFS;
-	}
-	/* Non-blocking sender: */
-	if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending)
-		return -ELINKCONG;
-
 	/* Create and schedule wakeup pseudo message */
 	skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
-			      addr, addr, oport, 0, 0);
+			      dnode, l->addr, dport, 0, 0);
 	if (!skb)
 		return -ENOBUFS;
-	TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list);
-	TIPC_SKB_CB(skb)->chain_imp = imp;
-	skb_queue_tail(&link->wakeupq, skb);
-	link->stats.link_congs++;
+	msg_set_dest_droppable(buf_msg(skb), true);
+	TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
+	skb_queue_tail(&l->wakeupq, skb);
+	l->stats.link_congs++;
 	return -ELINKCONG;
 }
 
 /**
  * link_prepare_wakeup - prepare users for wakeup after congestion
- * @link: congested link
- * Move a number of waiting users, as permitted by available space in
- * the send queue, from link wait queue to node wait queue for wakeup
+ * @l: congested link
+ * Wake up a number of waiting users, as permitted by available space
+ * in the send queue
  */
 void link_prepare_wakeup(struct tipc_link *l)
 {
-	int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
-	int imp, lim;
 	struct sk_buff *skb, *tmp;
+	int imp, i = 0;
 
 	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
 		imp = TIPC_SKB_CB(skb)->chain_imp;
-		lim = l->backlog[imp].limit;
-		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
-		if ((pnd[imp] + l->backlog[imp].len) >= lim)
+		if (l->backlog[imp].len < l->backlog[imp].limit) {
+			skb_unlink(skb, &l->wakeupq);
+			skb_queue_tail(l->inputq, skb);
+		} else if (i++ > 10) {
 			break;
-		skb_unlink(skb, &l->wakeupq);
-		skb_queue_tail(l->inputq, skb);
+		}
 	}
 }
 
@@ -869,8 +856,7 @@ void tipc_link_reset(struct tipc_link *l)
  * @list: chain of buffers containing message
  * @xmitq: returned list of packets to be sent by caller
  *
- * Consumes the buffer chain, except when returning -ELINKCONG,
- * since the caller then may want to make more send attempts.
+ * Consumes the buffer chain.
  * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
  * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
  */
@@ -879,7 +865,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 {
 	struct tipc_msg *hdr = buf_msg(skb_peek(list));
 	unsigned int maxwin = l->window;
-	unsigned int i, imp = msg_importance(hdr);
+	int imp = msg_importance(hdr);
 	unsigned int mtu = l->mtu;
 	u16 ack = l->rcv_nxt - 1;
 	u16 seqno = l->snd_nxt;
@@ -888,19 +874,22 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 	struct sk_buff_head *backlogq = &l->backlogq;
 	struct sk_buff *skb, *_skb, *bskb;
 	int pkt_cnt = skb_queue_len(list);
+	int rc = 0;
 
-	/* Match msg importance against this and all higher backlog limits: */
-	if (!skb_queue_empty(backlogq)) {
-		for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
-			if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
-				return link_schedule_user(l, list);
-		}
-	}
 	if (unlikely(msg_size(hdr) > mtu)) {
 		skb_queue_purge(list);
 		return -EMSGSIZE;
 	}
 
+	/* Allow oversubscription of one data msg per source at congestion */
+	if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
+		if (imp == TIPC_SYSTEM_IMPORTANCE) {
+			pr_warn("%s<%s>, link overflow", link_rst_msg, l->name);
+			return -ENOBUFS;
+		}
+		rc = link_schedule_user(l, hdr);
+	}
+
 	if (pkt_cnt > 1) {
 		l->stats.sent_fragmented++;
 		l->stats.sent_fragments += pkt_cnt;
@@ -946,7 +935,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 		skb_queue_splice_tail_init(list, backlogq);
 	}
 	l->snd_nxt = seqno;
-	return 0;
+	return rc;
 }
 
 void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 8d40861..850ae0e 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -98,8 +98,6 @@ struct tipc_skb_cb {
 	u32 bytes_read;
 	struct sk_buff *tail;
 	bool validated;
-	bool wakeup_pending;
-	u16 chain_sz;
 	u16 chain_imp;
 	u16 ackers;
 };
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d2f4c2..2883f6a 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1167,7 +1167,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
  * @list: chain of buffers containing message
  * @dnode: address of destination node
  * @selector: a number used for deterministic link selection
- * Consumes the buffer chain, except when returning -ELINKCONG
+ * Consumes the buffer chain.
  * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
  */
 int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
@@ -1206,10 +1206,10 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
 	spin_unlock_bh(&le->lock);
 	tipc_node_read_unlock(n);
 
-	if (likely(rc == 0))
-		tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
-	else if (rc == -ENOBUFS)
+	if (unlikely(rc == -ENOBUFS))
 		tipc_node_link_down(n, bearer_id, false);
+	else
+		tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
 
 	tipc_node_put(n);
 
@@ -1221,20 +1221,15 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
  * messages, which will not be rejected
  * The only exception is datagram messages rerouted after secondary
  * lookup, which are rare and safe to dispose of anyway.
- * TODO: Return real return value, and let callers use
- * tipc_wait_for_sendpkt() where applicable
  */
 int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
 		       u32 selector)
 {
 	struct sk_buff_head head;
-	int rc;
 
 	skb_queue_head_init(&head);
 	__skb_queue_tail(&head, skb);
-	rc = tipc_node_xmit(net, &head, dnode, selector);
-	if (rc == -ELINKCONG)
-		kfree_skb(skb);
+	tipc_node_xmit(net, &head, dnode, selector);
 	return 0;
 }
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index fae6a55..d2f3539 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -67,12 +67,14 @@ enum {
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @portid: unique port identity in TIPC socket hash table
  * @phdr: preformatted message header used when sending messages
+ * #cong_links: list of congested links
  * @publications: list of publications for port
+ * @blocking_link: address of the congested link we are currently sleeping on
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
  * @conn_timeout: the time we can wait for an unresponded setup request
  * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
- * @link_cong: non-zero if owner must sleep because of link congestion
+ * @cong_link_cnt: number of congested links
  * @sent_unacked: # messages sent by socket, and not yet acked by peer
  * @rcv_unacked: # messages read by user, but not yet acked back to peer
  * @peer: 'connected' peer for dgram/rdm
@@ -87,13 +89,13 @@ struct tipc_sock {
 	u32 max_pkt;
 	u32 portid;
 	struct tipc_msg phdr;
-	struct list_head sock_list;
+	struct list_head cong_links;
 	struct list_head publications;
 	u32 pub_count;
 	uint conn_timeout;
 	atomic_t dupl_rcvcnt;
 	bool probe_unacked;
-	bool link_cong;
+	u16 cong_link_cnt;
 	u16 snt_unacked;
 	u16 snd_win;
 	u16 peer_caps;
@@ -118,8 +120,7 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
 static int tipc_sk_insert(struct tipc_sock *tsk);
 static void tipc_sk_remove(struct tipc_sock *tsk);
-static int __tipc_send_stream(struct socket *sock, struct msghdr *m,
-			      size_t dsz);
+static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
 
 static const struct proto_ops packet_ops;
@@ -424,6 +425,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	tsk = tipc_sk(sk);
 	tsk->max_pkt = MAX_PKT_DEFAULT;
 	INIT_LIST_HEAD(&tsk->publications);
+	INIT_LIST_HEAD(&tsk->cong_links);
 	msg = &tsk->phdr;
 	tn = net_generic(sock_net(sk), tipc_net_id);
 	tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
@@ -474,9 +476,14 @@ static void __tipc_shutdown(struct socket *sock, int error)
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
+	long timeout = CONN_TIMEOUT_DEFAULT;
 	u32 dnode = tsk_peer_node(tsk);
 	struct sk_buff *skb;
 
+	/* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */
+	tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt &&
+					    !tsk_conn_cong(tsk)));
+
 	/* Reject all unreceived messages, except on an active connection
 	 * (which disconnects locally & sends a 'FIN+' to peer).
 	 */
@@ -547,7 +554,8 @@ static int tipc_release(struct socket *sock)
 
 	/* Reject any messages that accumulated in backlog queue */
 	release_sock(sk);
-
+	u32_list_purge(&tsk->cong_links);
+	tsk->cong_link_cnt = 0;
 	call_rcu(&tsk->rcu, tipc_sk_callback);
 	sock->sk = NULL;
 
@@ -690,7 +698,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 
 	switch (sk->sk_state) {
 	case TIPC_ESTABLISHED:
-		if (!tsk->link_cong && !tsk_conn_cong(tsk))
+		if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
 			mask |= POLLOUT;
 		/* fall thru' */
 	case TIPC_LISTEN:
@@ -699,7 +707,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 			mask |= (POLLIN | POLLRDNORM);
 		break;
 	case TIPC_OPEN:
-		if (!tsk->link_cong)
+		if (!tsk->cong_link_cnt)
 			mask |= POLLOUT;
 		if (tipc_sk_type_connectionless(sk) &&
 		    (!skb_queue_empty(&sk->sk_receive_queue)))
@@ -718,63 +726,48 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
  * @sock: socket structure
  * @seq: destination address
  * @msg: message to send
- * @dsz: total length of message data
- * @timeo: timeout to wait for wakeup
+ * @dlen: length of data to send
+ * @timeout: timeout to wait for wakeup
  *
  * Called from function tipc_sendmsg(), which has done all sanity checks
  * Returns the number of bytes sent on success, or errno
  */
 static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
-			  struct msghdr *msg, size_t dsz, long timeo)
+			  struct msghdr *msg, size_t dlen, long timeout)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
-	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head pktchain;
-	struct iov_iter save = msg->msg_iter;
-	uint mtu;
+	int mtu = tipc_bcast_get_mtu(net);
+	struct sk_buff_head pkts;
 	int rc;
 
-	if (!timeo && tsk->link_cong)
-		return -ELINKCONG;
-
-	msg_set_type(mhdr, TIPC_MCAST_MSG);
-	msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
-	msg_set_destport(mhdr, 0);
-	msg_set_destnode(mhdr, 0);
-	msg_set_nametype(mhdr, seq->type);
-	msg_set_namelower(mhdr, seq->lower);
-	msg_set_nameupper(mhdr, seq->upper);
-	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
-
-	skb_queue_head_init(&pktchain);
+	rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
+	if (unlikely(rc))
+		return rc;
 
-new_mtu:
-	mtu = tipc_bcast_get_mtu(net);
-	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
-	if (unlikely(rc < 0))
+	msg_set_type(hdr, TIPC_MCAST_MSG);
+	msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
+	msg_set_destport(hdr, 0);
+	msg_set_destnode(hdr, 0);
+	msg_set_nametype(hdr, seq->type);
+	msg_set_namelower(hdr, seq->lower);
+	msg_set_nameupper(hdr, seq->upper);
+	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
+
+	skb_queue_head_init(&pkts);
+	rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts);
+	if (unlikely(rc != dlen))
 		return rc;
 
-	do {
-		rc = tipc_bcast_xmit(net, &pktchain);
-		if (likely(!rc))
-			return dsz;
-
-		if (rc == -ELINKCONG) {
-			tsk->link_cong = 1;
-			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
-			if (!rc)
-				continue;
-		}
-		__skb_queue_purge(&pktchain);
-		if (rc == -EMSGSIZE) {
-			msg->msg_iter = save;
-			goto new_mtu;
-		}
-		break;
-	} while (1);
-	return rc;
+	rc = tipc_bcast_xmit(net, &pkts);
+	if (unlikely(rc == -ELINKCONG)) {
+		tsk->cong_link_cnt = 1;
+		rc = 0;
+	}
+
+	return rc ? rc : dlen;
 }
 
 /**
@@ -898,35 +891,38 @@ static int tipc_sendmsg(struct socket *sock,
 	return ret;
 }
 
-static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
+static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
 {
-	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 	struct sock *sk = sock->sk;
-	struct tipc_sock *tsk = tipc_sk(sk);
 	struct net *net = sock_net(sk);
-	struct tipc_msg *mhdr = &tsk->phdr;
-	u32 dnode, dport;
-	struct sk_buff_head pktchain;
-	bool is_connectionless = tipc_sk_type_connectionless(sk);
-	struct sk_buff *skb;
+	struct tipc_sock *tsk = tipc_sk(sk);
+	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+	struct list_head *clinks = &tsk->cong_links;
+	bool syn = !tipc_sk_type_connectionless(sk);
+	struct tipc_msg *hdr = &tsk->phdr;
 	struct tipc_name_seq *seq;
-	struct iov_iter save;
-	u32 mtu;
-	long timeo;
-	int rc;
+	struct sk_buff_head pkts;
+	u32 type, inst, domain;
+	u32 dnode, dport;
+	int mtu, rc;
 
-	if (dsz > TIPC_MAX_USER_MSG_SIZE)
+	if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
 		return -EMSGSIZE;
+
 	if (unlikely(!dest)) {
-		if (is_connectionless && tsk->peer.family == AF_TIPC)
-			dest = &tsk->peer;
-		else
+		dest = &tsk->peer;
+		if (!syn || dest->family != AF_TIPC)
 			return -EDESTADDRREQ;
-	} else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
-		   dest->family != AF_TIPC) {
-		return -EINVAL;
 	}
-	if (!is_connectionless) {
+
+	if (unlikely(m->msg_namelen < sizeof(*dest)))
+		return -EINVAL;
+
+	if (unlikely(dest->family != AF_TIPC))
+		return -EINVAL;
+
+	if (unlikely(syn)) {
 		if (sk->sk_state == TIPC_LISTEN)
 			return -EPIPE;
 		if (sk->sk_state != TIPC_OPEN)
@@ -938,72 +934,62 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
 			tsk->conn_instance = dest->addr.name.name.instance;
 		}
 	}
-	seq = &dest->addr.nameseq;
-	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
 
-	if (dest->addrtype == TIPC_ADDR_MCAST) {
-		return tipc_sendmcast(sock, seq, m, dsz, timeo);
-	} else if (dest->addrtype == TIPC_ADDR_NAME) {
-		u32 type = dest->addr.name.name.type;
-		u32 inst = dest->addr.name.name.instance;
-		u32 domain = dest->addr.name.domain;
+	seq = &dest->addr.nameseq;
+	if (dest->addrtype == TIPC_ADDR_MCAST)
+		return tipc_sendmcast(sock, seq, m, dlen, timeout);
 
+	if (dest->addrtype == TIPC_ADDR_NAME) {
+		type = dest->addr.name.name.type;
+		inst = dest->addr.name.name.instance;
+		domain = dest->addr.name.domain;
 		dnode = domain;
-		msg_set_type(mhdr, TIPC_NAMED_MSG);
-		msg_set_hdr_sz(mhdr, NAMED_H_SIZE);
-		msg_set_nametype(mhdr, type);
-		msg_set_nameinst(mhdr, inst);
-		msg_set_lookup_scope(mhdr, tipc_addr_scope(domain));
+		msg_set_type(hdr, TIPC_NAMED_MSG);
+		msg_set_hdr_sz(hdr, NAMED_H_SIZE);
+		msg_set_nametype(hdr, type);
+		msg_set_nameinst(hdr, inst);
+		msg_set_lookup_scope(hdr, tipc_addr_scope(domain));
 		dport = tipc_nametbl_translate(net, type, inst, &dnode);
-		msg_set_destnode(mhdr, dnode);
-		msg_set_destport(mhdr, dport);
+		msg_set_destnode(hdr, dnode);
+		msg_set_destport(hdr, dport);
 		if (unlikely(!dport && !dnode))
 			return -EHOSTUNREACH;
+
 	} else if (dest->addrtype == TIPC_ADDR_ID) {
 		dnode = dest->addr.id.node;
-		msg_set_type(mhdr, TIPC_DIRECT_MSG);
-		msg_set_lookup_scope(mhdr, 0);
-		msg_set_destnode(mhdr, dnode);
-		msg_set_destport(mhdr, dest->addr.id.ref);
-		msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
+		msg_set_type(hdr, TIPC_DIRECT_MSG);
+		msg_set_lookup_scope(hdr, 0);
+		msg_set_destnode(hdr, dnode);
+		msg_set_destport(hdr, dest->addr.id.ref);
+		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
-	skb_queue_head_init(&pktchain);
-	save = m->msg_iter;
-new_mtu:
+	/* Block or return if destination link is congested */
+	rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
+	if (unlikely(rc))
+		return rc;
+
+	skb_queue_head_init(&pkts);
 	mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-	rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
-	if (rc < 0)
+	rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+	if (unlikely(rc != dlen))
 		return rc;
 
-	do {
-		skb = skb_peek(&pktchain);
-		TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-		rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
-		if (likely(!rc)) {
-			if (!is_connectionless)
-				tipc_set_sk_state(sk, TIPC_CONNECTING);
-			return dsz;
-		}
-		if (rc == -ELINKCONG) {
-			tsk->link_cong = 1;
-			rc = tipc_wait_for_cond(sock, &timeo, !tsk->link_cong);
-			if (!rc)
-				continue;
-		}
-		__skb_queue_purge(&pktchain);
-		if (rc == -EMSGSIZE) {
-			m->msg_iter = save;
-			goto new_mtu;
-		}
-		break;
-	} while (1);
+	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+	if (unlikely(rc == -ELINKCONG)) {
+		u32_push(clinks, dnode);
+		tsk->cong_link_cnt++;
+		rc = 0;
+	}
 
-	return rc;
+	if (unlikely(syn && !rc))
+		tipc_set_sk_state(sk, TIPC_CONNECTING);
+
+	return rc ? rc : dlen;
 }
 
 /**
- * tipc_send_stream - send stream-oriented data
+ * tipc_sendstream - send stream-oriented data
  * @sock: socket structure
  * @m: data to send
  * @dsz: total length of data to be transmitted
@@ -1013,97 +999,69 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
  * Returns the number of bytes sent on success (or partial success),
  * or errno if no data sent
  */
-static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
+static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz)
 {
 	struct sock *sk = sock->sk;
 	int ret;
 
 	lock_sock(sk);
-	ret = __tipc_send_stream(sock, m, dsz);
+	ret = __tipc_sendstream(sock, m, dsz);
 	release_sock(sk);
 
 	return ret;
 }
 
-static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
+static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 {
 	struct sock *sk = sock->sk;
-	struct net *net = sock_net(sk);
-	struct tipc_sock *tsk = tipc_sk(sk);
-	struct tipc_msg *mhdr = &tsk->phdr;
-	struct sk_buff_head pktchain;
 	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
-	u32 portid = tsk->portid;
-	int rc = -EINVAL;
-	long timeo;
-	u32 dnode;
-	uint mtu, send, sent = 0;
-	struct iov_iter save;
-	int hlen = MIN_H_SIZE;
-
-	/* Handle implied connection establishment */
-	if (unlikely(dest)) {
-		rc = __tipc_sendmsg(sock, m, dsz);
-		hlen = msg_hdr_sz(mhdr);
-		if (dsz && (dsz == rc))
-			tsk->snt_unacked = tsk_inc(tsk, dsz + hlen);
-		return rc;
-	}
-	if (dsz > (uint)INT_MAX)
-		return -EMSGSIZE;
-
-	if (unlikely(!tipc_sk_connected(sk))) {
-		if (sk->sk_state == TIPC_DISCONNECTING)
-			return -EPIPE;
-		else
-			return -ENOTCONN;
-	}
+	long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+	struct tipc_sock *tsk = tipc_sk(sk);
+	struct tipc_msg *hdr = &tsk->phdr;
+	struct net *net = sock_net(sk);
+	struct sk_buff_head pkts;
+	u32 dnode = tsk_peer_node(tsk);
+	int send, sent = 0;
+	int rc = 0;
 
-	timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
-	if (!timeo && tsk->link_cong)
-		return -ELINKCONG;
+	skb_queue_head_init(&pkts);
 
-	dnode = tsk_peer_node(tsk);
-	skb_queue_head_init(&pktchain);
+	if (unlikely(dlen > INT_MAX))
+		return -EMSGSIZE;
 
-next:
-	save = m->msg_iter;
-	mtu = tsk->max_pkt;
-	send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-	rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
-	if (unlikely(rc < 0))
+	/* Handle implicit connection setup */
+	if (unlikely(dest)) {
+		rc = __tipc_sendmsg(sock, m, dlen);
+		if (dlen && (dlen == rc))
+			tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr));
 		return rc;
+	}
 
 	do {
-		if (likely(!tsk_conn_cong(tsk))) {
-			rc = tipc_node_xmit(net, &pktchain, dnode, portid);
-			if (likely(!rc)) {
-				tsk->snt_unacked += tsk_inc(tsk, send + hlen);
-				sent += send;
-				if (sent == dsz)
-					return dsz;
-				goto next;
-			}
-			if (rc == -EMSGSIZE) {
-				__skb_queue_purge(&pktchain);
-				tsk->max_pkt = tipc_node_get_mtu(net, dnode,
-								 portid);
-				m->msg_iter = save;
-				goto next;
-			}
-			if (rc != -ELINKCONG)
-				break;
-
-			tsk->link_cong = 1;
-		}
-		rc = tipc_wait_for_cond(sock, &timeo,
-					(!tsk->link_cong &&
+		rc = tipc_wait_for_cond(sock, &timeout,
+					(!tsk->cong_link_cnt &&
 					 !tsk_conn_cong(tsk) &&
 					 tipc_sk_connected(sk)));
-	} while (!rc);
+		if (unlikely(rc))
+			break;
+
+		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
+		rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts);
+		if (unlikely(rc != send))
+			break;
+
+		rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+		if (unlikely(rc == -ELINKCONG)) {
+			tsk->cong_link_cnt = 1;
+			rc = 0;
+		}
+		if (likely(!rc)) {
+			tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE);
+			sent += send;
+		}
+	} while (sent < dlen && !rc);
 
-	__skb_queue_purge(&pktchain);
-	return sent ? sent : rc;
+	return rc ? rc : sent;
 }
 
 /**
@@ -1121,7 +1079,7 @@ static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz)
 	if (dsz > TIPC_MAX_USER_MSG_SIZE)
 		return -EMSGSIZE;
 
-	return tipc_send_stream(sock, m, dsz);
+	return tipc_sendstream(sock, m, dsz);
 }
 
 /* tipc_sk_finish_conn - complete the setup of a connection
@@ -1688,6 +1646,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
 	unsigned int limit = rcvbuf_limit(sk, skb);
 	int err = TIPC_OK;
 	int usr = msg_user(hdr);
+	u32 onode;
 
 	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
 		tipc_sk_proto_rcv(tsk, skb, xmitq);
@@ -1695,8 +1654,10 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
 	}
 
 	if (unlikely(usr == SOCK_WAKEUP)) {
+		onode = msg_orignode(hdr);
 		kfree_skb(skb);
-		tsk->link_cong = 0;
+		u32_del(&tsk->cong_links, onode);
+		tsk->cong_link_cnt--;
 		sk->sk_write_space(sk);
 		return false;
 	}
@@ -2104,7 +2065,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 		struct msghdr m = {NULL,};
 
 		tsk_advance_rx_queue(sk);
-		__tipc_send_stream(new_sock, &m, 0);
+		__tipc_sendstream(new_sock, &m, 0);
 	} else {
 		__skb_dequeue(&sk->sk_receive_queue);
 		__skb_queue_head(&new_sk->sk_receive_queue, buf);
@@ -2565,7 +2526,7 @@ static const struct proto_ops stream_ops = {
 	.shutdown	= tipc_shutdown,
 	.setsockopt	= tipc_setsockopt,
 	.getsockopt	= tipc_getsockopt,
-	.sendmsg	= tipc_send_stream,
+	.sendmsg	= tipc_sendstream,
 	.recvmsg	= tipc_recv_stream,
 	.mmap		= sock_no_mmap,
 	.sendpage	= sock_no_sendpage
-- 
2.7.4


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply related

* Re: [PATCH] uapi: use wildcards to list files
From: David Miller @ 2017-01-03 15:56 UTC (permalink / raw)
  To: nicolas.dichtel
  Cc: linux-arch, linux-nfs, arnd, alsa-devel, linux-rdma, netdev,
	linux-mmc, linux-kernel, dri-devel, linux-spi, linux-raid,
	airlied, netfilter-devel, linux-fbdev, xen-devel, fcoe-devel,
	linux-mtd, linux-media
In-Reply-To: <1483454144-10519-1-git-send-email-nicolas.dichtel@6wind.com>

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue,  3 Jan 2017 15:35:44 +0100

> Regularly, when a new header is created in include/uapi/, the developer
> forgets to add it in the corresponding Kbuild file. This error is usually
> detected after the release is out.
> 
> In fact, all headers under include/uapi/ should be exported, so let's
> use wildcards.
> 
> After this patch, the following files, which were not exported, are now
> exported:
 ...
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>

Acked-by: David S. Miller <davem@davemloft.net>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply

* Re: [PATCH v3 net-next 1/2] af_packet: TX_RING support for TPACKET_V3
From: Willem de Bruijn @ 2017-01-03 15:57 UTC (permalink / raw)
  To: Sowmini Varadhan
  Cc: Network Development, Daniel Borkmann, Willem de Bruijn,
	David Miller
In-Reply-To: <ad0b245b49761228a99b8865693c4a60550caf61.1483452545.git.sowmini.varadhan@oracle.com>

On Tue, Jan 3, 2017 at 9:31 AM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
> Although TPACKET_V3 Rx has some benefits over TPACKET_V2 Rx, *_v3
> does not currently have TX_RING support. As a result an application
> that wants the best perf for Tx and Rx (e.g. to handle request/response
> transacations) ends up needing 2 sockets, one with *_v2 for Tx and
> another with *_v3 for Rx.
>
> This patch enables TPACKET_V2 compatible Tx features in TPACKET_V3
> so that an application can use a single descriptor to get the benefits
> of _v3 RX_RING and _v2 TX_RING. An application may do a block-send by
> first filling up multiple frames in the Tx ring and then triggering a
> transmit. This patch only support fixed size Tx frames for TPACKET_V3,
> and requires that tp_next_offset must be zero.
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>

Acked-by: Willem de Bruijn <willemb@google.com>

^ permalink raw reply

* Re: [PATCH v3 net-next 2/2] tools: test case for TPACKET_V3/TX_RING support
From: Willem de Bruijn @ 2017-01-03 15:57 UTC (permalink / raw)
  To: Sowmini Varadhan
  Cc: Network Development, Daniel Borkmann, Willem de Bruijn,
	David Miller
In-Reply-To: <dcf7fe19d3248b4f523b7f7d22937c61d92a152f.1483452545.git.sowmini.varadhan@oracle.com>

On Tue, Jan 3, 2017 at 9:31 AM, Sowmini Varadhan
<sowmini.varadhan@oracle.com> wrote:
> Add a test case and sample code for (TPACKET_V3, PACKET_TX_RING)
>
> Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>

Acked-by: Willem de Bruijn <willemb@google.com>

Thanks!

^ permalink raw reply

* Re: [PATCH net] benet: stricter vxlan offloading check in be_features_check
From: David Miller @ 2017-01-03 15:59 UTC (permalink / raw)
  To: sd
  Cc: netdev, sathya.perla, ajit.khaparde, sriharsha.basavapatna,
	somnath.kotur
In-Reply-To: <59e720bde70fb5226313c62d89b62cbbef25b3e2.1483455910.git.sd@queasysnail.net>

From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue,  3 Jan 2017 16:26:04 +0100

> When VXLAN offloading is enabled, be_features_check() tries to check if
> an encapsulated packet is indeed a VXLAN packet. The check is not strict
> enough, and considers any UDP-encapsulated ethernet frame with a 8-byte
> tunnel header as being VXLAN. Unfortunately, both GENEVE and VXLAN-GPE
> have a 8-byte header, so they get through this check.
> 
> Force the UDP destination port to be the one that has been offloaded to
> hardware.
> 
> Without this, GENEVE-encapsulated packets can end up having an incorrect
> checksum when both a GENEVE and a VXLAN (offloaded) tunnel are
> configured.
> 
> This is similar to commit a547224dceed ("mlx4e: Do not attempt to
> offload VXLAN ports that are unrecognized").
> 
> Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH v3 net-next 0/2] TPACKET_V3 TX_RING support
From: David Miller @ 2017-01-03 16:01 UTC (permalink / raw)
  To: sowmini.varadhan; +Cc: netdev, daniel, willemb
In-Reply-To: <cover.1483452545.git.sowmini.varadhan@oracle.com>

From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue,  3 Jan 2017 06:31:46 -0800

> This patch series allows an application to use a single PF_PACKET
> descriptor and leverage the best implementations of TX_RING
> and RX_RING that exist today.
> 
> Patch 1 adds the kernel/Documentation changes for TX_RING
> support and patch2 adds the associated test case in selftests.
> 
> Changes since v2: additional sanity checks for setsockopt
> input for TX_RING/TPACKET_V3. Refactored psock_tpacket.c
> test code to avoid code duplication from V2.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH] Ipvlan should return an error when an address is already in use.
From: David Miller @ 2017-01-03 15:55 UTC (permalink / raw)
  To: aconole; +Cc: kjlx, maheshb, netdev
In-Reply-To: <f7td1g49m8n.fsf@redhat.com>

From: Aaron Conole <aconole@redhat.com>
Date: Tue, 03 Jan 2017 10:50:00 -0500

>> @@ -489,7 +490,12 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
>>  	   Notifier will trigger FIB update, so that
>>  	   listeners of netlink will know about new ifaddr */
>>  	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
>> -	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
>> +	ret = blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
> 
> Why are you doing this assignment if you aren't using the result?
> 
>> +	ret = notifier_to_errno(ret);
>> +	if (ret) {
>> +		__inet_del_ifa(in_dev, ifap, 1, NULL, portid);
>> +		return ret;
>> +	}

'ret' assignment is being used, via notifier_to_errno().

^ permalink raw reply

* Re: [PATCH] drop_monitor: consider inserted data in genlmsg_end
From: Neil Horman @ 2017-01-03 16:04 UTC (permalink / raw)
  To: David Miller; +Cc: wr0112358, netdev, linux-kernel
In-Reply-To: <20170103.095419.261470619535526723.davem@davemloft.net>

On Tue, Jan 03, 2017 at 09:54:19AM -0500, David Miller wrote:
> From: Reiter Wolfgang <wr0112358@gmail.com>
> Date: Tue,  3 Jan 2017 01:39:10 +0100
> 
> > Final nlmsg_len field update must reflect inserted net_dm_drop_point
> > data.
> > 
> > This patch depends on previous patch:
> > "drop_monitor: add missing call to genlmsg_end"
> > 
> > Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
> 
> I don't understand why the current code doesn't work properly.
> 
> All over the tree, the pattern is:
> 
> 	x = genlmsg_put(skb, ...);
> 	...
> 	genlmsg_end(skb, x);
> 
> And that is exactly what the code is doing right now.
> 

Because reset_per_cpu_data should close the use of of the established skb
that was being written to.  Without this patch we add the END tlv to the skb
that is just getting started for use in the drop monitor, rather than for the
skb that is getting returned for use in sending up to user space listeners.

Or am I missing something?

^ permalink raw reply

* Re: [RFC PATCH 2/4] page_pool: basic implementation of page_pool
From: Vlastimil Babka @ 2017-01-03 16:07 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, linux-mm, Alexander Duyck
  Cc: willemdebruijn.kernel, netdev, john.fastabend, Saeed Mahameed,
	bjorn.topel, Alexei Starovoitov, Tariq Toukan
In-Reply-To: <20161220132817.18788.64726.stgit@firesoul>

On 12/20/2016 02:28 PM, Jesper Dangaard Brouer wrote:
> The focus in this patch is getting the API around page_pool figured out.
>
> The internal data structures for returning page_pool pages is not optimal.
> This implementation use ptr_ring for recycling, which is known not to scale
> in case of multiple remote CPUs releasing/returning pages.

Just few very quick impressions...

> A bulking interface into the page allocator is also left for later. (This
> requires cooperation will Mel Gorman, who just send me some PoC patches for this).
> ---
>  include/linux/mm.h             |    6 +
>  include/linux/mm_types.h       |   11 +
>  include/linux/page-flags.h     |   13 +
>  include/linux/page_pool.h      |  158 +++++++++++++++
>  include/linux/skbuff.h         |    2
>  include/trace/events/mmflags.h |    3
>  mm/Makefile                    |    3
>  mm/page_alloc.c                |   10 +
>  mm/page_pool.c                 |  423 ++++++++++++++++++++++++++++++++++++++++
>  mm/slub.c                      |    4
>  10 files changed, 627 insertions(+), 6 deletions(-)
>  create mode 100644 include/linux/page_pool.h
>  create mode 100644 mm/page_pool.c
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4424784ac374..11b4d8fb280b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -23,6 +23,7 @@
>  #include <linux/page_ext.h>
>  #include <linux/err.h>
>  #include <linux/page_ref.h>
> +#include <linux/page_pool.h>
>
>  struct mempolicy;
>  struct anon_vma;
> @@ -765,6 +766,11 @@ static inline void put_page(struct page *page)
>  {
>  	page = compound_head(page);
>
> +	if (PagePool(page)) {
> +		page_pool_put_page(page);
> +		return;
> +	}

Can't say I'm thrilled about a new page flag and a test in put_page(). I don't 
know the full life cycle here, but isn't it that these pages will be 
specifically allocated and used in page pool aware drivers, so maybe they can be 
also specifically freed there without hooking to the generic page refcount 
mechanism?

> +
>  	if (put_page_testzero(page))
>  		__put_page(page);
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 08d947fc4c59..c74dea967f99 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -47,6 +47,12 @@ struct page {
>  	unsigned long flags;		/* Atomic flags, some possibly
>  					 * updated asynchronously */
>  	union {
> +		/* DISCUSS: Considered moving page_pool pointer here,
> +		 * but I'm unsure if 'mapping' is needed for userspace
> +		 * mapping the page, as this is a use-case the
> +		 * page_pool need to support in the future. (Basically
> +		 * mapping a NIC RX ring into userspace).

I think so, but might be wrong here. In any case mapping usually goes with 
index, and you put dma_addr in union with index below...

> +		 */
>  		struct address_space *mapping;	/* If low bit clear, points to
>  						 * inode address_space, or NULL.
>  						 * If page mapped as anonymous
> @@ -63,6 +69,7 @@ struct page {
>  	union {
>  		pgoff_t index;		/* Our offset within mapping. */
>  		void *freelist;		/* sl[aou]b first free object */
> +		dma_addr_t dma_addr;    /* used by page_pool */
>  		/* page_deferred_list().prev	-- second tail page */
>  	};
>
> @@ -117,6 +124,8 @@ struct page {
>  	 * avoid collision and false-positive PageTail().
>  	 */
>  	union {
> +		/* XXX: Idea reuse lru list, in page_pool to align with PCP */
> +
>  		struct list_head lru;	/* Pageout list, eg. active_list
>  					 * protected by zone_lru_lock !
>  					 * Can be used as a generic list
> @@ -189,6 +198,8 @@ struct page {
>  #endif
>  #endif
>  		struct kmem_cache *slab_cache;	/* SL[AU]B: Pointer to slab */
> +		/* XXX: Sure page_pool will have no users of "private"? */
> +		struct page_pool *pool;
>  	};
>
>  #ifdef CONFIG_MEMCG

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH net-next] net/sched: cls_matchall: Fix error path
From: David Miller @ 2017-01-03 16:08 UTC (permalink / raw)
  To: yotamg; +Cc: jhs, eladr, jiri, netdev
In-Reply-To: <1483458422-13607-1-git-send-email-yotamg@mellanox.com>

From: Yotam Gigi <yotamg@mellanox.com>
Date: Tue,  3 Jan 2017 17:47:02 +0200

> Fix several error paths in matchall:
>  - Release reference to actions in case the hardware fails offloading
>    (relevant to skip_sw only)
>  - Fix error path in case tcf_exts initialization fails
> 
> Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier")
> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>

Nothing is checking the tcf_exts_init() return value for errors either,
and I think you should fix this alongside these release problems at the
same time.

Thanks.

^ permalink raw reply

* Re: [PATCH] drop_monitor: consider inserted data in genlmsg_end
From: David Miller @ 2017-01-03 16:10 UTC (permalink / raw)
  To: nhorman; +Cc: wr0112358, netdev, linux-kernel
In-Reply-To: <20170103160443.GC11735@hmsreliant.think-freely.org>

From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 3 Jan 2017 11:04:43 -0500

> On Tue, Jan 03, 2017 at 09:54:19AM -0500, David Miller wrote:
>> From: Reiter Wolfgang <wr0112358@gmail.com>
>> Date: Tue,  3 Jan 2017 01:39:10 +0100
>> 
>> > Final nlmsg_len field update must reflect inserted net_dm_drop_point
>> > data.
>> > 
>> > This patch depends on previous patch:
>> > "drop_monitor: add missing call to genlmsg_end"
>> > 
>> > Signed-off-by: Reiter Wolfgang <wr0112358@gmail.com>
>> 
>> I don't understand why the current code doesn't work properly.
>> 
>> All over the tree, the pattern is:
>> 
>> 	x = genlmsg_put(skb, ...);
>> 	...
>> 	genlmsg_end(skb, x);
>> 
>> And that is exactly what the code is doing right now.
>> 
> 
> Because reset_per_cpu_data should close the use of of the established skb
> that was being written to.  Without this patch we add the END tlv to the skb
> that is just getting started for use in the drop monitor, rather than for the
> skb that is getting returned for use in sending up to user space listeners.
> 
> Or am I missing something?

That's the critical part I didn't see, thanks for explaining.

Applied and queued up for -stabel, thanks.

^ permalink raw reply

* Re: [PATCH] staging: octeon: Call SET_NETDEV_DEV()
From: Greg KH @ 2017-01-03 16:11 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: devel, asbjorn, aaro.koskinen, netdev, nevola, linux-kernel,
	jarod, bhaktipriya96, David Miller, tremyfr
In-Reply-To: <748b758b-7a9c-d58e-2fa5-52b6fa031ae3@gmail.com>

On Tue, Dec 27, 2016 at 02:15:57PM -0800, Florian Fainelli wrote:
> On 12/20/2016 07:20 PM, David Miller wrote:
> > From: Florian Fainelli <f.fainelli@gmail.com>
> > Date: Tue, 20 Dec 2016 17:02:37 -0800
> > 
> >> On 12/14/2016 05:13 PM, Florian Fainelli wrote:
> >>> The Octeon driver calls into PHYLIB which now checks for
> >>> net_device->dev.parent, so make sure we do set it before calling into
> >>> any MDIO/PHYLIB related function.
> >>>
> >>> Fixes: ec988ad78ed6 ("phy: Don't increment MDIO bus refcount unless it's a different owner")
> >>> Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
> >>> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> >>
> >> Greg, David, since this is a fix for a regression introduced in the net
> >> tree, it may make sense that David take it via his tree.
> > 
> > Since the change in question is in Linus's tree, it's equally valid
> > for Greg to take it as well.
> 
> Sure, Greg, can you take this change? Thank you!

Will do so now, thanks,

greg k-h

^ permalink raw reply

* Re: [net-next 0/3] tipc: improve interaction socket-link
From: David Miller @ 2017-01-03 16:13 UTC (permalink / raw)
  To: jon.maloy; +Cc: netdev, tipc-discussion
In-Reply-To: <1483458911-32549-1-git-send-email-jon.maloy@ericsson.com>

From: Jon Maloy <jon.maloy@ericsson.com>
Date: Tue,  3 Jan 2017 10:55:08 -0500

> We fix a very real starvation problem that may occur when a link
> encounters send buffer congestion. At the same time we make the 
> interaction between the socket and link layer simpler and more 
> consistent.

Series applied, thanks Jon.

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot

^ permalink raw reply

* RE: [PATCH net-next] net/sched: cls_matchall: Fix error path
From: Yotam Gigi @ 2017-01-03 16:13 UTC (permalink / raw)
  To: David Miller
  Cc: jhs@mojatatu.com, Elad Raz, Jiri Pirko, netdev@vger.kernel.org
In-Reply-To: <20170103.110807.1415814934069793893.davem@davemloft.net>

>-----Original Message-----
>From: David Miller [mailto:davem@davemloft.net]
>Sent: Tuesday, January 03, 2017 6:08 PM
>To: Yotam Gigi <yotamg@mellanox.com>
>Cc: jhs@mojatatu.com; Elad Raz <eladr@mellanox.com>; Jiri Pirko
><jiri@mellanox.com>; netdev@vger.kernel.org
>Subject: Re: [PATCH net-next] net/sched: cls_matchall: Fix error path
>
>From: Yotam Gigi <yotamg@mellanox.com>
>Date: Tue,  3 Jan 2017 17:47:02 +0200
>
>> Fix several error paths in matchall:
>>  - Release reference to actions in case the hardware fails offloading
>>    (relevant to skip_sw only)
>>  - Fix error path in case tcf_exts initialization fails
>>
>> Fixes: bf3994d2ed31 ("net/sched: introduce Match-all classifier")
>> Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
>
>Nothing is checking the tcf_exts_init() return value for errors either,
>and I think you should fix this alongside these release problems at the
>same time.

Ok. Will send v2 soon.

Thanks!

>
>Thanks.

^ permalink raw reply

* [PATCH v2 net-next] net:mv88e6xxx: use g2 interrupt for 6097 chip
From: Volodymyr Bendiuga @ 2017-01-03 15:18 UTC (permalink / raw)
  To: andrew, vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga

From: Volodymyr Bendiuga <volodymyr.bendiuga@westermo.se>

This chip needs MV88E6XXX_FLAG_G2_INT

Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@westermo.se>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
 drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index af54bae..431e954 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -566,6 +566,7 @@ enum mv88e6xxx_cap {
 	(MV88E6XXX_FLAG_G1_ATU_FID |	\
 	 MV88E6XXX_FLAG_G1_VTU_FID |	\
 	 MV88E6XXX_FLAG_GLOBAL2 |	\
+ 	 MV88E6XXX_FLAG_G2_INT |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2 net-next] net:mv88e6xxx: use g2 interrupt for 6097 chip
From: David Miller @ 2017-01-03 16:18 UTC (permalink / raw)
  To: volodymyr.bendiuga
  Cc: andrew, vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga
In-Reply-To: <1483456720-9929-1-git-send-email-volodymyr.bendiuga@gmail.com>

From: Volodymyr Bendiuga <volodymyr.bendiuga@gmail.com>
Date: Tue,  3 Jan 2017 16:18:40 +0100

> + 	 MV88E6XXX_FLAG_G2_INT |	\

Space before TAB character is still there on this line, right after
the "+".

^ permalink raw reply

* [PATCH ipsec] xfrm: trivial typos
From: Alexander Alemayhu @ 2017-01-03 16:13 UTC (permalink / raw)
  To: netdev; +Cc: steffen.klassert, Alexander Alemayhu

o s/descentant/descendant
o s/workarbound/workaround

Signed-off-by: Alexander Alemayhu <alexander@alemayhu.com>
---
 net/xfrm/xfrm_policy.c | 2 +-
 net/xfrm/xfrm_state.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208e8ff5..99ad1af2927f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -330,7 +330,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
-/* Rule must be locked. Release descentant resources, announce
+/* Rule must be locked. Release descendant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 64e3c82eedf6..c5cf4d611aab 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -409,7 +409,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 			if (x->xflags & XFRM_SOFT_EXPIRE) {
 				/* enter hard expire without soft expire first?!
 				 * setting a new date could trigger this.
-				 * workarbound: fix x->curflt.add_time by below:
+				 * workaround: fix x->curflt.add_time by below:
 				 */
 				x->curlft.add_time = now - x->saved_tmo - 1;
 				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH v2 net-next] net:mv88e6xxx: use g2 interrupt for 6097 chip
From: Andrew Lunn @ 2017-01-03 16:28 UTC (permalink / raw)
  To: Volodymyr Bendiuga; +Cc: vivien.didelot, f.fainelli, netdev, volodymyr.bendiuga
In-Reply-To: <1483456720-9929-1-git-send-email-volodymyr.bendiuga@gmail.com>

On Tue, Jan 03, 2017 at 04:18:40PM +0100, Volodymyr Bendiuga wrote:
> From: Volodymyr Bendiuga <volodymyr.bendiuga@westermo.se>
> 
> This chip needs MV88E6XXX_FLAG_G2_INT
> 
> Signed-off-by: Volodymyr Bendiuga <volodymyr.bendiuga@westermo.se>
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> ---
>  drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> index af54bae..431e954 100644
> --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
> @@ -566,6 +566,7 @@ enum mv88e6xxx_cap {
>  	(MV88E6XXX_FLAG_G1_ATU_FID |	\
>  	 MV88E6XXX_FLAG_G1_VTU_FID |	\
>  	 MV88E6XXX_FLAG_GLOBAL2 |	\
> + 	 MV88E6XXX_FLAG_G2_INT |	\

checkpatch is your friend:

$ ./scripts/checkpatch.pl volodymyr.bendiuga 
ERROR: code indent should use tabs where possible
#76: FILE: drivers/net/dsa/mv88e6xxx/mv88e6xxx.h:569:
+ ^I MV88E6XXX_FLAG_G2_INT |^I\$

WARNING: please, no space before tabs
#76: FILE: drivers/net/dsa/mv88e6xxx/mv88e6xxx.h:569:
+ ^I MV88E6XXX_FLAG_G2_INT |^I\$

WARNING: please, no spaces at the start of a line
#76: FILE: drivers/net/dsa/mv88e6xxx/mv88e6xxx.h:569:
+ ^I MV88E6XXX_FLAG_G2_INT |^I\$

total: 1 errors, 2 warnings, 0 checks, 7 lines checked

       Andrew

^ permalink raw reply

* Re: [PATCH v3 3/3] nfc: trf7970a: Prevent repeated polling from crashing the kernel
From: Mark Greer @ 2017-01-03 16:33 UTC (permalink / raw)
  To: Geoff Lansberry
  Cc: linux-wireless, Lauro Ramos Venancio, Aloisio Almeida Jr,
	Samuel Ortiz, robh+dt, mark.rutland, netdev, devicetree,
	linux-kernel, Justin Bronder, Jaret Cantu
In-Reply-To: <CAO7Z3WJa0goJ-VXc7dvyz8imZtqby6QsC0QNH+uRAE8LhxqU2w@mail.gmail.com>

[Please stop top-posting.  Bottom-post only to these lists.]

Hi Geoff & happy new year.

On Tue, Dec 27, 2016 at 09:18:32AM -0500, Geoff Lansberry wrote:
> Mark - I will split this off soon.

OK

> In the meantime - here is some more info about how we use it.
> 
> We do use NFC structures.    I did find an interesting clue in that
> there are certain bottles that cause neard to segfault,  I'm not sure
> what is different about them.  We write a string, like
> "coppola_chardonnay_2015" to the bottles.

Off the top of my head, it could be the length of the text.
It would be useful to compare the data that works to the data
that doesn't work.  Can you install NXP's 'TagInfo' app on a
smartphone and scan tags with working & non-working data?
You can email the data from the app to yourself, edit out
the cruft, and share here.

> Come to think of it, I
> haven't done anything special to make that an ndef record, just
> assumed that it would happen by default, I'll look into this further.

If you wrote the data using neard, it will be NDEF formatted.
Since it is working this well, it is virtually guaranteed that
the data is NDEF formatted.

>   Also, I've been running neard with --plugin nfctype2. Just in case
> the problem was happening due to cycling through other tag types.   It
> didn't seem to make any difference, but I have not gone back to
> default.

Good to know, thanks.

Mark
--

^ permalink raw reply

* Re: [PATCH net 9/9] virtio-net: XDP support for small buffers
From: John Fastabend @ 2017-01-03 16:40 UTC (permalink / raw)
  To: Jason Wang, mst, virtualization, netdev, linux-kernel; +Cc: john.r.fastabend
In-Reply-To: <8c896c40-fd25-4b92-fe80-5be18c13dd48@redhat.com>

On 17-01-02 10:16 PM, Jason Wang wrote:
> 
> 
> On 2017年01月03日 06:43, John Fastabend wrote:
>> On 16-12-23 06:37 AM, Jason Wang wrote:
>>> Commit f600b6905015 ("virtio_net: Add XDP support") leaves the case of
>>> small receive buffer untouched. This will confuse the user who want to
>>> set XDP but use small buffers. Other than forbid XDP in small buffer
>>> mode, let's make it work. XDP then can only work at skb->data since
>>> virtio-net create skbs during refill, this is sub optimal which could
>>> be optimized in the future.
>>>
>>> Cc: John Fastabend <john.r.fastabend@intel.com>
>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>> ---
>>>   drivers/net/virtio_net.c | 112 ++++++++++++++++++++++++++++++++++++-----------
>>>   1 file changed, 87 insertions(+), 25 deletions(-)
>>>
>> Hi Jason,
>>
>> I was doing some more testing on this what do you think about doing this
>> so that free_unused_bufs() handles the buffer free with dev_kfree_skb()
>> instead of put_page in small receive mode. Seems more correct to me.
>>
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 783e842..27ff76c 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -1898,6 +1898,10 @@ static void free_receive_page_frags(struct virtnet_info
>> *vi)
>>
>>   static bool is_xdp_queue(struct virtnet_info *vi, int q)
>>   {
>> +       /* For small receive mode always use kfree_skb variants */
>> +       if (!vi->mergeable_rx_bufs)
>> +               return false;
>> +
>>          if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
>>                  return false;
>>          else if (q < vi->curr_queue_pairs)
>>
>>
>> patch is untested just spotted doing code review.
>>
>> Thanks,
>> John
> 
> We probably need a better name for this function.
> 
> Acked-by: Jason Wang <jasowang@redhat.com>
> 

How about is_xdp_raw_buffer_queue()?

I'll submit a proper patch today.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox