Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 15/20] batman-adv: Prefix icmp_socket local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/icmp_socket.c |   66 ++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 32 deletions(-)

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index a3e80b6..b285c31 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -26,18 +26,18 @@
 #include "originator.h"
 #include "hard-interface.h"
 
-static struct socket_client *socket_client_hash[256];
+static struct socket_client *batadv_socket_client_hash[256];
 
-static void bat_socket_add_packet(struct socket_client *socket_client,
-				  struct icmp_packet_rr *icmp_packet,
-				  size_t icmp_len);
+static void batadv_socket_add_packet(struct socket_client *socket_client,
+				     struct icmp_packet_rr *icmp_packet,
+				     size_t icmp_len);
 
 void batadv_socket_init(void)
 {
-	memset(socket_client_hash, 0, sizeof(socket_client_hash));
+	memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash));
 }
 
-static int bat_socket_open(struct inode *inode, struct file *file)
+static int batadv_socket_open(struct inode *inode, struct file *file)
 {
 	unsigned int i;
 	struct socket_client *socket_client;
@@ -49,14 +49,14 @@ static int bat_socket_open(struct inode *inode, struct file *file)
 	if (!socket_client)
 		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(socket_client_hash); i++) {
-		if (!socket_client_hash[i]) {
-			socket_client_hash[i] = socket_client;
+	for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) {
+		if (!batadv_socket_client_hash[i]) {
+			batadv_socket_client_hash[i] = socket_client;
 			break;
 		}
 	}
 
-	if (i == ARRAY_SIZE(socket_client_hash)) {
+	if (i == ARRAY_SIZE(batadv_socket_client_hash)) {
 		pr_err("Error - can't add another packet client: maximum number of clients reached\n");
 		kfree(socket_client);
 		return -EXFULL;
@@ -75,7 +75,7 @@ static int bat_socket_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int bat_socket_release(struct inode *inode, struct file *file)
+static int batadv_socket_release(struct inode *inode, struct file *file)
 {
 	struct socket_client *socket_client = file->private_data;
 	struct socket_packet *socket_packet;
@@ -92,7 +92,7 @@ static int bat_socket_release(struct inode *inode, struct file *file)
 		kfree(socket_packet);
 	}
 
-	socket_client_hash[socket_client->index] = NULL;
+	batadv_socket_client_hash[socket_client->index] = NULL;
 	spin_unlock_bh(&socket_client->lock);
 
 	kfree(socket_client);
@@ -101,8 +101,8 @@ static int bat_socket_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t bat_socket_read(struct file *file, char __user *buf,
-			       size_t count, loff_t *ppos)
+static ssize_t batadv_socket_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *ppos)
 {
 	struct socket_client *socket_client = file->private_data;
 	struct socket_packet *socket_packet;
@@ -144,8 +144,8 @@ static ssize_t bat_socket_read(struct file *file, char __user *buf,
 	return packet_len;
 }
 
-static ssize_t bat_socket_write(struct file *file, const char __user *buff,
-				size_t len, loff_t *off)
+static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
+				   size_t len, loff_t *off)
 {
 	struct socket_client *socket_client = file->private_data;
 	struct bat_priv *bat_priv = socket_client->bat_priv;
@@ -206,7 +206,8 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
 	if (icmp_packet->header.version != COMPAT_VERSION) {
 		icmp_packet->msg_type = PARAMETER_PROBLEM;
 		icmp_packet->header.version = COMPAT_VERSION;
-		bat_socket_add_packet(socket_client, icmp_packet, packet_len);
+		batadv_socket_add_packet(socket_client, icmp_packet,
+					 packet_len);
 		goto free_skb;
 	}
 
@@ -239,7 +240,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff,
 
 dst_unreach:
 	icmp_packet->msg_type = DESTINATION_UNREACHABLE;
-	bat_socket_add_packet(socket_client, icmp_packet, packet_len);
+	batadv_socket_add_packet(socket_client, icmp_packet, packet_len);
 free_skb:
 	kfree_skb(skb);
 out:
@@ -252,7 +253,7 @@ out:
 	return len;
 }
 
-static unsigned int bat_socket_poll(struct file *file, poll_table *wait)
+static unsigned int batadv_socket_poll(struct file *file, poll_table *wait)
 {
 	struct socket_client *socket_client = file->private_data;
 
@@ -264,13 +265,13 @@ static unsigned int bat_socket_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
-static const struct file_operations fops = {
+static const struct file_operations batadv_fops = {
 	.owner = THIS_MODULE,
-	.open = bat_socket_open,
-	.release = bat_socket_release,
-	.read = bat_socket_read,
-	.write = bat_socket_write,
-	.poll = bat_socket_poll,
+	.open = batadv_socket_open,
+	.release = batadv_socket_release,
+	.read = batadv_socket_read,
+	.write = batadv_socket_write,
+	.poll = batadv_socket_poll,
 	.llseek = no_llseek,
 };
 
@@ -282,7 +283,7 @@ int batadv_socket_setup(struct bat_priv *bat_priv)
 		goto err;
 
 	d = debugfs_create_file(ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR,
-				bat_priv->debug_dir, bat_priv, &fops);
+				bat_priv->debug_dir, bat_priv, &batadv_fops);
 	if (!d)
 		goto err;
 
@@ -292,9 +293,9 @@ err:
 	return -ENOMEM;
 }
 
-static void bat_socket_add_packet(struct socket_client *socket_client,
-				  struct icmp_packet_rr *icmp_packet,
-				  size_t icmp_len)
+static void batadv_socket_add_packet(struct socket_client *socket_client,
+				     struct icmp_packet_rr *icmp_packet,
+				     size_t icmp_len)
 {
 	struct socket_packet *socket_packet;
 
@@ -312,7 +313,7 @@ static void bat_socket_add_packet(struct socket_client *socket_client,
 	/* while waiting for the lock the socket_client could have been
 	 * deleted
 	 */
-	if (!socket_client_hash[icmp_packet->uid]) {
+	if (!batadv_socket_client_hash[icmp_packet->uid]) {
 		spin_unlock_bh(&socket_client->lock);
 		kfree(socket_packet);
 		return;
@@ -338,8 +339,9 @@ static void bat_socket_add_packet(struct socket_client *socket_client,
 void batadv_socket_receive_packet(struct icmp_packet_rr *icmp_packet,
 				  size_t icmp_len)
 {
-	struct socket_client *hash = socket_client_hash[icmp_packet->uid];
+	struct socket_client *hash;
 
+	hash = batadv_socket_client_hash[icmp_packet->uid];
 	if (hash)
-		bat_socket_add_packet(hash, icmp_packet, icmp_len);
+		batadv_socket_add_packet(hash, icmp_packet, icmp_len);
 }
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH 16/20] batman-adv: Prefix originator local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/originator.c |   54 +++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 1cd640e..bf9ec39a 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,17 +28,17 @@
 #include "soft-interface.h"
 #include "bridge_loop_avoidance.h"
 
-static void purge_orig(struct work_struct *work);
+static void batadv_purge_orig(struct work_struct *work);
 
-static void start_purge_timer(struct bat_priv *bat_priv)
+static void batadv_start_purge_timer(struct bat_priv *bat_priv)
 {
-	INIT_DELAYED_WORK(&bat_priv->orig_work, purge_orig);
+	INIT_DELAYED_WORK(&bat_priv->orig_work, batadv_purge_orig);
 	queue_delayed_work(batadv_event_workqueue,
 			   &bat_priv->orig_work, msecs_to_jiffies(1000));
 }
 
 /* returns 1 if they are the same originator */
-static int compare_orig(const struct hlist_node *node, const void *data2)
+static int batadv_compare_orig(const struct hlist_node *node, const void *data2)
 {
 	const void *data1 = container_of(node, struct orig_node, hash_entry);
 
@@ -55,7 +55,7 @@ int batadv_originator_init(struct bat_priv *bat_priv)
 	if (!bat_priv->orig_hash)
 		goto err;
 
-	start_purge_timer(bat_priv);
+	batadv_start_purge_timer(bat_priv);
 	return 0;
 
 err:
@@ -110,7 +110,7 @@ out:
 	return neigh_node;
 }
 
-static void orig_node_free_rcu(struct rcu_head *rcu)
+static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 {
 	struct hlist_node *node, *node_tmp;
 	struct neigh_node *neigh_node, *tmp_neigh_node;
@@ -149,7 +149,7 @@ static void orig_node_free_rcu(struct rcu_head *rcu)
 void batadv_orig_node_free_ref(struct orig_node *orig_node)
 {
 	if (atomic_dec_and_test(&orig_node->refcount))
-		call_rcu(&orig_node->rcu, orig_node_free_rcu);
+		call_rcu(&orig_node->rcu, batadv_orig_node_free_rcu);
 }
 
 void batadv_originator_free(struct bat_priv *bat_priv)
@@ -248,7 +248,7 @@ struct orig_node *batadv_get_orig_node(struct bat_priv *bat_priv,
 	if (!orig_node->bcast_own_sum)
 		goto free_bcast_own;
 
-	hash_added = batadv_hash_add(bat_priv->orig_hash, compare_orig,
+	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0)
@@ -264,9 +264,9 @@ free_orig_node:
 	return NULL;
 }
 
-static bool purge_orig_neighbors(struct bat_priv *bat_priv,
-				 struct orig_node *orig_node,
-				 struct neigh_node **best_neigh_node)
+static bool batadv_purge_orig_neighbors(struct bat_priv *bat_priv,
+					struct orig_node *orig_node,
+					struct neigh_node **best_neigh_node)
 {
 	struct hlist_node *node, *node_tmp;
 	struct neigh_node *neigh_node;
@@ -319,8 +319,8 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv,
 	return neigh_purged;
 }
 
-static bool purge_orig_node(struct bat_priv *bat_priv,
-			    struct orig_node *orig_node)
+static bool batadv_purge_orig_node(struct bat_priv *bat_priv,
+				   struct orig_node *orig_node)
 {
 	struct neigh_node *best_neigh_node;
 
@@ -331,8 +331,8 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
 			   jiffies_to_msecs(orig_node->last_seen));
 		return true;
 	} else {
-		if (purge_orig_neighbors(bat_priv, orig_node,
-					 &best_neigh_node))
+		if (batadv_purge_orig_neighbors(bat_priv, orig_node,
+						&best_neigh_node))
 			batadv_update_route(bat_priv, orig_node,
 					    best_neigh_node);
 	}
@@ -340,7 +340,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv,
 	return false;
 }
 
-static void _purge_orig(struct bat_priv *bat_priv)
+static void _batadv_purge_orig(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->orig_hash;
 	struct hlist_node *node, *node_tmp;
@@ -360,7 +360,7 @@ static void _purge_orig(struct bat_priv *bat_priv)
 		spin_lock_bh(list_lock);
 		hlist_for_each_entry_safe(orig_node, node, node_tmp,
 					  head, hash_entry) {
-			if (purge_orig_node(bat_priv, orig_node)) {
+			if (batadv_purge_orig_node(bat_priv, orig_node)) {
 				if (orig_node->gw_flags)
 					batadv_gw_node_delete(bat_priv,
 							      orig_node);
@@ -380,20 +380,20 @@ static void _purge_orig(struct bat_priv *bat_priv)
 	batadv_gw_election(bat_priv);
 }
 
-static void purge_orig(struct work_struct *work)
+static void batadv_purge_orig(struct work_struct *work)
 {
 	struct delayed_work *delayed_work =
 		container_of(work, struct delayed_work, work);
 	struct bat_priv *bat_priv =
 		container_of(delayed_work, struct bat_priv, orig_work);
 
-	_purge_orig(bat_priv);
-	start_purge_timer(bat_priv);
+	_batadv_purge_orig(bat_priv);
+	batadv_start_purge_timer(bat_priv);
 }
 
 void batadv_purge_orig_ref(struct bat_priv *bat_priv)
 {
-	_purge_orig(bat_priv);
+	_batadv_purge_orig(bat_priv);
 }
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
@@ -483,7 +483,7 @@ out:
 	return ret;
 }
 
-static int orig_node_add_if(struct orig_node *orig_node, int max_if_num)
+static int batadv_orig_node_add_if(struct orig_node *orig_node, int max_if_num)
 {
 	void *data_ptr;
 
@@ -528,7 +528,7 @@ int batadv_orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num)
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
-			ret = orig_node_add_if(orig_node, max_if_num);
+			ret = batadv_orig_node_add_if(orig_node, max_if_num);
 			spin_unlock_bh(&orig_node->ogm_cnt_lock);
 
 			if (ret == -ENOMEM)
@@ -544,8 +544,8 @@ err:
 	return -ENOMEM;
 }
 
-static int orig_node_del_if(struct orig_node *orig_node,
-		     int max_if_num, int del_if_num)
+static int batadv_orig_node_del_if(struct orig_node *orig_node,
+				   int max_if_num, int del_if_num)
 {
 	void *data_ptr = NULL;
 	int chunk_size;
@@ -612,8 +612,8 @@ int batadv_orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num)
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) {
 			spin_lock_bh(&orig_node->ogm_cnt_lock);
-			ret = orig_node_del_if(orig_node, max_if_num,
-					hard_iface->if_num);
+			ret = batadv_orig_node_del_if(orig_node, max_if_num,
+						      hard_iface->if_num);
 			spin_unlock_bh(&orig_node->ogm_cnt_lock);
 
 			if (ret == -ENOMEM)
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH 18/20] batman-adv: Prefix send local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/send.c |   30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 47c3a41..1842cbc 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -27,7 +27,7 @@
 #include "gateway_common.h"
 #include "originator.h"
 
-static void send_outstanding_bcast_packet(struct work_struct *work);
+static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
 
 /* send out an already prepared packet to the given address via the
  * specified batman interface
@@ -96,7 +96,7 @@ void batadv_schedule_bat_ogm(struct hard_iface *hard_iface)
 	bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface);
 }
 
-static void forw_packet_free(struct forw_packet *forw_packet)
+static void batadv_forw_packet_free(struct forw_packet *forw_packet)
 {
 	if (forw_packet->skb)
 		kfree_skb(forw_packet->skb);
@@ -105,9 +105,9 @@ static void forw_packet_free(struct forw_packet *forw_packet)
 	kfree(forw_packet);
 }
 
-static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
-				      struct forw_packet *forw_packet,
-				      unsigned long send_time)
+static void _batadv_add_bcast_packet_to_list(struct bat_priv *bat_priv,
+					     struct forw_packet *forw_packet,
+					     unsigned long send_time)
 {
 	INIT_HLIST_NODE(&forw_packet->list);
 
@@ -118,7 +118,7 @@ static void _add_bcast_packet_to_list(struct bat_priv *bat_priv,
 
 	/* start timer for this packet */
 	INIT_DELAYED_WORK(&forw_packet->delayed_work,
-			  send_outstanding_bcast_packet);
+			  batadv_send_outstanding_bcast_packet);
 	queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work,
 			   send_time);
 }
@@ -171,7 +171,7 @@ int batadv_add_bcast_packet_to_list(struct bat_priv *bat_priv,
 	/* how often did we send the bcast packet ? */
 	forw_packet->num_packets = 0;
 
-	_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
+	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
 packet_free:
@@ -184,7 +184,7 @@ out:
 	return NETDEV_TX_BUSY;
 }
 
-static void send_outstanding_bcast_packet(struct work_struct *work)
+static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 {
 	struct hard_iface *hard_iface;
 	struct delayed_work *delayed_work =
@@ -220,13 +220,13 @@ static void send_outstanding_bcast_packet(struct work_struct *work)
 
 	/* if we still have some more bcasts to send */
 	if (forw_packet->num_packets < 3) {
-		_add_bcast_packet_to_list(bat_priv, forw_packet,
-					  msecs_to_jiffies(5));
+		_batadv_add_bcast_packet_to_list(bat_priv, forw_packet,
+						 msecs_to_jiffies(5));
 		return;
 	}
 
 out:
-	forw_packet_free(forw_packet);
+	batadv_forw_packet_free(forw_packet);
 	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
@@ -260,7 +260,7 @@ out:
 	if (!forw_packet->own)
 		atomic_inc(&bat_priv->batman_queue_left);
 
-	forw_packet_free(forw_packet);
+	batadv_forw_packet_free(forw_packet);
 }
 
 void batadv_purge_outstanding_packets(struct bat_priv *bat_priv,
@@ -292,7 +292,7 @@ void batadv_purge_outstanding_packets(struct bat_priv *bat_priv,
 
 		spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
 
-		/* send_outstanding_bcast_packet() will lock the list to
+		/* batadv_send_outstanding_bcast_packet() will lock the list to
 		 * delete the item from the list
 		 */
 		pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
@@ -300,7 +300,7 @@ void batadv_purge_outstanding_packets(struct bat_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			forw_packet_free(forw_packet);
+			batadv_forw_packet_free(forw_packet);
 		}
 	}
 	spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
@@ -327,7 +327,7 @@ void batadv_purge_outstanding_packets(struct bat_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			forw_packet_free(forw_packet);
+			batadv_forw_packet_free(forw_packet);
 		}
 	}
 	spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH 17/20] batman-adv: Prefix routing local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/routing.c |   65 ++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 57ff851..e7ee40d 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -29,8 +29,8 @@
 #include "unicast.h"
 #include "bridge_loop_avoidance.h"
 
-static int route_unicast_packet(struct sk_buff *skb,
-				struct hard_iface *recv_if);
+static int batadv_route_unicast_packet(struct sk_buff *skb,
+				       struct hard_iface *recv_if);
 
 void batadv_slide_own_bcast_window(struct hard_iface *hard_iface)
 {
@@ -61,9 +61,9 @@ void batadv_slide_own_bcast_window(struct hard_iface *hard_iface)
 	}
 }
 
-static void _update_route(struct bat_priv *bat_priv,
-			  struct orig_node *orig_node,
-			  struct neigh_node *neigh_node)
+static void _batadv_update_route(struct bat_priv *bat_priv,
+				 struct orig_node *orig_node,
+				 struct neigh_node *neigh_node)
 {
 	struct neigh_node *curr_router;
 
@@ -117,7 +117,7 @@ void batadv_update_route(struct bat_priv *bat_priv, struct orig_node *orig_node,
 	router = batadv_orig_node_get_router(orig_node);
 
 	if (router != neigh_node)
-		_update_route(bat_priv, orig_node, neigh_node);
+		_batadv_update_route(bat_priv, orig_node, neigh_node);
 
 out:
 	if (router)
@@ -276,8 +276,8 @@ bool batadv_check_management_packet(struct sk_buff *skb,
 	return true;
 }
 
-static int recv_my_icmp_packet(struct bat_priv *bat_priv,
-			       struct sk_buff *skb, size_t icmp_len)
+static int batadv_recv_my_icmp_packet(struct bat_priv *bat_priv,
+				      struct sk_buff *skb, size_t icmp_len)
 {
 	struct hard_iface *primary_if = NULL;
 	struct orig_node *orig_node = NULL;
@@ -331,8 +331,8 @@ out:
 	return ret;
 }
 
-static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
-				  struct sk_buff *skb)
+static int batadv_recv_icmp_ttl_exceeded(struct bat_priv *bat_priv,
+					 struct sk_buff *skb)
 {
 	struct hard_iface *primary_if = NULL;
 	struct orig_node *orig_node = NULL;
@@ -431,11 +431,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 
 	/* packet for me */
 	if (batadv_is_my_mac(icmp_packet->dst))
-		return recv_my_icmp_packet(bat_priv, skb, hdr_size);
+		return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
 
 	/* TTL exceeded */
 	if (icmp_packet->header.ttl < 2)
-		return recv_icmp_ttl_exceeded(bat_priv, skb);
+		return batadv_recv_icmp_ttl_exceeded(bat_priv, skb);
 
 	/* get routing information */
 	orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst);
@@ -473,8 +473,9 @@ out:
  * This method rotates the bonding list and increases the
  * returned router's refcount.
  */
-static struct neigh_node *find_bond_router(struct orig_node *primary_orig,
-					   const struct hard_iface *recv_if)
+static struct neigh_node *
+batadv_find_bond_router(struct orig_node *primary_orig,
+			const struct hard_iface *recv_if)
 {
 	struct neigh_node *tmp_neigh_node;
 	struct neigh_node *router = NULL, *first_candidate = NULL;
@@ -527,8 +528,9 @@ out:
  *
  * Increases the returned router's refcount
  */
-static struct neigh_node *find_ifalter_router(struct orig_node *primary_orig,
-					      const struct hard_iface *recv_if)
+static struct neigh_node *
+batadv_find_ifalter_router(struct orig_node *primary_orig,
+			   const struct hard_iface *recv_if)
 {
 	struct neigh_node *tmp_neigh_node;
 	struct neigh_node *router = NULL, *first_candidate = NULL;
@@ -614,7 +616,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
 				   "Routing TT_REQUEST to %pM [%c]\n",
 				   tt_query->dst,
 				   tt_flag);
-			return route_unicast_packet(skb, recv_if);
+			return batadv_route_unicast_packet(skb, recv_if);
 		}
 		break;
 	case TT_RESPONSE:
@@ -643,7 +645,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if)
 				   "Routing TT_RESPONSE to %pM [%c]\n",
 				   tt_query->dst,
 				   tt_flag);
-			return route_unicast_packet(skb, recv_if);
+			return batadv_route_unicast_packet(skb, recv_if);
 		}
 		break;
 	}
@@ -679,7 +681,7 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if)
 	roam_adv_packet = (struct roam_adv_packet *)skb->data;
 
 	if (!batadv_is_my_mac(roam_adv_packet->dst))
-		return route_unicast_packet(skb, recv_if);
+		return batadv_route_unicast_packet(skb, recv_if);
 
 	/* check if it is a backbone gateway. we don't accept
 	 * roaming advertisement from it, as it has the same
@@ -783,9 +785,9 @@ struct neigh_node *batadv_find_router(struct bat_priv *bat_priv,
 	batadv_neigh_node_free_ref(router);
 
 	if (bonding_enabled)
-		router = find_bond_router(primary_orig_node, recv_if);
+		router = batadv_find_bond_router(primary_orig_node, recv_if);
 	else
-		router = find_ifalter_router(primary_orig_node, recv_if);
+		router = batadv_find_ifalter_router(primary_orig_node, recv_if);
 
 return_router:
 	if (router && router->if_incoming->if_status != IF_ACTIVE)
@@ -801,7 +803,7 @@ err:
 	return NULL;
 }
 
-static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
+static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size)
 {
 	struct ethhdr *ethhdr;
 
@@ -826,7 +828,8 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size)
 	return 0;
 }
 
-static int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
+static int batadv_route_unicast_packet(struct sk_buff *skb,
+				       struct hard_iface *recv_if)
 {
 	struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface);
 	struct orig_node *orig_node = NULL;
@@ -911,8 +914,8 @@ out:
 	return ret;
 }
 
-static int check_unicast_ttvn(struct bat_priv *bat_priv,
-			       struct sk_buff *skb) {
+static int batadv_check_unicast_ttvn(struct bat_priv *bat_priv,
+				     struct sk_buff *skb) {
 	uint8_t curr_ttvn;
 	struct orig_node *orig_node;
 	struct ethhdr *ethhdr;
@@ -994,10 +997,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 	struct unicast_packet *unicast_packet;
 	int hdr_size = sizeof(*unicast_packet);
 
-	if (check_unicast_packet(skb, hdr_size) < 0)
+	if (batadv_check_unicast_packet(skb, hdr_size) < 0)
 		return NET_RX_DROP;
 
-	if (!check_unicast_ttvn(bat_priv, skb))
+	if (!batadv_check_unicast_ttvn(bat_priv, skb))
 		return NET_RX_DROP;
 
 	unicast_packet = (struct unicast_packet *)skb->data;
@@ -1009,7 +1012,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if)
 		return NET_RX_SUCCESS;
 	}
 
-	return route_unicast_packet(skb, recv_if);
+	return batadv_route_unicast_packet(skb, recv_if);
 }
 
 int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
@@ -1021,10 +1024,10 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
 	struct sk_buff *new_skb = NULL;
 	int ret;
 
-	if (check_unicast_packet(skb, hdr_size) < 0)
+	if (batadv_check_unicast_packet(skb, hdr_size) < 0)
 		return NET_RX_DROP;
 
-	if (!check_unicast_ttvn(bat_priv, skb))
+	if (!batadv_check_unicast_ttvn(bat_priv, skb))
 		return NET_RX_DROP;
 
 	unicast_packet = (struct unicast_frag_packet *)skb->data;
@@ -1046,7 +1049,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
 		return NET_RX_SUCCESS;
 	}
 
-	return route_unicast_packet(skb, recv_if);
+	return batadv_route_unicast_packet(skb, recv_if);
 }
 
 
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH 19/20] batman-adv: Prefix soft-interface local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/soft-interface.c |   80 ++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index af676b8..2de1d74 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -37,23 +37,23 @@
 #include "bridge_loop_avoidance.h"
 
 
-static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
-static void bat_get_drvinfo(struct net_device *dev,
-			    struct ethtool_drvinfo *info);
-static u32 bat_get_msglevel(struct net_device *dev);
-static void bat_set_msglevel(struct net_device *dev, u32 value);
-static u32 bat_get_link(struct net_device *dev);
+static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
+static void batadv_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *info);
+static u32 batadv_get_msglevel(struct net_device *dev);
+static void batadv_set_msglevel(struct net_device *dev, u32 value);
+static u32 batadv_get_link(struct net_device *dev);
 static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data);
 static void batadv_get_ethtool_stats(struct net_device *dev,
 				     struct ethtool_stats *stats, u64 *data);
 static int batadv_get_sset_count(struct net_device *dev, int stringset);
 
-static const struct ethtool_ops bat_ethtool_ops = {
-	.get_settings = bat_get_settings,
-	.get_drvinfo = bat_get_drvinfo,
-	.get_msglevel = bat_get_msglevel,
-	.set_msglevel = bat_set_msglevel,
-	.get_link = bat_get_link,
+static const struct ethtool_ops batadv_ethtool_ops = {
+	.get_settings = batadv_get_settings,
+	.get_drvinfo = batadv_get_drvinfo,
+	.get_msglevel = batadv_get_msglevel,
+	.set_msglevel = batadv_set_msglevel,
+	.get_link = batadv_get_link,
 	.get_strings = batadv_get_strings,
 	.get_ethtool_stats = batadv_get_ethtool_stats,
 	.get_sset_count = batadv_get_sset_count,
@@ -78,25 +78,25 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
 	return 0;
 }
 
-static int interface_open(struct net_device *dev)
+static int batadv_interface_open(struct net_device *dev)
 {
 	netif_start_queue(dev);
 	return 0;
 }
 
-static int interface_release(struct net_device *dev)
+static int batadv_interface_release(struct net_device *dev)
 {
 	netif_stop_queue(dev);
 	return 0;
 }
 
-static struct net_device_stats *interface_stats(struct net_device *dev)
+static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
 {
 	struct bat_priv *bat_priv = netdev_priv(dev);
 	return &bat_priv->stats;
 }
 
-static int interface_set_mac_addr(struct net_device *dev, void *p)
+static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 {
 	struct bat_priv *bat_priv = netdev_priv(dev);
 	struct sockaddr *addr = p;
@@ -116,7 +116,7 @@ static int interface_set_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
-static int interface_change_mtu(struct net_device *dev, int new_mtu)
+static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
 {
 	/* check ranges */
 	if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
@@ -127,7 +127,8 @@ static int interface_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static int interface_tx(struct sk_buff *skb, struct net_device *soft_iface)
+static int batadv_interface_tx(struct sk_buff *skb,
+			       struct net_device *soft_iface)
 {
 	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
 	struct bat_priv *bat_priv = netdev_priv(soft_iface);
@@ -323,23 +324,23 @@ out:
 	return;
 }
 
-static const struct net_device_ops bat_netdev_ops = {
-	.ndo_open = interface_open,
-	.ndo_stop = interface_release,
-	.ndo_get_stats = interface_stats,
-	.ndo_set_mac_address = interface_set_mac_addr,
-	.ndo_change_mtu = interface_change_mtu,
-	.ndo_start_xmit = interface_tx,
+static const struct net_device_ops batadv_netdev_ops = {
+	.ndo_open = batadv_interface_open,
+	.ndo_stop = batadv_interface_release,
+	.ndo_get_stats = batadv_interface_stats,
+	.ndo_set_mac_address = batadv_interface_set_mac_addr,
+	.ndo_change_mtu = batadv_interface_change_mtu,
+	.ndo_start_xmit = batadv_interface_tx,
 	.ndo_validate_addr = eth_validate_addr
 };
 
-static void interface_setup(struct net_device *dev)
+static void batadv_interface_setup(struct net_device *dev)
 {
 	struct bat_priv *priv = netdev_priv(dev);
 
 	ether_setup(dev);
 
-	dev->netdev_ops = &bat_netdev_ops;
+	dev->netdev_ops = &batadv_netdev_ops;
 	dev->destructor = free_netdev;
 	dev->tx_queue_len = 0;
 
@@ -353,7 +354,7 @@ static void interface_setup(struct net_device *dev)
 	/* generate random address */
 	eth_hw_addr_random(dev);
 
-	SET_ETHTOOL_OPS(dev, &bat_ethtool_ops);
+	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
 
 	memset(priv, 0, sizeof(*priv));
 }
@@ -364,7 +365,8 @@ struct net_device *batadv_softif_create(const char *name)
 	struct bat_priv *bat_priv;
 	int ret;
 
-	soft_iface = alloc_netdev(sizeof(*bat_priv), name, interface_setup);
+	soft_iface = alloc_netdev(sizeof(*bat_priv), name,
+				  batadv_interface_setup);
 
 	if (!soft_iface)
 		goto out;
@@ -456,14 +458,14 @@ void batadv_softif_destroy(struct net_device *soft_iface)
 
 int batadv_softif_is_valid(const struct net_device *net_dev)
 {
-	if (net_dev->netdev_ops->ndo_start_xmit == interface_tx)
+	if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
 		return 1;
 
 	return 0;
 }
 
 /* ethtool */
-static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	cmd->supported = 0;
 	cmd->advertising = 0;
@@ -479,8 +481,8 @@ static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	return 0;
 }
 
-static void bat_get_drvinfo(struct net_device *dev,
-			    struct ethtool_drvinfo *info)
+static void batadv_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *info)
 {
 	strcpy(info->driver, "B.A.T.M.A.N. advanced");
 	strcpy(info->version, SOURCE_VERSION);
@@ -488,16 +490,16 @@ static void bat_get_drvinfo(struct net_device *dev,
 	strcpy(info->bus_info, "batman");
 }
 
-static u32 bat_get_msglevel(struct net_device *dev)
+static u32 batadv_get_msglevel(struct net_device *dev)
 {
 	return -EOPNOTSUPP;
 }
 
-static void bat_set_msglevel(struct net_device *dev, u32 value)
+static void batadv_set_msglevel(struct net_device *dev, u32 value)
 {
 }
 
-static u32 bat_get_link(struct net_device *dev)
+static u32 batadv_get_link(struct net_device *dev)
 {
 	return 1;
 }
@@ -508,7 +510,7 @@ static u32 bat_get_link(struct net_device *dev)
  */
 static const struct {
 	const char name[ETH_GSTRING_LEN];
-} bat_counters_strings[] = {
+} batadv_counters_strings[] = {
 	{ "forward" },
 	{ "forward_bytes" },
 	{ "mgmt_tx" },
@@ -527,8 +529,8 @@ static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
 			       uint8_t *data)
 {
 	if (stringset == ETH_SS_STATS)
-		memcpy(data, bat_counters_strings,
-		       sizeof(bat_counters_strings));
+		memcpy(data, batadv_counters_strings,
+		       sizeof(batadv_counters_strings));
 }
 
 static void batadv_get_ethtool_stats(struct net_device *dev,
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH 20/20] batman-adv: Prefix translation-table local static functions with batadv_
From: Antonio Quartulli @ 2012-06-25  6:54 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Sven Eckelmann, Antonio Quartulli
In-Reply-To: <1340607284-29950-1-git-send-email-ordex@autistici.org>

From: Sven Eckelmann <sven@narfation.org>

All non-static symbols of batman-adv were prefixed with batadv_ to avoid
collisions with other symbols of the kernel. Other symbols of batman-adv
should use the same prefix to keep the naming scheme consistent.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/translation-table.c |  540 +++++++++++++++++++-----------------
 1 file changed, 282 insertions(+), 258 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 2eff22f..f7a6152 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -29,13 +29,14 @@
 
 #include <linux/crc16.h>
 
-static void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
-			  struct orig_node *orig_node);
-static void tt_purge(struct work_struct *work);
-static void tt_global_del_orig_list(struct tt_global_entry *tt_global_entry);
+static void batadv_send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
+				 struct orig_node *orig_node);
+static void batadv_tt_purge(struct work_struct *work);
+static void
+batadv_tt_global_del_orig_list(struct tt_global_entry *tt_global_entry);
 
 /* returns 1 if they are the same mac addr */
-static int compare_tt(const struct hlist_node *node, const void *data2)
+static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
 {
 	const void *data1 = container_of(node, struct tt_common_entry,
 					 hash_entry);
@@ -43,15 +44,15 @@ static int compare_tt(const struct hlist_node *node, const void *data2)
 	return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0);
 }
 
-static void tt_start_timer(struct bat_priv *bat_priv)
+static void batadv_tt_start_timer(struct bat_priv *bat_priv)
 {
-	INIT_DELAYED_WORK(&bat_priv->tt_work, tt_purge);
+	INIT_DELAYED_WORK(&bat_priv->tt_work, batadv_tt_purge);
 	queue_delayed_work(batadv_event_workqueue, &bat_priv->tt_work,
 			   msecs_to_jiffies(5000));
 }
 
-static struct tt_common_entry *tt_hash_find(struct hashtable_t *hash,
-					    const void *data)
+static struct tt_common_entry *batadv_tt_hash_find(struct hashtable_t *hash,
+						   const void *data)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
@@ -80,26 +81,26 @@ static struct tt_common_entry *tt_hash_find(struct hashtable_t *hash,
 	return tt_common_entry_tmp;
 }
 
-static struct tt_local_entry *tt_local_hash_find(struct bat_priv *bat_priv,
-						 const void *data)
+static struct tt_local_entry *
+batadv_tt_local_hash_find(struct bat_priv *bat_priv, const void *data)
 {
 	struct tt_common_entry *tt_common_entry;
 	struct tt_local_entry *tt_local_entry = NULL;
 
-	tt_common_entry = tt_hash_find(bat_priv->tt_local_hash, data);
+	tt_common_entry = batadv_tt_hash_find(bat_priv->tt_local_hash, data);
 	if (tt_common_entry)
 		tt_local_entry = container_of(tt_common_entry,
 					      struct tt_local_entry, common);
 	return tt_local_entry;
 }
 
-static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv,
-						   const void *data)
+static struct tt_global_entry *
+batadv_tt_global_hash_find(struct bat_priv *bat_priv, const void *data)
 {
 	struct tt_common_entry *tt_common_entry;
 	struct tt_global_entry *tt_global_entry = NULL;
 
-	tt_common_entry = tt_hash_find(bat_priv->tt_global_hash, data);
+	tt_common_entry = batadv_tt_hash_find(bat_priv->tt_global_hash, data);
 	if (tt_common_entry)
 		tt_global_entry = container_of(tt_common_entry,
 					       struct tt_global_entry, common);
@@ -107,13 +108,14 @@ static struct tt_global_entry *tt_global_hash_find(struct bat_priv *bat_priv,
 
 }
 
-static void tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry)
+static void
+batadv_tt_local_entry_free_ref(struct tt_local_entry *tt_local_entry)
 {
 	if (atomic_dec_and_test(&tt_local_entry->common.refcount))
 		kfree_rcu(tt_local_entry, common.rcu);
 }
 
-static void tt_global_entry_free_rcu(struct rcu_head *rcu)
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
 {
 	struct tt_common_entry *tt_common_entry;
 	struct tt_global_entry *tt_global_entry;
@@ -125,16 +127,17 @@ static void tt_global_entry_free_rcu(struct rcu_head *rcu)
 	kfree(tt_global_entry);
 }
 
-static void tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry)
+static void
+batadv_tt_global_entry_free_ref(struct tt_global_entry *tt_global_entry)
 {
 	if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
-		tt_global_del_orig_list(tt_global_entry);
+		batadv_tt_global_del_orig_list(tt_global_entry);
 		call_rcu(&tt_global_entry->common.rcu,
-			 tt_global_entry_free_rcu);
+			 batadv_tt_global_entry_free_rcu);
 	}
 }
 
-static void tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
 {
 	struct tt_orig_list_entry *orig_entry;
 
@@ -144,13 +147,14 @@ static void tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
 	kfree(orig_entry);
 }
 
-static void tt_orig_list_entry_free_ref(struct tt_orig_list_entry *orig_entry)
+static void
+batadv_tt_orig_list_entry_free_ref(struct tt_orig_list_entry *orig_entry)
 {
-	call_rcu(&orig_entry->rcu, tt_orig_list_entry_free_rcu);
+	call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
 }
 
-static void tt_local_event(struct bat_priv *bat_priv, const uint8_t *addr,
-			   uint8_t flags)
+static void batadv_tt_local_event(struct bat_priv *bat_priv,
+				  const uint8_t *addr, uint8_t flags)
 {
 	struct tt_change_node *tt_change_node;
 
@@ -176,7 +180,7 @@ int batadv_tt_len(int changes_num)
 	return changes_num * sizeof(struct tt_change);
 }
 
-static int tt_local_init(struct bat_priv *bat_priv)
+static int batadv_tt_local_init(struct bat_priv *bat_priv)
 {
 	if (bat_priv->tt_local_hash)
 		return 0;
@@ -200,7 +204,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct tt_orig_list_entry *orig_entry;
 	int hash_added;
 
-	tt_local_entry = tt_local_hash_find(bat_priv, addr);
+	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
 
 	if (tt_local_entry) {
 		tt_local_entry->last_seen = jiffies;
@@ -234,21 +238,21 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	 */
 	tt_local_entry->common.flags |= TT_CLIENT_NEW;
 
-	hash_added = batadv_hash_add(bat_priv->tt_local_hash, compare_tt,
+	hash_added = batadv_hash_add(bat_priv->tt_local_hash, batadv_compare_tt,
 				     batadv_choose_orig,
 				     &tt_local_entry->common,
 				     &tt_local_entry->common.hash_entry);
 
 	if (unlikely(hash_added != 0)) {
 		/* remove the reference for the hash */
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 		goto out;
 	}
 
-	tt_local_event(bat_priv, addr, tt_local_entry->common.flags);
+	batadv_tt_local_event(bat_priv, addr, tt_local_entry->common.flags);
 
 	/* remove address from global hash if present */
-	tt_global_entry = tt_global_hash_find(bat_priv, addr);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
 
 	/* Check whether it is a roaming! */
 	if (tt_global_entry) {
@@ -258,8 +262,9 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 		hlist_for_each_entry_rcu(orig_entry, node, head, list) {
 			orig_entry->orig_node->tt_poss_change = true;
 
-			send_roam_adv(bat_priv, tt_global_entry->common.addr,
-				      orig_entry->orig_node);
+			batadv_send_roam_adv(bat_priv,
+					     tt_global_entry->common.addr,
+					     orig_entry->orig_node);
 		}
 		rcu_read_unlock();
 		/* The global entry has to be marked as ROAMING and
@@ -270,14 +275,15 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	}
 out:
 	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 	if (tt_global_entry)
-		tt_global_entry_free_ref(tt_global_entry);
+		batadv_tt_global_entry_free_ref(tt_global_entry);
 }
 
-static void tt_realloc_packet_buff(unsigned char **packet_buff,
-				   int *packet_buff_len, int min_packet_len,
-				   int new_packet_len)
+static void batadv_tt_realloc_packet_buff(unsigned char **packet_buff,
+					  int *packet_buff_len,
+					  int min_packet_len,
+					  int new_packet_len)
 {
 	unsigned char *new_buff;
 
@@ -292,9 +298,10 @@ static void tt_realloc_packet_buff(unsigned char **packet_buff,
 	}
 }
 
-static void tt_prepare_packet_buff(struct bat_priv *bat_priv,
-				   unsigned char **packet_buff,
-				   int *packet_buff_len, int min_packet_len)
+static void batadv_tt_prepare_packet_buff(struct bat_priv *bat_priv,
+					  unsigned char **packet_buff,
+					  int *packet_buff_len,
+					  int min_packet_len)
 {
 	struct hard_iface *primary_if;
 	int req_len;
@@ -310,23 +317,24 @@ static void tt_prepare_packet_buff(struct bat_priv *bat_priv,
 	if ((!primary_if) || (req_len > primary_if->soft_iface->mtu))
 		req_len = min_packet_len;
 
-	tt_realloc_packet_buff(packet_buff, packet_buff_len,
-			       min_packet_len, req_len);
+	batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
+				      min_packet_len, req_len);
 
 	if (primary_if)
 		batadv_hardif_free_ref(primary_if);
 }
 
-static int tt_changes_fill_buff(struct bat_priv *bat_priv,
-				unsigned char **packet_buff,
-				int *packet_buff_len, int min_packet_len)
+static int batadv_tt_changes_fill_buff(struct bat_priv *bat_priv,
+				       unsigned char **packet_buff,
+				       int *packet_buff_len,
+				       int min_packet_len)
 {
 	struct tt_change_node *entry, *safe;
 	int count = 0, tot_changes = 0, new_len;
 	unsigned char *tt_buff;
 
-	tt_prepare_packet_buff(bat_priv, packet_buff,
-			       packet_buff_len, min_packet_len);
+	batadv_tt_prepare_packet_buff(bat_priv, packet_buff,
+				      packet_buff_len, min_packet_len);
 
 	new_len = *packet_buff_len - min_packet_len;
 	tt_buff = *packet_buff + min_packet_len;
@@ -428,12 +436,12 @@ out:
 	return ret;
 }
 
-static void tt_local_set_pending(struct bat_priv *bat_priv,
-				 struct tt_local_entry *tt_local_entry,
-				 uint16_t flags, const char *message)
+static void batadv_tt_local_set_pending(struct bat_priv *bat_priv,
+					struct tt_local_entry *tt_local_entry,
+					uint16_t flags, const char *message)
 {
-	tt_local_event(bat_priv, tt_local_entry->common.addr,
-		       tt_local_entry->common.flags | flags);
+	batadv_tt_local_event(bat_priv, tt_local_entry->common.addr,
+			      tt_local_entry->common.flags | flags);
 
 	/* The local client has to be marked as "pending to be removed" but has
 	 * to be kept in the table in order to send it in a full table
@@ -451,18 +459,19 @@ void batadv_tt_local_remove(struct bat_priv *bat_priv, const uint8_t *addr,
 {
 	struct tt_local_entry *tt_local_entry = NULL;
 
-	tt_local_entry = tt_local_hash_find(bat_priv, addr);
+	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
 	if (!tt_local_entry)
 		goto out;
 
-	tt_local_set_pending(bat_priv, tt_local_entry, TT_CLIENT_DEL |
-			     (roaming ? TT_CLIENT_ROAM : NO_FLAGS), message);
+	batadv_tt_local_set_pending(bat_priv, tt_local_entry, TT_CLIENT_DEL |
+				    (roaming ? TT_CLIENT_ROAM : NO_FLAGS),
+				    message);
 out:
 	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 }
 
-static void tt_local_purge(struct bat_priv *bat_priv)
+static void batadv_tt_local_purge(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->tt_local_hash;
 	struct tt_local_entry *tt_local_entry;
@@ -493,15 +502,15 @@ static void tt_local_purge(struct bat_priv *bat_priv)
 						  TT_LOCAL_TIMEOUT))
 				continue;
 
-			tt_local_set_pending(bat_priv, tt_local_entry,
-					     TT_CLIENT_DEL, "timed out");
+			batadv_tt_local_set_pending(bat_priv, tt_local_entry,
+						    TT_CLIENT_DEL, "timed out");
 		}
 		spin_unlock_bh(list_lock);
 	}
 
 }
 
-static void tt_local_table_free(struct bat_priv *bat_priv)
+static void batadv_tt_local_table_free(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -527,7 +536,7 @@ static void tt_local_table_free(struct bat_priv *bat_priv)
 			tt_local_entry = container_of(tt_common_entry,
 						      struct tt_local_entry,
 						      common);
-			tt_local_entry_free_ref(tt_local_entry);
+			batadv_tt_local_entry_free_ref(tt_local_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
@@ -537,7 +546,7 @@ static void tt_local_table_free(struct bat_priv *bat_priv)
 	bat_priv->tt_local_hash = NULL;
 }
 
-static int tt_global_init(struct bat_priv *bat_priv)
+static int batadv_tt_global_init(struct bat_priv *bat_priv)
 {
 	if (bat_priv->tt_global_hash)
 		return 0;
@@ -550,7 +559,7 @@ static int tt_global_init(struct bat_priv *bat_priv)
 	return 0;
 }
 
-static void tt_changes_list_free(struct bat_priv *bat_priv)
+static void batadv_tt_changes_list_free(struct bat_priv *bat_priv)
 {
 	struct tt_change_node *entry, *safe;
 
@@ -569,8 +578,8 @@ static void tt_changes_list_free(struct bat_priv *bat_priv)
 /* find out if an orig_node is already in the list of a tt_global_entry.
  * returns 1 if found, 0 otherwise
  */
-static bool tt_global_entry_has_orig(const struct tt_global_entry *entry,
-				     const struct orig_node *orig_node)
+static bool batadv_tt_global_entry_has_orig(const struct tt_global_entry *entry,
+					    const struct orig_node *orig_node)
 {
 	struct tt_orig_list_entry *tmp_orig_entry;
 	const struct hlist_head *head;
@@ -589,9 +598,9 @@ static bool tt_global_entry_has_orig(const struct tt_global_entry *entry,
 	return found;
 }
 
-static void tt_global_add_orig_entry(struct tt_global_entry *tt_global_entry,
-				     struct orig_node *orig_node,
-				     int ttvn)
+static void
+batadv_tt_global_add_orig_entry(struct tt_global_entry *tt_global_entry,
+				struct orig_node *orig_node, int ttvn)
 {
 	struct tt_orig_list_entry *orig_entry;
 
@@ -621,7 +630,7 @@ int batadv_tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
 	int hash_added;
 	struct tt_common_entry *common;
 
-	tt_global_entry = tt_global_hash_find(bat_priv, tt_addr);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr);
 
 	if (!tt_global_entry) {
 		tt_global_entry = kzalloc(sizeof(*tt_global_entry),
@@ -640,16 +649,18 @@ int batadv_tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
 		spin_lock_init(&tt_global_entry->list_lock);
 
 		hash_added = batadv_hash_add(bat_priv->tt_global_hash,
-					     compare_tt, batadv_choose_orig,
-					     common, &common->hash_entry);
+					     batadv_compare_tt,
+					     batadv_choose_orig, common,
+					     &common->hash_entry);
 
 		if (unlikely(hash_added != 0)) {
 			/* remove the reference for the hash */
-			tt_global_entry_free_ref(tt_global_entry);
+			batadv_tt_global_entry_free_ref(tt_global_entry);
 			goto out_remove;
 		}
 
-		tt_global_add_orig_entry(tt_global_entry, orig_node, ttvn);
+		batadv_tt_global_add_orig_entry(tt_global_entry, orig_node,
+						ttvn);
 	} else {
 		/* there is already a global entry, use this one. */
 
@@ -661,14 +672,15 @@ int batadv_tt_global_add(struct bat_priv *bat_priv, struct orig_node *orig_node,
 		 * new one.
 		 */
 		if (tt_global_entry->common.flags & TT_CLIENT_ROAM) {
-			tt_global_del_orig_list(tt_global_entry);
+			batadv_tt_global_del_orig_list(tt_global_entry);
 			tt_global_entry->common.flags &= ~TT_CLIENT_ROAM;
 			tt_global_entry->roam_at = 0;
 		}
 
-		if (!tt_global_entry_has_orig(tt_global_entry, orig_node))
-			tt_global_add_orig_entry(tt_global_entry, orig_node,
-						 ttvn);
+		if (!batadv_tt_global_entry_has_orig(tt_global_entry,
+						     orig_node))
+			batadv_tt_global_add_orig_entry(tt_global_entry,
+							orig_node, ttvn);
 	}
 
 	if (wifi)
@@ -685,15 +697,16 @@ out_remove:
 	ret = 1;
 out:
 	if (tt_global_entry)
-		tt_global_entry_free_ref(tt_global_entry);
+		batadv_tt_global_entry_free_ref(tt_global_entry);
 	return ret;
 }
 
 /* print all orig nodes who announce the address for this global entry.
  * it is assumed that the caller holds rcu_read_lock();
  */
-static void tt_global_print_entry(struct tt_global_entry *tt_global_entry,
-				  struct seq_file *seq)
+static void
+batadv_tt_global_print_entry(struct tt_global_entry *tt_global_entry,
+			     struct seq_file *seq)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
@@ -760,7 +773,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 			tt_global_entry = container_of(tt_common_entry,
 						       struct tt_global_entry,
 						       common);
-			tt_global_print_entry(tt_global_entry, seq);
+			batadv_tt_global_print_entry(tt_global_entry, seq);
 		}
 		rcu_read_unlock();
 	}
@@ -771,7 +784,8 @@ out:
 }
 
 /* deletes the orig list of a tt_global_entry */
-static void tt_global_del_orig_list(struct tt_global_entry *tt_global_entry)
+static void
+batadv_tt_global_del_orig_list(struct tt_global_entry *tt_global_entry)
 {
 	struct hlist_head *head;
 	struct hlist_node *node, *safe;
@@ -781,16 +795,17 @@ static void tt_global_del_orig_list(struct tt_global_entry *tt_global_entry)
 	head = &tt_global_entry->orig_list;
 	hlist_for_each_entry_safe(orig_entry, node, safe, head, list) {
 		hlist_del_rcu(node);
-		tt_orig_list_entry_free_ref(orig_entry);
+		batadv_tt_orig_list_entry_free_ref(orig_entry);
 	}
 	spin_unlock_bh(&tt_global_entry->list_lock);
 
 }
 
-static void tt_global_del_orig_entry(struct bat_priv *bat_priv,
-				     struct tt_global_entry *tt_global_entry,
-				     struct orig_node *orig_node,
-				     const char *message)
+static void
+batadv_tt_global_del_orig_entry(struct bat_priv *bat_priv,
+				struct tt_global_entry *tt_global_entry,
+				struct orig_node *orig_node,
+				const char *message)
 {
 	struct hlist_head *head;
 	struct hlist_node *node, *safe;
@@ -805,22 +820,22 @@ static void tt_global_del_orig_entry(struct bat_priv *bat_priv,
 				   orig_node->orig,
 				   tt_global_entry->common.addr, message);
 			hlist_del_rcu(node);
-			tt_orig_list_entry_free_ref(orig_entry);
+			batadv_tt_orig_list_entry_free_ref(orig_entry);
 		}
 	}
 	spin_unlock_bh(&tt_global_entry->list_lock);
 }
 
-static void tt_global_del_struct(struct bat_priv *bat_priv,
-				 struct tt_global_entry *tt_global_entry,
-				 const char *message)
+static void batadv_tt_global_del_struct(struct bat_priv *bat_priv,
+					struct tt_global_entry *tt_global_entry,
+					const char *message)
 {
 	batadv_dbg(DBG_TT, bat_priv, "Deleting global tt entry %pM: %s\n",
 		   tt_global_entry->common.addr, message);
 
-	batadv_hash_remove(bat_priv->tt_global_hash, compare_tt,
+	batadv_hash_remove(bat_priv->tt_global_hash, batadv_compare_tt,
 			   batadv_choose_orig, tt_global_entry->common.addr);
-	tt_global_entry_free_ref(tt_global_entry);
+	batadv_tt_global_entry_free_ref(tt_global_entry);
 
 }
 
@@ -828,10 +843,10 @@ static void tt_global_del_struct(struct bat_priv *bat_priv,
  * within tt_global entry. If yes, we set the TT_CLIENT_ROAM flag and the timer,
  * otherwise we simply remove the originator scheduled for deletion.
  */
-static void tt_global_del_roaming(struct bat_priv *bat_priv,
-				  struct tt_global_entry *tt_global_entry,
-				  struct orig_node *orig_node,
-				  const char *message)
+static void
+batadv_tt_global_del_roaming(struct bat_priv *bat_priv,
+			     struct tt_global_entry *tt_global_entry,
+			     struct orig_node *orig_node, const char *message)
 {
 	bool last_entry = true;
 	struct hlist_head *head;
@@ -860,31 +875,31 @@ static void tt_global_del_roaming(struct bat_priv *bat_priv,
 		/* there is another entry, we can simply delete this
 		 * one and can still use the other one.
 		 */
-		tt_global_del_orig_entry(bat_priv, tt_global_entry,
-					 orig_node, message);
+		batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry,
+						orig_node, message);
 }
 
 
 
-static void tt_global_del(struct bat_priv *bat_priv,
-			  struct orig_node *orig_node,
-			  const unsigned char *addr,
-			  const char *message, bool roaming)
+static void batadv_tt_global_del(struct bat_priv *bat_priv,
+				 struct orig_node *orig_node,
+				 const unsigned char *addr,
+				 const char *message, bool roaming)
 {
 	struct tt_global_entry *tt_global_entry = NULL;
-	struct tt_local_entry *tt_local_entry = NULL;
+	struct tt_local_entry *local_entry = NULL;
 
-	tt_global_entry = tt_global_hash_find(bat_priv, addr);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
 	if (!tt_global_entry)
 		goto out;
 
 	if (!roaming) {
-		tt_global_del_orig_entry(bat_priv, tt_global_entry, orig_node,
-					 message);
+		batadv_tt_global_del_orig_entry(bat_priv, tt_global_entry,
+						orig_node, message);
 
 		if (hlist_empty(&tt_global_entry->orig_list))
-			tt_global_del_struct(bat_priv, tt_global_entry,
-					     message);
+			batadv_tt_global_del_struct(bat_priv, tt_global_entry,
+						    message);
 
 		goto out;
 	}
@@ -902,29 +917,29 @@ static void tt_global_del(struct bat_priv *bat_priv,
 	 * 2) the client roamed to us => we can directly delete
 	 *    the global entry, since it is useless now.
 	 */
-	tt_local_entry = tt_local_hash_find(bat_priv,
-					    tt_global_entry->common.addr);
-	if (tt_local_entry) {
+	local_entry = batadv_tt_local_hash_find(bat_priv,
+						tt_global_entry->common.addr);
+	if (local_entry) {
 		/* local entry exists, case 2: client roamed to us. */
-		tt_global_del_orig_list(tt_global_entry);
-		tt_global_del_struct(bat_priv, tt_global_entry, message);
+		batadv_tt_global_del_orig_list(tt_global_entry);
+		batadv_tt_global_del_struct(bat_priv, tt_global_entry, message);
 	} else
 		/* no local entry exists, case 1: check for roaming */
-		tt_global_del_roaming(bat_priv, tt_global_entry, orig_node,
-				      message);
+		batadv_tt_global_del_roaming(bat_priv, tt_global_entry,
+					     orig_node, message);
 
 
 out:
 	if (tt_global_entry)
-		tt_global_entry_free_ref(tt_global_entry);
-	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_global_entry_free_ref(tt_global_entry);
+	if (local_entry)
+		batadv_tt_local_entry_free_ref(local_entry);
 }
 
 void batadv_tt_global_del_orig(struct bat_priv *bat_priv,
 			       struct orig_node *orig_node, const char *message)
 {
-	struct tt_global_entry *tt_global_entry;
+	struct tt_global_entry *global_entry;
 	struct tt_common_entry *tt_common_entry;
 	uint32_t i;
 	struct hashtable_t *hash = bat_priv->tt_global_hash;
@@ -942,20 +957,19 @@ void batadv_tt_global_del_orig(struct bat_priv *bat_priv,
 		spin_lock_bh(list_lock);
 		hlist_for_each_entry_safe(tt_common_entry, node, safe,
 					  head, hash_entry) {
-			tt_global_entry = container_of(tt_common_entry,
-						       struct tt_global_entry,
-						       common);
+			global_entry = container_of(tt_common_entry,
+						    struct tt_global_entry,
+						    common);
 
-			tt_global_del_orig_entry(bat_priv, tt_global_entry,
-						 orig_node, message);
+			batadv_tt_global_del_orig_entry(bat_priv, global_entry,
+							orig_node, message);
 
-			if (hlist_empty(&tt_global_entry->orig_list)) {
+			if (hlist_empty(&global_entry->orig_list)) {
 				batadv_dbg(DBG_TT, bat_priv,
 					   "Deleting global tt entry %pM: %s\n",
-					   tt_global_entry->common.addr,
-					   message);
+					   global_entry->common.addr, message);
 				hlist_del_rcu(node);
-				tt_global_entry_free_ref(tt_global_entry);
+				batadv_tt_global_entry_free_ref(global_entry);
 			}
 		}
 		spin_unlock_bh(list_lock);
@@ -964,7 +978,7 @@ void batadv_tt_global_del_orig(struct bat_priv *bat_priv,
 	orig_node->tt_initialised = false;
 }
 
-static void tt_global_roam_purge(struct bat_priv *bat_priv)
+static void batadv_tt_global_roam_purge(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->tt_global_hash;
 	struct tt_common_entry *tt_common_entry;
@@ -995,14 +1009,14 @@ static void tt_global_roam_purge(struct bat_priv *bat_priv)
 				   tt_global_entry->common.addr);
 
 			hlist_del_rcu(node);
-			tt_global_entry_free_ref(tt_global_entry);
+			batadv_tt_global_entry_free_ref(tt_global_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
 
 }
 
-static void tt_global_table_free(struct bat_priv *bat_priv)
+static void batadv_tt_global_table_free(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -1028,7 +1042,7 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
 			tt_global_entry = container_of(tt_common_entry,
 						       struct tt_global_entry,
 						       common);
-			tt_global_entry_free_ref(tt_global_entry);
+			batadv_tt_global_entry_free_ref(tt_global_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
@@ -1038,8 +1052,8 @@ static void tt_global_table_free(struct bat_priv *bat_priv)
 	bat_priv->tt_global_hash = NULL;
 }
 
-static bool _is_ap_isolated(struct tt_local_entry *tt_local_entry,
-			    struct tt_global_entry *tt_global_entry)
+static bool _batadv_is_ap_isolated(struct tt_local_entry *tt_local_entry,
+				   struct tt_global_entry *tt_global_entry)
 {
 	bool ret = false;
 
@@ -1064,19 +1078,20 @@ struct orig_node *batadv_transtable_search(struct bat_priv *bat_priv,
 	int best_tq;
 
 	if (src && atomic_read(&bat_priv->ap_isolation)) {
-		tt_local_entry = tt_local_hash_find(bat_priv, src);
+		tt_local_entry = batadv_tt_local_hash_find(bat_priv, src);
 		if (!tt_local_entry)
 			goto out;
 	}
 
-	tt_global_entry = tt_global_hash_find(bat_priv, addr);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
 	if (!tt_global_entry)
 		goto out;
 
 	/* check whether the clients should not communicate due to AP
 	 * isolation
 	 */
-	if (tt_local_entry && _is_ap_isolated(tt_local_entry, tt_global_entry))
+	if (tt_local_entry &&
+	    _batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
 		goto out;
 
 	best_tq = 0;
@@ -1100,16 +1115,16 @@ struct orig_node *batadv_transtable_search(struct bat_priv *bat_priv,
 	rcu_read_unlock();
 out:
 	if (tt_global_entry)
-		tt_global_entry_free_ref(tt_global_entry);
+		batadv_tt_global_entry_free_ref(tt_global_entry);
 	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 
 	return orig_node;
 }
 
 /* Calculates the checksum of the local table of a given orig_node */
-static uint16_t tt_global_crc(struct bat_priv *bat_priv,
-			      struct orig_node *orig_node)
+static uint16_t batadv_tt_global_crc(struct bat_priv *bat_priv,
+				     struct orig_node *orig_node)
 {
 	uint16_t total = 0, total_one;
 	struct hashtable_t *hash = bat_priv->tt_global_hash;
@@ -1140,8 +1155,8 @@ static uint16_t tt_global_crc(struct bat_priv *bat_priv,
 			/* find out if this global entry is announced by this
 			 * originator
 			 */
-			if (!tt_global_entry_has_orig(tt_global_entry,
-						      orig_node))
+			if (!batadv_tt_global_entry_has_orig(tt_global_entry,
+							     orig_node))
 				continue;
 
 			total_one = 0;
@@ -1190,7 +1205,7 @@ static uint16_t batadv_tt_local_crc(struct bat_priv *bat_priv)
 	return total;
 }
 
-static void tt_req_list_free(struct bat_priv *bat_priv)
+static void batadv_tt_req_list_free(struct bat_priv *bat_priv)
 {
 	struct tt_req_node *node, *safe;
 
@@ -1204,10 +1219,10 @@ static void tt_req_list_free(struct bat_priv *bat_priv)
 	spin_unlock_bh(&bat_priv->tt_req_list_lock);
 }
 
-static void tt_save_orig_buffer(struct bat_priv *bat_priv,
-				struct orig_node *orig_node,
-				const unsigned char *tt_buff,
-				uint8_t tt_num_changes)
+static void batadv_tt_save_orig_buffer(struct bat_priv *bat_priv,
+				       struct orig_node *orig_node,
+				       const unsigned char *tt_buff,
+				       uint8_t tt_num_changes)
 {
 	uint16_t tt_buff_len = batadv_tt_len(tt_num_changes);
 
@@ -1227,7 +1242,7 @@ static void tt_save_orig_buffer(struct bat_priv *bat_priv,
 	spin_unlock_bh(&orig_node->tt_buff_lock);
 }
 
-static void tt_req_purge(struct bat_priv *bat_priv)
+static void batadv_tt_req_purge(struct bat_priv *bat_priv)
 {
 	struct tt_req_node *node, *safe;
 
@@ -1244,8 +1259,8 @@ static void tt_req_purge(struct bat_priv *bat_priv)
 /* returns the pointer to the new tt_req_node struct if no request
  * has already been issued for this orig_node, NULL otherwise
  */
-static struct tt_req_node *new_tt_req_node(struct bat_priv *bat_priv,
-					  struct orig_node *orig_node)
+static struct tt_req_node *batadv_new_tt_req_node(struct bat_priv *bat_priv,
+						  struct orig_node *orig_node)
 {
 	struct tt_req_node *tt_req_node_tmp, *tt_req_node = NULL;
 
@@ -1271,7 +1286,8 @@ unlock:
 }
 
 /* data_ptr is useless here, but has to be kept to respect the prototype */
-static int tt_local_valid_entry(const void *entry_ptr, const void *data_ptr)
+static int batadv_tt_local_valid_entry(const void *entry_ptr,
+				       const void *data_ptr)
 {
 	const struct tt_common_entry *tt_common_entry = entry_ptr;
 
@@ -1280,7 +1296,8 @@ static int tt_local_valid_entry(const void *entry_ptr, const void *data_ptr)
 	return 1;
 }
 
-static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr)
+static int batadv_tt_global_valid(const void *entry_ptr,
+				  const void *data_ptr)
 {
 	const struct tt_common_entry *tt_common_entry = entry_ptr;
 	const struct tt_global_entry *tt_global_entry;
@@ -1292,15 +1309,15 @@ static int tt_global_valid_entry(const void *entry_ptr, const void *data_ptr)
 	tt_global_entry = container_of(tt_common_entry, struct tt_global_entry,
 				       common);
 
-	return tt_global_entry_has_orig(tt_global_entry, orig_node);
+	return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node);
 }
 
-static struct sk_buff *tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
-					      struct hashtable_t *hash,
-					      struct hard_iface *primary_if,
-					      int (*valid_cb)(const void *,
-							      const void *),
-					      void *cb_data)
+static struct sk_buff *
+batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
+			      struct hashtable_t *hash,
+			      struct hard_iface *primary_if,
+			      int (*valid_cb)(const void *, const void *),
+			      void *cb_data)
 {
 	struct tt_common_entry *tt_common_entry;
 	struct tt_query_packet *tt_response;
@@ -1361,9 +1378,10 @@ out:
 	return skb;
 }
 
-static int send_tt_request(struct bat_priv *bat_priv,
-			   struct orig_node *dst_orig_node,
-			   uint8_t ttvn, uint16_t tt_crc, bool full_table)
+static int batadv_send_tt_request(struct bat_priv *bat_priv,
+				  struct orig_node *dst_orig_node,
+				  uint8_t ttvn, uint16_t tt_crc,
+				  bool full_table)
 {
 	struct sk_buff *skb = NULL;
 	struct tt_query_packet *tt_request;
@@ -1379,7 +1397,7 @@ static int send_tt_request(struct bat_priv *bat_priv,
 	/* The new tt_req will be issued only if I'm not waiting for a
 	 * reply from the same orig_node yet
 	 */
-	tt_req_node = new_tt_req_node(bat_priv, dst_orig_node);
+	tt_req_node = batadv_new_tt_req_node(bat_priv, dst_orig_node);
 	if (!tt_req_node)
 		goto out;
 
@@ -1434,8 +1452,8 @@ out:
 	return ret;
 }
 
-static bool send_other_tt_response(struct bat_priv *bat_priv,
-				   struct tt_query_packet *tt_request)
+static bool batadv_send_other_tt_response(struct bat_priv *bat_priv,
+					  struct tt_query_packet *tt_request)
 {
 	struct orig_node *req_dst_orig_node = NULL, *res_dst_orig_node = NULL;
 	struct neigh_node *neigh_node = NULL;
@@ -1515,10 +1533,11 @@ static bool send_other_tt_response(struct bat_priv *bat_priv,
 						sizeof(struct tt_change);
 		ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
 
-		skb = tt_response_fill_table(tt_len, ttvn,
-					     bat_priv->tt_global_hash,
-					     primary_if, tt_global_valid_entry,
-					     req_dst_orig_node);
+		skb = batadv_tt_response_fill_table(tt_len, ttvn,
+						    bat_priv->tt_global_hash,
+						    primary_if,
+						    batadv_tt_global_valid,
+						    req_dst_orig_node);
 		if (!skb)
 			goto out;
 
@@ -1563,8 +1582,8 @@ out:
 	return ret;
 
 }
-static bool send_my_tt_response(struct bat_priv *bat_priv,
-				struct tt_query_packet *tt_request)
+static bool batadv_send_my_tt_response(struct bat_priv *bat_priv,
+				       struct tt_query_packet *tt_request)
 {
 	struct orig_node *orig_node = NULL;
 	struct neigh_node *neigh_node = NULL;
@@ -1635,10 +1654,11 @@ static bool send_my_tt_response(struct bat_priv *bat_priv,
 						sizeof(struct tt_change);
 		ttvn = (uint8_t)atomic_read(&bat_priv->ttvn);
 
-		skb = tt_response_fill_table(tt_len, ttvn,
-					     bat_priv->tt_local_hash,
-					     primary_if, tt_local_valid_entry,
-					     NULL);
+		skb = batadv_tt_response_fill_table(tt_len, ttvn,
+						    bat_priv->tt_local_hash,
+						    primary_if,
+						    batadv_tt_local_valid_entry,
+						    NULL);
 		if (!skb)
 			goto out;
 
@@ -1689,26 +1709,28 @@ bool batadv_send_tt_response(struct bat_priv *bat_priv,
 		if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src))
 			return true;
 
-		return send_my_tt_response(bat_priv, tt_request);
+		return batadv_send_my_tt_response(bat_priv, tt_request);
 	} else {
-		return send_other_tt_response(bat_priv, tt_request);
+		return batadv_send_other_tt_response(bat_priv, tt_request);
 	}
 }
 
-static void _tt_update_changes(struct bat_priv *bat_priv,
-			       struct orig_node *orig_node,
-			       struct tt_change *tt_change,
-			       uint16_t tt_num_changes, uint8_t ttvn)
+static void _batadv_tt_update_changes(struct bat_priv *bat_priv,
+				      struct orig_node *orig_node,
+				      struct tt_change *tt_change,
+				      uint16_t tt_num_changes, uint8_t ttvn)
 {
 	int i;
 	int is_wifi;
+	int roams;
 
 	for (i = 0; i < tt_num_changes; i++) {
 		if ((tt_change + i)->flags & TT_CLIENT_DEL) {
-			tt_global_del(bat_priv, orig_node,
-				      (tt_change + i)->addr,
-				      "tt removed by changes",
-				      (tt_change + i)->flags & TT_CLIENT_ROAM);
+			roams = (tt_change + i)->flags & TT_CLIENT_ROAM;
+			batadv_tt_global_del(bat_priv, orig_node,
+					     (tt_change + i)->addr,
+					      "tt removed by changes",
+					      roams);
 		} else {
 			is_wifi = (tt_change + i)->flags & TT_CLIENT_WIFI;
 			if (!batadv_tt_global_add(bat_priv, orig_node,
@@ -1726,8 +1748,8 @@ static void _tt_update_changes(struct bat_priv *bat_priv,
 	orig_node->tt_initialised = true;
 }
 
-static void tt_fill_gtable(struct bat_priv *bat_priv,
-			   struct tt_query_packet *tt_response)
+static void batadv_tt_fill_gtable(struct bat_priv *bat_priv,
+				  struct tt_query_packet *tt_response)
 {
 	struct orig_node *orig_node = NULL;
 
@@ -1738,9 +1760,10 @@ static void tt_fill_gtable(struct bat_priv *bat_priv,
 	/* Purge the old table first.. */
 	batadv_tt_global_del_orig(bat_priv, orig_node, "Received full table");
 
-	_tt_update_changes(bat_priv, orig_node,
-			   (struct tt_change *)(tt_response + 1),
-			   ntohs(tt_response->tt_data), tt_response->ttvn);
+	_batadv_tt_update_changes(bat_priv, orig_node,
+				  (struct tt_change *)(tt_response + 1),
+				  ntohs(tt_response->tt_data),
+				  tt_response->ttvn);
 
 	spin_lock_bh(&orig_node->tt_buff_lock);
 	kfree(orig_node->tt_buff);
@@ -1755,16 +1778,16 @@ out:
 		batadv_orig_node_free_ref(orig_node);
 }
 
-static void tt_update_changes(struct bat_priv *bat_priv,
-			      struct orig_node *orig_node,
-			      uint16_t tt_num_changes, uint8_t ttvn,
-			      struct tt_change *tt_change)
+static void batadv_tt_update_changes(struct bat_priv *bat_priv,
+				     struct orig_node *orig_node,
+				     uint16_t tt_num_changes, uint8_t ttvn,
+				     struct tt_change *tt_change)
 {
-	_tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes,
-			   ttvn);
+	_batadv_tt_update_changes(bat_priv, orig_node, tt_change,
+				  tt_num_changes, ttvn);
 
-	tt_save_orig_buffer(bat_priv, orig_node, (unsigned char *)tt_change,
-			    tt_num_changes);
+	batadv_tt_save_orig_buffer(bat_priv, orig_node,
+				   (unsigned char *)tt_change, tt_num_changes);
 	atomic_set(&orig_node->last_ttvn, ttvn);
 }
 
@@ -1773,7 +1796,7 @@ bool batadv_is_my_client(struct bat_priv *bat_priv, const uint8_t *addr)
 	struct tt_local_entry *tt_local_entry = NULL;
 	bool ret = false;
 
-	tt_local_entry = tt_local_hash_find(bat_priv, addr);
+	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr);
 	if (!tt_local_entry)
 		goto out;
 	/* Check if the client has been logically deleted (but is kept for
@@ -1784,7 +1807,7 @@ bool batadv_is_my_client(struct bat_priv *bat_priv, const uint8_t *addr)
 	ret = true;
 out:
 	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 	return ret;
 }
 
@@ -1809,12 +1832,12 @@ void batadv_handle_tt_response(struct bat_priv *bat_priv,
 		goto out;
 
 	if (tt_response->flags & TT_FULL_TABLE)
-		tt_fill_gtable(bat_priv, tt_response);
+		batadv_tt_fill_gtable(bat_priv, tt_response);
 	else
-		tt_update_changes(bat_priv, orig_node,
-				  ntohs(tt_response->tt_data),
-				  tt_response->ttvn,
-				  (struct tt_change *)(tt_response + 1));
+		batadv_tt_update_changes(bat_priv, orig_node,
+					 ntohs(tt_response->tt_data),
+					 tt_response->ttvn,
+					 (struct tt_change *)(tt_response + 1));
 
 	/* Delete the tt_req_node from pending tt_requests list */
 	spin_lock_bh(&bat_priv->tt_req_list_lock);
@@ -1827,7 +1850,7 @@ void batadv_handle_tt_response(struct bat_priv *bat_priv,
 	spin_unlock_bh(&bat_priv->tt_req_list_lock);
 
 	/* Recalculate the CRC for this orig_node and store it */
-	orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
+	orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
 	/* Roaming phase is over: tables are in sync again. I can
 	 * unset the flag
 	 */
@@ -1841,20 +1864,20 @@ int batadv_tt_init(struct bat_priv *bat_priv)
 {
 	int ret;
 
-	ret = tt_local_init(bat_priv);
+	ret = batadv_tt_local_init(bat_priv);
 	if (ret < 0)
 		return ret;
 
-	ret = tt_global_init(bat_priv);
+	ret = batadv_tt_global_init(bat_priv);
 	if (ret < 0)
 		return ret;
 
-	tt_start_timer(bat_priv);
+	batadv_tt_start_timer(bat_priv);
 
 	return 1;
 }
 
-static void tt_roam_list_free(struct bat_priv *bat_priv)
+static void batadv_tt_roam_list_free(struct bat_priv *bat_priv)
 {
 	struct tt_roam_node *node, *safe;
 
@@ -1868,7 +1891,7 @@ static void tt_roam_list_free(struct bat_priv *bat_priv)
 	spin_unlock_bh(&bat_priv->tt_roam_list_lock);
 }
 
-static void tt_roam_purge(struct bat_priv *bat_priv)
+static void batadv_tt_roam_purge(struct bat_priv *bat_priv)
 {
 	struct tt_roam_node *node, *safe;
 
@@ -1889,8 +1912,8 @@ static void tt_roam_purge(struct bat_priv *bat_priv)
  *
  * returns true if the ROAMING_ADV can be sent, false otherwise
  */
-static bool tt_check_roam_count(struct bat_priv *bat_priv,
-				uint8_t *client)
+static bool batadv_tt_check_roam_count(struct bat_priv *bat_priv,
+				       uint8_t *client)
 {
 	struct tt_roam_node *tt_roam_node;
 	bool ret = false;
@@ -1932,8 +1955,8 @@ unlock:
 	return ret;
 }
 
-static void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
-			  struct orig_node *orig_node)
+static void batadv_send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
+				 struct orig_node *orig_node)
 {
 	struct neigh_node *neigh_node = NULL;
 	struct sk_buff *skb = NULL;
@@ -1944,7 +1967,7 @@ static void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client,
 	/* before going on we have to check whether the client has
 	 * already roamed to us too many times
 	 */
-	if (!tt_check_roam_count(bat_priv, client))
+	if (!batadv_tt_check_roam_count(bat_priv, client))
 		goto out;
 
 	skb = dev_alloc_skb(sizeof(struct roam_adv_packet) + ETH_HLEN);
@@ -1988,30 +2011,30 @@ out:
 	return;
 }
 
-static void tt_purge(struct work_struct *work)
+static void batadv_tt_purge(struct work_struct *work)
 {
 	struct delayed_work *delayed_work =
 		container_of(work, struct delayed_work, work);
 	struct bat_priv *bat_priv =
 		container_of(delayed_work, struct bat_priv, tt_work);
 
-	tt_local_purge(bat_priv);
-	tt_global_roam_purge(bat_priv);
-	tt_req_purge(bat_priv);
-	tt_roam_purge(bat_priv);
+	batadv_tt_local_purge(bat_priv);
+	batadv_tt_global_roam_purge(bat_priv);
+	batadv_tt_req_purge(bat_priv);
+	batadv_tt_roam_purge(bat_priv);
 
-	tt_start_timer(bat_priv);
+	batadv_tt_start_timer(bat_priv);
 }
 
 void batadv_tt_free(struct bat_priv *bat_priv)
 {
 	cancel_delayed_work_sync(&bat_priv->tt_work);
 
-	tt_local_table_free(bat_priv);
-	tt_global_table_free(bat_priv);
-	tt_req_list_free(bat_priv);
-	tt_changes_list_free(bat_priv);
-	tt_roam_list_free(bat_priv);
+	batadv_tt_local_table_free(bat_priv);
+	batadv_tt_global_table_free(bat_priv);
+	batadv_tt_req_list_free(bat_priv);
+	batadv_tt_changes_list_free(bat_priv);
+	batadv_tt_roam_list_free(bat_priv);
 
 	kfree(bat_priv->tt_buff);
 }
@@ -2019,8 +2042,8 @@ void batadv_tt_free(struct bat_priv *bat_priv)
 /* This function will enable or disable the specified flags for all the entries
  * in the given hash table and returns the number of modified entries
  */
-static uint16_t tt_set_flags(struct hashtable_t *hash, uint16_t flags,
-			     bool enable)
+static uint16_t batadv_tt_set_flags(struct hashtable_t *hash, uint16_t flags,
+				    bool enable)
 {
 	uint32_t i;
 	uint16_t changed_num = 0;
@@ -2055,7 +2078,7 @@ out:
 }
 
 /* Purge out all the tt local entries marked with TT_CLIENT_PENDING */
-static void tt_local_purge_pending_clients(struct bat_priv *bat_priv)
+static void batadv_tt_local_purge_pending_clients(struct bat_priv *bat_priv)
 {
 	struct hashtable_t *hash = bat_priv->tt_local_hash;
 	struct tt_common_entry *tt_common_entry;
@@ -2087,28 +2110,28 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv)
 			tt_local_entry = container_of(tt_common_entry,
 						      struct tt_local_entry,
 						      common);
-			tt_local_entry_free_ref(tt_local_entry);
+			batadv_tt_local_entry_free_ref(tt_local_entry);
 		}
 		spin_unlock_bh(list_lock);
 	}
 
 }
 
-static int tt_commit_changes(struct bat_priv *bat_priv,
-			     unsigned char **packet_buff, int *packet_buff_len,
-			     int packet_min_len)
+static int batadv_tt_commit_changes(struct bat_priv *bat_priv,
+				    unsigned char **packet_buff,
+				    int *packet_buff_len, int packet_min_len)
 {
 	uint16_t changed_num = 0;
 
 	if (atomic_read(&bat_priv->tt_local_changes) < 1)
 		return -ENOENT;
 
-	changed_num = tt_set_flags(bat_priv->tt_local_hash,
-				   TT_CLIENT_NEW, false);
+	changed_num = batadv_tt_set_flags(bat_priv->tt_local_hash,
+					  TT_CLIENT_NEW, false);
 
 	/* all reset entries have to be counted as local entries */
 	atomic_add(changed_num, &bat_priv->num_local_tt);
-	tt_local_purge_pending_clients(bat_priv);
+	batadv_tt_local_purge_pending_clients(bat_priv);
 	bat_priv->tt_crc = batadv_tt_local_crc(bat_priv);
 
 	/* Increment the TTVN only once per OGM interval */
@@ -2121,8 +2144,8 @@ static int tt_commit_changes(struct bat_priv *bat_priv,
 	/* reset the sending counter */
 	atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX);
 
-	return tt_changes_fill_buff(bat_priv, packet_buff,
-				    packet_buff_len, packet_min_len);
+	return batadv_tt_changes_fill_buff(bat_priv, packet_buff,
+					   packet_buff_len, packet_min_len);
 }
 
 /* when calling this function (hard_iface == primary_if) has to be true */
@@ -2133,14 +2156,15 @@ int batadv_tt_append_diff(struct bat_priv *bat_priv,
 	int tt_num_changes;
 
 	/* if at least one change happened */
-	tt_num_changes = tt_commit_changes(bat_priv, packet_buff,
-					   packet_buff_len, packet_min_len);
+	tt_num_changes = batadv_tt_commit_changes(bat_priv, packet_buff,
+						  packet_buff_len,
+						  packet_min_len);
 
 	/* if the changes have been sent often enough */
 	if ((tt_num_changes < 0) &&
 	    (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))) {
-		tt_realloc_packet_buff(packet_buff, packet_buff_len,
-				       packet_min_len, packet_min_len);
+		batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
+					      packet_min_len, packet_min_len);
 		tt_num_changes = 0;
 	}
 
@@ -2157,24 +2181,24 @@ bool batadv_is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src,
 	if (!atomic_read(&bat_priv->ap_isolation))
 		return false;
 
-	tt_local_entry = tt_local_hash_find(bat_priv, dst);
+	tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst);
 	if (!tt_local_entry)
 		goto out;
 
-	tt_global_entry = tt_global_hash_find(bat_priv, src);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, src);
 	if (!tt_global_entry)
 		goto out;
 
-	if (_is_ap_isolated(tt_local_entry, tt_global_entry))
+	if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
 		goto out;
 
 	ret = false;
 
 out:
 	if (tt_global_entry)
-		tt_global_entry_free_ref(tt_global_entry);
+		batadv_tt_global_entry_free_ref(tt_global_entry);
 	if (tt_local_entry)
-		tt_local_entry_free_ref(tt_local_entry);
+		batadv_tt_local_entry_free_ref(tt_local_entry);
 	return ret;
 }
 
@@ -2204,14 +2228,14 @@ void batadv_tt_update_orig(struct bat_priv *bat_priv,
 			goto request_table;
 		}
 
-		tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn,
-				  (struct tt_change *)tt_buff);
+		batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
+					 ttvn, (struct tt_change *)tt_buff);
 
 		/* Even if we received the precomputed crc with the OGM, we
 		 * prefer to recompute it to spot any possible inconsistency
 		 * in the global table
 		 */
-		orig_node->tt_crc = tt_global_crc(bat_priv, orig_node);
+		orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
 
 		/* The ttvn alone is not enough to guarantee consistency
 		 * because a single value could represent different states
@@ -2240,8 +2264,8 @@ request_table:
 				   "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u crc: %u last_crc: %u num_changes: %u)\n",
 				   orig_node->orig, ttvn, orig_ttvn, tt_crc,
 				   orig_node->tt_crc, tt_num_changes);
-			send_tt_request(bat_priv, orig_node, ttvn, tt_crc,
-					full_table);
+			batadv_send_tt_request(bat_priv, orig_node, ttvn,
+					       tt_crc, full_table);
 			return;
 		}
 	}
@@ -2257,12 +2281,12 @@ bool batadv_tt_global_client_is_roaming(struct bat_priv *bat_priv,
 	struct tt_global_entry *tt_global_entry;
 	bool ret = false;
 
-	tt_global_entry = tt_global_hash_find(bat_priv, addr);
+	tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr);
 	if (!tt_global_entry)
 		goto out;
 
 	ret = tt_global_entry->common.flags & TT_CLIENT_ROAM;
-	tt_global_entry_free_ref(tt_global_entry);
+	batadv_tt_global_entry_free_ref(tt_global_entry);
 out:
 	return ret;
 }
-- 
1.7.9.4

^ permalink raw reply related

* [PATCH] net/ipv6/route.c: packets originating on device match lo
From: David McCullough @ 2012-06-25  6:50 UTC (permalink / raw)
  To: netdev; +Cc: david_mccullough


Hi all,

Seems like the special case of "iif lo" when using rules to control packets
is broken for IPv6 traffic.  The section from the ip man page is:

   iif NAME
          select  the incoming device to match.  If the interface is loop‐
          back, the rule only matches packets originating from this  host.
          This  means that you may create separate routing tables for for‐
          warded and local packets and, hence, completely segregate them.

If I run the following configuration:

    ifconfig eth0 192.168.0.1 netmask 255.255.255.0
    ip addr add fec0:0:0:2::1/64 dev eth0

    ip route add table 200 unreachable default
    ip -6 route add table 200 unreachable default

    ip rule add to 192.168.0.2 table 200 iif lo
    ip -6 rule add to fec0:0:0:2::2 table 200 iif lo

and then test with:

    # ping 192.168.0.2
    connect: Network is unreachable
    # ping6 fec0:0:0:2::2
    PING fec0:0:0:2::2(fec0:0:0:2::2) 56 data bytes
    64 bytes from fec0:0:0:2::2: icmp_seq=1 ttl=64 time=0.648 ms
    ...

The patch below is against linux-3.4.  I am not sure if its the best
solution but it appears to fix the case above:

    # ping6 fec0:0:0:2::2
    connect: Network is unreachable

and does not appear to affect forwarded packets is the few cases I have
tried.  Tested on linux-3.4 and iproute2-3.4.0, Feedback appreciated :-)

Thanks,
Davidm

Signed-off-by: David McCullough <david_mccullough@mcafee.com>

diff -p -u -r1.1.1.59 route.c
--- linux-3.4/net/ipv6/route.c	21 May 2012 23:15:01 -0000	1.1.1.59
+++ linux-3.4/net/ipv6/route.c	25 Jun 2012 05:43:50 -0000
@@ -931,6 +931,9 @@ struct dst_entry * ip6_route_output(stru
 {
 	int flags = 0;
 
+	if (fl6->flowi6_iif == 0)
+		fl6->flowi6_iif = net->loopback_dev->ifindex;
+		
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 		flags |= RT6_LOOKUP_F_IFACE;
 

-- 
David McCullough,      david_mccullough@mcafee.com,  Ph:+61 734352815
McAfee - SnapGear      http://www.mcafee.com         http://www.uCdot.org

^ permalink raw reply

* Re: [PATCH 1/1] ipheth: add support for iPad
From: Davide Gerhard @ 2012-06-25  7:03 UTC (permalink / raw)
  To: Jesper Juhl; +Cc: Greg Kroah-Hartman, linux-usb, netdev, linux-kernel
In-Reply-To: <alpine.LNX.2.00.1206250208140.30361@swampdragon.chaosbits.net>

=- Jesper Juhl wrote on Mon 25.Jun'12 at  2:09:22 +0200 -=

> On Mon, 25 Jun 2012, rainbow wrote:
> 
> > This adds support for the iPad to the ipheth driver.
> > (product id = 0x129a)
> > 
> > Signed-off-by: rainbow <rainbow@irh.it>
> 
> It is usually very much prefered that people use their real names in 
> Signed-off-by: lines.

Sorry.
I have resubmitted.

/davide

> 
> Please consider re-submitting using your real name.
> 
> -- 
> Jesper Juhl <jj@chaosbits.net>       http://www.chaosbits.net/
> Don't top-post http://www.catb.org/jargon/html/T/top-post.html
> Plain text mails only, please.
> 

-- 
"There is no likelihood man can ever tap the power of the atom." - Robert 
Millikan, Nobel Prize in Physics, 1923.

^ permalink raw reply

* [PATCH 1/1] ipheth: add support for iPad
From: Davide Gerhard @ 2012-06-25  7:04 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: linux-usb, netdev, linux-kernel, Davide Gerhard

This adds support for the iPad to the ipheth driver.
(product id = 0x129a)

Signed-off-by: Davide Gerhard <rainbow@irh.it>
---
 drivers/net/usb/ipheth.c |    5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 964031e..9c98449 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -59,6 +59,7 @@
 #define USB_PRODUCT_IPHONE_3G   0x1292
 #define USB_PRODUCT_IPHONE_3GS  0x1294
 #define USB_PRODUCT_IPHONE_4	0x1297
+#define USB_PRODUCT_IPAD 0x129a
 #define USB_PRODUCT_IPHONE_4_VZW 0x129c
 #define USB_PRODUCT_IPHONE_4S	0x12a0
 
@@ -101,6 +102,10 @@ static struct usb_device_id ipheth_table[] = {
 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
 		IPHETH_USBINTF_PROTO) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(
+		USB_VENDOR_APPLE, USB_PRODUCT_IPAD,
+		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
+		IPHETH_USBINTF_PROTO) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(
 		USB_VENDOR_APPLE, USB_PRODUCT_IPHONE_4_VZW,
 		IPHETH_USBINTF_CLASS, IPHETH_USBINTF_SUBCLASS,
 		IPHETH_USBINTF_PROTO) },
-- 
1.7.10.2

^ permalink raw reply related

* Re: [PATCH net] net: qmi_wwan: fix Oops while disconnecting
From: Ming Lei @ 2012-06-25  7:15 UTC (permalink / raw)
  To: Oliver Neukum
  Cc: Bjørn Mork, netdev, linux-usb, Marius Bjørnstad Kotsbak
In-Reply-To: <201206250815.39466.oliver@neukum.org>

On Mon, Jun 25, 2012 at 2:15 PM, Oliver Neukum <oliver@neukum.org> wrote:
> Am Montag, 25. Juni 2012, 05:37:20 schrieb Ming Lei:

>> The current problem is caused by the set to NULL without any
>> protection or sync mechanism on it, and it is really a bug.
>
> Minidrivers can test for NULL.
> That may not be enough and locking may be needed.

Any locking isn't needed if the set to NULL is put after
driver_info->unbind,  since ->unbind will call subdriver->disconnect,
which will hold the open/disconnect lock of wdm_mutex.

>
>> Also we didn't say the set to NULL will be cancelled, just delay
>> the clear until it is safe to do it, eg. after complete of unregister_netdev()
>> and driver_info->unbind, when the previous .open/.close has been
>> completed already or the later ones will notice the disconnection
>> early and won't touch usb_get_intfdata() any more.
>
> We can move to after unregister_netdev()
> I am unhappy with it going after unbind.
>

Could you let us know the reason? I think it may let the
patch not necessary.

>> I don't see any races caused by just removing usb_set_intfdata(, NULL)
>> or moving it after driver_info->unbind simply in usbnet_disconnect.
>
> They are not caused. They are harder to detect.
>
>> In fact, suppose that .open/.close and .disconnect are run on different CPUs,
>> there are no any guarantee that .open/.close can always see the clear action
>> immediately. Also, the clear of intfdata may not be observed in .manage_power
>> since usb_set_intfdata(, NULL) may be completed after the lock wdm_mutex
>> operation.
>
> Sure, it is a debugging aid. It has the drawback that minidrivers have
> to be able to deal with intfdata being NULL. That is not hard to do.

The check isn't needed if the set to NULL is put after  driver_info->unbind
in usbnet_disconnect.

I think we can add the comment on it to avoid the unnecessary check.

Thanks,
-- 
Ming Lei

^ permalink raw reply

* Re: [PATCH net] net: qmi_wwan: fix Oops while disconnecting
From: Bjørn Mork @ 2012-06-25  7:24 UTC (permalink / raw)
  To: Ming Lei
  Cc: Oliver Neukum, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Marius Bjørnstad Kotsbak
In-Reply-To: <CACVXFVN3wJ3NWxSGj-yWCgtDE_sgJT5CZYHwYUWk1MxkphcsTg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

[I don't have anything more to add to the generic usbnet discussion, but
 want to comment on the qmi_wwan issues]

Ming Lei <tom.leiming-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:

> Also, the clear of intfdata may not be observed in .manage_power
> since usb_set_intfdata(, NULL) may be completed after the lock wdm_mutex
> operation.

True, but irrelevant.  The pointer is either valid or NULL.  We don't
need to care about synchronizing the exact time it is set to NULL.

The locking in cdc-wdm will ensure that the pointer is valid while it is
in use by .manage_power, because usbnet_disconnect is prevented from
continuing with free_netdev() while any caller of .manage_power is
running.

> So it is only the sync mechanism that  works on the race even the check is
> added in the patch.  Putting usb_set_intfdata(, NULL) after driver_info->unbind
> should be OK, and it is a general solution for the problem.

There is no problem wrt qmi_wwan and intfdata as long as the NULL test
is added to .manage_power.

Bjørn
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] netfilter: ipvs: fix dst leak in __ip_vs_addr_is_local_v6
From: Eric Dumazet @ 2012-06-25  7:58 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: netfilter-devel, netdev, Wensong Zhang, Simon Horman,
	Julian Anastasov, David Miller

From: Eric Dumazet <edumazet@google.com>

After call to ip6_route_output() we must release dst or we leak it.

Also should test dst->error, as ip6_route_output() never returns NULL.

Use boolean while we are at it.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/netfilter/ipvs/ip_vs_ctl.c |   14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dd811b8..d43e3c1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -76,19 +76,19 @@ static void __ip_vs_del_service(struct ip_vs_service *svc);
 
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(struct net *net,
-				    const struct in6_addr *addr)
+static bool __ip_vs_addr_is_local_v6(struct net *net,
+				     const struct in6_addr *addr)
 {
-	struct rt6_info *rt;
 	struct flowi6 fl6 = {
 		.daddr = *addr,
 	};
+	struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
+	bool is_local;
 
-	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
-	if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
-		return 1;
+	is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
 
-	return 0;
+	dst_release(dst);
+	return is_local;
 }
 #endif
 



^ permalink raw reply related

* Re: [PATCH net] net: qmi_wwan: fix Oops while disconnecting
From: Ming Lei @ 2012-06-25  8:08 UTC (permalink / raw)
  To: Bjørn Mork
  Cc: Oliver Neukum, netdev, linux-usb, Marius Bjørnstad Kotsbak
In-Reply-To: <87hatzhn0k.fsf@nemi.mork.no>

On Mon, Jun 25, 2012 at 3:24 PM, Bjørn Mork <bjorn@mork.no> wrote:

>
> True, but irrelevant.  The pointer is either valid or NULL.  We don't
> need to care about synchronizing the exact time it is set to NULL.
>
> The locking in cdc-wdm will ensure that the pointer is valid while it is
> in use by .manage_power, because usbnet_disconnect is prevented from
> continuing with free_netdev() while any caller of .manage_power is
> running.

What I mean is that the situation is just what moving the set to NULL
is doing.

>> So it is only the sync mechanism that  works on the race even the check is
>> added in the patch.  Putting usb_set_intfdata(, NULL) after driver_info->unbind
>> should be OK, and it is a general solution for the problem.
>
> There is no problem wrt qmi_wwan and intfdata as long as the NULL test
> is added to .manage_power.

It depends on the ARCH or compiler.

Considered there is not any locking/memory barrier between the set to NULL
and read the pointer, also no ACCESS_ONCE on read or store the pointer,
reading in .manage_power may see a invalid pointer if the CPU doesn't
support Store Atomicity or the compiler does a byte-at-a-time optimization
on the store[1].

So why not take the correct way in theory? also it is a general solution,
and we can document its usage.

[1], Paul mentioned it in the previous discussion
http://lkml.org/lkml/2012/6/6/280

Thanks,
-- 
Ming Lei

^ permalink raw reply

* Re: [net-next RFC V3 PATCH 4/6] tuntap: multiqueue support
From: Michael S. Tsirkin @ 2012-06-25  8:25 UTC (permalink / raw)
  To: Jason Wang
  Cc: habanero, netdev, linux-kernel, krkumar2, tahm, akong, davem,
	shemminger, mashirle
In-Reply-To: <20120625061018.6765.76633.stgit@amd-6168-8-1.englab.nay.redhat.com>

On Mon, Jun 25, 2012 at 02:10:18PM +0800, Jason Wang wrote:
> This patch adds multiqueue support for tap device. This is done by abstracting
> each queue as a file/socket and allowing multiple sockets to be attached to the
> tuntap device (an array of tun_file were stored in the tun_struct). Userspace
> could write and read from those files to do the parallel packet
> sending/receiving.
> 
> Unlike the previous single queue implementation, the socket and device were
> loosely coupled, each of them were allowed to go away first. In order to let the
> tx path lockless, netif_tx_loch_bh() is replaced by RCU/NETIF_F_LLTX to
> synchronize between data path and system call.

Don't use LLTX/RCU. It's not worth it.
Use something like netif_set_real_num_tx_queues.

> 
> The tx queue selecting is first based on the recorded rxq index of an skb, it
> there's no such one, then choosing based on rx hashing (skb_get_rxhash()).
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Interestingly macvtap switched to hashing first:
ef0002b577b52941fb147128f30bd1ecfdd3ff6d
(the commit log is corrupted but see what it
does in the patch).
Any idea why?

> ---
>  drivers/net/tun.c |  371 +++++++++++++++++++++++++++++++++--------------------
>  1 files changed, 232 insertions(+), 139 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 8233b0a..5c26757 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -107,6 +107,8 @@ struct tap_filter {
>  	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
>  };
>  
> +#define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)

Why the limit? I am guessing you copied this from macvtap?
This is problematic for a number of reasons:
	- will not play well with migration
	- will not work well for a large guest

Yes, macvtap needs to be fixed too.

I am guessing what it is trying to prevent is queueing
up a huge number of packets?
So just divide the default tx queue limit by the # of queues.

And by the way, for MQ applications maybe we can finally
ignore tx queue altogether and limit the total number
of bytes queued?
To avoid regressions we can make it large like 64M/# queues.
Could be a separate patch I think, and for a single queue
might need a compatible mode though I am not sure.

> +
>  struct tun_file {
>  	struct sock sk;
>  	struct socket socket;
> @@ -114,16 +116,18 @@ struct tun_file {
>  	int vnet_hdr_sz;
>  	struct tap_filter txflt;
>  	atomic_t count;
> -	struct tun_struct *tun;
> +	struct tun_struct __rcu *tun;
>  	struct net *net;
>  	struct fasync_struct *fasync;
>  	unsigned int flags;
> +	u16 queue_index;
>  };
>  
>  struct tun_sock;
>  
>  struct tun_struct {
> -	struct tun_file		*tfile;
> +	struct tun_file		*tfiles[MAX_TAP_QUEUES];
> +	unsigned int            numqueues;
>  	unsigned int 		flags;
>  	uid_t			owner;
>  	gid_t			group;
> @@ -138,80 +142,159 @@ struct tun_struct {
>  #endif
>  };
>  
> -static int tun_attach(struct tun_struct *tun, struct file *file)
> +static DEFINE_SPINLOCK(tun_lock);
> +
> +/*
> + * tun_get_queue(): calculate the queue index
> + *     - if skbs comes from mq nics, we can just borrow
> + *     - if not, calculate from the hash
> + */
> +static struct tun_file *tun_get_queue(struct net_device *dev,
> +				      struct sk_buff *skb)
>  {
> -	struct tun_file *tfile = file->private_data;
> -	int err;
> +	struct tun_struct *tun = netdev_priv(dev);
> +	struct tun_file *tfile = NULL;
> +	int numqueues = tun->numqueues;
> +	__u32 rxq;
>  
> -	ASSERT_RTNL();
> +	BUG_ON(!rcu_read_lock_held());
>  
> -	netif_tx_lock_bh(tun->dev);
> +	if (!numqueues)
> +		goto out;
>  
> -	err = -EINVAL;
> -	if (tfile->tun)
> +	if (numqueues == 1) {
> +		tfile = rcu_dereference(tun->tfiles[0]);

Instead of hacks like this, you can ask for an MQ
flag to be set in SETIFF. Then you won't need to
handle attach/detach at random times.
And most of the scary num_queues checks can go away.
You can then also ask userspace about the max # of queues
to expect if you want to save some memory.


>  		goto out;
> +	}
>  
> -	err = -EBUSY;
> -	if (tun->tfile)
> +	if (likely(skb_rx_queue_recorded(skb))) {
> +		rxq = skb_get_rx_queue(skb);
> +
> +		while (unlikely(rxq >= numqueues))
> +			rxq -= numqueues;
> +
> +		tfile = rcu_dereference(tun->tfiles[rxq]);
>  		goto out;
> +	}
>  
> -	err = 0;
> -	tfile->tun = tun;
> -	tun->tfile = tfile;
> -	netif_carrier_on(tun->dev);
> -	dev_hold(tun->dev);
> -	sock_hold(&tfile->sk);
> -	atomic_inc(&tfile->count);
> +	/* Check if we can use flow to select a queue */
> +	rxq = skb_get_rxhash(skb);
> +	if (rxq) {
> +		u32 idx = ((u64)rxq * numqueues) >> 32;

This completely confuses me. What's the logic here?
How do we even know it's in range?

> +		tfile = rcu_dereference(tun->tfiles[idx]);
> +		goto out;
> +	}
>  
> +	tfile = rcu_dereference(tun->tfiles[0]);
>  out:
> -	netif_tx_unlock_bh(tun->dev);
> -	return err;
> +	return tfile;
>  }
>  
> -static void __tun_detach(struct tun_struct *tun)
> +static int tun_detach(struct tun_file *tfile, bool clean)
>  {
> -	struct tun_file *tfile = tun->tfile;
> -	/* Detach from net device */
> -	netif_tx_lock_bh(tun->dev);
> -	netif_carrier_off(tun->dev);
> -	tun->tfile = NULL;
> -	netif_tx_unlock_bh(tun->dev);
> -
> -	/* Drop read queue */
> -	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
> -
> -	/* Drop the extra count on the net device */
> -	dev_put(tun->dev);
> -}
> +	struct tun_struct *tun;
> +	struct net_device *dev = NULL;
> +	bool destroy = false;
>  
> -static void tun_detach(struct tun_struct *tun)
> -{
> -	rtnl_lock();
> -	__tun_detach(tun);
> -	rtnl_unlock();
> -}
> +	spin_lock(&tun_lock);
>  
> -static struct tun_struct *__tun_get(struct tun_file *tfile)
> -{
> -	struct tun_struct *tun = NULL;
> +	tun = rcu_dereference_protected(tfile->tun,
> +					lockdep_is_held(&tun_lock));
> +	if (tun) {
> +		u16 index = tfile->queue_index;
> +		BUG_ON(index >= tun->numqueues);
> +		dev = tun->dev;
> +
> +		rcu_assign_pointer(tun->tfiles[index],
> +				   tun->tfiles[tun->numqueues - 1]);
> +		tun->tfiles[index]->queue_index = index;
> +		rcu_assign_pointer(tfile->tun, NULL);
> +		--tun->numqueues;
> +		sock_put(&tfile->sk);
>  
> -	if (atomic_inc_not_zero(&tfile->count))
> -		tun = tfile->tun;
> +		if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST))
> +			destroy = true;

Please don't use flags like that. Use dedicated labels and goto there on error.


> +	}
>  
> -	return tun;
> +	spin_unlock(&tun_lock);
> +
> +	synchronize_rcu();
> +	if (clean)
> +		sock_put(&tfile->sk);
> +
> +	if (destroy) {
> +		rtnl_lock();
> +		if (dev->reg_state == NETREG_REGISTERED)
> +			unregister_netdevice(dev);
> +		rtnl_unlock();
> +	}
> +
> +	return 0;
>  }
>  
> -static struct tun_struct *tun_get(struct file *file)
> +static void tun_detach_all(struct net_device *dev)
>  {
> -	return __tun_get(file->private_data);
> +	struct tun_struct *tun = netdev_priv(dev);
> +	struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
> +	int i, j = 0;
> +
> +	spin_lock(&tun_lock);
> +
> +	for (i = 0; i < MAX_TAP_QUEUES && tun->numqueues; i++) {
> +		tfile = rcu_dereference_protected(tun->tfiles[i],
> +						lockdep_is_held(&tun_lock));
> +		BUG_ON(!tfile);
> +		wake_up_all(&tfile->wq.wait);
> +		tfile_list[j++] = tfile;
> +		rcu_assign_pointer(tfile->tun, NULL);
> +		--tun->numqueues;
> +	}
> +	BUG_ON(tun->numqueues != 0);
> +	/* guarantee that any future tun_attach will fail */
> +	tun->numqueues = MAX_TAP_QUEUES;
> +	spin_unlock(&tun_lock);
> +
> +	synchronize_rcu();
> +	for (--j; j >= 0; j--)
> +		sock_put(&tfile_list[j]->sk);
>  }
>  
> -static void tun_put(struct tun_struct *tun)
> +static int tun_attach(struct tun_struct *tun, struct file *file)
>  {
> -	struct tun_file *tfile = tun->tfile;
> +	struct tun_file *tfile = file->private_data;
> +	int err;
> +
> +	ASSERT_RTNL();
> +
> +	spin_lock(&tun_lock);
>  
> -	if (atomic_dec_and_test(&tfile->count))
> -		tun_detach(tfile->tun);
> +	err = -EINVAL;
> +	if (rcu_dereference_protected(tfile->tun, lockdep_is_held(&tun_lock)))
> +		goto out;
> +
> +	err = -EBUSY;
> +	if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
> +		goto out;
> +
> +	if (tun->numqueues == MAX_TAP_QUEUES)
> +		goto out;
> +
> +	err = 0;
> +	tfile->queue_index = tun->numqueues;
> +	rcu_assign_pointer(tfile->tun, tun);
> +	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
> +	sock_hold(&tfile->sk);
> +	tun->numqueues++;
> +
> +	if (tun->numqueues == 1)
> +		netif_carrier_on(tun->dev);
> +
> +	/* device is allowed to go away first, so no need to hold extra
> +	 * refcnt. */
> +
> +out:
> +	spin_unlock(&tun_lock);
> +	return err;
>  }
>  
>  /* TAP filtering */
> @@ -331,16 +414,7 @@ static const struct ethtool_ops tun_ethtool_ops;
>  /* Net device detach from fd. */
>  static void tun_net_uninit(struct net_device *dev)
>  {
> -	struct tun_struct *tun = netdev_priv(dev);
> -	struct tun_file *tfile = tun->tfile;
> -
> -	/* Inform the methods they need to stop using the dev.
> -	 */
> -	if (tfile) {
> -		wake_up_all(&tfile->wq.wait);
> -		if (atomic_dec_and_test(&tfile->count))
> -			__tun_detach(tun);
> -	}
> +	tun_detach_all(dev);
>  }
>  
>  /* Net device open. */
> @@ -360,10 +434,10 @@ static int tun_net_close(struct net_device *dev)
>  /* Net device start xmit */
>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> -	struct tun_struct *tun = netdev_priv(dev);
> -	struct tun_file *tfile = tun->tfile;
> +	struct tun_file *tfile = NULL;
>  
> -	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
> +	rcu_read_lock();
> +	tfile = tun_get_queue(dev, skb);
>  
>  	/* Drop packet if interface is not attached */
>  	if (!tfile)
> @@ -381,7 +455,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  
>  	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
>  	    >= dev->tx_queue_len) {
> -		if (!(tun->flags & TUN_ONE_QUEUE)) {
> +		if (!(tfile->flags & TUN_ONE_QUEUE) &&

Which patch moved flags from tun to tfile?

> +		    !(tfile->flags & TUN_TAP_MQ)) {
>  			/* Normal queueing mode. */
>  			/* Packet scheduler handles dropping of further packets. */
>  			netif_stop_queue(dev);
> @@ -390,7 +465,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  			 * error is more appropriate. */
>  			dev->stats.tx_fifo_errors++;
>  		} else {
> -			/* Single queue mode.
> +			/* Single queue mode or multi queue mode.
>  			 * Driver handles dropping of all packets itself. */

Please don't do this. Stop the queue on overrun as appropriate.
ONE_QUEUE is a legacy hack.

BTW we really should stop queue before we start dropping packets,
but that can be a separate patch.

>  			goto drop;
>  		}
> @@ -408,9 +483,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
>  	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
>  				   POLLRDNORM | POLLRDBAND);
> +	rcu_read_unlock();
>  	return NETDEV_TX_OK;
>  
>  drop:
> +	rcu_read_unlock();
>  	dev->stats.tx_dropped++;
>  	kfree_skb(skb);
>  	return NETDEV_TX_OK;
> @@ -527,16 +604,22 @@ static void tun_net_init(struct net_device *dev)
>  static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
>  {
>  	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun = __tun_get(tfile);
> +	struct tun_struct *tun = NULL;
>  	struct sock *sk;
>  	unsigned int mask = 0;
>  
> -	if (!tun)
> +	if (!tfile)
>  		return POLLERR;
>  
> -	sk = tfile->socket.sk;
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
> +	if (!tun) {
> +		rcu_read_unlock();
> +		return POLLERR;
> +	}
> +	rcu_read_unlock();
>  
> -	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
> +	sk = &tfile->sk;
>  
>  	poll_wait(file, &tfile->wq.wait, wait);
>  
> @@ -548,10 +631,12 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
>  	     sock_writeable(sk)))
>  		mask |= POLLOUT | POLLWRNORM;
>  
> -	if (tun->dev->reg_state != NETREG_REGISTERED)
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
> +	if (!tun || tun->dev->reg_state != NETREG_REGISTERED)
>  		mask = POLLERR;
> +	rcu_read_unlock();
>  
> -	tun_put(tun);
>  	return mask;
>  }
>  
> @@ -708,9 +793,12 @@ static ssize_t tun_get_user(struct tun_file *tfile,
>  		skb_shinfo(skb)->gso_segs = 0;
>  	}
>  
> -	tun = __tun_get(tfile);
> -	if (!tun)
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
> +	if (!tun) {
> +		rcu_read_unlock();
>  		return -EBADFD;
> +	}
>  
>  	switch (tfile->flags & TUN_TYPE_MASK) {
>  	case TUN_TUN_DEV:
> @@ -720,26 +808,30 @@ static ssize_t tun_get_user(struct tun_file *tfile,
>  		skb->protocol = eth_type_trans(skb, tun->dev);
>  		break;
>  	}
> -
> -	netif_rx_ni(skb);
>  	tun->dev->stats.rx_packets++;
>  	tun->dev->stats.rx_bytes += len;
> -	tun_put(tun);
> +	rcu_read_unlock();
> +
> +	netif_rx_ni(skb);
> +
>  	return count;
>  
>  err_free:
>  	count = -EINVAL;
>  	kfree_skb(skb);
>  err:
> -	tun = __tun_get(tfile);
> -	if (!tun)
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
> +	if (!tun) {
> +		rcu_read_unlock();
>  		return -EBADFD;
> +	}
>  
>  	if (drop)
>  		tun->dev->stats.rx_dropped++;
>  	if (error)
>  		tun->dev->stats.rx_frame_errors++;
> -	tun_put(tun);
> +	rcu_read_unlock();
>  	return count;
>  }
>  
> @@ -833,12 +925,13 @@ static ssize_t tun_put_user(struct tun_file *tfile,
>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
>  	total += skb->len;
>  
> -	tun = __tun_get(tfile);
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
>  	if (tun) {
>  		tun->dev->stats.tx_packets++;
>  		tun->dev->stats.tx_bytes += len;
> -		tun_put(tun);
>  	}
> +	rcu_read_unlock();
>  
>  	return total;
>  }
> @@ -869,28 +962,31 @@ static ssize_t tun_do_read(struct tun_file *tfile,
>  				break;
>  			}
>  
> -			tun = __tun_get(tfile);
> +			rcu_read_lock();
> +			tun = rcu_dereference(tfile->tun);
>  			if (!tun) {
> -				ret = -EIO;
> +				ret = -EBADFD;

BADFD is for when you get passed something like -1 fd.
Here fd is OK, it's just in a bad state so you can not do IO.


> +				rcu_read_unlock();
>  				break;
>  			}
>  			if (tun->dev->reg_state != NETREG_REGISTERED) {
>  				ret = -EIO;
> -				tun_put(tun);
> +				rcu_read_unlock();
>  				break;
>  			}
> -			tun_put(tun);
> +			rcu_read_unlock();
>  
>  			/* Nothing to read, let's sleep */
>  			schedule();
>  			continue;
>  		}
>  
> -		tun = __tun_get(tfile);
> +		rcu_read_lock();
> +		tun = rcu_dereference(tfile->tun);
>  		if (tun) {
>  			netif_wake_queue(tun->dev);
> -			tun_put(tun);
>  		}
> +		rcu_read_unlock();
>  
>  		ret = tun_put_user(tfile, skb, iv, len);
>  		kfree_skb(skb);
> @@ -1038,6 +1134,9 @@ static int tun_flags(struct tun_struct *tun)
>  	if (tun->flags & TUN_VNET_HDR)
>  		flags |= IFF_VNET_HDR;
>  
> +	if (tun->flags & TUN_TAP_MQ)
> +		flags |= IFF_MULTI_QUEUE;
> +
>  	return flags;
>  }
>  
> @@ -1097,8 +1196,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  		err = tun_attach(tun, file);
>  		if (err < 0)
>  			return err;
> -	}
> -	else {
> +	} else {
>  		char *name;
>  		unsigned long flags = 0;
>  
> @@ -1142,6 +1240,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
>  			TUN_USER_FEATURES;
>  		dev->features = dev->hw_features;
> +		if (ifr->ifr_flags & IFF_MULTI_QUEUE)
> +			dev->features |= NETIF_F_LLTX;
>  
>  		err = register_netdevice(tun->dev);
>  		if (err < 0)
> @@ -1154,7 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		err = tun_attach(tun, file);
>  		if (err < 0)
> -			goto failed;
> +			goto err_free_dev;
>  	}
>  
>  	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
> @@ -1174,6 +1274,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  	else
>  		tun->flags &= ~TUN_VNET_HDR;
>  
> +	if (ifr->ifr_flags & IFF_MULTI_QUEUE)
> +		tun->flags |= TUN_TAP_MQ;
> +	else
> +		tun->flags &= ~TUN_TAP_MQ;
> +
>  	/* Cache flags from tun device */
>  	tfile->flags = tun->flags;
>  	/* Make sure persistent devices do not get stuck in
> @@ -1187,7 +1292,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  err_free_dev:
>  	free_netdev(dev);
> -failed:
>  	return err;
>  }
>  
> @@ -1264,38 +1368,40 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  				(unsigned int __user*)argp);
>  	}
>  
> -	rtnl_lock();
> -
> -	tun = __tun_get(tfile);
> -	if (cmd == TUNSETIFF && !tun) {
> +	ret = 0;
> +	if (cmd == TUNSETIFF) {
> +		rtnl_lock();
>  		ifr.ifr_name[IFNAMSIZ-1] = '\0';
> -
>  		ret = tun_set_iff(tfile->net, file, &ifr);
> -
> +		rtnl_unlock();
>  		if (ret)
> -			goto unlock;
> -
> +			return ret;
>  		if (copy_to_user(argp, &ifr, ifreq_len))
> -			ret = -EFAULT;
> -		goto unlock;
> +			return -EFAULT;
> +		return ret;
>  	}
>  
> +	rtnl_lock();
> +
> +	rcu_read_lock();
> +
>  	ret = -EBADFD;
> +	tun = rcu_dereference(tfile->tun);
>  	if (!tun)
>  		goto unlock;
> +	else
> +		ret = 0;
>  
> -	tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd);
> -
> -	ret = 0;
>  	switch (cmd) {
>  	case TUNGETIFF:
>  		ret = tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
> +		rcu_read_unlock();
>  		if (ret)
> -			break;
> +			goto out;
>  
>  		if (copy_to_user(argp, &ifr, ifreq_len))
>  			ret = -EFAULT;
> -		break;
> +		goto out;
>  
>  	case TUNSETNOCSUM:
>  		/* Disable/Enable checksum */
> @@ -1357,9 +1463,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  		/* Get hw address */
>  		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
>  		ifr.ifr_hwaddr.sa_family = tun->dev->type;
> +		rcu_read_unlock();
>  		if (copy_to_user(argp, &ifr, ifreq_len))
>  			ret = -EFAULT;
> -		break;
> +		goto out;
>  
>  	case SIOCSIFHWADDR:
>  		/* Set hw address */
> @@ -1375,9 +1482,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  	}
>  
>  unlock:
> +	rcu_read_unlock();
> +out:
>  	rtnl_unlock();
> -	if (tun)
> -		tun_put(tun);
>  	return ret;
>  }
>  
> @@ -1517,6 +1624,11 @@ out:
>  	return ret;
>  }
>  
> +static void tun_sock_destruct(struct sock *sk)
> +{
> +	skb_queue_purge(&sk->sk_receive_queue);
> +}
> +
>  static int tun_chr_open(struct inode *inode, struct file * file)
>  {
>  	struct net *net = current->nsproxy->net_ns;
> @@ -1540,6 +1652,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
>  	sock_init_data(&tfile->socket, &tfile->sk);
>  
>  	tfile->sk.sk_write_space = tun_sock_write_space;
> +	tfile->sk.sk_destruct = tun_sock_destruct;
>  	tfile->sk.sk_sndbuf = INT_MAX;
>  	file->private_data = tfile;
>  
> @@ -1549,31 +1662,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
>  static int tun_chr_close(struct inode *inode, struct file *file)
>  {
>  	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun;
> -
> -	tun = __tun_get(tfile);
> -	if (tun) {
> -		struct net_device *dev = tun->dev;
> -
> -		tun_debug(KERN_INFO, tun, "tun_chr_close\n");
> -
> -		__tun_detach(tun);
> -
> -		/* If desirable, unregister the netdevice. */
> -		if (!(tun->flags & TUN_PERSIST)) {
> -			rtnl_lock();
> -			if (dev->reg_state == NETREG_REGISTERED)
> -				unregister_netdevice(dev);
> -			rtnl_unlock();
> -		}
>  
> -		/* drop the reference that netdevice holds */
> -		sock_put(&tfile->sk);
> -
> -	}
> -
> -	/* drop the reference that file holds */
> -	sock_put(&tfile->sk);
> +	tun_detach(tfile, true);
>  
>  	return 0;
>  }
> @@ -1700,14 +1790,17 @@ static void tun_cleanup(void)
>   * holding a reference to the file for as long as the socket is in use. */
>  struct socket *tun_get_socket(struct file *file)
>  {
> -	struct tun_struct *tun;
> +	struct tun_struct *tun = NULL;
>  	struct tun_file *tfile = file->private_data;
>  	if (file->f_op != &tun_fops)
>  		return ERR_PTR(-EINVAL);
> -	tun = tun_get(file);
> -	if (!tun)
> +	rcu_read_lock();
> +	tun = rcu_dereference(tfile->tun);
> +	if (!tun) {
> +		rcu_read_unlock();
>  		return ERR_PTR(-EBADFD);
> -	tun_put(tun);
> +	}
> +	rcu_read_unlock();
>  	return &tfile->socket;
>  }
>  EXPORT_SYMBOL_GPL(tun_get_socket);

^ permalink raw reply

* Re: [PATCH net] net: qmi_wwan: fix Oops while disconnecting
From: Bjørn Mork @ 2012-06-25  8:27 UTC (permalink / raw)
  To: Ming Lei
  Cc: Oliver Neukum, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Marius Bjørnstad Kotsbak
In-Reply-To: <CACVXFVOPjFSS6Sv6AUCSAs4nywR045QhjYAbN8g6U3adsUbujw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Ming Lei <tom.leiming-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> writes:
> On Mon, Jun 25, 2012 at 3:24 PM, Bjørn Mork <bjorn-yOkvZcmFvRU@public.gmane.org> wrote:
>
>> There is no problem wrt qmi_wwan and intfdata as long as the NULL test
>> is added to .manage_power.
>
> It depends on the ARCH or compiler.
>
> Considered there is not any locking/memory barrier between the set to NULL
> and read the pointer, also no ACCESS_ONCE on read or store the pointer,
> reading in .manage_power may see a invalid pointer if the CPU doesn't
> support Store Atomicity or the compiler does a byte-at-a-time optimization
> on the store[1].
>
> So why not take the correct way in theory? also it is a general solution,
> and we can document its usage.
>
>
> [1], Paul mentioned it in the previous discussion
> http://lkml.org/lkml/2012/6/6/280

I don't believe that is valid for pointers, which hopefully either have
native word size or are protected by the compiler.

In any case, I'm not prepared to deal with the situation that changing a
pointer from valid to NULL can cause intermediate invalid pointers.  If
such an architecture/compiler exists, then I believe the sanest thing to
do is to let it fail randomly.


Bjørn
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [net-next RFC V3 PATCH 1/6] tuntap: move socket to tun_file
From: Michael S. Tsirkin @ 2012-06-25  8:27 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, linux-kernel, krkumar2, tahm, akong, davem, shemminger,
	mashirle
In-Reply-To: <20120625060945.6765.98618.stgit@amd-6168-8-1.englab.nay.redhat.com>

On Mon, Jun 25, 2012 at 02:09:45PM +0800, Jason Wang wrote:
> This patch moves socket structure from tun_device and to tun_file in order to
> let it possbile for multiple sockets to be attached to tun/tap device. The
> reference between tap device and socket was setup during TUNSETIFF as
> usual.
> 
> After this patch, we can go further towards multiqueue tun/tap support by
> storing an array of pointers of tun_file in tun_device.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I think this changes visible userspace
behaviour for persistent devices.

Specifically, with this patch, TUNSETSNDBUF and TUNATTACHFILTER won't
be effective if you close and reopen the device, right?

It's possible that no application uses either of these
ioctls on persistent tun devices at the moment,
but seems safer to avoid changing such behaviour.


> ---
>  drivers/net/tun.c |  352 +++++++++++++++++++++++++++--------------------------
>  1 files changed, 181 insertions(+), 171 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 987aeef..1f27789 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -108,9 +108,16 @@ struct tap_filter {
>  };
>  
>  struct tun_file {
> +	struct sock sk;
> +	struct socket socket;
> +	struct socket_wq wq;
> +	int vnet_hdr_sz;
> +	struct tap_filter txflt;
>  	atomic_t count;
>  	struct tun_struct *tun;
>  	struct net *net;
> +	struct fasync_struct *fasync;
> +	unsigned int flags;
>  };
>  
>  struct tun_sock;
> @@ -125,29 +132,12 @@ struct tun_struct {
>  	netdev_features_t	set_features;
>  #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
>  			  NETIF_F_TSO6|NETIF_F_UFO)
> -	struct fasync_struct	*fasync;
> -
> -	struct tap_filter       txflt;
> -	struct socket		socket;
> -	struct socket_wq	wq;
> -
> -	int			vnet_hdr_sz;
>  
>  #ifdef TUN_DEBUG
>  	int debug;
>  #endif
>  };
>  
> -struct tun_sock {
> -	struct sock		sk;
> -	struct tun_struct	*tun;
> -};
> -
> -static inline struct tun_sock *tun_sk(struct sock *sk)
> -{
> -	return container_of(sk, struct tun_sock, sk);
> -}
> -
>  static int tun_attach(struct tun_struct *tun, struct file *file)
>  {
>  	struct tun_file *tfile = file->private_data;
> @@ -168,10 +158,9 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>  	err = 0;
>  	tfile->tun = tun;
>  	tun->tfile = tfile;
> -	tun->socket.file = file;
>  	netif_carrier_on(tun->dev);
>  	dev_hold(tun->dev);
> -	sock_hold(tun->socket.sk);
> +	sock_hold(&tfile->sk);
>  	atomic_inc(&tfile->count);
>  
>  out:
> @@ -181,15 +170,15 @@ out:
>  
>  static void __tun_detach(struct tun_struct *tun)
>  {
> +	struct tun_file *tfile = tun->tfile;
>  	/* Detach from net device */
>  	netif_tx_lock_bh(tun->dev);
>  	netif_carrier_off(tun->dev);
>  	tun->tfile = NULL;
> -	tun->socket.file = NULL;
>  	netif_tx_unlock_bh(tun->dev);
>  
>  	/* Drop read queue */
> -	skb_queue_purge(&tun->socket.sk->sk_receive_queue);
> +	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
>  
>  	/* Drop the extra count on the net device */
>  	dev_put(tun->dev);
> @@ -348,19 +337,12 @@ static void tun_net_uninit(struct net_device *dev)
>  	/* Inform the methods they need to stop using the dev.
>  	 */
>  	if (tfile) {
> -		wake_up_all(&tun->wq.wait);
> +		wake_up_all(&tfile->wq.wait);
>  		if (atomic_dec_and_test(&tfile->count))
>  			__tun_detach(tun);
>  	}
>  }
>  
> -static void tun_free_netdev(struct net_device *dev)
> -{
> -	struct tun_struct *tun = netdev_priv(dev);
> -
> -	sk_release_kernel(tun->socket.sk);
> -}
> -
>  /* Net device open. */
>  static int tun_net_open(struct net_device *dev)
>  {
> @@ -379,24 +361,26 @@ static int tun_net_close(struct net_device *dev)
>  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
>  	struct tun_struct *tun = netdev_priv(dev);
> +	struct tun_file *tfile = tun->tfile;
>  
>  	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
>  
>  	/* Drop packet if interface is not attached */
> -	if (!tun->tfile)
> +	if (!tfile)
>  		goto drop;
>  
>  	/* Drop if the filter does not like it.
>  	 * This is a noop if the filter is disabled.
>  	 * Filter can be enabled only for the TAP devices. */
> -	if (!check_filter(&tun->txflt, skb))
> +	if (!check_filter(&tfile->txflt, skb))
>  		goto drop;
>  
> -	if (tun->socket.sk->sk_filter &&
> -	    sk_filter(tun->socket.sk, skb))
> +	if (tfile->socket.sk->sk_filter &&
> +	    sk_filter(tfile->socket.sk, skb))
>  		goto drop;
>  
> -	if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) {
> +	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
> +	    >= dev->tx_queue_len) {
>  		if (!(tun->flags & TUN_ONE_QUEUE)) {
>  			/* Normal queueing mode. */
>  			/* Packet scheduler handles dropping of further packets. */
> @@ -417,12 +401,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  	skb_orphan(skb);
>  
>  	/* Enqueue packet */
> -	skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb);
> +	skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
>  
>  	/* Notify and wake up reader process */
> -	if (tun->flags & TUN_FASYNC)
> -		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
> -	wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
> +	if (tfile->flags & TUN_FASYNC)
> +		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
> +	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
>  				   POLLRDNORM | POLLRDBAND);
>  	return NETDEV_TX_OK;
>  
> @@ -550,11 +534,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
>  	if (!tun)
>  		return POLLERR;
>  
> -	sk = tun->socket.sk;
> +	sk = tfile->socket.sk;
>  
>  	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
>  
> -	poll_wait(file, &tun->wq.wait, wait);
> +	poll_wait(file, &tfile->wq.wait, wait);
>  
>  	if (!skb_queue_empty(&sk->sk_receive_queue))
>  		mask |= POLLIN | POLLRDNORM;
> @@ -573,11 +557,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
>  
>  /* prepad is the amount to reserve at front.  len is length after that.
>   * linear is a hint as to how much to copy (usually headers). */
> -static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
> +static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
>  				     size_t prepad, size_t len,
>  				     size_t linear, int noblock)
>  {
> -	struct sock *sk = tun->socket.sk;
> +	struct sock *sk = tfile->socket.sk;
>  	struct sk_buff *skb;
>  	int err;
>  
> @@ -601,7 +585,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
>  }
>  
>  /* Get packet from user space buffer */
> -static ssize_t tun_get_user(struct tun_struct *tun,
> +static ssize_t tun_get_user(struct tun_file *tfile,
>  			    const struct iovec *iv, size_t count,
>  			    int noblock)
>  {
> @@ -610,8 +594,10 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  	size_t len = count, align = NET_SKB_PAD;
>  	struct virtio_net_hdr gso = { 0 };
>  	int offset = 0;
> +	struct tun_struct *tun = NULL;
> +	bool drop = false, error = false;
>  
> -	if (!(tun->flags & TUN_NO_PI)) {
> +	if (!(tfile->flags & TUN_NO_PI)) {
>  		if ((len -= sizeof(pi)) > count)
>  			return -EINVAL;
>  
> @@ -620,8 +606,9 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  		offset += sizeof(pi);
>  	}
>  
> -	if (tun->flags & TUN_VNET_HDR) {
> -		if ((len -= tun->vnet_hdr_sz) > count)
> +	if (tfile->flags & TUN_VNET_HDR) {
> +		len -= tfile->vnet_hdr_sz;
> +		if (len > count)
>  			return -EINVAL;
>  
>  		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
> @@ -633,41 +620,43 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  
>  		if (gso.hdr_len > len)
>  			return -EINVAL;
> -		offset += tun->vnet_hdr_sz;
> +		offset += tfile->vnet_hdr_sz;
>  	}
>  
> -	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
> +	if ((tfile->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
>  		align += NET_IP_ALIGN;
>  		if (unlikely(len < ETH_HLEN ||
>  			     (gso.hdr_len && gso.hdr_len < ETH_HLEN)))
>  			return -EINVAL;
>  	}
>  
> -	skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
> +	skb = tun_alloc_skb(tfile, align, len, gso.hdr_len, noblock);
> +
>  	if (IS_ERR(skb)) {
>  		if (PTR_ERR(skb) != -EAGAIN)
> -			tun->dev->stats.rx_dropped++;
> -		return PTR_ERR(skb);
> +			drop = true;
> +		count = PTR_ERR(skb);
> +		goto err;
>  	}
>  
>  	if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) {
> -		tun->dev->stats.rx_dropped++;
> +		drop = true;
>  		kfree_skb(skb);
> -		return -EFAULT;
> +		count = -EFAULT;
> +		goto err;
>  	}
>  
>  	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
>  		if (!skb_partial_csum_set(skb, gso.csum_start,
>  					  gso.csum_offset)) {
> -			tun->dev->stats.rx_frame_errors++;
> -			kfree_skb(skb);
> -			return -EINVAL;
> +			error = true;
> +			goto err_free;
>  		}
>  	}
>  
> -	switch (tun->flags & TUN_TYPE_MASK) {
> +	switch (tfile->flags & TUN_TYPE_MASK) {
>  	case TUN_TUN_DEV:
> -		if (tun->flags & TUN_NO_PI) {
> +		if (tfile->flags & TUN_NO_PI) {
>  			switch (skb->data[0] & 0xf0) {
>  			case 0x40:
>  				pi.proto = htons(ETH_P_IP);
> @@ -676,18 +665,15 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  				pi.proto = htons(ETH_P_IPV6);
>  				break;
>  			default:
> -				tun->dev->stats.rx_dropped++;
> -				kfree_skb(skb);
> -				return -EINVAL;
> +				drop = true;
> +				goto err_free;
>  			}
>  		}
>  
>  		skb_reset_mac_header(skb);
>  		skb->protocol = pi.proto;
> -		skb->dev = tun->dev;
>  		break;
>  	case TUN_TAP_DEV:
> -		skb->protocol = eth_type_trans(skb, tun->dev);
>  		break;
>  	}
>  
> @@ -704,9 +690,8 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
>  			break;
>  		default:
> -			tun->dev->stats.rx_frame_errors++;
> -			kfree_skb(skb);
> -			return -EINVAL;
> +			error = true;
> +			goto err_free;
>  		}
>  
>  		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
> @@ -714,9 +699,8 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  
>  		skb_shinfo(skb)->gso_size = gso.gso_size;
>  		if (skb_shinfo(skb)->gso_size == 0) {
> -			tun->dev->stats.rx_frame_errors++;
> -			kfree_skb(skb);
> -			return -EINVAL;
> +			error = true;
> +			goto err_free;
>  		}
>  
>  		/* Header must be checked, and gso_segs computed. */
> @@ -724,11 +708,38 @@ static ssize_t tun_get_user(struct tun_struct *tun,
>  		skb_shinfo(skb)->gso_segs = 0;
>  	}
>  
> -	netif_rx_ni(skb);
> +	tun = __tun_get(tfile);
> +	if (!tun)
> +		return -EBADFD;
>  
> +	switch (tfile->flags & TUN_TYPE_MASK) {
> +	case TUN_TUN_DEV:
> +		skb->dev = tun->dev;
> +		break;
> +	case TUN_TAP_DEV:
> +		skb->protocol = eth_type_trans(skb, tun->dev);
> +		break;
> +	}
> +
> +	netif_rx_ni(skb);
>  	tun->dev->stats.rx_packets++;
>  	tun->dev->stats.rx_bytes += len;
> +	tun_put(tun);
> +	return count;
> +
> +err_free:
> +	count = -EINVAL;
> +	kfree_skb(skb);
> +err:
> +	tun = __tun_get(tfile);
> +	if (!tun)
> +		return -EBADFD;
>  
> +	if (drop)
> +		tun->dev->stats.rx_dropped++;
> +	if (error)
> +		tun->dev->stats.rx_frame_errors++;
> +	tun_put(tun);
>  	return count;
>  }
>  
> @@ -736,30 +747,25 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
>  			      unsigned long count, loff_t pos)
>  {
>  	struct file *file = iocb->ki_filp;
> -	struct tun_struct *tun = tun_get(file);
> +	struct tun_file *tfile = file->private_data;
>  	ssize_t result;
>  
> -	if (!tun)
> -		return -EBADFD;
> -
> -	tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
> -
> -	result = tun_get_user(tun, iv, iov_length(iv, count),
> +	result = tun_get_user(tfile, iv, iov_length(iv, count),
>  			      file->f_flags & O_NONBLOCK);
>  
> -	tun_put(tun);
>  	return result;
>  }
>  
>  /* Put packet to the user space buffer */
> -static ssize_t tun_put_user(struct tun_struct *tun,
> +static ssize_t tun_put_user(struct tun_file *tfile,
>  			    struct sk_buff *skb,
>  			    const struct iovec *iv, int len)
>  {
> +	struct tun_struct *tun = NULL;
>  	struct tun_pi pi = { 0, skb->protocol };
>  	ssize_t total = 0;
>  
> -	if (!(tun->flags & TUN_NO_PI)) {
> +	if (!(tfile->flags & TUN_NO_PI)) {
>  		if ((len -= sizeof(pi)) < 0)
>  			return -EINVAL;
>  
> @@ -773,9 +779,10 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>  		total += sizeof(pi);
>  	}
>  
> -	if (tun->flags & TUN_VNET_HDR) {
> +	if (tfile->flags & TUN_VNET_HDR) {
>  		struct virtio_net_hdr gso = { 0 }; /* no info leak */
> -		if ((len -= tun->vnet_hdr_sz) < 0)
> +		len -= tfile->vnet_hdr_sz;
> +		if (len < 0)
>  			return -EINVAL;
>  
>  		if (skb_is_gso(skb)) {
> @@ -818,7 +825,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>  		if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
>  					       sizeof(gso))))
>  			return -EFAULT;
> -		total += tun->vnet_hdr_sz;
> +		total += tfile->vnet_hdr_sz;
>  	}
>  
>  	len = min_t(int, skb->len, len);
> @@ -826,29 +833,33 @@ static ssize_t tun_put_user(struct tun_struct *tun,
>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
>  	total += skb->len;
>  
> -	tun->dev->stats.tx_packets++;
> -	tun->dev->stats.tx_bytes += len;
> +	tun = __tun_get(tfile);
> +	if (tun) {
> +		tun->dev->stats.tx_packets++;
> +		tun->dev->stats.tx_bytes += len;
> +		tun_put(tun);
> +	}
>  
>  	return total;
>  }
>  
> -static ssize_t tun_do_read(struct tun_struct *tun,
> +static ssize_t tun_do_read(struct tun_file *tfile,
>  			   struct kiocb *iocb, const struct iovec *iv,
>  			   ssize_t len, int noblock)
>  {
>  	DECLARE_WAITQUEUE(wait, current);
>  	struct sk_buff *skb;
>  	ssize_t ret = 0;
> -
> -	tun_debug(KERN_INFO, tun, "tun_chr_read\n");
> +	struct tun_struct *tun = NULL;
>  
>  	if (unlikely(!noblock))
> -		add_wait_queue(&tun->wq.wait, &wait);
> +		add_wait_queue(&tfile->wq.wait, &wait);
>  	while (len) {
>  		current->state = TASK_INTERRUPTIBLE;
>  
> +		skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue);
>  		/* Read frames from the queue */
> -		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> +		if (!skb) {
>  			if (noblock) {
>  				ret = -EAGAIN;
>  				break;
> @@ -857,25 +868,38 @@ static ssize_t tun_do_read(struct tun_struct *tun,
>  				ret = -ERESTARTSYS;
>  				break;
>  			}
> +
> +			tun = __tun_get(tfile);
> +			if (!tun) {
> +				ret = -EIO;
> +				break;
> +			}
>  			if (tun->dev->reg_state != NETREG_REGISTERED) {
>  				ret = -EIO;
> +				tun_put(tun);
>  				break;
>  			}
> +			tun_put(tun);
>  
>  			/* Nothing to read, let's sleep */
>  			schedule();
>  			continue;
>  		}
> -		netif_wake_queue(tun->dev);
>  
> -		ret = tun_put_user(tun, skb, iv, len);
> +		tun = __tun_get(tfile);
> +		if (tun) {
> +			netif_wake_queue(tun->dev);
> +			tun_put(tun);
> +		}
> +
> +		ret = tun_put_user(tfile, skb, iv, len);
>  		kfree_skb(skb);
>  		break;
>  	}
>  
>  	current->state = TASK_RUNNING;
>  	if (unlikely(!noblock))
> -		remove_wait_queue(&tun->wq.wait, &wait);
> +		remove_wait_queue(&tfile->wq.wait, &wait);
>  
>  	return ret;
>  }
> @@ -885,21 +909,17 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
>  {
>  	struct file *file = iocb->ki_filp;
>  	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun = __tun_get(tfile);
>  	ssize_t len, ret;
>  
> -	if (!tun)
> -		return -EBADFD;
>  	len = iov_length(iv, count);
>  	if (len < 0) {
>  		ret = -EINVAL;
>  		goto out;
>  	}
>  
> -	ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
> +	ret = tun_do_read(tfile, iocb, iv, len, file->f_flags & O_NONBLOCK);
>  	ret = min_t(ssize_t, ret, len);
>  out:
> -	tun_put(tun);
>  	return ret;
>  }
>  
> @@ -911,7 +931,7 @@ static void tun_setup(struct net_device *dev)
>  	tun->group = -1;
>  
>  	dev->ethtool_ops = &tun_ethtool_ops;
> -	dev->destructor = tun_free_netdev;
> +	dev->destructor = free_netdev;
>  }
>  
>  /* Trivial set of netlink ops to allow deleting tun or tap
> @@ -931,7 +951,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
>  
>  static void tun_sock_write_space(struct sock *sk)
>  {
> -	struct tun_struct *tun;
> +	struct tun_file *tfile = NULL;
>  	wait_queue_head_t *wqueue;
>  
>  	if (!sock_writeable(sk))
> @@ -945,37 +965,38 @@ static void tun_sock_write_space(struct sock *sk)
>  		wake_up_interruptible_sync_poll(wqueue, POLLOUT |
>  						POLLWRNORM | POLLWRBAND);
>  
> -	tun = tun_sk(sk)->tun;
> -	kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
> -}
> -
> -static void tun_sock_destruct(struct sock *sk)
> -{
> -	free_netdev(tun_sk(sk)->tun->dev);
> +	tfile = container_of(sk, struct tun_file, sk);
> +	kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
>  }
>  
>  static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
>  		       struct msghdr *m, size_t total_len)
>  {
> -	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> -	return tun_get_user(tun, m->msg_iov, total_len,
> -			    m->msg_flags & MSG_DONTWAIT);
> +	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
> +	ssize_t result;
> +
> +	result = tun_get_user(tfile, m->msg_iov, total_len,
> +			      m->msg_flags & MSG_DONTWAIT);
> +	return result;
>  }
>  
>  static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
>  		       struct msghdr *m, size_t total_len,
>  		       int flags)
>  {
> -	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
>  	int ret;
> +
>  	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
>  		return -EINVAL;
> -	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> +
> +	ret = tun_do_read(tfile, iocb, m->msg_iov, total_len,
>  			  flags & MSG_DONTWAIT);
>  	if (ret > total_len) {
>  		m->msg_flags |= MSG_TRUNC;
>  		ret = flags & MSG_TRUNC ? ret : total_len;
>  	}
> +
>  	return ret;
>  }
>  
> @@ -996,7 +1017,7 @@ static const struct proto_ops tun_socket_ops = {
>  static struct proto tun_proto = {
>  	.name		= "tun",
>  	.owner		= THIS_MODULE,
> -	.obj_size	= sizeof(struct tun_sock),
> +	.obj_size	= sizeof(struct tun_file),
>  };
>  
>  static int tun_flags(struct tun_struct *tun)
> @@ -1047,8 +1068,8 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
>  
>  static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  {
> -	struct sock *sk;
>  	struct tun_struct *tun;
> +	struct tun_file *tfile = file->private_data;
>  	struct net_device *dev;
>  	int err;
>  
> @@ -1069,7 +1090,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
>  		    !capable(CAP_NET_ADMIN))
>  			return -EPERM;
> -		err = security_tun_dev_attach(tun->socket.sk);
> +		err = security_tun_dev_attach(tfile->socket.sk);
>  		if (err < 0)
>  			return err;
>  
> @@ -1113,25 +1134,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  		tun = netdev_priv(dev);
>  		tun->dev = dev;
>  		tun->flags = flags;
> -		tun->txflt.count = 0;
> -		tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
>  
> -		err = -ENOMEM;
> -		sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
> -		if (!sk)
> -			goto err_free_dev;
> -
> -		sk_change_net(sk, net);
> -		tun->socket.wq = &tun->wq;
> -		init_waitqueue_head(&tun->wq.wait);
> -		tun->socket.ops = &tun_socket_ops;
> -		sock_init_data(&tun->socket, sk);
> -		sk->sk_write_space = tun_sock_write_space;
> -		sk->sk_sndbuf = INT_MAX;
> -
> -		tun_sk(sk)->tun = tun;
> -
> -		security_tun_dev_post_create(sk);
> +		security_tun_dev_post_create(&tfile->sk);
>  
>  		tun_net_init(dev);
>  
> @@ -1141,15 +1145,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  
>  		err = register_netdevice(tun->dev);
>  		if (err < 0)
> -			goto err_free_sk;
> +			goto err_free_dev;
>  
>  		if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
>  		    device_create_file(&tun->dev->dev, &dev_attr_owner) ||
>  		    device_create_file(&tun->dev->dev, &dev_attr_group))
>  			pr_err("Failed to create tun sysfs files\n");
>  
> -		sk->sk_destruct = tun_sock_destruct;
> -
>  		err = tun_attach(tun, file);
>  		if (err < 0)
>  			goto failed;
> @@ -1172,6 +1174,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  	else
>  		tun->flags &= ~TUN_VNET_HDR;
>  
> +	/* Cache flags from tun device */
> +	tfile->flags = tun->flags;
>  	/* Make sure persistent devices do not get stuck in
>  	 * xoff state.
>  	 */
> @@ -1181,11 +1185,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  	strcpy(ifr->ifr_name, tun->dev->name);
>  	return 0;
>  
> - err_free_sk:
> -	tun_free_netdev(dev);
> - err_free_dev:
> +err_free_dev:
>  	free_netdev(dev);
> - failed:
> +failed:
>  	return err;
>  }
>  
> @@ -1357,9 +1359,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  	case TUNSETTXFILTER:
>  		/* Can be set only for TAPs */
>  		ret = -EINVAL;
> -		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
> +		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
>  			break;
> -		ret = update_filter(&tun->txflt, (void __user *)arg);
> +		ret = update_filter(&tfile->txflt, (void __user *)arg);
>  		break;
>  
>  	case SIOCGIFHWADDR:
> @@ -1379,7 +1381,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  		break;
>  
>  	case TUNGETSNDBUF:
> -		sndbuf = tun->socket.sk->sk_sndbuf;
> +		sndbuf = tfile->socket.sk->sk_sndbuf;
>  		if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
>  			ret = -EFAULT;
>  		break;
> @@ -1390,11 +1392,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  			break;
>  		}
>  
> -		tun->socket.sk->sk_sndbuf = sndbuf;
> +		tfile->socket.sk->sk_sndbuf = sndbuf;
>  		break;
>  
>  	case TUNGETVNETHDRSZ:
> -		vnet_hdr_sz = tun->vnet_hdr_sz;
> +		vnet_hdr_sz = tfile->vnet_hdr_sz;
>  		if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
>  			ret = -EFAULT;
>  		break;
> @@ -1409,27 +1411,27 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
>  			break;
>  		}
>  
> -		tun->vnet_hdr_sz = vnet_hdr_sz;
> +		tfile->vnet_hdr_sz = vnet_hdr_sz;
>  		break;
>  
>  	case TUNATTACHFILTER:
>  		/* Can be set only for TAPs */
>  		ret = -EINVAL;
> -		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
> +		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
>  			break;
>  		ret = -EFAULT;
>  		if (copy_from_user(&fprog, argp, sizeof(fprog)))
>  			break;
>  
> -		ret = sk_attach_filter(&fprog, tun->socket.sk);
> +		ret = sk_attach_filter(&fprog, tfile->socket.sk);
>  		break;
>  
>  	case TUNDETACHFILTER:
>  		/* Can be set only for TAPs */
>  		ret = -EINVAL;
> -		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
> +		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
>  			break;
> -		ret = sk_detach_filter(tun->socket.sk);
> +		ret = sk_detach_filter(tfile->socket.sk);
>  		break;
>  
>  	default:
> @@ -1481,43 +1483,50 @@ static long tun_chr_compat_ioctl(struct file *file,
>  
>  static int tun_chr_fasync(int fd, struct file *file, int on)
>  {
> -	struct tun_struct *tun = tun_get(file);
> -	int ret;
> -
> -	if (!tun)
> -		return -EBADFD;
> -
> -	tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on);
> +	struct tun_file *tfile = file->private_data;
> +	int ret = fasync_helper(fd, file, on, &tfile->fasync);
>  
> -	if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
> +	if (ret < 0)
>  		goto out;
>  
>  	if (on) {
>  		ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
>  		if (ret)
>  			goto out;
> -		tun->flags |= TUN_FASYNC;
> +		tfile->flags |= TUN_FASYNC;
>  	} else
> -		tun->flags &= ~TUN_FASYNC;
> +		tfile->flags &= ~TUN_FASYNC;
>  	ret = 0;
>  out:
> -	tun_put(tun);
>  	return ret;
>  }
>  
>  static int tun_chr_open(struct inode *inode, struct file * file)
>  {
> +	struct net *net = current->nsproxy->net_ns;
>  	struct tun_file *tfile;
>  
>  	DBG1(KERN_INFO, "tunX: tun_chr_open\n");
>  
> -	tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
> +	tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
> +					&tun_proto);
>  	if (!tfile)
>  		return -ENOMEM;
> -	atomic_set(&tfile->count, 0);
> +
>  	tfile->tun = NULL;
> -	tfile->net = get_net(current->nsproxy->net_ns);
> +	tfile->net = net;
> +	tfile->txflt.count = 0;
> +	tfile->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
> +	tfile->socket.wq = &tfile->wq;
> +	init_waitqueue_head(&tfile->wq.wait);
> +	tfile->socket.file = file;
> +	tfile->socket.ops = &tun_socket_ops;
> +	sock_init_data(&tfile->socket, &tfile->sk);
> +
> +	tfile->sk.sk_write_space = tun_sock_write_space;
> +	tfile->sk.sk_sndbuf = INT_MAX;
>  	file->private_data = tfile;
> +
>  	return 0;
>  }
>  
> @@ -1541,14 +1550,14 @@ static int tun_chr_close(struct inode *inode, struct file *file)
>  				unregister_netdevice(dev);
>  			rtnl_unlock();
>  		}
> -	}
>  
> -	tun = tfile->tun;
> -	if (tun)
> -		sock_put(tun->socket.sk);
> +		/* drop the reference that netdevice holds */
> +		sock_put(&tfile->sk);
>  
> -	put_net(tfile->net);
> -	kfree(tfile);
> +	}
> +
> +	/* drop the reference that file holds */
> +	sock_put(&tfile->sk);
>  
>  	return 0;
>  }
> @@ -1676,13 +1685,14 @@ static void tun_cleanup(void)
>  struct socket *tun_get_socket(struct file *file)
>  {
>  	struct tun_struct *tun;
> +	struct tun_file *tfile = file->private_data;
>  	if (file->f_op != &tun_fops)
>  		return ERR_PTR(-EINVAL);
>  	tun = tun_get(file);
>  	if (!tun)
>  		return ERR_PTR(-EBADFD);
>  	tun_put(tun);
> -	return &tun->socket;
> +	return &tfile->socket;
>  }
>  EXPORT_SYMBOL_GPL(tun_get_socket);
>  

^ permalink raw reply

* Re: [net-next RFC V3 PATCH 4/6] tuntap: multiqueue support
From: Michael S. Tsirkin @ 2012-06-25  8:41 UTC (permalink / raw)
  To: Jason Wang
  Cc: habanero, netdev, linux-kernel, krkumar2, tahm, akong, davem,
	shemminger, mashirle
In-Reply-To: <20120625082553.GC18229@redhat.com>

On Mon, Jun 25, 2012 at 11:25:53AM +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 25, 2012 at 02:10:18PM +0800, Jason Wang wrote:
> > This patch adds multiqueue support for tap device. This is done by abstracting
> > each queue as a file/socket and allowing multiple sockets to be attached to the
> > tuntap device (an array of tun_file were stored in the tun_struct). Userspace
> > could write and read from those files to do the parallel packet
> > sending/receiving.
> > 
> > Unlike the previous single queue implementation, the socket and device were
> > loosely coupled, each of them were allowed to go away first. In order to let the
> > tx path lockless, netif_tx_loch_bh() is replaced by RCU/NETIF_F_LLTX to
> > synchronize between data path and system call.
> 
> Don't use LLTX/RCU. It's not worth it.

Or maybe we should use LLTX. Need to think about it.
But if yes I'd like a separate patch moving tun to LLTX
and move it always to LLTX. Don't play with LLTX at runtime.

-- 
MST

^ permalink raw reply

* RE: [PATCH 4/4] net/mlx4_en: Use atomic counter to decide when queue is full
From: David Laight @ 2012-06-25  9:00 UTC (permalink / raw)
  To: David Miller, yevgenyp; +Cc: netdev
In-Reply-To: <20120622.172353.1092394608024377078.davem@davemloft.net>

 
> > The Transmit and transmit completion flows execute from different
contexts,
> > which are not synchronized. Hence naive reading the of consumer
index might
> > give wrong value by the time it is being used, That could lead to a
state of transmit timeout.
> > Fix that by using atomic variable to maintain that index.
> > 
> > Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
> 
> I'm not convinced.  There is only one place that actually changes
> the counter.
> 
> So it seems more like you have a missing memory barrier somewhere.

Or just keep the two ring indexes - instead of keeping the
number of 'active' entries as well.
Then you don't have a variable which the tx setup and
tx completion routines both update.

	David

^ permalink raw reply

* [net-next RFC V4 PATCH 0/4] Multiqueue virtio-net
From: Jason Wang @ 2012-06-25  9:16 UTC (permalink / raw)
  To: krkumar2, habanero, rusty, mst, netdev, mashirle, linux-kernel,
	virtualization, edumazet, tahm, jwhan, akong, davem
  Cc: kvm

Hello All:

This series is an update version of multiqueue virtio-net driver based on
Krishna Kumar's work to let virtio-net use multiple rx/tx queues to do the
packets reception and transmission. Please review and comments.

Test Environment:
- Intel(R) Xeon(R) CPU E5620 @ 2.40GHz, 8 cores 2 numa nodes
- Two directed connected 82599

Test Summary:

- Highlights: huge improvements on TCP_RR test
- Lowlights: regression on small packet transmission, higher cpu utilization
             than single queue, need further optimization

Analysis of the performance result:

- I count the number of packets sending/receiving during the test, and
  multiqueue show much more ability in terms of packets per second.

- For the tx regression, multiqueue send about 1-2 times of more packets
  compared to single queue, and the packets size were much smaller than single
  queue does. I suspect tcp does less batching in multiqueue, so I hack the
  tcp_write_xmit() to forece more batching, multiqueue works as well as
  singlequeue for both small transmission and throughput

- I didn't pack the accelerate RFS with virtio-net in this sereis as it still
  need further shaping, for the one that interested in this please see:
  http://www.mail-archive.com/kvm@vger.kernel.org/msg64111.html

Detail result:

Test results: smp = 2 pin vhosts and vcpus in the same node - 1 sq 2 mq(q=2)
- TCP_MAERTS (Guest to external Host):

sessions size throughput1 throughput2   norm1 norm2:
1 64 424.91 401.49 94% 13.20 12.35 93%
2 64 1211.06 878.31 72% 24.35 15.80 64%
4 64 1292.46 1081.78 83% 26.46 20.14 76%
8 64 1355.57 826.06 60% 27.88 15.32 54%
1 256 1489.37 1406.51 94% 46.93 43.72 93%
2 256 4936.19 2688.46 54% 100.24 46.39 46%
4 256 5251.10 2900.08 55% 107.98 50.47 46%
8 256 5270.11 3562.10 67% 108.26 66.19 61%
1 512 1877.57 2697.64 143% 57.29 83.02 144%
2 512 9183.49 5056.33 55% 205.26 86.43 42%
4 512 9349.59 8918.77 95% 212.78 176.99 83%
8 512 9318.29 8947.69 96% 216.55 176.34 81%
1 1024 4746.47 4669.10 98% 143.87 140.63 97%
2 1024 7406.49 4738.58 63% 245.16 131.04 53%
4 1024 8955.70 9337.82 104% 254.49 208.66 81%
8 1024 9384.48 9378.41 99% 260.24 197.77 75%
1 2048 8947.29 8955.98 100% 338.39 338.60 100%
2 2048 9259.04 9383.77 101% 326.02 224.92 68%
4 2048 9043.04 9325.38 103% 305.09 214.77 70%
8 2048 9358.04 8413.21 89% 291.07 189.74 65%
1 4096 8882.55 8885.40 100% 329.83 326.42 98%
2 4096 9247.07 9387.49 101% 345.55 240.51 69%
4 4096 9365.30 9341.82 99% 333.40 211.97 63%
8 4096 9397.64 8472.35 90% 312.21 182.51 58%
1 16384 8843.82 8725.50 98% 315.06 312.29 99%
2 16384 7978.70 9176.16 115% 280.34 317.62 113%
4 16384 8906.91 9251.15 103% 312.85 226.13 72%
8 16384 9385.36 9401.39 100% 341.53 207.53 60%

- TCP_RR

sessions size trans1 trans2   norm1 norm2
50 1 63021.83 93003.57 147% 2360.36 1972.08 83%
100 1 54599.87 93724.43 171% 2042.64 2119.50 103%
250 1 86546.75 119738.71 138% 2414.80 2508.14 103%
50 64 59785.19 97761.24 163% 2264.59 2136.85 94%
100 64 56028.20 103233.18 184% 2063.65 2260.91 109%
250 64 99116.82 113948.86 114% 2452.77 2393.88 97%
50 128 58025.96 84971.26 146% 2284.48 1907.75 83%
100 128 66302.76 83110.60 125% 2579.87 1881.60 72%
250 128 109262.23 137571.13 125% 2603.96 2911.55 111%
50 256 59766.57 77370.93 129% 2393.53 2018.02 84%
100 256 60862.02 80737.50 132% 2426.71 2024.51 83%
250 256 90076.66 141780.44 157% 2453.06 3056.27 124%

- TCP_STREAM (External Host to Guest)

sessions size throughput1 throughput2   norm1 norm2
1 64 373.53 575.24 154% 15.37 33.44 217%
2 64 1314.17 1087.49 82% 54.21 32.97 60%
4 64 3352.93 2404.70 71% 136.35 53.72 39%
8 64 6866.17 6587.17 95% 252.06 147.03 58%
1 256 677.37 767.65 113% 26.59 29.80 112%
2 256 4386.83 4550.40 103% 164.17 174.81 106%
4 256 8446.27 8814.95 104% 307.02 186.95 60%
8 256 6529.46 9370.44 143% 237.52 207.77 87%
1 512 2542.48 1371.31 53% 102.14 51.68 50%
2 512 9183.10 9354.81 101% 319.85 333.62 104%
4 512 8488.12 9230.58 108% 303.14 258.55 85%
8 512 8115.66 9393.33 115% 293.83 219.82 74%
1 1024 2653.16 2507.78 94% 100.88 96.56 95%
2 1024 6490.12 5033.35 77% 316.74 102.74 32%
4 1024 7582.51 9183.60 121% 298.87 210.48 70%
8 1024 8304.13 9341.48 112% 298.06 242.32 81%
1 2048 2495.06 2508.12 100% 99.60 102.96 103%
2 2048 8968.12 7554.57 84% 324.57 175.56 54%
4 2048 8627.85 9192.90 106% 310.91 205.98 66%
8 2048 7781.72 9422.10 121% 284.31 208.77 73%
1 4096 6067.46 4489.04 73% 233.63 171.01 73%
2 4096 9029.43 8684.09 96% 323.17 191.19 59%
4 4096 8284.30 9253.33 111% 306.93 268.67 87%
8 4096 7789.39 9388.32 120% 283.97 238.28 83%
1 16384 8660.90 9313.87 107% 318.88 315.61 98%
2 16384 8646.37 8871.30 102% 318.81 318.88 100%
4 16384 8386.02 9397.13 112% 306.28 205.44 67%
8 16384 8006.27 9404.98 117% 288.41 224.83 77%

Test results: smp = 4 no pinning - 1 sq 2 mq(q=4)

- TCP_MAERTS - Guest to External Host

sessions size throughput1 throughput2   norm1 norm2
1 64 513.13 331.27 64% 15.86 10.39 65%
2 64 1325.72 543.38 40% 26.54 12.59 47%
4 64 2735.18 1168.01 42% 37.61 12.63 33%
8 64 3084.53 2278.38 73% 41.20 27.01 65%
1 256 1313.25 996.64 75% 41.23 28.59 69%
2 256 3425.64 3337.68 97% 71.57 61.69 86%
4 256 8174.33 4252.54 52% 126.63 49.68 39%
8 256 9365.96 8411.91 89% 145.00 101.64 70%
1 512 2363.07 2370.35 100% 73.00 73.52 100%
2 512 6047.23 3636.29 60% 135.43 61.02 45%
4 512 9330.76 7165.99 76% 184.18 95.50 51%
8 512 8178.20 9221.16 112% 162.52 122.44 75%
1 1024 3196.63 4016.30 125% 98.41 120.75 122%
2 1024 4940.41 9296.51 188% 140.03 191.95 137%
4 1024 5696.43 9018.76 158% 147.30 150.01 101%
8 1024 9355.07 9342.43 99% 216.90 140.48 64%
1 2048 4248.99 5189.24 122% 131.95 157.01 118%
2 2048 9021.22 9262.81 102% 242.37 198.21 81%
4 2048 8357.81 9241.94 110% 225.88 180.61 79%
8 2048 8024.56 9327.27 116% 205.75 145.76 70%
1 4096 9270.51 8199.32 88% 326.88 269.53 82%
2 4096 9151.10 9348.33 102% 257.92 214.46 83%
4 4096 9243.34 9294.30 100% 281.20 164.35 58%
8 4096 9020.35 9339.32 103% 249.87 143.54 57%
1 16384 9357.69 9355.69 99% 319.15 322.94 101%
2 16384 9319.39 9076.63 97% 319.26 215.64 67%
4 16384 9352.99 9183.41 98% 308.98 145.26 47%
8 16384 9384.67 9353.54 99% 322.49 155.78 48%

- TCP_RR
sessions size throughput1 throughput2   norm1 norm2
50 1 59677.80 71761.59 120% 2020.92 1585.19 78%
100 1 55324.42 81302.10 146% 1889.49 1439.22 76%
250 1 73973.48 155031.82 209% 2495.73 2195.91 87%
50 64 53093.57 67893.59 127% 1879.41 1266.19 67%
100 64 59186.33 60128.69 101% 2044.43 1084.57 53%
250 64 64159.90 137389.38 214% 2186.02 1994.61 91%
50 128 54323.45 51615.38 95% 1924.99 908.08 47%
100 128 57543.49 69999.21 121% 2049.99 1266.95 61%
250 128 71541.81 123915.35 173% 2448.38 1845.62 75%
50 256 57184.12 68314.09 119% 2204.47 1393.02 63%
100 256 51983.40 61931.00 119% 1897.89 1216.24 64%
250 256 66542.32 119435.53 179% 2267.97 1887.71 83%

- TCP_STREAM - External Host to Guest
sessions size throughput1 throughput2   norm1 norm2
1 64 589.10 634.81 107% 27.01 34.51 127%
2 64 1805.57 1442.49 79% 70.44 39.59 56%
4 64 3066.91 3869.26 126% 121.89 84.96 69%
8 64 6602.37 7626.61 115% 211.07 116.27 55%
1 256 775.25 2092.37 269% 26.38 115.47 437%
2 256 6213.63 3527.24 56% 191.89 84.18 43%
4 256 7333.13 9190.87 125% 230.74 218.05 94%
8 256 6776.23 9396.71 138% 216.14 153.14 70%
1 512 4308.25 3536.38 82% 160.93 140.38 87%
2 512 7159.09 9212.97 128% 240.80 201.42 83%
4 512 7095.49 9241.70 130% 229.77 193.26 84%
8 512 6935.59 9398.09 135% 221.37 152.36 68%
1 1024 5566.68 6155.74 110% 250.52 245.05 97%
2 1024 7303.83 9212.72 126% 299.33 229.91 76%
4 1024 7179.31 9334.93 130% 233.62 192.51 82%
8 1024 7080.91 9396.46 132% 226.37 173.81 76%
1 2048 7477.66 2671.34 35% 250.25 96.85 38%
2 2048 6743.70 8986.82 133% 295.51 230.07 77%
4 2048 7186.19 9239.56 128% 228.13 219.78 96%
8 2048 6883.19 9375.34 136% 221.11 164.59 74%
1 4096 7582.83 5040.55 66% 291.98 178.36 61%
2 4096 7617.98 9345.37 122% 245.74 197.78 80%
4 4096 7157.87 9383.81 131% 226.80 188.65 83%
8 4096 5916.83 9401.45 158% 189.64 147.61 77%
1 16384 8417.28 5351.11 63% 309.57 301.47 97%
2 16384 8426.42 9303.45 110% 302.13 197.14 65%
4 16384 5695.26 8678.73 152% 180.68 192.30 106%
8 16384 6761.33 9374.67 138% 214.50 160.25 74%

Changes from V3:

- Rebase to the net-next
- Let queue 2 to be the control virtqueue to obey the spec
- Prodives irq affinity
- Choose txq based on processor id

References:

- V3: http://lwn.net/Articles/467283/

---

Jason Wang (3):
      virtio_ring: move queue_index to vring_virtqueue
      virtio: introduce a method to get the irq of a specific virtqueue
      virtio_net: multiqueue support

Krishna Kumar (1):
      virtio_net: Introduce VIRTIO_NET_F_MULTIQUEUE


 drivers/lguest/lguest_device.c |    8 
 drivers/net/virtio_net.c       |  695 +++++++++++++++++++++++++++++-----------
 drivers/s390/kvm/kvm_virtio.c  |    6 
 drivers/virtio/virtio_mmio.c   |   13 +
 drivers/virtio/virtio_pci.c    |   24 +
 drivers/virtio/virtio_ring.c   |   17 +
 include/linux/virtio.h         |    4 
 include/linux/virtio_config.h  |    4 
 include/linux/virtio_net.h     |    3 
 9 files changed, 572 insertions(+), 202 deletions(-)

-- 
Jason Wang

^ permalink raw reply

* [net-next RFC V4 PATCH 1/4] virtio_net: Introduce VIRTIO_NET_F_MULTIQUEUE
From: Jason Wang @ 2012-06-25  9:17 UTC (permalink / raw)
  To: krkumar2, habanero, rusty, mst, netdev, mashirle, linux-kernel,
	virtualization, edumazet, tahm, jwhan, akong, davem
  Cc: kvm
In-Reply-To: <20120625090829.7263.65026.stgit@amd-6168-8-1.englab.nay.redhat.com>

From: Krishna Kumar <krkumar2@in.ibm.com>

Introduce VIRTIO_NET_F_MULTIQUEUE.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/virtio_net.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 2470f54..1bc7e30 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -51,6 +51,7 @@
 #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
 #define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on the
 					 * network */
+#define VIRTIO_NET_F_MULTIQUEUE	22	/* Device supports multiple TXQ/RXQ */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */

^ permalink raw reply related

* [net-next RFC V4 PATCH 2/4] virtio_ring: move queue_index to vring_virtqueue
From: Jason Wang @ 2012-06-25  9:17 UTC (permalink / raw)
  To: krkumar2, habanero, rusty, mst, netdev, mashirle, linux-kernel,
	virtualization, edumazet, tahm, jwhan, akong, davem
  Cc: kvm
In-Reply-To: <20120625090829.7263.65026.stgit@amd-6168-8-1.englab.nay.redhat.com>

Instead of storing the queue index in virtio infos, this patch moves them to
vring_virtqueue and introduces helpers to set and get the value. This would
simplify the management and tracing.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/virtio/virtio_mmio.c |    5 +----
 drivers/virtio/virtio_pci.c  |   12 +++++-------
 drivers/virtio/virtio_ring.c |   17 +++++++++++++++++
 include/linux/virtio.h       |    4 ++++
 4 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 453db0c..f5432b6 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -131,9 +131,6 @@ struct virtio_mmio_vq_info {
 	/* the number of entries in the queue */
 	unsigned int num;
 
-	/* the index of the queue */
-	int queue_index;
-
 	/* the virtual address of the ring queue */
 	void *queue;
 
@@ -324,7 +321,6 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 		err = -ENOMEM;
 		goto error_kmalloc;
 	}
-	info->queue_index = index;
 
 	/* Allocate pages for the queue - start with a queue as big as
 	 * possible (limited by maximum size allowed by device), drop down
@@ -363,6 +359,7 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
 		goto error_new_virtqueue;
 	}
 
+	virtqueue_set_queue_index(vq, index);
 	vq->priv = info;
 	info->vq = vq;
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2e03d41..adb24f2 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -79,9 +79,6 @@ struct virtio_pci_vq_info
 	/* the number of entries in the queue */
 	int num;
 
-	/* the index of the queue */
-	int queue_index;
-
 	/* the virtual address of the ring queue */
 	void *queue;
 
@@ -202,11 +199,11 @@ static void vp_reset(struct virtio_device *vdev)
 static void vp_notify(struct virtqueue *vq)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
-	struct virtio_pci_vq_info *info = vq->priv;
 
 	/* we write the queue's selector into the notification register to
 	 * signal the other end */
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+	iowrite16(virtqueue_get_queue_index(vq),
+		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -402,7 +399,6 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
 	if (!info)
 		return ERR_PTR(-ENOMEM);
 
-	info->queue_index = index;
 	info->num = num;
 	info->msix_vector = msix_vec;
 
@@ -425,6 +421,7 @@ static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
 		goto out_activate_queue;
 	}
 
+	virtqueue_set_queue_index(vq, index);
 	vq->priv = info;
 	info->vq = vq;
 
@@ -467,7 +464,8 @@ static void vp_del_vq(struct virtqueue *vq)
 	list_del(&info->node);
 	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
-	iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(virtqueue_get_queue_index(vq),
+		vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
 	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 5aa43c3..9c5aeea 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -106,6 +106,9 @@ struct vring_virtqueue
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	void (*notify)(struct virtqueue *vq);
 
+	/* Index of the queue */
+	int queue_index;
+
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
@@ -171,6 +174,20 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
 	return head;
 }
 
+void virtqueue_set_queue_index(struct virtqueue *_vq, int queue_index)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	vq->queue_index = queue_index;
+}
+EXPORT_SYMBOL_GPL(virtqueue_set_queue_index);
+
+int virtqueue_get_queue_index(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	return vq->queue_index;
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_queue_index);
+
 /**
  * virtqueue_add_buf - expose buffer to other end
  * @vq: the struct virtqueue we're talking about.
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8efd28a..0d8ed46 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -50,6 +50,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
+void virtqueue_set_queue_index(struct virtqueue *vq, int queue_index);
+
+int virtqueue_get_queue_index(struct virtqueue *vq);
+
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus

^ permalink raw reply related

* [net-next RFC V4 PATCH 3/4] virtio: introduce a method to get the irq of a specific virtqueue
From: Jason Wang @ 2012-06-25  9:41 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, jwhan, mashirle, krkumar2, edumazet,
	davem, rusty, linux-kernel, virtualization, netdev
  Cc: qemu-devel, kvm
In-Reply-To: <20120625090829.7263.65026.stgit@amd-6168-8-1.englab.nay.redhat.com>

Device specific irq optimizations such as irq affinity may be used by virtio
drivers. So this patch introduce a new method to get the irq of a specific
virtqueue.

After this patch, virtio device drivers could query the irq and do device
specific optimizations. First user would be virtio-net.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/lguest/lguest_device.c |    8 ++++++++
 drivers/s390/kvm/kvm_virtio.c  |    6 ++++++
 drivers/virtio/virtio_mmio.c   |    8 ++++++++
 drivers/virtio/virtio_pci.c    |   12 ++++++++++++
 include/linux/virtio_config.h  |    4 ++++
 5 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 9e8388e..bcd080f 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -392,6 +392,13 @@ static const char *lg_bus_name(struct virtio_device *vdev)
 	return "";
 }
 
+static int lg_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	struct lguest_vq_info *lvq = vq->priv;
+
+	return lvq->config.irq;
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -404,6 +411,7 @@ static struct virtio_config_ops lguest_config_ops = {
 	.find_vqs = lg_find_vqs,
 	.del_vqs = lg_del_vqs,
 	.bus_name = lg_bus_name,
+	.get_vq_irq = lg_get_vq_irq,
 };
 
 /*
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index d74e9ae..a897de2 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -268,6 +268,11 @@ static const char *kvm_bus_name(struct virtio_device *vdev)
 	return "";
 }
 
+static int kvm_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	return 0x2603;
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -282,6 +287,7 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.find_vqs = kvm_find_vqs,
 	.del_vqs = kvm_del_vqs,
 	.bus_name = kvm_bus_name,
+	.get_vq_irq = kvm_get_vq_irq,
 };
 
 /*
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index f5432b6..2ba37ed 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -411,6 +411,13 @@ static const char *vm_bus_name(struct virtio_device *vdev)
 	return vm_dev->pdev->name;
 }
 
+static int vm_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+	return platform_get_irq(vm_dev->pdev, 0);
+}
+
 static struct virtio_config_ops virtio_mmio_config_ops = {
 	.get		= vm_get,
 	.set		= vm_set,
@@ -422,6 +429,7 @@ static struct virtio_config_ops virtio_mmio_config_ops = {
 	.get_features	= vm_get_features,
 	.finalize_features = vm_finalize_features,
 	.bus_name	= vm_bus_name,
+	.get_vq_irq     = vm_get_vq_irq,
 };
 
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index adb24f2..c062227 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -607,6 +607,17 @@ static const char *vp_bus_name(struct virtio_device *vdev)
 	return pci_name(vp_dev->pci_dev);
 }
 
+static int vp_get_vq_irq(struct virtio_device *vdev, struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+
+	if (vp_dev->intx_enabled)
+		return vp_dev->pci_dev->irq;
+	else
+		return vp_dev->msix_entries[info->msix_vector].vector;
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
@@ -618,6 +629,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
+	.get_vq_irq     = vp_get_vq_irq,
 };
 
 static void virtio_pci_release_dev(struct device *_d)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index fc457f4..acd6930 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -98,6 +98,9 @@
  *	vdev: the virtio_device
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
+ * @get_vq_irq: get the irq numer of the specific virt queue.
+ *      vdev: the virtio_device
+ *      vq: the virtqueue
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -116,6 +119,7 @@ struct virtio_config_ops {
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
+	int (*get_vq_irq)(struct virtio_device *vdev, struct virtqueue *vq);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
-- 
1.7.1

^ permalink raw reply related

* [net-next RFC V4 PATCH 4/4] virtio_net: multiqueue support
From: Jason Wang @ 2012-06-25  9:41 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, jwhan, mashirle, krkumar2, edumazet,
	davem, rusty, linux-kernel, virtualization, netdev
  Cc: qemu-devel, kvm
In-Reply-To: <20120625090829.7263.65026.stgit@amd-6168-8-1.englab.nay.redhat.com>

This addes multiqueue support to virtio_net driver. This feature is negotiated
through VIRTIO_NET_F_MULTIQUEUE.

The driver expects the number of rx/tx queue paris is equal to the number of
vcpus. To maximize the performance under this per-cpu rx/tx queue pairs, some
optimization were introduced:

- Txq selection is based on the processor id in order to avoid contending a lock
 whose owner may exits to host.
- Since the txq/txq were per-cpu, affinity hint were set to the cpu that owns
 the queue pairs.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c   |  695 ++++++++++++++++++++++++++++++++------------
 include/linux/virtio_net.h |    2 +
 2 files changed, 506 insertions(+), 191 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f18149a..5c719de 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -26,6 +26,7 @@
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
+#include <linux/interrupt.h>
 
 static int napi_weight = 128;
 module_param(napi_weight, int, 0444);
@@ -51,43 +52,73 @@ struct virtnet_stats {
 	u64 rx_packets;
 };
 
-struct virtnet_info {
-	struct virtio_device *vdev;
-	struct virtqueue *rvq, *svq, *cvq;
-	struct net_device *dev;
+/* Internal representation of a send virtqueue */
+struct send_queue {
+	/* Virtqueue associated with this send _queue */
+	struct virtqueue *vq;
+
+	/* TX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+	cpumask_var_t affinity_mask;
+};
+
+/* Internal representation of a receive virtqueue */
+struct receive_queue {
+	/* Virtqueue associated with this receive_queue */
+	struct virtqueue *vq;
+
+	/* Back pointer to the virtnet_info */
+	struct virtnet_info *vi;
+
 	struct napi_struct napi;
-	unsigned int status;
 
 	/* Number of input buffers, and max we've ever had. */
 	unsigned int num, max;
 
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Chain pages by the private ptr. */
+	struct page *pages;
+
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+	cpumask_var_t affinity_mask;
+};
+
+struct virtnet_info {
+	int num_queue_pairs;		/* # of RX/TX vq pairs */
+
+	struct send_queue **sq;
+	struct receive_queue **rq;
+	struct virtqueue *cvq;
+
+	struct virtio_device *vdev;
+	struct net_device *dev;
+	unsigned int status;
+
 	/* I like... big packets and I cannot lie! */
 	bool big_packets;
 
 	/* Host will merge rx buffers for big packets (shake it! shake it!) */
 	bool mergeable_rx_bufs;
 
+	/* Has control virtqueue */
+	bool has_cvq;
+
 	/* enable config space updates */
 	bool config_enable;
 
 	/* Active statistics */
 	struct virtnet_stats __percpu *stats;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
-
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
 	/* Lock for config space updates */
 	struct mutex config_lock;
-
-	/* Chain pages by the private ptr. */
-	struct page *pages;
-
-	/* fragments + linear part + virtio header */
-	struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
-	struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
 };
 
 struct skb_vnet_hdr {
@@ -108,6 +139,22 @@ struct padded_vnet_hdr {
 	char padding[6];
 };
 
+static inline int txq_get_qnum(struct virtnet_info *vi, struct virtqueue *vq)
+{
+	int ret = virtqueue_get_queue_index(vq);
+
+	/* skip ctrl vq */
+	if (vi->has_cvq)
+		return (ret - 1) / 2;
+	else
+		return ret / 2;
+}
+
+static inline int rxq_get_qnum(struct virtnet_info *vi, struct virtqueue *vq)
+{
+	return virtqueue_get_queue_index(vq) / 2;
+}
+
 static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
 {
 	return (struct skb_vnet_hdr *)skb->cb;
@@ -117,22 +164,22 @@ static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
  * private is used to chain pages for big packets, put the whole
  * most recent used list in the beginning for reuse
  */
-static void give_pages(struct virtnet_info *vi, struct page *page)
+static void give_pages(struct receive_queue *rq, struct page *page)
 {
 	struct page *end;
 
 	/* Find end of list, sew whole thing into vi->pages. */
 	for (end = page; end->private; end = (struct page *)end->private);
-	end->private = (unsigned long)vi->pages;
-	vi->pages = page;
+	end->private = (unsigned long)rq->pages;
+	rq->pages = page;
 }
 
-static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 {
-	struct page *p = vi->pages;
+	struct page *p = rq->pages;
 
 	if (p) {
-		vi->pages = (struct page *)p->private;
+		rq->pages = (struct page *)p->private;
 		/* clear private here, it is used to chain pages */
 		p->private = 0;
 	} else
@@ -140,15 +187,15 @@ static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
 	return p;
 }
 
-static void skb_xmit_done(struct virtqueue *svq)
+static void skb_xmit_done(struct virtqueue *vq)
 {
-	struct virtnet_info *vi = svq->vdev->priv;
+	struct virtnet_info *vi = vq->vdev->priv;
 
 	/* Suppress further interrupts. */
-	virtqueue_disable_cb(svq);
+	virtqueue_disable_cb(vq);
 
 	/* We were probably waiting for more output buffers. */
-	netif_wake_queue(vi->dev);
+	netif_wake_subqueue(vi->dev, txq_get_qnum(vi, vq));
 }
 
 static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -167,9 +214,10 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
 }
 
 /* Called from bottom half context */
-static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+static struct sk_buff *page_to_skb(struct receive_queue *rq,
 				   struct page *page, unsigned int len)
 {
+	struct virtnet_info *vi = rq->vi;
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	unsigned int copy, hdr_len, offset;
@@ -225,12 +273,12 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	}
 
 	if (page)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return skb;
 }
 
-static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	struct page *page;
@@ -244,7 +292,7 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 			skb->dev->stats.rx_length_errors++;
 			return -EINVAL;
 		}
-		page = virtqueue_get_buf(vi->rvq, &len);
+		page = virtqueue_get_buf(rq->vq, &len);
 		if (!page) {
 			pr_debug("%s: rx error: %d buffers missing\n",
 				 skb->dev->name, hdr->mhdr.num_buffers);
@@ -257,13 +305,14 @@ static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
 
 		set_skb_frag(skb, page, 0, &len);
 
-		--vi->num;
+		--rq->num;
 	}
 	return 0;
 }
 
-static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
+static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
 {
+	struct net_device *dev = rq->vi->dev;
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 	struct sk_buff *skb;
@@ -274,7 +323,7 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		pr_debug("%s: short packet %i\n", dev->name, len);
 		dev->stats.rx_length_errors++;
 		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
+			give_pages(rq, buf);
 		else
 			dev_kfree_skb(buf);
 		return;
@@ -286,14 +335,14 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
 		skb_trim(skb, len);
 	} else {
 		page = buf;
-		skb = page_to_skb(vi, page, len);
+		skb = page_to_skb(rq, page, len);
 		if (unlikely(!skb)) {
 			dev->stats.rx_dropped++;
-			give_pages(vi, page);
+			give_pages(rq, page);
 			return;
 		}
 		if (vi->mergeable_rx_bufs)
-			if (receive_mergeable(vi, skb)) {
+			if (receive_mergeable(rq, skb)) {
 				dev_kfree_skb(skb);
 				return;
 			}
@@ -363,90 +412,91 @@ frame_err:
 	dev_kfree_skb(skb);
 }
 
-static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	struct skb_vnet_hdr *hdr;
 	int err;
 
-	skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp);
+	skb = __netdev_alloc_skb_ip_align(rq->vi->dev, MAX_PACKET_LEN, gfp);
 	if (unlikely(!skb))
 		return -ENOMEM;
 
 	skb_put(skb, MAX_PACKET_LEN);
 
 	hdr = skb_vnet_hdr(skb);
-	sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr);
+	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
 
-	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
+	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
+
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
 	return err;
 }
 
-static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *first, *list = NULL;
 	char *p;
 	int i, err, offset;
 
-	/* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
+	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
-		first = get_a_page(vi, gfp);
+		first = get_a_page(rq, gfp);
 		if (!first) {
 			if (list)
-				give_pages(vi, list);
+				give_pages(rq, list);
 			return -ENOMEM;
 		}
-		sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE);
+		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
 
 		/* chain new page in list head to match sg */
 		first->private = (unsigned long)list;
 		list = first;
 	}
 
-	first = get_a_page(vi, gfp);
+	first = get_a_page(rq, gfp);
 	if (!first) {
-		give_pages(vi, list);
+		give_pages(rq, list);
 		return -ENOMEM;
 	}
 	p = page_address(first);
 
-	/* vi->rx_sg[0], vi->rx_sg[1] share the same page */
-	/* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
-	sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr));
+	/* rq->sg[0], rq->sg[1] share the same page */
+	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
+	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
 
-	/* vi->rx_sg[1] for data packet, from offset */
+	/* rq->sg[1] for data packet, from offset */
 	offset = sizeof(struct padded_vnet_hdr);
-	sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset);
+	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
 				first, gfp);
 	if (err < 0)
-		give_pages(vi, first);
+		give_pages(rq, first);
 
 	return err;
 }
 
-static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
 {
 	struct page *page;
 	int err;
 
-	page = get_a_page(vi, gfp);
+	page = get_a_page(rq, gfp);
 	if (!page)
 		return -ENOMEM;
 
-	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
+	sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
 	if (err < 0)
-		give_pages(vi, page);
+		give_pages(rq, page);
 
 	return err;
 }
@@ -458,97 +508,104 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
  * before we're receiving packets, or from refill_work which is
  * careful to disable receiving (using napi_disable).
  */
-static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
 {
+	struct virtnet_info *vi = rq->vi;
 	int err;
 	bool oom;
 
 	do {
 		if (vi->mergeable_rx_bufs)
-			err = add_recvbuf_mergeable(vi, gfp);
+			err = add_recvbuf_mergeable(rq, gfp);
 		else if (vi->big_packets)
-			err = add_recvbuf_big(vi, gfp);
+			err = add_recvbuf_big(rq, gfp);
 		else
-			err = add_recvbuf_small(vi, gfp);
+			err = add_recvbuf_small(rq, gfp);
 
 		oom = err == -ENOMEM;
 		if (err < 0)
 			break;
-		++vi->num;
+		++rq->num;
 	} while (err > 0);
-	if (unlikely(vi->num > vi->max))
-		vi->max = vi->num;
-	virtqueue_kick(vi->rvq);
+	if (unlikely(rq->num > rq->max))
+		rq->max = rq->num;
+	virtqueue_kick(rq->vq);
 	return !oom;
 }
 
-static void skb_recv_done(struct virtqueue *rvq)
+static void skb_recv_done(struct virtqueue *vq)
 {
-	struct virtnet_info *vi = rvq->vdev->priv;
+	struct virtnet_info *vi = vq->vdev->priv;
+	struct napi_struct *napi = &vi->rq[rxq_get_qnum(vi, vq)]->napi;
+
 	/* Schedule NAPI, Suppress further interrupts if successful. */
-	if (napi_schedule_prep(&vi->napi)) {
-		virtqueue_disable_cb(rvq);
-		__napi_schedule(&vi->napi);
+	if (napi_schedule_prep(napi)) {
+		virtqueue_disable_cb(vq);
+		__napi_schedule(napi);
 	}
 }
 
-static void virtnet_napi_enable(struct virtnet_info *vi)
+static void virtnet_napi_enable(struct receive_queue *rq)
 {
-	napi_enable(&vi->napi);
+	napi_enable(&rq->napi);
 
 	/* If all buffers were filled by other side before we napi_enabled, we
 	 * won't get another interrupt, so process any outstanding packets
 	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
 	 * We synchronize against interrupts via NAPI_STATE_SCHED */
-	if (napi_schedule_prep(&vi->napi)) {
-		virtqueue_disable_cb(vi->rvq);
+	if (napi_schedule_prep(&rq->napi)) {
+		virtqueue_disable_cb(rq->vq);
 		local_bh_disable();
-		__napi_schedule(&vi->napi);
+		__napi_schedule(&rq->napi);
 		local_bh_enable();
 	}
 }
 
 static void refill_work(struct work_struct *work)
 {
-	struct virtnet_info *vi;
+	struct napi_struct *napi;
+	struct receive_queue *rq;
 	bool still_empty;
 
-	vi = container_of(work, struct virtnet_info, refill.work);
-	napi_disable(&vi->napi);
-	still_empty = !try_fill_recv(vi, GFP_KERNEL);
-	virtnet_napi_enable(vi);
+	rq = container_of(work, struct receive_queue, refill.work);
+	napi = &rq->napi;
+
+	napi_disable(napi);
+	still_empty = !try_fill_recv(rq, GFP_KERNEL);
+	virtnet_napi_enable(rq);
 
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
 	if (still_empty)
-		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
+		queue_delayed_work(system_nrt_wq, &rq->refill, HZ/2);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
-	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
+	struct receive_queue *rq = container_of(napi, struct receive_queue,
+						napi);
 	void *buf;
 	unsigned int len, received = 0;
 
 again:
 	while (received < budget &&
-	       (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
-		receive_buf(vi->dev, buf, len);
-		--vi->num;
+	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+		receive_buf(rq, buf, len);
+		--rq->num;
 		received++;
 	}
 
-	if (vi->num < vi->max / 2) {
-		if (!try_fill_recv(vi, GFP_ATOMIC))
-			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+	if (rq->num < rq->max / 2) {
+		if (!try_fill_recv(rq, GFP_ATOMIC))
+			queue_delayed_work(system_nrt_wq, &rq->refill, 0);
 	}
 
 	/* Out of packets? */
 	if (received < budget) {
 		napi_complete(napi);
-		if (unlikely(!virtqueue_enable_cb(vi->rvq)) &&
+		if (unlikely(!virtqueue_enable_cb(rq->vq)) &&
 		    napi_schedule_prep(napi)) {
-			virtqueue_disable_cb(vi->rvq);
+			virtqueue_disable_cb(rq->vq);
 			__napi_schedule(napi);
 			goto again;
 		}
@@ -557,13 +614,14 @@ again:
 	return received;
 }
 
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static unsigned int free_old_xmit_skbs(struct virtnet_info *vi,
+				       struct virtqueue *vq)
 {
 	struct sk_buff *skb;
 	unsigned int len, tot_sgs = 0;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 
-	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+	while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 
 		u64_stats_update_begin(&stats->tx_syncp);
@@ -577,7 +635,8 @@ static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
 	return tot_sgs;
 }
 
-static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
+static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb,
+		    struct virtqueue *vq, struct scatterlist *sg)
 {
 	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
@@ -615,44 +674,47 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 
 	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
+		sg_set_buf(sg, &hdr->mhdr, sizeof hdr->mhdr);
 	else
-		sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
+		sg_set_buf(sg, &hdr->hdr, sizeof hdr->hdr);
 
-	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
-	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
+	hdr->num_sg = skb_to_sgvec(skb, sg + 1, 0, skb->len) + 1;
+	return virtqueue_add_buf(vq, sg, hdr->num_sg,
 				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int qnum = skb_get_queue_mapping(skb);
+	struct virtqueue *vq = vi->sq[qnum]->vq;
 	int capacity;
 
 	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(vi);
+	free_old_xmit_skbs(vi, vq);
 
 	/* Try to transmit */
-	capacity = xmit_skb(vi, skb);
+	capacity = xmit_skb(vi, skb, vq, vi->sq[qnum]->sg);
 
 	/* This can happen with OOM and indirect buffers. */
 	if (unlikely(capacity < 0)) {
 		if (likely(capacity == -ENOMEM)) {
 			if (net_ratelimit())
 				dev_warn(&dev->dev,
-					 "TX queue failure: out of memory\n");
+					"TXQ (%d) failure: out of memory\n",
+					qnum);
 		} else {
 			dev->stats.tx_fifo_errors++;
 			if (net_ratelimit())
 				dev_warn(&dev->dev,
-					 "Unexpected TX queue failure: %d\n",
-					 capacity);
+					"Unexpected TXQ (%d) failure: %d\n",
+					qnum, capacity);
 		}
 		dev->stats.tx_dropped++;
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
 	}
-	virtqueue_kick(vi->svq);
+	virtqueue_kick(vq);
 
 	/* Don't wait up for transmitted skbs to be freed. */
 	skb_orphan(skb);
@@ -661,13 +723,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
-		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
+		netif_stop_subqueue(dev, qnum);
+		if (unlikely(!virtqueue_enable_cb_delayed(vq))) {
 			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(vi);
+			capacity += free_old_xmit_skbs(vi, vq);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
-				netif_start_queue(dev);
-				virtqueue_disable_cb(vi->svq);
+				netif_start_subqueue(dev, qnum);
+				virtqueue_disable_cb(vq);
 			}
 		}
 	}
@@ -700,7 +762,8 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
 	unsigned int start;
 
 	for_each_possible_cpu(cpu) {
-		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
+		struct virtnet_stats __percpu *stats
+			= per_cpu_ptr(vi->stats, cpu);
 		u64 tpackets, tbytes, rpackets, rbytes;
 
 		do {
@@ -734,20 +797,31 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
 static void virtnet_netpoll(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
 
-	napi_schedule(&vi->napi);
+	for (i = 0; i < vi->num_queue_pairs; i++)
+		napi_schedule(&vi->rq[i]->napi);
 }
 #endif
 
+static void free_stats(struct virtnet_info *vi)
+{
+	free_percpu(vi->stats);
+}
+
 static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
 
-	/* Make sure we have some buffers: if oom use wq. */
-	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		/* Make sure we have some buffers: if oom use wq. */
+		if (!try_fill_recv(vi->rq[i], GFP_KERNEL))
+			queue_delayed_work(system_nrt_wq,
+					   &vi->rq[i]->refill, 0);
+		virtnet_napi_enable(vi->rq[i]);
+	}
 
-	virtnet_napi_enable(vi);
 	return 0;
 }
 
@@ -809,10 +883,13 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
 static int virtnet_close(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
 
 	/* Make sure refill_work doesn't re-enable napi! */
-	cancel_delayed_work_sync(&vi->refill);
-	napi_disable(&vi->napi);
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		cancel_delayed_work_sync(&vi->rq[i]->refill);
+		napi_disable(&vi->rq[i]->napi);
+	}
 
 	return 0;
 }
@@ -924,11 +1001,10 @@ static void virtnet_get_ringparam(struct net_device *dev,
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 
-	ring->rx_max_pending = virtqueue_get_vring_size(vi->rvq);
-	ring->tx_max_pending = virtqueue_get_vring_size(vi->svq);
+	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0]->vq);
+	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0]->vq);
 	ring->rx_pending = ring->rx_max_pending;
 	ring->tx_pending = ring->tx_max_pending;
-
 }
 
 
@@ -961,6 +1037,19 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+/* To avoid contending a lock hold by a vcpu who would exit to host, select the
+ * txq based on the processor id.
+ */
+static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
+{
+	int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
+		  smp_processor_id();
+
+	while (unlikely(txq >= dev->real_num_tx_queues))
+		txq -= dev->real_num_tx_queues;
+	return txq;
+}
+
 static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
@@ -972,6 +1061,7 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_get_stats64     = virtnet_stats,
 	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
+	.ndo_select_queue     = virtnet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = virtnet_netpoll,
 #endif
@@ -1007,10 +1097,10 @@ static void virtnet_config_changed_work(struct work_struct *work)
 
 	if (vi->status & VIRTIO_NET_S_LINK_UP) {
 		netif_carrier_on(vi->dev);
-		netif_wake_queue(vi->dev);
+		netif_tx_wake_all_queues(vi->dev);
 	} else {
 		netif_carrier_off(vi->dev);
-		netif_stop_queue(vi->dev);
+		netif_tx_stop_all_queues(vi->dev);
 	}
 done:
 	mutex_unlock(&vi->config_lock);
@@ -1023,41 +1113,252 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 	queue_work(system_nrt_wq, &vi->config_work);
 }
 
-static int init_vqs(struct virtnet_info *vi)
+static void free_receive_bufs(struct virtnet_info *vi)
+{
+	int i;
+
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		while (vi->rq[i]->pages)
+			__free_pages(get_a_page(vi->rq[i], GFP_KERNEL), 0);
+	}
+}
+
+/* Free memory allocated for send and receive queues */
+static void free_rq_sq(struct virtnet_info *vi)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs, err;
+	int i;
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+	free_stats(vi);
 
-	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
-	if (err)
-		return err;
+	if (vi->rq) {
+		for (i = 0; i < vi->num_queue_pairs; i++)
+			kfree(vi->rq[i]);
+		kfree(vi->rq);
+	}
+
+	if (vi->sq) {
+		for (i = 0; i < vi->num_queue_pairs; i++)
+			kfree(vi->sq[i]);
+		kfree(vi->sq);
+	}
+}
+
+static void free_unused_bufs(struct virtnet_info *vi)
+{
+	void *buf;
+	int i;
+
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		struct virtqueue *vq = vi->sq[i]->vq;
+
+		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
+			dev_kfree_skb(buf);
+	}
+
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		struct virtqueue *vq = vi->rq[i]->vq;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
+		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
+			if (vi->mergeable_rx_bufs || vi->big_packets)
+				give_pages(vi->rq[i], buf);
+			else
+				dev_kfree_skb(buf);
+			--vi->rq[i]->num;
+		}
+		BUG_ON(vi->rq[i]->num != 0);
+	}
+}
+
+static int virtnet_find_vqs(struct virtnet_info *vi)
+{
+	vq_callback_t **callbacks;
+	struct virtqueue **vqs;
+	int ret = -ENOMEM;
+	int i, total_vqs;
+	char **names;
+
+	/*
+	 * We expect 1 RX virtqueue followed by 1 TX virtqueue, followd by
+	 * possible control virtqueue and followed by the same
+	 * 'vi->num_queue_pairs-1' more times
+	 */
+	total_vqs = vi->num_queue_pairs * 2 +
+		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
+
+	/* Allocate space for find_vqs parameters */
+	vqs = kmalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
+	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
+	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
+	if (!vqs || !callbacks || !names)
+		goto err;
+
+	/* Parameters for control virtqueue, if any */
+	if (vi->has_cvq) {
+		callbacks[2] = NULL;
+		names[2] = "control";
+	}
+
+	/* Allocate/initialize parameters for send/receive virtqueues */
+	for (i = 0; i < vi->num_queue_pairs * 2; i += 2) {
+		int j = (i == 0 ? i : i + vi->has_cvq);
+		callbacks[j] = skb_recv_done;
+		callbacks[j + 1] = skb_xmit_done;
+		names[j] = kasprintf(GFP_KERNEL, "input.%d", i / 2);
+		names[j + 1] = kasprintf(GFP_KERNEL, "output.%d", i / 2);
+	}
 
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
+					 (const char **)names);
+	if (ret)
+		goto err;
+
+	if (vi->has_cvq)
 		vi->cvq = vqs[2];
 
-		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
-			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
+	for (i = 0; i < vi->num_queue_pairs * 2; i += 2) {
+		int j = i == 0 ? i : i + vi->has_cvq;
+		vi->rq[i / 2]->vq = vqs[j];
+		vi->sq[i / 2]->vq = vqs[j + 1];
 	}
+
+err:
+	if (ret && names)
+		for (i = 0; i < vi->num_queue_pairs * 2; i++)
+			kfree(names[i]);
+
+	kfree(names);
+	kfree(callbacks);
+	kfree(vqs);
+
+	return ret;
+}
+
+static int allocate_queues(struct virtnet_info *vi)
+{
+	int ret = -ENOMEM;
+	int i;
+
+	vi->rq = kcalloc(vi->num_queue_pairs, sizeof(*vi->rq), GFP_KERNEL);
+	vi->sq = kcalloc(vi->num_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
+	if (!vi->sq || !vi->rq)
+		goto err;
+
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		vi->rq[i] = kzalloc(sizeof(*vi->rq[i]), GFP_KERNEL);
+		vi->sq[i] = kzalloc(sizeof(*vi->sq[i]), GFP_KERNEL);
+		if (!vi->rq[i] || !vi->sq[i])
+			goto err;
+	}
+
+	ret = 0;
+
+	/* setup initial receive and send queue parameters */
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		vi->rq[i]->vi = vi;
+		vi->rq[i]->pages = NULL;
+		INIT_DELAYED_WORK(&vi->rq[i]->refill, refill_work);
+		netif_napi_add(vi->dev, &vi->rq[i]->napi, virtnet_poll,
+			       napi_weight);
+
+		sg_init_table(vi->rq[i]->sg, ARRAY_SIZE(vi->rq[i]->sg));
+		sg_init_table(vi->sq[i]->sg, ARRAY_SIZE(vi->sq[i]->sg));
+	}
+
+err:
+	if (ret)
+		free_rq_sq(vi);
+
+	return ret;
+}
+
+static int virtnet_setup_vqs(struct virtnet_info *vi)
+{
+	int ret;
+
+	/* Allocate send & receive queues */
+	ret = allocate_queues(vi);
+	if (!ret) {
+		ret = virtnet_find_vqs(vi);
+		if (ret)
+			free_rq_sq(vi);
+	}
+
+	return ret;
+}
+
+static void _virtnet_free_affinity(struct virtnet_info *vi, int i)
+{
+	struct virtio_device *vdev = vi->vdev;
+	int irq;
+
+	while (i) {
+		i--;
+		irq = vdev->config->get_vq_irq(vdev, vi->rq[i]->vq);
+		irq_set_affinity_hint(irq, NULL);
+		free_cpumask_var(vi->rq[i]->affinity_mask);
+		irq = vdev->config->get_vq_irq(vdev, vi->sq[i]->vq);
+		irq_set_affinity_hint(irq, NULL);
+		free_cpumask_var(vi->sq[i]->affinity_mask);
+	}
+}
+
+static int virtnet_init_affinity(struct virtnet_info *vi)
+{
+	struct virtio_device *vdev = vi->vdev;
+	int i, irq;
+
+	for (i = 0; i < vi->num_queue_pairs; i++) {
+		if (!alloc_cpumask_var(&vi->rq[i]->affinity_mask, GFP_KERNEL))
+			goto err_out;
+		if (!alloc_cpumask_var(&vi->sq[i]->affinity_mask, GFP_KERNEL)) {
+			free_cpumask_var(vi->rq[i]->affinity_mask);
+			goto err_out;
+		}
+		cpumask_set_cpu(i, vi->rq[i]->affinity_mask);
+		cpumask_set_cpu(i, vi->sq[i]->affinity_mask);
+
+		irq = vdev->config->get_vq_irq(vdev, vi->rq[i]->vq);
+		if (irq_set_affinity_hint(irq, vi->rq[i]->affinity_mask))
+			goto err_out;
+		irq = vdev->config->get_vq_irq(vdev, vi->sq[i]->vq);
+		if (irq_set_affinity_hint(irq, vi->sq[i]->affinity_mask)) {
+			irq = vdev->config->get_vq_irq(vdev, vi->rq[i]->vq);
+			irq_set_affinity_hint(irq, NULL);
+			goto err_out;
+		}
+	}
+
 	return 0;
+err_out:
+	_virtnet_free_affinity(vi, i);
+	return -ENOMEM;
+}
+
+static void virtnet_free_affinity(struct virtnet_info *vi)
+{
+	_virtnet_free_affinity(vi, vi->num_queue_pairs);
 }
 
 static int virtnet_probe(struct virtio_device *vdev)
 {
-	int err;
+	int i, err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
+	u16 num_queues, num_queue_pairs;
+
+	/* Find if host supports multiqueue virtio_net device */
+	err = virtio_config_val(vdev, VIRTIO_NET_F_MULTIQUEUE,
+				offsetof(struct virtio_net_config,
+				num_queues), &num_queues);
+
+	/* We need atleast 2 queue's */
+	if (err || num_queues < 2)
+		num_queues = 2;
+
+	num_queue_pairs = num_queues / 2;
 
 	/* Allocate ourselves a network device with room for our info */
-	dev = alloc_etherdev(sizeof(struct virtnet_info));
+	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), num_queue_pairs);
 	if (!dev)
 		return -ENOMEM;
 
@@ -1103,22 +1404,18 @@ static int virtnet_probe(struct virtio_device *vdev)
 
 	/* Set up our device-specific information */
 	vi = netdev_priv(dev);
-	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
 	vi->dev = dev;
 	vi->vdev = vdev;
 	vdev->priv = vi;
-	vi->pages = NULL;
 	vi->stats = alloc_percpu(struct virtnet_stats);
 	err = -ENOMEM;
 	if (vi->stats == NULL)
-		goto free;
+		goto free_netdev;
 
-	INIT_DELAYED_WORK(&vi->refill, refill_work);
 	mutex_init(&vi->config_lock);
 	vi->config_enable = true;
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
-	sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
-	sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
+	vi->num_queue_pairs = num_queue_pairs;
 
 	/* If we can receive ANY GSO packets, we must allocate large ones. */
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -1129,9 +1426,17 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	err = init_vqs(vi);
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
+		vi->has_cvq = true;
+
+	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
+	err = virtnet_setup_vqs(vi);
 	if (err)
-		goto free_stats;
+		goto free_netdev;
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) &&
+	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+		dev->features |= NETIF_F_HW_VLAN_FILTER;
 
 	err = register_netdev(dev);
 	if (err) {
@@ -1140,12 +1445,21 @@ static int virtnet_probe(struct virtio_device *vdev)
 	}
 
 	/* Last of all, set up some receive buffers. */
-	try_fill_recv(vi, GFP_KERNEL);
+	for (i = 0; i < num_queue_pairs; i++) {
+		try_fill_recv(vi->rq[i], GFP_KERNEL);
+
+		/* If we didn't even get one input buffer, we're useless. */
+		if (vi->rq[i]->num == 0) {
+			free_unused_bufs(vi);
+			err = -ENOMEM;
+			goto free_recv_bufs;
+		}
+	}
 
-	/* If we didn't even get one input buffer, we're useless. */
-	if (vi->num == 0) {
-		err = -ENOMEM;
-		goto unregister;
+	err = virtnet_init_affinity(vi);
+	if (err) {
+		pr_debug("virtio_net: fail to initialize irq affinity\n");
+		goto free_recv_bufs;
 	}
 
 	/* Assume link up if device can't report link status,
@@ -1158,42 +1472,26 @@ static int virtnet_probe(struct virtio_device *vdev)
 		netif_carrier_on(dev);
 	}
 
-	pr_debug("virtnet: registered device %s\n", dev->name);
+	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
+		 dev->name, num_queue_pairs);
+
 	return 0;
 
-unregister:
+free_recv_bufs:
+	free_receive_bufs(vi);
 	unregister_netdev(dev);
 free_vqs:
+	for (i = 0; i < num_queue_pairs; i++)
+		cancel_delayed_work_sync(&vi->rq[i]->refill);
 	vdev->config->del_vqs(vdev);
-free_stats:
-	free_percpu(vi->stats);
-free:
+
+free_netdev:
+	free_rq_sq(vi);
+
 	free_netdev(dev);
 	return err;
 }
 
-static void free_unused_bufs(struct virtnet_info *vi)
-{
-	void *buf;
-	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->svq);
-		if (!buf)
-			break;
-		dev_kfree_skb(buf);
-	}
-	while (1) {
-		buf = virtqueue_detach_unused_buf(vi->rvq);
-		if (!buf)
-			break;
-		if (vi->mergeable_rx_bufs || vi->big_packets)
-			give_pages(vi, buf);
-		else
-			dev_kfree_skb(buf);
-		--vi->num;
-	}
-	BUG_ON(vi->num != 0);
-}
-
 static void remove_vq_common(struct virtnet_info *vi)
 {
 	vi->vdev->config->reset(vi->vdev);
@@ -1203,14 +1501,17 @@ static void remove_vq_common(struct virtnet_info *vi)
 
 	vi->vdev->config->del_vqs(vi->vdev);
 
-	while (vi->pages)
-		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+	free_receive_bufs(vi);
 }
 
 static void __devexit virtnet_remove(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+
+
 	/* Prevent config work handler from accessing the device. */
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = false;
@@ -1220,6 +1521,11 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 
 	remove_vq_common(vi);
 
+	virtnet_free_affinity(vi);
+
+	/* Free memory for send and receive queues */
+	free_rq_sq(vi);
+
 	flush_work(&vi->config_work);
 
 	free_percpu(vi->stats);
@@ -1230,6 +1536,7 @@ static void __devexit virtnet_remove(struct virtio_device *vdev)
 static int virtnet_freeze(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
+	int i;
 
 	/* Prevent config work handler from accessing the device */
 	mutex_lock(&vi->config_lock);
@@ -1237,10 +1544,13 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	mutex_unlock(&vi->config_lock);
 
 	netif_device_detach(vi->dev);
-	cancel_delayed_work_sync(&vi->refill);
+	for (i = 0; i < vi->num_queue_pairs; i++)
+		cancel_delayed_work_sync(&vi->rq[i]->refill);
 
 	if (netif_running(vi->dev))
-		napi_disable(&vi->napi);
+		for (i = 0; i < vi->num_queue_pairs; i++)
+			napi_disable(&vi->rq[i]->napi);
+
 
 	remove_vq_common(vi);
 
@@ -1252,19 +1562,22 @@ static int virtnet_freeze(struct virtio_device *vdev)
 static int virtnet_restore(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
-	int err;
+	int err, i;
 
-	err = init_vqs(vi);
+	err = virtnet_setup_vqs(vi);
 	if (err)
 		return err;
 
 	if (netif_running(vi->dev))
-		virtnet_napi_enable(vi);
+		for (i = 0; i < vi->num_queue_pairs; i++)
+			virtnet_napi_enable(vi->rq[i]);
 
 	netif_device_attach(vi->dev);
 
-	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+	for (i = 0; i < vi->num_queue_pairs; i++)
+		if (!try_fill_recv(vi->rq[i], GFP_KERNEL))
+			queue_delayed_work(system_nrt_wq,
+					   &vi->rq[i]->refill, 0);
 
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = true;
@@ -1287,7 +1600,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
 	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
-	VIRTIO_NET_F_GUEST_ANNOUNCE,
+	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MULTIQUEUE,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 1bc7e30..60f09ff 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -61,6 +61,8 @@ struct virtio_net_config {
 	__u8 mac[6];
 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
 	__u16 status;
+	/* Total number of RX/TX queues */
+	__u16 num_queues;
 } __attribute__((packed));
 
 /* This is the first element of the scatter-gather list.  If you don't
-- 
1.7.1

^ permalink raw reply related

* Re: [net-next RFC V4 PATCH 0/4] Multiqueue virtio-net
From: Michael S. Tsirkin @ 2012-06-25 10:07 UTC (permalink / raw)
  To: Jason Wang
  Cc: krkumar2, habanero, kvm, netdev, mashirle, linux-kernel,
	virtualization, edumazet, tahm, jwhan, davem
In-Reply-To: <20120625090829.7263.65026.stgit@amd-6168-8-1.englab.nay.redhat.com>

On Mon, Jun 25, 2012 at 05:16:48PM +0800, Jason Wang wrote:
> Hello All:
> 
> This series is an update version of multiqueue virtio-net driver based on
> Krishna Kumar's work to let virtio-net use multiple rx/tx queues to do the
> packets reception and transmission. Please review and comments.
> 
> Test Environment:
> - Intel(R) Xeon(R) CPU E5620 @ 2.40GHz, 8 cores 2 numa nodes
> - Two directed connected 82599
> 
> Test Summary:
> 
> - Highlights: huge improvements on TCP_RR test
> - Lowlights: regression on small packet transmission, higher cpu utilization
>              than single queue, need further optimization

Didn't review yet, reacting this this paragraph:

To avoid regressions, it seems reasonable to make
the device use a single queue by default for now.
Add a way to switch multiqueue on/off using ethtool.

This way guest admin can tune the device for the
workload manually until we manage to imlement some
self-tuning heuristics.

-- 
MST

^ permalink raw reply

* RE: [PATCH 4/4] net/mlx4_en: Use atomic counter to decide when queue is full
From: Eric Dumazet @ 2012-06-25 10:12 UTC (permalink / raw)
  To: David Laight; +Cc: David Miller, yevgenyp, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B6F61@saturn3.aculab.com>

On Mon, 2012-06-25 at 10:00 +0100, David Laight wrote:
> > > The Transmit and transmit completion flows execute from different
> contexts,
> > > which are not synchronized. Hence naive reading the of consumer
> index might
> > > give wrong value by the time it is being used, That could lead to a
> state of transmit timeout.
> > > Fix that by using atomic variable to maintain that index.
> > > 
> > > Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
> > 
> > I'm not convinced.  There is only one place that actually changes
> > the counter.
> > 
> > So it seems more like you have a missing memory barrier somewhere.
> 
> Or just keep the two ring indexes - instead of keeping the
> number of 'active' entries as well.
> Then you don't have a variable which the tx setup and
> tx completion routines both update.

This is what was implied by David.

Using a producer/consumer index and appropriate memory barriers.

start_xmit() and tx completion can be truly lockless and atomicless in
their fast path.

There are many drivers doing that correctly.

tg3 driver is a good example.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox