Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 03/16] batman-adv: add bat_orig_print API function
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Each routing protocol has its own metric and private
variables, therefore it is useful to introduce a new API
for originator information printing.

This API needs to be implemented by each protocol in order
to provide its specific originator table output.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/bat_iv_ogm.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/originator.c | 66 +++++++--------------------------------------
 net/batman-adv/types.h      |  3 +++
 3 files changed, 78 insertions(+), 56 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 0b1343d..4aabd55 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1411,6 +1411,70 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
 	return NET_RX_SUCCESS;
 }
 
+/**
+ * batadv_iv_ogm_orig_print - print the originator table
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: debugfs table seq_file struct
+ */
+static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
+				     struct seq_file *seq)
+{
+	struct batadv_neigh_node *neigh_node, *neigh_node_tmp;
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	int last_seen_msecs, last_seen_secs;
+	struct batadv_orig_node *orig_node;
+	unsigned long last_seen_jiffies;
+	struct hlist_head *head;
+	int batman_count = 0;
+	uint32_t i;
+
+	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
+		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
+		   "Nexthop", "outgoingIF", "Potential nexthops");
+
+	for (i = 0; i < hash->size; i++) {
+		head = &hash->table[i];
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+			neigh_node = batadv_orig_node_get_router(orig_node);
+			if (!neigh_node)
+				continue;
+
+			if (neigh_node->bat_iv.tq_avg == 0)
+				goto next;
+
+			last_seen_jiffies = jiffies - orig_node->last_seen;
+			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
+			last_seen_secs = last_seen_msecs / 1000;
+			last_seen_msecs = last_seen_msecs % 1000;
+
+			seq_printf(seq, "%pM %4i.%03is   (%3i) %pM [%10s]:",
+				   orig_node->orig, last_seen_secs,
+				   last_seen_msecs, neigh_node->bat_iv.tq_avg,
+				   neigh_node->addr,
+				   neigh_node->if_incoming->net_dev->name);
+
+			hlist_for_each_entry_rcu(neigh_node_tmp,
+						 &orig_node->neigh_list, list) {
+				seq_printf(seq, " %pM (%3i)",
+					   neigh_node_tmp->addr,
+					   neigh_node_tmp->bat_iv.tq_avg);
+			}
+
+			seq_puts(seq, "\n");
+			batman_count++;
+
+next:
+			batadv_neigh_node_free_ref(neigh_node);
+		}
+		rcu_read_unlock();
+	}
+
+	if (batman_count == 0)
+		seq_puts(seq, "No batman nodes in range ...\n");
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.bat_iface_enable = batadv_iv_ogm_iface_enable,
@@ -1419,6 +1483,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
 	.bat_ogm_schedule = batadv_iv_ogm_schedule,
 	.bat_ogm_emit = batadv_iv_ogm_emit,
+	.bat_orig_print = batadv_iv_ogm_orig_print,
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index aa14094..8d1b16e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -513,73 +513,27 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
-	struct batadv_hashtable *hash = bat_priv->orig_hash;
-	struct hlist_head *head;
 	struct batadv_hard_iface *primary_if;
-	struct batadv_orig_node *orig_node;
-	struct batadv_neigh_node *neigh_node, *neigh_node_tmp;
-	int batman_count = 0;
-	int last_seen_secs;
-	int last_seen_msecs;
-	unsigned long last_seen_jiffies;
-	uint32_t i;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
 	if (!primary_if)
-		goto out;
+		return 0;
 
-	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
+	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
-		   primary_if->net_dev->dev_addr, net_dev->name);
-	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
-		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
-		   "Nexthop", "outgoingIF", "Potential nexthops");
-
-	for (i = 0; i < hash->size; i++) {
-		head = &hash->table[i];
-
-		rcu_read_lock();
-		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			neigh_node = batadv_orig_node_get_router(orig_node);
-			if (!neigh_node)
-				continue;
-
-			if (neigh_node->bat_iv.tq_avg == 0)
-				goto next;
-
-			last_seen_jiffies = jiffies - orig_node->last_seen;
-			last_seen_msecs = jiffies_to_msecs(last_seen_jiffies);
-			last_seen_secs = last_seen_msecs / 1000;
-			last_seen_msecs = last_seen_msecs % 1000;
-
-			seq_printf(seq, "%pM %4i.%03is   (%3i) %pM [%10s]:",
-				   orig_node->orig, last_seen_secs,
-				   last_seen_msecs, neigh_node->bat_iv.tq_avg,
-				   neigh_node->addr,
-				   neigh_node->if_incoming->net_dev->name);
-
-			hlist_for_each_entry_rcu(neigh_node_tmp,
-						 &orig_node->neigh_list, list) {
-				seq_printf(seq, " %pM (%3i)",
-					   neigh_node_tmp->addr,
-					   neigh_node_tmp->bat_iv.tq_avg);
-			}
+		   primary_if->net_dev->dev_addr, net_dev->name,
+		   bat_priv->bat_algo_ops->name);
 
-			seq_puts(seq, "\n");
-			batman_count++;
+	batadv_hardif_free_ref(primary_if);
 
-next:
-			batadv_neigh_node_free_ref(neigh_node);
-		}
-		rcu_read_unlock();
+	if (!bat_priv->bat_algo_ops->bat_orig_print) {
+		seq_puts(seq,
+			 "No printing function for this routing protocol\n");
+		return 0;
 	}
 
-	if (batman_count == 0)
-		seq_puts(seq, "No batman nodes in range ...\n");
+	bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq);
 
-out:
-	if (primary_if)
-		batadv_hardif_free_ref(primary_if);
 	return 0;
 }
 
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 97bde51..72fd617 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -992,6 +992,7 @@ struct batadv_forw_packet {
  * @bat_primary_iface_set: called when primary interface is selected / changed
  * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
  * @bat_ogm_emit: send scheduled OGM
+ * @bat_orig_print: print the originator table (optional)
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
@@ -1002,6 +1003,8 @@ struct batadv_algo_ops {
 	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
 	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
 	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
+	/* orig_node handling API */
+	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq);
 };
 
 /**
-- 
1.8.4

^ permalink raw reply related

* [PATCH 05/16] batman-adv: add bat_neigh_is_equiv_or_better API function
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Each routing protocol has its own metric semantic and
therefore is the protocol itself the only component able to
compare two metrics to check their "similarity".

This new API allows each routing protocol to implement its
own logic and make the external code protocol agnostic.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/bat_iv_ogm.c | 18 ++++++++++++++++++
 net/batman-adv/main.c       |  3 ++-
 net/batman-adv/main.h       |  6 ++++++
 net/batman-adv/types.h      |  4 ++++
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index b288d90..4376fe7 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1494,6 +1494,23 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
 	return tq1 - tq2;
 }
 
+/**
+ * batadv_iv_ogm_neigh_is_eob - check if neigh1 is equally good or better than
+ *  neigh2 from the metric prospective
+ * @neigh1: the first neighbor object of the comparison
+ * @neigh2: the second neighbor object of the comparison
+ *
+ * Returns true if the metric via neigh1 is equally good or better than the
+ * metric via neigh2, false otherwise.
+ */
+static bool batadv_iv_ogm_neigh_is_eob(struct batadv_neigh_node *neigh1,
+				       struct batadv_neigh_node *neigh2)
+{
+	int diff = batadv_iv_ogm_neigh_cmp(neigh1, neigh2);
+
+	return diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.bat_iface_enable = batadv_iv_ogm_iface_enable,
@@ -1503,6 +1520,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.bat_ogm_schedule = batadv_iv_ogm_schedule,
 	.bat_ogm_emit = batadv_iv_ogm_emit,
 	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
+	.bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob,
 	.bat_orig_print = batadv_iv_ogm_orig_print,
 };
 
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 1f2f1ac..c51a5e5 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -502,7 +502,8 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
 	    !bat_algo_ops->bat_primary_iface_set ||
 	    !bat_algo_ops->bat_ogm_schedule ||
 	    !bat_algo_ops->bat_ogm_emit ||
-	    !bat_algo_ops->bat_neigh_cmp) {
+	    !bat_algo_ops->bat_neigh_cmp ||
+	    !bat_algo_ops->bat_neigh_is_equiv_or_better) {
 		pr_info("Routing algo '%s' does not implement required ops\n",
 			bat_algo_ops->name);
 		ret = -EINVAL;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index d7dfafe..773301a 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -86,6 +86,12 @@
 /* numbers of originator to contact for any PUT/GET DHT operation */
 #define BATADV_DAT_CANDIDATES_NUM 3
 
+/**
+ * BATADV_TQ_SIMILARITY_THRESHOLD - TQ points that a secondary metric can differ
+ *  at most from the primary one in order to be still considered acceptable
+ */
+#define BATADV_TQ_SIMILARITY_THRESHOLD 50
+
 /* how much worse secondary interfaces may be to be considered as bonding
  * candidates
  */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 7a00932..d0e64d2 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -993,6 +993,8 @@ struct batadv_forw_packet {
  * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
  * @bat_ogm_emit: send scheduled OGM
  * @bat_neigh_cmp: compare the metrics of two neighbors
+ * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or
+ *  better than neigh2 from the metric prospective
  * @bat_orig_print: print the originator table (optional)
  */
 struct batadv_algo_ops {
@@ -1006,6 +1008,8 @@ struct batadv_algo_ops {
 	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
 	int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
 			     struct batadv_neigh_node *neigh2);
+	bool (*bat_neigh_is_equiv_or_better)(struct batadv_neigh_node *neigh1,
+					     struct batadv_neigh_node *neigh2);
 	/* orig_node handling API */
 	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq);
 };
-- 
1.8.4

^ permalink raw reply related

* [PATCH 04/16] batman-adv: add bat_neigh_cmp API function
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

This new API allows to compare the two neighbours based on
the metric avoiding the user to deal with any routing
algorithm specific detail

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/bat_iv_ogm.c | 20 ++++++++++++++++++++
 net/batman-adv/main.c       |  3 ++-
 net/batman-adv/types.h      |  3 +++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 4aabd55..b288d90 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1475,6 +1475,25 @@ next:
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
 
+/**
+ * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @neigh2: the second neighbor object of the comparison
+ *
+ * Returns a value less, equal to or greater than 0 if the metric via neigh1 is
+ * lower, the same as or higher than the metric via neigh2
+ */
+static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1,
+				   struct batadv_neigh_node *neigh2)
+{
+	uint8_t tq1, tq2;
+
+	tq1 = neigh1->bat_iv.tq_avg;
+	tq2 = neigh2->bat_iv.tq_avg;
+
+	return tq1 - tq2;
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.bat_iface_enable = batadv_iv_ogm_iface_enable,
@@ -1483,6 +1502,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.bat_primary_iface_set = batadv_iv_ogm_primary_iface_set,
 	.bat_ogm_schedule = batadv_iv_ogm_schedule,
 	.bat_ogm_emit = batadv_iv_ogm_emit,
+	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
 	.bat_orig_print = batadv_iv_ogm_orig_print,
 };
 
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3159a14..1f2f1ac 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -501,7 +501,8 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops)
 	    !bat_algo_ops->bat_iface_update_mac ||
 	    !bat_algo_ops->bat_primary_iface_set ||
 	    !bat_algo_ops->bat_ogm_schedule ||
-	    !bat_algo_ops->bat_ogm_emit) {
+	    !bat_algo_ops->bat_ogm_emit ||
+	    !bat_algo_ops->bat_neigh_cmp) {
 		pr_info("Routing algo '%s' does not implement required ops\n",
 			bat_algo_ops->name);
 		ret = -EINVAL;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 72fd617..7a00932 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -992,6 +992,7 @@ struct batadv_forw_packet {
  * @bat_primary_iface_set: called when primary interface is selected / changed
  * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue
  * @bat_ogm_emit: send scheduled OGM
+ * @bat_neigh_cmp: compare the metrics of two neighbors
  * @bat_orig_print: print the originator table (optional)
  */
 struct batadv_algo_ops {
@@ -1003,6 +1004,8 @@ struct batadv_algo_ops {
 	void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface);
 	void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface);
 	void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet);
+	int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1,
+			     struct batadv_neigh_node *neigh2);
 	/* orig_node handling API */
 	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq);
 };
-- 
1.8.4

^ permalink raw reply related

* [PATCH 06/16] batman-adv: adapt bonding to use the new API functions
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/bat_iv_ogm.c |  2 +-
 net/batman-adv/routing.c    | 42 +++++++++++++++++++++++++++++-------------
 net/batman-adv/routing.h    |  3 ++-
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 4376fe7..7cdc394 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -853,7 +853,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
 		neigh_node->last_ttl = batadv_ogm_packet->header.ttl;
 	}
 
-	batadv_bonding_candidate_add(orig_node, neigh_node);
+	batadv_bonding_candidate_add(bat_priv, orig_node, neigh_node);
 
 	/* if this neighbor already is our next hop there is nothing
 	 * to change
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 5b78a71..71fba14 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -115,11 +115,21 @@ out:
 	return;
 }
 
-void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
+/**
+ * batadv_bonding_candidate_add - consider a new link for bonding mode towards
+ *  the given originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_node: the target node
+ * @neigh_node: the neighbor representing the new link to consider for bonding
+ *  mode
+ */
+void batadv_bonding_candidate_add(struct batadv_priv *bat_priv,
+				  struct batadv_orig_node *orig_node,
 				  struct batadv_neigh_node *neigh_node)
 {
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
 	struct batadv_neigh_node *tmp_neigh_node, *router = NULL;
-	uint8_t interference_candidate = 0, tq;
+	uint8_t interference_candidate = 0;
 
 	spin_lock_bh(&orig_node->neigh_list_lock);
 
@@ -134,8 +144,7 @@ void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
 
 
 	/* ... and is good enough to be considered */
-	tq = router->bat_iv.tq_avg - BATADV_BONDING_TQ_THRESHOLD;
-	if (neigh_node->bat_iv.tq_avg < tq)
+	if (bao->bat_neigh_is_equiv_or_better(neigh_node, router))
 		goto candidate_del;
 
 	/* check if we have another candidate with the same mac address or
@@ -481,18 +490,25 @@ out:
 	return router;
 }
 
-/* Interface Alternating: Use the best of the
- * remaining candidates which are not using
- * this interface.
+/**
+ * batadv_find_ifalter_router - find the best of the remaining candidates which
+ *  are not using this interface
+ * @bat_priv: the bat priv with all the soft interface information
+ * @primary_orig: the destination
+ * @recv_if: the interface that the router returned by this function has to not
+ *  use
  *
- * Increases the returned router's refcount
+ * Returns the best candidate towards primary_orig that is not using recv_if.
+ * Increases the returned neighbor's refcount
  */
 static struct batadv_neigh_node *
-batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
+batadv_find_ifalter_router(struct batadv_priv *bat_priv,
+			   struct batadv_orig_node *primary_orig,
 			   const struct batadv_hard_iface *recv_if)
 {
-	struct batadv_neigh_node *tmp_neigh_node;
 	struct batadv_neigh_node *router = NULL, *first_candidate = NULL;
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
+	struct batadv_neigh_node *tmp_neigh_node;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(tmp_neigh_node, &primary_orig->bond_list,
@@ -504,8 +520,7 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
 		if (tmp_neigh_node->if_incoming == recv_if)
 			continue;
 
-		if (router &&
-		    tmp_neigh_node->bat_iv.tq_avg <= router->bat_iv.tq_avg)
+		if (router && bao->bat_neigh_cmp(tmp_neigh_node, router))
 			continue;
 
 		if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
@@ -639,7 +654,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
 	if (bonding_enabled)
 		router = batadv_find_bond_router(primary_orig_node, recv_if);
 	else
-		router = batadv_find_ifalter_router(primary_orig_node, recv_if);
+		router = batadv_find_ifalter_router(bat_priv, primary_orig_node,
+						    recv_if);
 
 return_router:
 	if (router && router->if_incoming->if_status != BATADV_IF_ACTIVE)
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index 55d637a..19544dd 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -48,7 +48,8 @@ batadv_find_router(struct batadv_priv *bat_priv,
 		   const struct batadv_hard_iface *recv_if);
 void batadv_bonding_candidate_del(struct batadv_orig_node *orig_node,
 				  struct batadv_neigh_node *neigh_node);
-void batadv_bonding_candidate_add(struct batadv_orig_node *orig_node,
+void batadv_bonding_candidate_add(struct batadv_priv *bat_priv,
+				  struct batadv_orig_node *orig_node,
 				  struct batadv_neigh_node *neigh_node);
 void batadv_bonding_save_primary(const struct batadv_orig_node *orig_node,
 				 struct batadv_orig_node *orig_neigh_node,
-- 
1.8.4

^ permalink raw reply related

* [PATCH 07/16] batman-adv: adapt the neighbor purging routine to use the new API functions
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/originator.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8d1b16e..9cee053 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -378,16 +378,16 @@ free_orig_node:
 static bool
 batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 			    struct batadv_orig_node *orig_node,
-			    struct batadv_neigh_node **best_neigh_node)
+			    struct batadv_neigh_node **best_neigh)
 {
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
 	struct hlist_node *node_tmp;
 	struct batadv_neigh_node *neigh_node;
 	bool neigh_purged = false;
 	unsigned long last_seen;
 	struct batadv_hard_iface *if_incoming;
-	uint8_t best_metric = 0;
 
-	*best_neigh_node = NULL;
+	*best_neigh = NULL;
 
 	spin_lock_bh(&orig_node->neigh_list_lock);
 
@@ -420,11 +420,12 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
 			batadv_bonding_candidate_del(orig_node, neigh_node);
 			batadv_neigh_node_free_ref(neigh_node);
 		} else {
-			if ((!*best_neigh_node) ||
-			    (neigh_node->bat_iv.tq_avg > best_metric)) {
-				*best_neigh_node = neigh_node;
-				best_metric = neigh_node->bat_iv.tq_avg;
-			}
+			/* store the best_neighbour if this is the first
+			 * iteration or if a better neighbor has been found
+			 */
+			if (!*best_neigh ||
+			    bao->bat_neigh_cmp(neigh_node, *best_neigh) > 0)
+				*best_neigh = neigh_node;
 		}
 	}
 
-- 
1.8.4

^ permalink raw reply related

* [PATCH 08/16] batman-adv: provide orig_node routing API
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Some operations executed on an orig_node depends on the
current routing algorithm being used. To easily make this
mechanism routing algorithm agnostic add a orig_node
specific API that each algorithm can populate with its own
routines.

Such routines are then invoked by the code when needed,
without knowing which routing algorithm is currently in use

With this patch 3 API functions are added:
- orig_free (to free routing depending internal structs)
- orig_add_if (to change the inner state of an orig_node
  when a new hard interface is added)
- orig_del_if (to change the inner state of an orig_node
  when an hard interface is removed)

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/bat_iv_ogm.c | 126 ++++++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/originator.c | 102 +++++------------------------------
 net/batman-adv/types.h      |  11 ++++
 3 files changed, 151 insertions(+), 88 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 7cdc394..a2b480a 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -88,6 +88,129 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
 }
 
 /**
+ * batadv_iv_ogm_orig_free - free the private resources allocated for this
+ *  orig_node
+ * @orig_node: the orig_node for which the resources have to be free'd
+ */
+static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node)
+{
+	kfree(orig_node->bat_iv.bcast_own);
+	kfree(orig_node->bat_iv.bcast_own_sum);
+}
+
+/**
+ * batadv_iv_ogm_orig_add_if - change the private structures of the orig_node to
+ *  include the new hard-interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node,
+				     int max_if_num)
+{
+	void *data_ptr;
+	size_t data_size, old_size;
+	int ret = -ENOMEM;
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
+	old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
+	data_ptr = kmalloc(data_size, GFP_ATOMIC);
+	if (!data_ptr)
+		goto unlock;
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
+	kfree(orig_node->bat_iv.bcast_own);
+	orig_node->bat_iv.bcast_own = data_ptr;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		kfree(orig_node->bat_iv.bcast_own);
+		goto unlock;
+	}
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+	       (max_if_num - 1) * sizeof(uint8_t));
+	kfree(orig_node->bat_iv.bcast_own_sum);
+	orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+	ret = 0;
+
+unlock:
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_orig_del_if - change the private structures of the orig_node to
+ *  exclude the removed interface
+ * @orig_node: the orig_node that has to be changed
+ * @max_if_num: the current amount of interfaces
+ * @del_if_num: the index of the interface being removed
+ *
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node,
+				     int max_if_num, int del_if_num)
+{
+	int chunk_size,  ret = -ENOMEM, if_offset;
+	void *data_ptr = NULL;
+
+	spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	/* last interface was removed */
+	if (max_if_num == 0)
+		goto free_bcast_own;
+
+	chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
+	data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
+	if (!data_ptr)
+		goto unlock;
+
+	/* copy first part */
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
+
+	/* copy second part */
+	memcpy((char *)data_ptr + del_if_num * chunk_size,
+	       orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
+	       (max_if_num - del_if_num) * chunk_size);
+
+free_bcast_own:
+	kfree(orig_node->bat_iv.bcast_own);
+	orig_node->bat_iv.bcast_own = data_ptr;
+
+	if (max_if_num == 0)
+		goto free_own_sum;
+
+	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
+	if (!data_ptr) {
+		kfree(orig_node->bat_iv.bcast_own);
+		goto unlock;
+	}
+
+	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
+	       del_if_num * sizeof(uint8_t));
+
+	if_offset = (del_if_num + 1) * sizeof(uint8_t);
+	memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
+	       orig_node->bat_iv.bcast_own_sum + if_offset,
+	       (max_if_num - del_if_num) * sizeof(uint8_t));
+
+free_own_sum:
+	kfree(orig_node->bat_iv.bcast_own_sum);
+	orig_node->bat_iv.bcast_own_sum = data_ptr;
+
+	ret = 0;
+unlock:
+	spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
+
+	return ret;
+}
+
+/**
  * batadv_iv_ogm_orig_get - retrieve or create (if does not exist) an originator
  * @bat_priv: the bat priv with all the soft interface information
  * @addr: mac address of the originator
@@ -1522,6 +1645,9 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.bat_neigh_cmp = batadv_iv_ogm_neigh_cmp,
 	.bat_neigh_is_equiv_or_better = batadv_iv_ogm_neigh_is_eob,
 	.bat_orig_print = batadv_iv_ogm_orig_print,
+	.bat_orig_free = batadv_iv_ogm_orig_free,
+	.bat_orig_add_if = batadv_iv_ogm_orig_add_if,
+	.bat_orig_del_if = batadv_iv_ogm_orig_del_if,
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 9cee053..8ab1434 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -241,9 +241,10 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
 	batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1,
 				  "originator timed out");
 
+	if (orig_node->bat_priv->bat_algo_ops->bat_orig_free)
+		orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node);
+
 	kfree(orig_node->tt_buff);
-	kfree(orig_node->bat_iv.bcast_own);
-	kfree(orig_node->bat_iv.bcast_own_sum);
 	kfree(orig_node);
 }
 
@@ -538,38 +539,11 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 	return 0;
 }
 
-static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node,
-				   int max_if_num)
-{
-	void *data_ptr;
-	size_t data_size, old_size;
-
-	data_size = max_if_num * sizeof(unsigned long) * BATADV_NUM_WORDS;
-	old_size = (max_if_num - 1) * sizeof(unsigned long) * BATADV_NUM_WORDS;
-	data_ptr = kmalloc(data_size, GFP_ATOMIC);
-	if (!data_ptr)
-		return -ENOMEM;
-
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own, old_size);
-	kfree(orig_node->bat_iv.bcast_own);
-	orig_node->bat_iv.bcast_own = data_ptr;
-
-	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
-	if (!data_ptr)
-		return -ENOMEM;
-
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
-	       (max_if_num - 1) * sizeof(uint8_t));
-	kfree(orig_node->bat_iv.bcast_own_sum);
-	orig_node->bat_iv.bcast_own_sum = data_ptr;
-
-	return 0;
-}
-
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_head *head;
 	struct batadv_orig_node *orig_node;
@@ -584,10 +558,10 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-			ret = batadv_orig_node_add_if(orig_node, max_if_num);
-			spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
+			ret = 0;
+			if (bao->bat_orig_add_if)
+				ret = bao->bat_orig_add_if(orig_node,
+							   max_if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
@@ -601,55 +575,6 @@ err:
 	return -ENOMEM;
 }
 
-static int batadv_orig_node_del_if(struct batadv_orig_node *orig_node,
-				   int max_if_num, int del_if_num)
-{
-	int chunk_size, if_offset;
-	void *data_ptr = NULL;
-
-	/* last interface was removed */
-	if (max_if_num == 0)
-		goto free_bcast_own;
-
-	chunk_size = sizeof(unsigned long) * BATADV_NUM_WORDS;
-	data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC);
-	if (!data_ptr)
-		return -ENOMEM;
-
-	/* copy first part */
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own, del_if_num * chunk_size);
-
-	/* copy second part */
-	memcpy((char *)data_ptr + del_if_num * chunk_size,
-	       orig_node->bat_iv.bcast_own + ((del_if_num + 1) * chunk_size),
-	       (max_if_num - del_if_num) * chunk_size);
-
-free_bcast_own:
-	kfree(orig_node->bat_iv.bcast_own);
-	orig_node->bat_iv.bcast_own = data_ptr;
-
-	if (max_if_num == 0)
-		goto free_own_sum;
-
-	data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC);
-	if (!data_ptr)
-		return -ENOMEM;
-
-	memcpy(data_ptr, orig_node->bat_iv.bcast_own_sum,
-	       del_if_num * sizeof(uint8_t));
-
-	if_offset = (del_if_num + 1) * sizeof(uint8_t);
-	memcpy((char *)data_ptr + del_if_num * sizeof(uint8_t),
-	       orig_node->bat_iv.bcast_own_sum + if_offset,
-	       (max_if_num - del_if_num) * sizeof(uint8_t));
-
-free_own_sum:
-	kfree(orig_node->bat_iv.bcast_own_sum);
-	orig_node->bat_iv.bcast_own_sum = data_ptr;
-
-	return 0;
-}
-
 int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
 {
@@ -658,6 +583,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 	struct hlist_head *head;
 	struct batadv_hard_iface *hard_iface_tmp;
 	struct batadv_orig_node *orig_node;
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
 	uint32_t i;
 	int ret;
 
@@ -669,11 +595,11 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface,
 
 		rcu_read_lock();
 		hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
-			spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-			ret = batadv_orig_node_del_if(orig_node, max_if_num,
-						      hard_iface->if_num);
-			spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock);
-
+			ret = 0;
+			if (bao->bat_orig_del_if)
+				ret = bao->bat_orig_del_if(orig_node,
+							   max_if_num,
+							   hard_iface->if_num);
 			if (ret == -ENOMEM)
 				goto err;
 		}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index d0e64d2..672a813 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -996,6 +996,12 @@ struct batadv_forw_packet {
  * @bat_neigh_is_equiv_or_better: check if neigh1 is equally good or
  *  better than neigh2 from the metric prospective
  * @bat_orig_print: print the originator table (optional)
+ * @bat_orig_free: free the resources allocated by the routing algorithm for an
+ *  orig_node object
+ * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to
+ *  the orig_node due to a new hard-interface being added into the mesh
+ * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to
+ *  the orig_node due to an hard-interface being removed from the mesh
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
@@ -1012,6 +1018,11 @@ struct batadv_algo_ops {
 					     struct batadv_neigh_node *neigh2);
 	/* orig_node handling API */
 	void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq);
+	void (*bat_orig_free)(struct batadv_orig_node *orig_node);
+	int (*bat_orig_add_if)(struct batadv_orig_node *orig_node,
+			       int max_if_num);
+	int (*bat_orig_del_if)(struct batadv_orig_node *orig_node,
+			       int max_if_num, int del_if_num);
 };
 
 /**
-- 
1.8.4

^ permalink raw reply related

* [PATCH 09/16] batman-adv: adapt the TT component to use the new API functions
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
---
 net/batman-adv/translation-table.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 1d5a4f5..4c28251 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1280,18 +1280,20 @@ out:
 }
 
 /* batadv_transtable_best_orig - Get best originator list entry from tt entry
+ * @bat_priv: the bat priv with all the soft interface information
  * @tt_global_entry: global translation table entry to be analyzed
  *
  * This functon assumes the caller holds rcu_read_lock().
  * Returns best originator list entry or NULL on errors.
  */
 static struct batadv_tt_orig_list_entry *
-batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
+batadv_transtable_best_orig(struct batadv_priv *bat_priv,
+			    struct batadv_tt_global_entry *tt_global_entry)
 {
-	struct batadv_neigh_node *router = NULL;
+	struct batadv_neigh_node *router, *best_router = NULL;
+	struct batadv_algo_ops *bao = bat_priv->bat_algo_ops;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL;
-	int best_tq = 0;
 
 	head = &tt_global_entry->orig_list;
 	hlist_for_each_entry_rcu(orig_entry, head, list) {
@@ -1299,26 +1301,37 @@ batadv_transtable_best_orig(struct batadv_tt_global_entry *tt_global_entry)
 		if (!router)
 			continue;
 
-		if (router->bat_iv.tq_avg > best_tq) {
-			best_entry = orig_entry;
-			best_tq = router->bat_iv.tq_avg;
+		if (best_router &&
+		    bao->bat_neigh_cmp(router, best_router) <= 0) {
+			batadv_neigh_node_free_ref(router);
+			continue;
 		}
 
-		batadv_neigh_node_free_ref(router);
+		/* release the refcount for the "old" best */
+		if (best_router)
+			batadv_neigh_node_free_ref(best_router);
+
+		best_entry = orig_entry;
+		best_router = router;
 	}
 
+	if (best_router)
+		batadv_neigh_node_free_ref(best_router);
+
 	return best_entry;
 }
 
 /* batadv_tt_global_print_entry - print all orig nodes who announce the address
  * for this global entry
+ * @bat_priv: the bat priv with all the soft interface information
  * @tt_global_entry: global translation table entry to be printed
  * @seq: debugfs table seq_file struct
  *
  * This functon assumes the caller holds rcu_read_lock().
  */
 static void
-batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
+batadv_tt_global_print_entry(struct batadv_priv *bat_priv,
+			     struct batadv_tt_global_entry *tt_global_entry,
 			     struct seq_file *seq)
 {
 	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
@@ -1331,7 +1344,7 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
 	tt_common_entry = &tt_global_entry->common;
 	flags = tt_common_entry->flags;
 
-	best_entry = batadv_transtable_best_orig(tt_global_entry);
+	best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry);
 	if (best_entry) {
 		vlan = batadv_orig_node_vlan_get(best_entry->orig_node,
 						 tt_common_entry->vid);
@@ -1420,7 +1433,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
 			tt_global = container_of(tt_common_entry,
 						 struct batadv_tt_global_entry,
 						 common);
-			batadv_tt_global_print_entry(tt_global, seq);
+			batadv_tt_global_print_entry(bat_priv, tt_global, seq);
 		}
 		rcu_read_unlock();
 	}
@@ -1808,7 +1821,7 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
 		goto out;
 
 	rcu_read_lock();
-	best_entry = batadv_transtable_best_orig(tt_global_entry);
+	best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry);
 	/* found anything? */
 	if (best_entry)
 		orig_node = best_entry->orig_node;
-- 
1.8.4

^ permalink raw reply related

* Re: [Xen-devel] [PATCH net] xen-netback: add the scenario which now beyond the range time_after_eq().
From: Jan Beulich @ 2013-10-23 16:07 UTC (permalink / raw)
  To: David.Laight, wei.liu2, jianhai.luan
  Cc: david.vrabel, ian.campbell, xen-devel, annie.li, netdev
In-Reply-To: <52678286.6000802@oracle.com>

>>> jianhai luan <jianhai.luan@oracle.com> 10/23/13 10:02 AM >>>
>On 2013-10-18 19:24, Wei Liu wrote:
>> On Fri, Oct 18, 2013 at 09:40:33AM +0100, David Laight wrote:
>>>>> My understanding is this patch does not simply double the span, it is
>>>>> just stricter than the original one. Please check my previous comments,
>>>>> I paste it here.
>>>> No, the code (on a 32-bit arch) just _can't_ handle jiffies differences
>>>> beyond 2^32, no matter how cleverly you use the respective macros.
>>>> All arithmetic there is done modulo 2^32.
>>> I haven't followed this discussion very closely but it might be possible
>>> to arrange that the 'incorrect lack of credit' only occurs for a few
>>> seconds every time 'jiffies' wraps - instead of half of the time.
>>> Then you'd have to be extremely unlucky to hit the timing window.
>>>
>> As I understand it, this is the idea of this patch -- to narrow down the
>> timing window.
>Jan,  do you agree the idea or have better suggestion to me.

As said before - I disagree (reducing a timing window is never a solution,
only eliminating it is), and I pointed at the alternative (using 64-bit
calculations) before.

Jan

^ permalink raw reply

* [PATCH v5] net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)
From: Arvid Brodin @ 2013-10-23 16:09 UTC (permalink / raw)
  To: netdev@vger.kernel.org
  Cc: Arvid Brodin, David Miller, Stephen Hemminger, Joe Perches,
	Javier Boticario, balferreira@googlemail.com,
	Elías Molina Muñoz

High-availability Seamless Redundancy ("HSR") provides instant failover
redundancy for Ethernet networks. It requires a special network topology where
all nodes are connected in a ring (each node having two physical network
interfaces). It is suited for applications that demand high availability and
very short reaction time.

HSR acts on the Ethernet layer, using a registered Ethernet protocol type to
send special HSR frames in both directions over the ring. The driver creates
virtual network interfaces that can be used just like any ordinary Linux
network interface, for IP/TCP/UDP traffic etc. All nodes in the network ring
must be HSR capable.

This code is a "best effort" to comply with the HSR standard as described in
IEC 62439-3:2010 (HSRv0).

Signed-off-by: Arvid Brodin <arvid.brodin@xdin.com>
---

This is a patch against net-next (2013-10-23).

The code has been statically analysed using sparse (no problems detected).

kmemleak has been used to detect memory leaks (no known leaks exist).

The code passes checkpatch.pl with the options --strict --max-line-length=83
except for CHECKs, which are all intentional; e.g. CamelCase is used in the
IEC standard to describe field names, and these names have been used in the
code.

The code has been tested extensively on rings of up to 32 avr32-based nodes.
Less intense testing has included an x86_64 (SMP) system, and running HSR
over a USB-to-Ethernet device.

Changes in v5:

* Tried to address the issues with aligning the skb header that was brought 
  up by David Miller in v4. Fixed comment where true/false was called 1/0.

Changes in v4:

* Fixed racy test-and-modify lock for device operstate in __hsr_set_operstate().

Changes in v3:

* hsr_dev_open() no longer calls dev_open() on the HSR slave interfaces.
  This is to let user space decide how to treat any errors on slave open.
* is_operstate_up() is now called is_slave_up() and checks both admin
  state and operational state. Slaves going DOWN could otherwise be
  detected as still UP when HSR decided its own state.
* Files under net/ now have header comments that follow the network code
  standard.

Changes in v2:

* Aligned newlines with open parenthesis (Joe Perches)
* Removed unneccessary WARN_ON_ONCE from is_hsr_master() (Joe Perches)
* Removed deprecated comments about "actually signed" for HSR_A_IFn_AGE (Joe Perches)
* Broke out userspace hsr_netlink.h into include/uapi/linux/ (Joe Perches)
* Use netif_oper_up() to check for operational state UP (Stephen Hemminger)

Possibly unresolved:

* dev_base_lock vs rtnl_mutex when setting operstate? (Stephen Hemminger)
  David Miller asked about this on 2013-09-03; no response.
* Move more (local) header files into include/net/? (checkpatch still warns
  about CamelCase) (Joe Perches / with help from David Miller?)

Earlier RFC:s:
RFC v1: http://www.spinics.net/lists/netdev/msg192817.html
RFC v2: http://www.spinics.net/lists/netdev/msg203397.html
RFC v3: http://www.spinics.net/lists/netdev/msg207816.html
RFC v4: http://www.spinics.net/lists/netdev/msg213309.html

Thanks to Stephen Hemminger, Joe Perches, and others for their comments on
these RFC:s.


 include/uapi/linux/hsr_netlink.h |  50 ++++
 include/uapi/linux/if_ether.h    |   1 +
 include/uapi/linux/if_link.h     |  13 +
 net/Kconfig                      |   1 +
 net/Makefile                     |   1 +
 net/hsr/Kconfig                  |  27 ++
 net/hsr/Makefile                 |   7 +
 net/hsr/hsr_device.c             | 594 +++++++++++++++++++++++++++++++++++++++
 net/hsr/hsr_device.h             |  29 ++
 net/hsr/hsr_framereg.c           | 501 +++++++++++++++++++++++++++++++++
 net/hsr/hsr_framereg.h           |  53 ++++
 net/hsr/hsr_main.c               | 469 +++++++++++++++++++++++++++++++
 net/hsr/hsr_main.h               | 166 +++++++++++
 net/hsr/hsr_netlink.c            | 457 ++++++++++++++++++++++++++++++
 net/hsr/hsr_netlink.h            |  30 ++
 15 files changed, 2399 insertions(+)
 create mode 100644 include/uapi/linux/hsr_netlink.h
 create mode 100644 net/hsr/Kconfig
 create mode 100644 net/hsr/Makefile
 create mode 100644 net/hsr/hsr_device.c
 create mode 100644 net/hsr/hsr_device.h
 create mode 100644 net/hsr/hsr_framereg.c
 create mode 100644 net/hsr/hsr_framereg.h
 create mode 100644 net/hsr/hsr_main.c
 create mode 100644 net/hsr/hsr_main.h
 create mode 100644 net/hsr/hsr_netlink.c
 create mode 100644 net/hsr/hsr_netlink.h

diff --git a/include/uapi/linux/hsr_netlink.h b/include/uapi/linux/hsr_netlink.h
new file mode 100644
index 0000000..2475cb8
--- /dev/null
+++ b/include/uapi/linux/hsr_netlink.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef __UAPI_HSR_NETLINK_H
+#define __UAPI_HSR_NETLINK_H
+
+/* Generic Netlink HSR family definition
+ */
+
+/* attributes */
+enum {
+	HSR_A_UNSPEC,
+	HSR_A_NODE_ADDR,
+	HSR_A_IFINDEX,
+	HSR_A_IF1_AGE,
+	HSR_A_IF2_AGE,
+	HSR_A_NODE_ADDR_B,
+	HSR_A_IF1_SEQ,
+	HSR_A_IF2_SEQ,
+	HSR_A_IF1_IFINDEX,
+	HSR_A_IF2_IFINDEX,
+	HSR_A_ADDR_B_IFINDEX,
+	__HSR_A_MAX,
+};
+#define HSR_A_MAX (__HSR_A_MAX - 1)
+
+
+/* commands */
+enum {
+	HSR_C_UNSPEC,
+	HSR_C_RING_ERROR,
+	HSR_C_NODE_DOWN,
+	HSR_C_GET_NODE_STATUS,
+	HSR_C_SET_NODE_STATUS,
+	HSR_C_GET_NODE_LIST,
+	HSR_C_SET_NODE_LIST,
+	__HSR_C_MAX,
+};
+#define HSR_C_MAX (__HSR_C_MAX - 1)
+
+#endif /* __UAPI_HSR_NETLINK_H */
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index ade07f1..2ce0f6a 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -85,6 +85,7 @@
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_MVRP	0x88F5          /* 802.1Q MVRP                  */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
+#define ETH_P_PRP	0x88FB		/* IEC 62439-3 PRP/HSRv0	*/
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_TDLS	0x890D          /* TDLS */
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8a1e346..b78566f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -481,4 +481,17 @@ enum {
 
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 
+
+/* HSR section */
+
+enum {
+	IFLA_HSR_UNSPEC,
+	IFLA_HSR_SLAVE1,
+	IFLA_HSR_SLAVE2,
+	IFLA_HSR_MULTICAST_SPEC,
+	__IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/net/Kconfig b/net/Kconfig
index b50dacc..0715db6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -220,6 +220,7 @@ source "net/openvswitch/Kconfig"
 source "net/vmw_vsock/Kconfig"
 source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
+source "net/hsr/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index 9492e8c..8fa2f91 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -71,3 +71,4 @@ obj-$(CONFIG_NFC)		+= nfc/
 obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
 obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
 obj-$(CONFIG_NET_MPLS_GSO)	+= mpls/
+obj-$(CONFIG_HSR)		+= hsr/
diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
new file mode 100644
index 0000000..0d3d709
--- /dev/null
+++ b/net/hsr/Kconfig
@@ -0,0 +1,27 @@
+#
+# IEC 62439-3 High-availability Seamless Redundancy
+#
+
+config HSR
+	tristate "High-availability Seamless Redundancy (HSR)"
+	---help---
+	  If you say Y here, then your Linux box will be able to act as a
+	  DANH ("Doubly attached node implementing HSR"). For this to work,
+	  your Linux box needs (at least) two physical Ethernet interfaces,
+	  and it must be connected as a node in a ring network together with
+	  other HSR capable nodes.
+
+	  All Ethernet frames sent over the hsr device will be sent in both
+	  directions on the ring (over both slave ports), giving a redundant,
+	  instant fail-over network. Each HSR node in the ring acts like a
+	  bridge for HSR frames, but filters frames that have been forwarded
+	  earlier.
+
+	  This code is a "best effort" to comply with the HSR standard as
+	  described in IEC 62439-3:2010 (HSRv0), but no compliancy tests have
+	  been made.
+
+	  You need to perform any and all necessary tests yourself before
+	  relying on this code in a safety critical system!
+
+	  If unsure, say N.
diff --git a/net/hsr/Makefile b/net/hsr/Makefile
new file mode 100644
index 0000000..b68359f
--- /dev/null
+++ b/net/hsr/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for HSR
+#
+
+obj-$(CONFIG_HSR)	+= hsr.o
+
+hsr-y			:= hsr_main.o hsr_framereg.o hsr_device.o hsr_netlink.o
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
new file mode 100644
index 0000000..636d79d
--- /dev/null
+++ b/net/hsr/hsr_device.c
@@ -0,0 +1,594 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * This file contains device methods for creating, using and destroying
+ * virtual HSR devices.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include "hsr_device.h"
+#include "hsr_framereg.h"
+#include "hsr_main.h"
+
+
+static bool is_admin_up(struct net_device *dev)
+{
+	return dev && (dev->flags & IFF_UP);
+}
+
+static bool is_slave_up(struct net_device *dev)
+{
+	return dev && is_admin_up(dev) && netif_oper_up(dev);
+}
+
+static void __hsr_set_operstate(struct net_device *dev, int transition)
+{
+	write_lock_bh(&dev_base_lock);
+	if (dev->operstate != transition) {
+		dev->operstate = transition;
+		write_unlock_bh(&dev_base_lock);
+		netdev_state_change(dev);
+	} else {
+		write_unlock_bh(&dev_base_lock);
+	}
+}
+
+void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
+		       struct net_device *slave2)
+{
+	if (!is_admin_up(hsr_dev)) {
+		__hsr_set_operstate(hsr_dev, IF_OPER_DOWN);
+		return;
+	}
+
+	if (is_slave_up(slave1) || is_slave_up(slave2))
+		__hsr_set_operstate(hsr_dev, IF_OPER_UP);
+	else
+		__hsr_set_operstate(hsr_dev, IF_OPER_LOWERLAYERDOWN);
+}
+
+void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
+		     struct net_device *slave2)
+{
+	if (is_slave_up(slave1) || is_slave_up(slave2))
+		netif_carrier_on(hsr_dev);
+	else
+		netif_carrier_off(hsr_dev);
+}
+
+
+void hsr_check_announce(struct net_device *hsr_dev, int old_operstate)
+{
+	struct hsr_priv *hsr_priv;
+
+	hsr_priv = netdev_priv(hsr_dev);
+
+	if ((hsr_dev->operstate == IF_OPER_UP) && (old_operstate != IF_OPER_UP)) {
+		/* Went up */
+		hsr_priv->announce_count = 0;
+		hsr_priv->announce_timer.expires = jiffies +
+				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+		add_timer(&hsr_priv->announce_timer);
+	}
+
+	if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP))
+		/* Went down */
+		del_timer(&hsr_priv->announce_timer);
+}
+
+
+int hsr_get_max_mtu(struct hsr_priv *hsr_priv)
+{
+	int mtu_max;
+
+	if (hsr_priv->slave[0] && hsr_priv->slave[1])
+		mtu_max = min(hsr_priv->slave[0]->mtu, hsr_priv->slave[1]->mtu);
+	else if (hsr_priv->slave[0])
+		mtu_max = hsr_priv->slave[0]->mtu;
+	else if (hsr_priv->slave[1])
+		mtu_max = hsr_priv->slave[1]->mtu;
+	else
+		mtu_max = HSR_TAGLEN;
+
+	return mtu_max - HSR_TAGLEN;
+}
+
+static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct hsr_priv *hsr_priv;
+
+	hsr_priv = netdev_priv(dev);
+
+	if (new_mtu > hsr_get_max_mtu(hsr_priv)) {
+		netdev_info(hsr_priv->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
+			    HSR_TAGLEN);
+		return -EINVAL;
+	}
+
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+static int hsr_dev_open(struct net_device *dev)
+{
+	struct hsr_priv *hsr_priv;
+	int i;
+	char *slave_name;
+
+	hsr_priv = netdev_priv(dev);
+
+	for (i = 0; i < HSR_MAX_SLAVE; i++) {
+		if (hsr_priv->slave[i])
+			slave_name = hsr_priv->slave[i]->name;
+		else
+			slave_name = "null";
+
+		if (!is_slave_up(hsr_priv->slave[i]))
+			netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a working HSR network\n",
+				    'A' + i, slave_name);
+	}
+
+	return 0;
+}
+
+static int hsr_dev_close(struct net_device *dev)
+{
+	/* Nothing to do here. We could try to restore the state of the slaves
+	 * to what they were before being changed by the hsr master dev's state,
+	 * but they might have been changed manually in the mean time too, so
+	 * taking them up or down here might be confusing and is probably not a
+	 * good idea.
+	 */
+	return 0;
+}
+
+
+static void hsr_fill_tag(struct hsr_ethhdr *hsr_ethhdr, struct hsr_priv *hsr_priv)
+{
+	unsigned long irqflags;
+
+	/* IEC 62439-1:2010, p 48, says the 4-bit "path" field can take values
+	 * between 0001-1001 ("ring identifier", for regular HSR frames),
+	 * or 1111 ("HSR management", supervision frames). Unfortunately, the
+	 * spec writers forgot to explain what a "ring identifier" is, or
+	 * how it is used. So we just set this to 0001 for regular frames,
+	 * and 1111 for supervision frames.
+	 */
+	set_hsr_tag_path(&hsr_ethhdr->hsr_tag, 0x1);
+
+	/* IEC 62439-1:2010, p 12: "The link service data unit in an Ethernet
+	 * frame is the content of the frame located between the Length/Type
+	 * field and the Frame Check Sequence."
+	 *
+	 * IEC 62439-3, p 48, specifies the "original LPDU" to include the
+	 * original "LT" field (what "LT" means is not explained anywhere as
+	 * far as I can see - perhaps "Length/Type"?). So LSDU_size might
+	 * equal original length + 2.
+	 *   Also, the fact that this field is not used anywhere (might be used
+	 * by a RedBox connecting HSR and PRP nets?) means I cannot test its
+	 * correctness. Instead of guessing, I set this to 0 here, to make any
+	 * problems immediately apparent. Anyone using this driver with PRP/HSR
+	 * RedBoxes might need to fix this...
+	 */
+	set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, 0);
+
+	spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
+	hsr_ethhdr->hsr_tag.sequence_nr = htons(hsr_priv->sequence_nr);
+	hsr_priv->sequence_nr++;
+	spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+
+	hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto;
+
+	hsr_ethhdr->ethhdr.h_proto = htons(ETH_P_PRP);
+}
+
+static int slave_xmit(struct sk_buff *skb, struct hsr_priv *hsr_priv,
+		      enum hsr_dev_idx dev_idx)
+{
+	struct hsr_ethhdr *hsr_ethhdr;
+
+	hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
+
+	skb->dev = hsr_priv->slave[dev_idx];
+
+	hsr_addr_subst_dest(hsr_priv, &hsr_ethhdr->ethhdr, dev_idx);
+
+	/* Address substitution (IEC62439-3 pp 26, 50): replace mac
+	 * address of outgoing frame with that of the outgoing slave's.
+	 */
+	memcpy(hsr_ethhdr->ethhdr.h_source, skb->dev->dev_addr, ETH_ALEN);
+
+	return dev_queue_xmit(skb);
+}
+
+
+static int hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct hsr_priv *hsr_priv;
+	struct hsr_ethhdr *hsr_ethhdr;
+	struct sk_buff *skb2;
+	int res1, res2;
+
+	hsr_priv = netdev_priv(dev);
+	hsr_ethhdr = (struct hsr_ethhdr *) skb->data;
+
+	if ((skb->protocol != htons(ETH_P_PRP)) ||
+	    (hsr_ethhdr->ethhdr.h_proto != htons(ETH_P_PRP))) {
+		hsr_fill_tag(hsr_ethhdr, hsr_priv);
+		skb->protocol = htons(ETH_P_PRP);
+	}
+
+	skb2 = pskb_copy(skb, GFP_ATOMIC);
+
+	res1 = NET_XMIT_DROP;
+	if (likely(hsr_priv->slave[HSR_DEV_SLAVE_A]))
+		res1 = slave_xmit(skb, hsr_priv, HSR_DEV_SLAVE_A);
+
+	res2 = NET_XMIT_DROP;
+	if (likely(skb2 && hsr_priv->slave[HSR_DEV_SLAVE_B]))
+		res2 = slave_xmit(skb2, hsr_priv, HSR_DEV_SLAVE_B);
+
+	if (likely(res1 == NET_XMIT_SUCCESS || res1 == NET_XMIT_CN ||
+		   res2 == NET_XMIT_SUCCESS || res2 == NET_XMIT_CN)) {
+		hsr_priv->dev->stats.tx_packets++;
+		hsr_priv->dev->stats.tx_bytes += skb->len;
+	} else {
+		hsr_priv->dev->stats.tx_dropped++;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+
+static int hsr_header_create(struct sk_buff *skb, struct net_device *dev,
+			     unsigned short type, const void *daddr,
+			     const void *saddr, unsigned int len)
+{
+	int res;
+
+	/* Make room for the HSR tag now. We will fill it in later (in
+	 * hsr_dev_xmit)
+	 */
+	if (skb_headroom(skb) < HSR_TAGLEN + ETH_HLEN)
+		return -ENOBUFS;
+	skb_push(skb, HSR_TAGLEN);
+
+	/* To allow VLAN/HSR combos we should probably use
+	 * res = dev_hard_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
+	 * here instead. It would require other changes too, though - e.g.
+	 * separate headers for each slave etc...
+	 */
+	res = eth_header(skb, dev, type, daddr, saddr, len + HSR_TAGLEN);
+	if (res <= 0)
+		return res;
+	skb_reset_mac_header(skb);
+
+	return res + HSR_TAGLEN;
+}
+
+
+static const struct header_ops hsr_header_ops = {
+	.create	 = hsr_header_create,
+	.parse	 = eth_header_parse,
+};
+
+
+/* HSR:2010 supervision frames should be padded so that the whole frame,
+ * including headers and FCS, is 64 bytes (without VLAN).
+ */
+static int hsr_pad(int size)
+{
+	const int min_size = ETH_ZLEN - HSR_TAGLEN - ETH_HLEN;
+
+	if (size >= min_size)
+		return size;
+	return min_size;
+}
+
+static void send_hsr_supervision_frame(struct net_device *hsr_dev, u8 type)
+{
+	struct hsr_priv *hsr_priv;
+	struct sk_buff *skb;
+	int hlen, tlen;
+	struct hsr_sup_tag *hsr_stag;
+	struct hsr_sup_payload *hsr_sp;
+	unsigned long irqflags;
+
+	hlen = LL_RESERVED_SPACE(hsr_dev);
+	tlen = hsr_dev->needed_tailroom;
+	skb = alloc_skb(hsr_pad(sizeof(struct hsr_sup_payload)) + hlen + tlen,
+			GFP_ATOMIC);
+
+	if (skb == NULL)
+		return;
+
+	hsr_priv = netdev_priv(hsr_dev);
+
+	skb_reserve(skb, hlen);
+
+	skb->dev = hsr_dev;
+	skb->protocol = htons(ETH_P_PRP);
+	skb->priority = TC_PRIO_CONTROL;
+
+	if (dev_hard_header(skb, skb->dev, ETH_P_PRP,
+			    hsr_priv->sup_multicast_addr,
+			    skb->dev->dev_addr, skb->len) < 0)
+		goto out;
+
+	skb_pull(skb, sizeof(struct ethhdr));
+	hsr_stag = (typeof(hsr_stag)) skb->data;
+
+	set_hsr_stag_path(hsr_stag, 0xf);
+	set_hsr_stag_HSR_Ver(hsr_stag, 0);
+
+	spin_lock_irqsave(&hsr_priv->seqnr_lock, irqflags);
+	hsr_stag->sequence_nr = htons(hsr_priv->sequence_nr);
+	hsr_priv->sequence_nr++;
+	spin_unlock_irqrestore(&hsr_priv->seqnr_lock, irqflags);
+
+	hsr_stag->HSR_TLV_Type = type;
+	hsr_stag->HSR_TLV_Length = 12;
+
+	skb_push(skb, sizeof(struct ethhdr));
+
+	/* Payload: MacAddressA */
+	hsr_sp = (typeof(hsr_sp)) skb_put(skb, sizeof(*hsr_sp));
+	memcpy(hsr_sp->MacAddressA, hsr_dev->dev_addr, ETH_ALEN);
+
+	dev_queue_xmit(skb);
+	return;
+
+out:
+	kfree_skb(skb);
+}
+
+
+/* Announce (supervision frame) timer function
+ */
+static void hsr_announce(unsigned long data)
+{
+	struct hsr_priv *hsr_priv;
+
+	hsr_priv = (struct hsr_priv *) data;
+
+	if (hsr_priv->announce_count < 3) {
+		send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_ANNOUNCE);
+		hsr_priv->announce_count++;
+	} else {
+		send_hsr_supervision_frame(hsr_priv->dev, HSR_TLV_LIFE_CHECK);
+	}
+
+	if (hsr_priv->announce_count < 3)
+		hsr_priv->announce_timer.expires = jiffies +
+				msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL);
+	else
+		hsr_priv->announce_timer.expires = jiffies +
+				msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL);
+
+	if (is_admin_up(hsr_priv->dev))
+		add_timer(&hsr_priv->announce_timer);
+}
+
+
+static void restore_slaves(struct net_device *hsr_dev)
+{
+	struct hsr_priv *hsr_priv;
+	int i;
+	int res;
+
+	hsr_priv = netdev_priv(hsr_dev);
+
+	rtnl_lock();
+
+	/* Restore promiscuity */
+	for (i = 0; i < 2; i++) {
+		if (!hsr_priv->slave[i])
+			continue;
+		res = dev_set_promiscuity(hsr_priv->slave[i], -1);
+		if (res)
+			netdev_info(hsr_dev, "Cannot restore slave promiscuity (%s, %d)\n",
+				    hsr_priv->slave[i]->name, res);
+	}
+
+	rtnl_unlock();
+}
+
+static void reclaim_hsr_dev(struct rcu_head *rh)
+{
+	struct hsr_priv *hsr_priv;
+
+	hsr_priv = container_of(rh, struct hsr_priv, rcu_head);
+	free_netdev(hsr_priv->dev);
+}
+
+
+/* According to comments in the declaration of struct net_device, this function
+ * is "Called from unregister, can be used to call free_netdev". Ok then...
+ */
+static void hsr_dev_destroy(struct net_device *hsr_dev)
+{
+	struct hsr_priv *hsr_priv;
+
+	hsr_priv = netdev_priv(hsr_dev);
+
+	del_timer(&hsr_priv->announce_timer);
+	unregister_hsr_master(hsr_priv);    /* calls list_del_rcu on hsr_priv */
+	restore_slaves(hsr_dev);
+	call_rcu(&hsr_priv->rcu_head, reclaim_hsr_dev);   /* reclaim hsr_priv */
+}
+
+static const struct net_device_ops hsr_device_ops = {
+	.ndo_change_mtu = hsr_dev_change_mtu,
+	.ndo_open = hsr_dev_open,
+	.ndo_stop = hsr_dev_close,
+	.ndo_start_xmit = hsr_dev_xmit,
+};
+
+
+void hsr_dev_setup(struct net_device *dev)
+{
+	random_ether_addr(dev->dev_addr);
+
+	ether_setup(dev);
+	dev->header_ops		 = &hsr_header_ops;
+	dev->netdev_ops		 = &hsr_device_ops;
+	dev->tx_queue_len	 = 0;
+
+	dev->destructor = hsr_dev_destroy;
+}
+
+
+/* Return true if dev is a HSR master; return false otherwise.
+ */
+bool is_hsr_master(struct net_device *dev)
+{
+	return (dev->netdev_ops->ndo_start_xmit == hsr_dev_xmit);
+}
+
+static int check_slave_ok(struct net_device *dev)
+{
+	/* Don't allow HSR on non-ethernet like devices */
+	if ((dev->flags & IFF_LOOPBACK) || (dev->type != ARPHRD_ETHER) ||
+	    (dev->addr_len != ETH_ALEN)) {
+		netdev_info(dev, "Cannot use loopback or non-ethernet device as HSR slave.\n");
+		return -EINVAL;
+	}
+
+	/* Don't allow enslaving hsr devices */
+	if (is_hsr_master(dev)) {
+		netdev_info(dev, "Cannot create trees of HSR devices.\n");
+		return -EINVAL;
+	}
+
+	if (is_hsr_slave(dev)) {
+		netdev_info(dev, "This device is already a HSR slave.\n");
+		return -EINVAL;
+	}
+
+	if (dev->priv_flags & IFF_802_1Q_VLAN) {
+		netdev_info(dev, "HSR on top of VLAN is not yet supported in this driver.\n");
+		return -EINVAL;
+	}
+
+	/* HSR over bonded devices has not been tested, but I'm not sure it
+	 * won't work...
+	 */
+
+	return 0;
+}
+
+
+/* Default multicast address for HSR Supervision frames */
+static const unsigned char def_multicast_addr[ETH_ALEN] = {
+	0x01, 0x15, 0x4e, 0x00, 0x01, 0x00
+};
+
+int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+		     unsigned char multicast_spec)
+{
+	struct hsr_priv *hsr_priv;
+	int i;
+	int res;
+
+	hsr_priv = netdev_priv(hsr_dev);
+	hsr_priv->dev = hsr_dev;
+	INIT_LIST_HEAD(&hsr_priv->node_db);
+	INIT_LIST_HEAD(&hsr_priv->self_node_db);
+	for (i = 0; i < 2; i++)
+		hsr_priv->slave[i] = slave[i];
+
+	spin_lock_init(&hsr_priv->seqnr_lock);
+	/* Overflow soon to find bugs easier: */
+	hsr_priv->sequence_nr = USHRT_MAX - 1024;
+
+	init_timer(&hsr_priv->announce_timer);
+	hsr_priv->announce_timer.function = hsr_announce;
+	hsr_priv->announce_timer.data = (unsigned long) hsr_priv;
+
+	memcpy(hsr_priv->sup_multicast_addr, def_multicast_addr, ETH_ALEN);
+	hsr_priv->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
+
+/* FIXME: should I modify the value of these?
+ *
+ * - hsr_dev->flags - i.e.
+ *			IFF_MASTER/SLAVE?
+ * - hsr_dev->priv_flags - i.e.
+ *			IFF_EBRIDGE?
+ *			IFF_TX_SKB_SHARING?
+ *			IFF_HSR_MASTER/SLAVE?
+ */
+
+	for (i = 0; i < 2; i++) {
+		res = check_slave_ok(slave[i]);
+		if (res)
+			return res;
+	}
+
+	hsr_dev->features = slave[0]->features & slave[1]->features;
+	/* Prevent recursive tx locking */
+	hsr_dev->features |= NETIF_F_LLTX;
+	/* VLAN on top of HSR needs testing and probably some work on
+	 * hsr_header_create() etc.
+	 */
+	hsr_dev->features |= NETIF_F_VLAN_CHALLENGED;
+
+	/* Set hsr_dev's MAC address to that of mac_slave1 */
+	memcpy(hsr_dev->dev_addr, hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+
+	/* Set required header length */
+	for (i = 0; i < 2; i++)
+		if (slave[i]->hard_header_len + HSR_TAGLEN >
+						hsr_dev->hard_header_len)
+			hsr_dev->hard_header_len =
+					slave[i]->hard_header_len + HSR_TAGLEN;
+
+	/* MTU */
+	for (i = 0; i < 2; i++)
+		if (slave[i]->mtu - HSR_TAGLEN < hsr_dev->mtu)
+			hsr_dev->mtu = slave[i]->mtu - HSR_TAGLEN;
+
+	/* Make sure the 1st call to netif_carrier_on() gets through */
+	netif_carrier_off(hsr_dev);
+
+	/* Promiscuity */
+	for (i = 0; i < 2; i++) {
+		res = dev_set_promiscuity(slave[i], 1);
+		if (res) {
+			netdev_info(hsr_dev, "Cannot set slave promiscuity (%s, %d)\n",
+				    slave[i]->name, res);
+			goto fail;
+		}
+	}
+
+	/* Make sure we recognize frames from ourselves in hsr_rcv() */
+	res = hsr_create_self_node(&hsr_priv->self_node_db,
+					hsr_dev->dev_addr,
+					hsr_priv->slave[1]->dev_addr);
+	if (res < 0)
+		goto fail;
+
+	res = register_netdevice(hsr_dev);
+	if (res)
+		goto fail;
+
+	register_hsr_master(hsr_priv);
+
+	return 0;
+
+fail:
+	restore_slaves(hsr_dev);
+	return res;
+}
diff --git a/net/hsr/hsr_device.h b/net/hsr/hsr_device.h
new file mode 100644
index 0000000..2c7148e
--- /dev/null
+++ b/net/hsr/hsr_device.h
@@ -0,0 +1,29 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef __HSR_DEVICE_H
+#define __HSR_DEVICE_H
+
+#include <linux/netdevice.h>
+#include "hsr_main.h"
+
+void hsr_dev_setup(struct net_device *dev);
+int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
+		     unsigned char multicast_spec);
+void hsr_set_operstate(struct net_device *hsr_dev, struct net_device *slave1,
+		       struct net_device *slave2);
+void hsr_set_carrier(struct net_device *hsr_dev, struct net_device *slave1,
+		     struct net_device *slave2);
+void hsr_check_announce(struct net_device *hsr_dev, int old_operstate);
+bool is_hsr_master(struct net_device *dev);
+int hsr_get_max_mtu(struct hsr_priv *hsr_priv);
+
+#endif /* __HSR_DEVICE_H */
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
new file mode 100644
index 0000000..fbcd7fc
--- /dev/null
+++ b/net/hsr/hsr_framereg.c
@@ -0,0 +1,501 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * The HSR spec says never to forward the same frame twice on the same
+ * interface. A frame is identified by its source MAC address and its HSR
+ * sequence number. This code keeps track of senders and their sequence numbers
+ * to allow filtering of duplicate frames, and to detect HSR ring errors.
+ */
+
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include "hsr_main.h"
+#include "hsr_framereg.h"
+#include "hsr_netlink.h"
+
+
+struct node_entry {
+	struct list_head mac_list;
+	unsigned char	MacAddressA[ETH_ALEN];
+	unsigned char	MacAddressB[ETH_ALEN];
+	enum hsr_dev_idx   AddrB_if;	/* The local slave through which AddrB
+					 * frames are received from this node
+					 */
+	unsigned long	time_in[HSR_MAX_SLAVE];
+	bool		time_in_stale[HSR_MAX_SLAVE];
+	u16		seq_out[HSR_MAX_DEV];
+	struct rcu_head rcu_head;
+};
+
+/*	TODO: use hash lists for mac addresses (linux/jhash.h)?    */
+
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+static struct node_entry *find_node_by_AddrA(struct list_head *node_db,
+					     const unsigned char addr[ETH_ALEN])
+{
+	struct node_entry *node;
+
+	list_for_each_entry_rcu(node, node_db, mac_list) {
+		if (!compare_ether_addr(node->MacAddressA, addr))
+			return node;
+	}
+
+	return NULL;
+}
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+static struct node_entry *find_node_by_AddrB(struct list_head *node_db,
+					     const unsigned char addr[ETH_ALEN])
+{
+	struct node_entry *node;
+
+	list_for_each_entry_rcu(node, node_db, mac_list) {
+		if (!compare_ether_addr(node->MacAddressB, addr))
+			return node;
+	}
+
+	return NULL;
+}
+
+
+/* Search for mac entry. Caller must hold rcu read lock.
+ */
+struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb)
+{
+	struct node_entry *node;
+	struct ethhdr *ethhdr;
+
+	if (!skb_mac_header_was_set(skb))
+		return NULL;
+
+	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+
+	list_for_each_entry_rcu(node, node_db, mac_list) {
+		if (!compare_ether_addr(node->MacAddressA, ethhdr->h_source))
+			return node;
+		if (!compare_ether_addr(node->MacAddressB, ethhdr->h_source))
+			return node;
+	}
+
+	return NULL;
+}
+
+
+/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
+ * frames from self that's been looped over the HSR ring.
+ */
+int hsr_create_self_node(struct list_head *self_node_db,
+			 unsigned char addr_a[ETH_ALEN],
+			 unsigned char addr_b[ETH_ALEN])
+{
+	struct node_entry *node, *oldnode;
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	memcpy(node->MacAddressA, addr_a, ETH_ALEN);
+	memcpy(node->MacAddressB, addr_b, ETH_ALEN);
+
+	rcu_read_lock();
+	oldnode = list_first_or_null_rcu(self_node_db,
+						struct node_entry, mac_list);
+	if (oldnode) {
+		list_replace_rcu(&oldnode->mac_list, &node->mac_list);
+		rcu_read_unlock();
+		synchronize_rcu();
+		kfree(oldnode);
+	} else {
+		rcu_read_unlock();
+		list_add_tail_rcu(&node->mac_list, self_node_db);
+	}
+
+	return 0;
+}
+
+static void node_entry_reclaim(struct rcu_head *rh)
+{
+	kfree(container_of(rh, struct node_entry, rcu_head));
+}
+
+
+/* Add/merge node to the database of nodes. 'skb' must contain an HSR
+ * supervision frame.
+ * - If the supervision header's MacAddressA field is not yet in the database,
+ * this frame is from an hitherto unknown node - add it to the database.
+ * - If the sender's MAC address is not the same as its MacAddressA address,
+ * the node is using PICS_SUBS (address substitution). Record the sender's
+ * address as the node's MacAddressB.
+ *
+ * This function needs to work even if the sender node has changed one of its
+ * slaves' MAC addresses. In this case, there are four different cases described
+ * by (Addr-changed, received-from) pairs as follows. Note that changing the
+ * SlaveA address is equal to changing the node's own address:
+ *
+ * - (AddrB, SlaveB): The new AddrB will be recorded by PICS_SUBS code since
+ *		      node == NULL.
+ * - (AddrB, SlaveA): Will work as usual (the AddrB change won't be detected
+ *		      from this frame).
+ *
+ * - (AddrA, SlaveB): The old node will be found. We need to detect this and
+ *		      remove the node.
+ * - (AddrA, SlaveA): A new node will be registered (non-PICS_SUBS at first).
+ *		      The old one will be pruned after HSR_NODE_FORGET_TIME.
+ *
+ * We also need to detect if the sender's SlaveA and SlaveB cables have been
+ * swapped.
+ */
+struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
+				  struct node_entry *node,
+				  struct sk_buff *skb,
+				  enum hsr_dev_idx dev_idx)
+{
+	struct hsr_sup_payload *hsr_sp;
+	struct hsr_ethhdr_sp *hsr_ethsup;
+	int i;
+	unsigned long now;
+
+	hsr_ethsup = (struct hsr_ethhdr_sp *) skb_mac_header(skb);
+	hsr_sp = (struct hsr_sup_payload *) skb->data;
+
+	if (node && compare_ether_addr(node->MacAddressA, hsr_sp->MacAddressA)) {
+		/* Node has changed its AddrA, frame was received from SlaveB */
+		list_del_rcu(&node->mac_list);
+		call_rcu(&node->rcu_head, node_entry_reclaim);
+		node = NULL;
+	}
+
+	if (node && (dev_idx == node->AddrB_if) &&
+	    compare_ether_addr(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) {
+		/* Cables have been swapped */
+		list_del_rcu(&node->mac_list);
+		call_rcu(&node->rcu_head, node_entry_reclaim);
+		node = NULL;
+	}
+
+	if (node && (dev_idx != node->AddrB_if) &&
+	    (node->AddrB_if != HSR_DEV_NONE) &&
+	    compare_ether_addr(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) {
+		/* Cables have been swapped */
+		list_del_rcu(&node->mac_list);
+		call_rcu(&node->rcu_head, node_entry_reclaim);
+		node = NULL;
+	}
+
+	if (node)
+		return node;
+
+	node = find_node_by_AddrA(&hsr_priv->node_db, hsr_sp->MacAddressA);
+	if (node) {
+		/* Node is known, but frame was received from an unknown
+		 * address. Node is PICS_SUBS capable; merge its AddrB.
+		 */
+		memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+		node->AddrB_if = dev_idx;
+		return node;
+	}
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC);
+	if (!node)
+		return NULL;
+
+	memcpy(node->MacAddressA, hsr_sp->MacAddressA, ETH_ALEN);
+	memcpy(node->MacAddressB, hsr_ethsup->ethhdr.h_source, ETH_ALEN);
+	if (compare_ether_addr(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))
+		node->AddrB_if = dev_idx;
+	else
+		node->AddrB_if = HSR_DEV_NONE;
+
+	/* We are only interested in time diffs here, so use current jiffies
+	 * as initialization. (0 could trigger an spurious ring error warning).
+	 */
+	now = jiffies;
+	for (i = 0; i < HSR_MAX_SLAVE; i++)
+		node->time_in[i] = now;
+	for (i = 0; i < HSR_MAX_DEV; i++)
+		node->seq_out[i] = ntohs(hsr_ethsup->hsr_sup.sequence_nr) - 1;
+
+	list_add_tail_rcu(&node->mac_list, &hsr_priv->node_db);
+
+	return node;
+}
+
+
+/* 'skb' is a frame meant for this host, that is to be passed to upper layers.
+ *
+ * If the frame was sent by a node's B interface, replace the sender
+ * address with that node's "official" address (MacAddressA) so that upper
+ * layers recognize where it came from.
+ */
+void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb)
+{
+	struct ethhdr *ethhdr;
+	struct node_entry *node;
+
+	if (!skb_mac_header_was_set(skb)) {
+		WARN_ONCE(1, "%s: Mac header not set\n", __func__);
+		return;
+	}
+	ethhdr = (struct ethhdr *) skb_mac_header(skb);
+
+	rcu_read_lock();
+	node = find_node_by_AddrB(&hsr_priv->node_db, ethhdr->h_source);
+	if (node)
+		memcpy(ethhdr->h_source, node->MacAddressA, ETH_ALEN);
+	rcu_read_unlock();
+}
+
+
+/* 'skb' is a frame meant for another host.
+ * 'hsr_dev_idx' is the HSR index of the outgoing device
+ *
+ * Substitute the target (dest) MAC address if necessary, so the it matches the
+ * recipient interface MAC address, regardless of whether that is the
+ * recipient's A or B interface.
+ * This is needed to keep the packets flowing through switches that learn on
+ * which "side" the different interfaces are.
+ */
+void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
+			 enum hsr_dev_idx dev_idx)
+{
+	struct node_entry *node;
+
+	rcu_read_lock();
+	node = find_node_by_AddrA(&hsr_priv->node_db, ethhdr->h_dest);
+	if (node && (node->AddrB_if == dev_idx))
+		memcpy(ethhdr->h_dest, node->MacAddressB, ETH_ALEN);
+	rcu_read_unlock();
+}
+
+
+/* above(a, b) - return 1 if a > b, 0 otherwise.
+ */
+static bool above(u16 a, u16 b)
+{
+	/* Remove inconsistency where above(a, b) == below(a, b) */
+	if ((int) b - a == 32768)
+		return 0;
+
+	return (((s16) (b - a)) < 0);
+}
+#define below(a, b)		above((b), (a))
+#define above_or_eq(a, b)	(!below((a), (b)))
+#define below_or_eq(a, b)	(!above((a), (b)))
+
+
+void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
+{
+	if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+		WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
+		return;
+	}
+	node->time_in[dev_idx] = jiffies;
+	node->time_in_stale[dev_idx] = 0;
+}
+
+
+/* 'skb' is a HSR Ethernet frame (with a HSR tag inserted), with a valid
+ * ethhdr->h_source address and skb->mac_header set.
+ *
+ * Return:
+ *	 1 if frame can be shown to have been sent recently on this interface,
+ *	 0 otherwise, or
+ *	 negative error code on error
+ */
+int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
+			   struct sk_buff *skb)
+{
+	struct hsr_ethhdr *hsr_ethhdr;
+	u16 sequence_nr;
+
+	if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+		WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
+		return -EINVAL;
+	}
+	if (!skb_mac_header_was_set(skb)) {
+		WARN_ONCE(1, "%s: Mac header not set\n", __func__);
+		return -EINVAL;
+	}
+	hsr_ethhdr = (struct hsr_ethhdr *) skb_mac_header(skb);
+
+	sequence_nr = ntohs(hsr_ethhdr->hsr_tag.sequence_nr);
+	if (below_or_eq(sequence_nr, node->seq_out[dev_idx]))
+		return 1;
+
+	node->seq_out[dev_idx] = sequence_nr;
+	return 0;
+}
+
+
+
+static bool is_late(struct node_entry *node, enum hsr_dev_idx dev_idx)
+{
+	enum hsr_dev_idx other;
+
+	if (node->time_in_stale[dev_idx])
+		return 1;
+
+	if (dev_idx == HSR_DEV_SLAVE_A)
+		other = HSR_DEV_SLAVE_B;
+	else
+		other = HSR_DEV_SLAVE_A;
+
+	if (node->time_in_stale[other])
+		return 0;
+
+	if (time_after(node->time_in[other], node->time_in[dev_idx] +
+		       msecs_to_jiffies(MAX_SLAVE_DIFF)))
+		return 1;
+
+	return 0;
+}
+
+
+/* Remove stale sequence_nr records. Called by timer every
+ * HSR_LIFE_CHECK_INTERVAL (two seconds or so).
+ */
+void hsr_prune_nodes(struct hsr_priv *hsr_priv)
+{
+	struct node_entry *node;
+	unsigned long timestamp;
+	unsigned long time_a, time_b;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(node, &hsr_priv->node_db, mac_list) {
+		/* Shorthand */
+		time_a = node->time_in[HSR_DEV_SLAVE_A];
+		time_b = node->time_in[HSR_DEV_SLAVE_B];
+
+		/* Check for timestamps old enough to risk wrap-around */
+		if (time_after(jiffies, time_a + MAX_JIFFY_OFFSET/2))
+			node->time_in_stale[HSR_DEV_SLAVE_A] = 1;
+		if (time_after(jiffies, time_b + MAX_JIFFY_OFFSET/2))
+			node->time_in_stale[HSR_DEV_SLAVE_B] = 1;
+
+		/* Get age of newest frame from node.
+		 * At least one time_in is OK here; nodes get pruned long
+		 * before both time_ins can get stale
+		 */
+		timestamp = time_a;
+		if (node->time_in_stale[HSR_DEV_SLAVE_A] ||
+		    (!node->time_in_stale[HSR_DEV_SLAVE_B] &&
+		    time_after(time_b, time_a)))
+			timestamp = time_b;
+
+		/* Warn of ring error only as long as we get frames at all */
+		if (time_is_after_jiffies(timestamp +
+					msecs_to_jiffies(1.5*MAX_SLAVE_DIFF))) {
+
+			if (is_late(node, HSR_DEV_SLAVE_A))
+				hsr_nl_ringerror(hsr_priv, node->MacAddressA,
+						 HSR_DEV_SLAVE_A);
+			else if (is_late(node, HSR_DEV_SLAVE_B))
+				hsr_nl_ringerror(hsr_priv, node->MacAddressA,
+						 HSR_DEV_SLAVE_B);
+		}
+
+		/* Prune old entries */
+		if (time_is_before_jiffies(timestamp +
+					msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
+			hsr_nl_nodedown(hsr_priv, node->MacAddressA);
+			list_del_rcu(&node->mac_list);
+			/* Note that we need to free this entry later: */
+			call_rcu(&node->rcu_head, node_entry_reclaim);
+		}
+	}
+	rcu_read_unlock();
+}
+
+
+void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+			unsigned char addr[ETH_ALEN])
+{
+	struct node_entry *node;
+
+	if (!_pos) {
+		node = list_first_or_null_rcu(&hsr_priv->node_db,
+						struct node_entry, mac_list);
+		if (node)
+			memcpy(addr, node->MacAddressA, ETH_ALEN);
+		return node;
+	}
+
+	node = _pos;
+	list_for_each_entry_continue_rcu(node, &hsr_priv->node_db, mac_list) {
+		memcpy(addr, node->MacAddressA, ETH_ALEN);
+		return node;
+	}
+
+	return NULL;
+}
+
+
+int hsr_get_node_data(struct hsr_priv *hsr_priv,
+		      const unsigned char *addr,
+		      unsigned char addr_b[ETH_ALEN],
+		      unsigned int *addr_b_ifindex,
+		      int *if1_age,
+		      u16 *if1_seq,
+		      int *if2_age,
+		      u16 *if2_seq)
+{
+	struct node_entry *node;
+	unsigned long tdiff;
+
+
+	rcu_read_lock();
+	node = find_node_by_AddrA(&hsr_priv->node_db, addr);
+	if (!node) {
+		rcu_read_unlock();
+		return -ENOENT;	/* No such entry */
+	}
+
+	memcpy(addr_b, node->MacAddressB, ETH_ALEN);
+
+	tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_A];
+	if (node->time_in_stale[HSR_DEV_SLAVE_A])
+		*if1_age = INT_MAX;
+#if HZ <= MSEC_PER_SEC
+	else if (tdiff > msecs_to_jiffies(INT_MAX))
+		*if1_age = INT_MAX;
+#endif
+	else
+		*if1_age = jiffies_to_msecs(tdiff);
+
+	tdiff = jiffies - node->time_in[HSR_DEV_SLAVE_B];
+	if (node->time_in_stale[HSR_DEV_SLAVE_B])
+		*if2_age = INT_MAX;
+#if HZ <= MSEC_PER_SEC
+	else if (tdiff > msecs_to_jiffies(INT_MAX))
+		*if2_age = INT_MAX;
+#endif
+	else
+		*if2_age = jiffies_to_msecs(tdiff);
+
+	/* Present sequence numbers as if they were incoming on interface */
+	*if1_seq = node->seq_out[HSR_DEV_SLAVE_B];
+	*if2_seq = node->seq_out[HSR_DEV_SLAVE_A];
+
+	if ((node->AddrB_if != HSR_DEV_NONE) && hsr_priv->slave[node->AddrB_if])
+		*addr_b_ifindex = hsr_priv->slave[node->AddrB_if]->ifindex;
+	else
+		*addr_b_ifindex = -1;
+
+	rcu_read_unlock();
+
+	return 0;
+}
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
new file mode 100644
index 0000000..e6c4022
--- /dev/null
+++ b/net/hsr/hsr_framereg.h
@@ -0,0 +1,53 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef _HSR_FRAMEREG_H
+#define _HSR_FRAMEREG_H
+
+#include "hsr_main.h"
+
+struct node_entry;
+
+struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb);
+
+struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
+				  struct node_entry *node,
+				  struct sk_buff *skb,
+				  enum hsr_dev_idx dev_idx);
+
+void hsr_addr_subst_source(struct hsr_priv *hsr_priv, struct sk_buff *skb);
+void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr,
+			 enum hsr_dev_idx dev_idx);
+
+void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx);
+
+int hsr_register_frame_out(struct node_entry *node, enum hsr_dev_idx dev_idx,
+			   struct sk_buff *skb);
+
+void hsr_prune_nodes(struct hsr_priv *hsr_priv);
+
+int hsr_create_self_node(struct list_head *self_node_db,
+			 unsigned char addr_a[ETH_ALEN],
+			 unsigned char addr_b[ETH_ALEN]);
+
+void *hsr_get_next_node(struct hsr_priv *hsr_priv, void *_pos,
+			unsigned char addr[ETH_ALEN]);
+
+int hsr_get_node_data(struct hsr_priv *hsr_priv,
+		      const unsigned char *addr,
+		      unsigned char addr_b[ETH_ALEN],
+		      unsigned int *addr_b_ifindex,
+		      int *if1_age,
+		      u16 *if1_seq,
+		      int *if2_age,
+		      u16 *if2_seq);
+
+#endif /* _HSR_FRAMEREG_H */
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
new file mode 100644
index 0000000..b08197d
--- /dev/null
+++ b/net/hsr/hsr_main.c
@@ -0,0 +1,469 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * In addition to routines for registering and unregistering HSR support, this
+ * file also contains the receive routine that handles all incoming frames with
+ * Ethertype (protocol) ETH_P_PRP (HSRv0), and network device event handling.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/timer.h>
+#include <linux/etherdevice.h>
+#include "hsr_main.h"
+#include "hsr_device.h"
+#include "hsr_netlink.h"
+#include "hsr_framereg.h"
+
+
+/* List of all registered virtual HSR devices */
+static LIST_HEAD(hsr_list);
+
+void register_hsr_master(struct hsr_priv *hsr_priv)
+{
+	list_add_tail_rcu(&hsr_priv->hsr_list, &hsr_list);
+}
+
+void unregister_hsr_master(struct hsr_priv *hsr_priv)
+{
+	struct hsr_priv *hsr_priv_it;
+
+	list_for_each_entry(hsr_priv_it, &hsr_list, hsr_list)
+		if (hsr_priv_it == hsr_priv) {
+			list_del_rcu(&hsr_priv_it->hsr_list);
+			return;
+		}
+}
+
+bool is_hsr_slave(struct net_device *dev)
+{
+	struct hsr_priv *hsr_priv_it;
+
+	list_for_each_entry_rcu(hsr_priv_it, &hsr_list, hsr_list) {
+		if (dev == hsr_priv_it->slave[0])
+			return true;
+		if (dev == hsr_priv_it->slave[1])
+			return true;
+	}
+
+	return false;
+}
+
+
+/* If dev is a HSR slave device, return the virtual master device. Return NULL
+ * otherwise.
+ */
+static struct hsr_priv *get_hsr_master(struct net_device *dev)
+{
+	struct hsr_priv *hsr_priv;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
+		if ((dev == hsr_priv->slave[0]) ||
+		    (dev == hsr_priv->slave[1])) {
+			rcu_read_unlock();
+			return hsr_priv;
+		}
+
+	rcu_read_unlock();
+	return NULL;
+}
+
+
+/* If dev is a HSR slave device, return the other slave device. Return NULL
+ * otherwise.
+ */
+static struct net_device *get_other_slave(struct hsr_priv *hsr_priv,
+					  struct net_device *dev)
+{
+	if (dev == hsr_priv->slave[0])
+		return hsr_priv->slave[1];
+	if (dev == hsr_priv->slave[1])
+		return hsr_priv->slave[0];
+
+	return NULL;
+}
+
+
+static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
+			     void *ptr)
+{
+	struct net_device *slave, *other_slave;
+	struct hsr_priv *hsr_priv;
+	int old_operstate;
+	int mtu_max;
+	int res;
+	struct net_device *dev;
+
+	dev = netdev_notifier_info_to_dev(ptr);
+
+	hsr_priv = get_hsr_master(dev);
+	if (hsr_priv) {
+		/* dev is a slave device */
+		slave = dev;
+		other_slave = get_other_slave(hsr_priv, slave);
+	} else {
+		if (!is_hsr_master(dev))
+			return NOTIFY_DONE;
+		hsr_priv = netdev_priv(dev);
+		slave = hsr_priv->slave[0];
+		other_slave = hsr_priv->slave[1];
+	}
+
+	switch (event) {
+	case NETDEV_UP:		/* Administrative state DOWN */
+	case NETDEV_DOWN:	/* Administrative state UP */
+	case NETDEV_CHANGE:	/* Link (carrier) state changes */
+		old_operstate = hsr_priv->dev->operstate;
+		hsr_set_carrier(hsr_priv->dev, slave, other_slave);
+		/* netif_stacked_transfer_operstate() cannot be used here since
+		 * it doesn't set IF_OPER_LOWERLAYERDOWN (?)
+		 */
+		hsr_set_operstate(hsr_priv->dev, slave, other_slave);
+		hsr_check_announce(hsr_priv->dev, old_operstate);
+		break;
+	case NETDEV_CHANGEADDR:
+
+		/* This should not happen since there's no ndo_set_mac_address()
+		 * for HSR devices - i.e. not supported.
+		 */
+		if (dev == hsr_priv->dev)
+			break;
+
+		if (dev == hsr_priv->slave[0])
+			memcpy(hsr_priv->dev->dev_addr,
+			       hsr_priv->slave[0]->dev_addr, ETH_ALEN);
+
+		/* Make sure we recognize frames from ourselves in hsr_rcv() */
+		res = hsr_create_self_node(&hsr_priv->self_node_db,
+					   hsr_priv->dev->dev_addr,
+					   hsr_priv->slave[1] ?
+						hsr_priv->slave[1]->dev_addr :
+						hsr_priv->dev->dev_addr);
+		if (res)
+			netdev_warn(hsr_priv->dev,
+				    "Could not update HSR node address.\n");
+
+		if (dev == hsr_priv->slave[0])
+			call_netdevice_notifiers(NETDEV_CHANGEADDR, hsr_priv->dev);
+		break;
+	case NETDEV_CHANGEMTU:
+		if (dev == hsr_priv->dev)
+			break; /* Handled in ndo_change_mtu() */
+		mtu_max = hsr_get_max_mtu(hsr_priv);
+		if (hsr_priv->dev->mtu > mtu_max)
+			dev_set_mtu(hsr_priv->dev, mtu_max);
+		break;
+	case NETDEV_UNREGISTER:
+		if (dev == hsr_priv->slave[0])
+			hsr_priv->slave[0] = NULL;
+		if (dev == hsr_priv->slave[1])
+			hsr_priv->slave[1] = NULL;
+
+		/* There should really be a way to set a new slave device... */
+
+		break;
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* HSR works only on Ethernet devices. Refuse slave to change
+		 * its type.
+		 */
+		return NOTIFY_BAD;
+	}
+
+	return NOTIFY_DONE;
+}
+
+
+static struct timer_list prune_timer;
+
+static void prune_nodes_all(unsigned long data)
+{
+	struct hsr_priv *hsr_priv;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hsr_priv, &hsr_list, hsr_list)
+		hsr_prune_nodes(hsr_priv);
+	rcu_read_unlock();
+
+	prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
+	add_timer(&prune_timer);
+}
+
+
+static struct sk_buff *hsr_pull_tag(struct sk_buff *skb)
+{
+	struct hsr_tag *hsr_tag;
+	struct sk_buff *skb2;
+
+	skb2 = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb2))
+		goto err_free;
+	skb = skb2;
+
+	if (unlikely(!pskb_may_pull(skb, HSR_TAGLEN)))
+		goto err_free;
+
+	hsr_tag = (struct hsr_tag *) skb->data;
+	skb->protocol = hsr_tag->encap_proto;
+	skb_pull(skb, HSR_TAGLEN);
+
+	return skb;
+
+err_free:
+	kfree_skb(skb);
+	return NULL;
+}
+
+
+/* The uses I can see for these HSR supervision frames are:
+ * 1) Use the frames that are sent after node initialization ("HSR_TLV.Type =
+ *    22") to reset any sequence_nr counters belonging to that node. Useful if
+ *    the other node's counter has been reset for some reason.
+ *    --
+ *    Or not - resetting the counter and bridging the frame would create a
+ *    loop, unfortunately.
+ *
+ * 2) Use the LifeCheck frames to detect ring breaks. I.e. if no LifeCheck
+ *    frame is received from a particular node, we know something is wrong.
+ *    We just register these (as with normal frames) and throw them away.
+ *
+ * 3) Allow different MAC addresses for the two slave interfaces, using the
+ *    MacAddressA field.
+ */
+static bool is_supervision_frame(struct hsr_priv *hsr_priv, struct sk_buff *skb)
+{
+	struct hsr_sup_tag *hsr_stag;
+
+	if (compare_ether_addr(eth_hdr(skb)->h_dest,
+			       hsr_priv->sup_multicast_addr))
+		return 0;
+
+	hsr_stag = (struct hsr_sup_tag *) skb->data;
+	if (get_hsr_stag_path(hsr_stag) != 0x0f)
+		return 0;
+	if ((hsr_stag->HSR_TLV_Type != HSR_TLV_ANNOUNCE) &&
+	    (hsr_stag->HSR_TLV_Type != HSR_TLV_LIFE_CHECK))
+		return 0;
+	if (hsr_stag->HSR_TLV_Length != 12)
+		return 0;
+
+	return 1;
+}
+
+
+/* Implementation somewhat according to IEC-62439-3, p. 43
+ */
+static int hsr_rcv(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct hsr_priv *hsr_priv;
+	struct net_device *other_slave;
+	struct node_entry *node;
+	int deliver_to_self;
+	struct sk_buff *skb_deliver;
+	enum hsr_dev_idx dev_in_idx, dev_other_idx;
+	bool dup_out;
+	int ret;
+
+	hsr_priv = get_hsr_master(dev);
+
+	if (!hsr_priv) {
+		/* Non-HSR-slave device 'dev' is connected to a HSR network */
+		kfree_skb(skb);
+		dev->stats.rx_errors++;
+		return NET_RX_SUCCESS;
+	}
+
+	if (dev == hsr_priv->slave[0]) {
+		dev_in_idx = HSR_DEV_SLAVE_A;
+		dev_other_idx = HSR_DEV_SLAVE_B;
+	} else {
+		dev_in_idx = HSR_DEV_SLAVE_B;
+		dev_other_idx = HSR_DEV_SLAVE_A;
+	}
+
+	node = hsr_find_node(&hsr_priv->self_node_db, skb);
+	if (node) {
+		/* Always kill frames sent by ourselves */
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+	/* Is this frame a candidate for local reception? */
+	deliver_to_self = 0;
+	if ((skb->pkt_type == PACKET_HOST) ||
+	    (skb->pkt_type == PACKET_MULTICAST) ||
+	    (skb->pkt_type == PACKET_BROADCAST))
+		deliver_to_self = 1;
+	else if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+				     hsr_priv->dev->dev_addr)) {
+		skb->pkt_type = PACKET_HOST;
+		deliver_to_self = 1;
+	}
+
+
+	rcu_read_lock(); /* node_db */
+	node = hsr_find_node(&hsr_priv->node_db, skb);
+
+	if (is_supervision_frame(hsr_priv, skb)) {
+		skb_pull(skb, sizeof(struct hsr_sup_tag));
+		node = hsr_merge_node(hsr_priv, node, skb, dev_in_idx);
+		if (!node) {
+			rcu_read_unlock(); /* node_db */
+			kfree_skb(skb);
+			hsr_priv->dev->stats.rx_dropped++;
+			return NET_RX_DROP;
+		}
+		skb_push(skb, sizeof(struct hsr_sup_tag));
+		deliver_to_self = 0;
+	}
+
+	if (!node) {
+		/* Source node unknown; this might be a HSR frame from
+		 * another net (different multicast address). Ignore it.
+		 */
+		rcu_read_unlock(); /* node_db */
+		kfree_skb(skb);
+		return NET_RX_SUCCESS;
+	}
+
+	/* Register ALL incoming frames as outgoing through the other interface.
+	 * This allows us to register frames as incoming only if they are valid
+	 * for the receiving interface, without using a specific counter for
+	 * incoming frames.
+	 */
+	dup_out = hsr_register_frame_out(node, dev_other_idx, skb);
+	if (!dup_out)
+		hsr_register_frame_in(node, dev_in_idx);
+
+	/* Forward this frame? */
+	if (!dup_out && (skb->pkt_type != PACKET_HOST))
+		other_slave = get_other_slave(hsr_priv, dev);
+	else
+		other_slave = NULL;
+
+	if (hsr_register_frame_out(node, HSR_DEV_MASTER, skb))
+		deliver_to_self = 0;
+
+	rcu_read_unlock(); /* node_db */
+
+	if (!deliver_to_self && !other_slave) {
+		kfree_skb(skb);
+		/* Circulated frame; silently remove it. */
+		return NET_RX_SUCCESS;
+	}
+
+	skb_deliver = skb;
+	if (deliver_to_self && other_slave) {
+		/* skb_clone() is not enough since we will strip the hsr tag
+		 * and do address substitution below
+		 */
+		skb_deliver = pskb_copy(skb, GFP_ATOMIC);
+		if (!skb_deliver) {
+			deliver_to_self = 0;
+			hsr_priv->dev->stats.rx_dropped++;
+		}
+	}
+
+	if (deliver_to_self) {
+		bool multicast_frame;
+
+		skb_deliver = hsr_pull_tag(skb_deliver);
+		if (!skb_deliver) {
+			hsr_priv->dev->stats.rx_dropped++;
+			goto forward;
+		}
+#if !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+		/* Move everything in the header that is after the HSR tag,
+		 * to work around alignment problems caused by the 6-byte HSR
+		 * tag. In practice, this removes/overwrites the HSR tag in
+		 * the header and restores a "standard" packet.
+		 */
+		memmove(skb_deliver->data - HSR_TAGLEN, skb_deliver->data,
+			skb_headlen(skb_deliver));
+
+		/* Adjust skb members so they correspond with the move above.
+		 * This cannot possibly underflow skb->data since hsr_pull_tag()
+		 * above succeeded.
+		 * At this point in the protocol stack, the transport and
+		 * network headers have not been set yet, and we haven't touched
+		 * the mac header nor the head. So we only need to adjust data
+		 * and tail:
+		 */
+		skb_deliver->data -= HSR_TAGLEN;
+		skb_deliver->tail -= HSR_TAGLEN;
+#endif
+		skb_deliver->dev = hsr_priv->dev;
+		hsr_addr_subst_source(hsr_priv, skb_deliver);
+		multicast_frame = (skb_deliver->pkt_type == PACKET_MULTICAST);
+		ret = netif_rx(skb_deliver);
+		if (ret == NET_RX_DROP) {
+			hsr_priv->dev->stats.rx_dropped++;
+		} else {
+			hsr_priv->dev->stats.rx_packets++;
+			hsr_priv->dev->stats.rx_bytes += skb->len;
+			if (multicast_frame)
+				hsr_priv->dev->stats.multicast++;
+		}
+	}
+
+forward:
+	if (other_slave) {
+		skb_push(skb, ETH_HLEN);
+		skb->dev = other_slave;
+		dev_queue_xmit(skb);
+	}
+
+	return NET_RX_SUCCESS;
+}
+
+
+static struct packet_type hsr_pt __read_mostly = {
+	.type = htons(ETH_P_PRP),
+	.func = hsr_rcv,
+};
+
+static struct notifier_block hsr_nb = {
+	.notifier_call = hsr_netdev_notify,	/* Slave event notifications */
+};
+
+
+static int __init hsr_init(void)
+{
+	int res;
+
+	BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_TAGLEN);
+
+	dev_add_pack(&hsr_pt);
+
+	init_timer(&prune_timer);
+	prune_timer.function = prune_nodes_all;
+	prune_timer.data = 0;
+	prune_timer.expires = jiffies + msecs_to_jiffies(PRUNE_PERIOD);
+	add_timer(&prune_timer);
+
+	register_netdevice_notifier(&hsr_nb);
+
+	res = hsr_netlink_init();
+
+	return res;
+}
+
+static void __exit hsr_exit(void)
+{
+	unregister_netdevice_notifier(&hsr_nb);
+	del_timer(&prune_timer);
+	hsr_netlink_exit();
+	dev_remove_pack(&hsr_pt);
+}
+
+module_init(hsr_init);
+module_exit(hsr_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
new file mode 100644
index 0000000..56fe060
--- /dev/null
+++ b/net/hsr/hsr_main.h
@@ -0,0 +1,166 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef _HSR_PRIVATE_H
+#define _HSR_PRIVATE_H
+
+#include <linux/netdevice.h>
+#include <linux/list.h>
+
+
+/* Time constants as specified in the HSR specification (IEC-62439-3 2010)
+ * Table 8.
+ * All values in milliseconds.
+ */
+#define HSR_LIFE_CHECK_INTERVAL		 2000 /* ms */
+#define HSR_NODE_FORGET_TIME		60000 /* ms */
+#define HSR_ANNOUNCE_INTERVAL		  100 /* ms */
+
+
+/* By how much may slave1 and slave2 timestamps of latest received frame from
+ * each node differ before we notify of communication problem?
+ */
+#define MAX_SLAVE_DIFF			 3000 /* ms */
+
+
+/* How often shall we check for broken ring and remove node entries older than
+ * HSR_NODE_FORGET_TIME?
+ */
+#define PRUNE_PERIOD			 3000 /* ms */
+
+
+#define HSR_TLV_ANNOUNCE		   22
+#define HSR_TLV_LIFE_CHECK		   23
+
+
+/* HSR Tag.
+ * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB,
+ * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest,
+ * h_source, h_proto = 0x88FB }, and add { path, LSDU_size, sequence Nr,
+ * encapsulated protocol } instead.
+ */
+#define HSR_TAGLEN	6
+
+/* Field names below as defined in the IEC:2010 standard for HSR. */
+struct hsr_tag {
+	__be16		path_and_LSDU_size;
+	__be16		sequence_nr;
+	__be16		encap_proto;
+} __packed;
+
+
+/* The helper functions below assumes that 'path' occupies the 4 most
+ * significant bits of the 16-bit field shared by 'path' and 'LSDU_size' (or
+ * equivalently, the 4 most significant bits of HSR tag byte 14).
+ *
+ * This is unclear in the IEC specification; its definition of MAC addresses
+ * indicates the spec is written with the least significant bit first (to the
+ * left). This, however, would mean that the LSDU field would be split in two
+ * with the path field in-between, which seems strange. I'm guessing the MAC
+ * address definition is in error.
+ */
+static inline u16 get_hsr_tag_path(struct hsr_tag *ht)
+{
+	return ntohs(ht->path_and_LSDU_size) >> 12;
+}
+
+static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht)
+{
+	return ntohs(ht->path_and_LSDU_size) & 0x0FFF;
+}
+
+static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path)
+{
+	ht->path_and_LSDU_size = htons(
+			(ntohs(ht->path_and_LSDU_size) & 0x0FFF) | (path << 12));
+}
+
+static inline void set_hsr_tag_LSDU_size(struct hsr_tag *ht, u16 LSDU_size)
+{
+	ht->path_and_LSDU_size = htons(
+			(ntohs(ht->path_and_LSDU_size) & 0xF000) |
+			(LSDU_size & 0x0FFF));
+}
+
+struct hsr_ethhdr {
+	struct ethhdr	ethhdr;
+	struct hsr_tag	hsr_tag;
+} __packed;
+
+
+/* HSR Supervision Frame data types.
+ * Field names as defined in the IEC:2010 standard for HSR.
+ */
+struct hsr_sup_tag {
+	__be16		path_and_HSR_Ver;
+	__be16		sequence_nr;
+	__u8		HSR_TLV_Type;
+	__u8		HSR_TLV_Length;
+} __packed;
+
+struct hsr_sup_payload {
+	unsigned char	MacAddressA[ETH_ALEN];
+} __packed;
+
+static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst)
+{
+	return get_hsr_tag_path((struct hsr_tag *) hst);
+}
+
+static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst)
+{
+	return get_hsr_tag_LSDU_size((struct hsr_tag *) hst);
+}
+
+static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path)
+{
+	set_hsr_tag_path((struct hsr_tag *) hst, path);
+}
+
+static inline void set_hsr_stag_HSR_Ver(struct hsr_sup_tag *hst, u16 HSR_Ver)
+{
+	set_hsr_tag_LSDU_size((struct hsr_tag *) hst, HSR_Ver);
+}
+
+struct hsr_ethhdr_sp {
+	struct ethhdr		ethhdr;
+	struct hsr_sup_tag	hsr_sup;
+} __packed;
+
+
+enum hsr_dev_idx {
+	HSR_DEV_NONE = -1,
+	HSR_DEV_SLAVE_A = 0,
+	HSR_DEV_SLAVE_B,
+	HSR_DEV_MASTER,
+};
+#define HSR_MAX_SLAVE	(HSR_DEV_SLAVE_B + 1)
+#define HSR_MAX_DEV	(HSR_DEV_MASTER + 1)
+
+struct hsr_priv {
+	struct list_head	hsr_list;	/* List of hsr devices */
+	struct rcu_head		rcu_head;
+	struct net_device	*dev;
+	struct net_device	*slave[HSR_MAX_SLAVE];
+	struct list_head	node_db;	/* Other HSR nodes */
+	struct list_head	self_node_db;	/* MACs of slaves */
+	struct timer_list	announce_timer;	/* Supervision frame dispatch */
+	int announce_count;
+	u16 sequence_nr;
+	spinlock_t seqnr_lock;			/* locking for sequence_nr */
+	unsigned char		sup_multicast_addr[ETH_ALEN];
+};
+
+void register_hsr_master(struct hsr_priv *hsr_priv);
+void unregister_hsr_master(struct hsr_priv *hsr_priv);
+bool is_hsr_slave(struct net_device *dev);
+
+#endif /*  _HSR_PRIVATE_H */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
new file mode 100644
index 0000000..4e66bf6
--- /dev/null
+++ b/net/hsr/hsr_netlink.c
@@ -0,0 +1,457 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ *
+ * Routines for handling Netlink messages for HSR.
+ */
+
+#include "hsr_netlink.h"
+#include <linux/kernel.h>
+#include <net/rtnetlink.h>
+#include <net/genetlink.h>
+#include "hsr_main.h"
+#include "hsr_device.h"
+#include "hsr_framereg.h"
+
+static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = {
+	[IFLA_HSR_SLAVE1]		= { .type = NLA_U32 },
+	[IFLA_HSR_SLAVE2]		= { .type = NLA_U32 },
+	[IFLA_HSR_MULTICAST_SPEC]	= { .type = NLA_U8 },
+};
+
+
+/* Here, it seems a netdevice has already been allocated for us, and the
+ * hsr_dev_setup routine has been executed. Nice!
+ */
+static int hsr_newlink(struct net *src_net, struct net_device *dev,
+		       struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_device *link[2];
+	unsigned char multicast_spec;
+
+	if (!data[IFLA_HSR_SLAVE1]) {
+		netdev_info(dev, "IFLA_HSR_SLAVE1 missing!\n");
+		return -EINVAL;
+	}
+	link[0] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE1]));
+	if (!data[IFLA_HSR_SLAVE2]) {
+		netdev_info(dev, "IFLA_HSR_SLAVE2 missing!\n");
+		return -EINVAL;
+	}
+	link[1] = __dev_get_by_index(src_net, nla_get_u32(data[IFLA_HSR_SLAVE2]));
+
+	if (!link[0] || !link[1])
+		return -ENODEV;
+	if (link[0] == link[1])
+		return -EINVAL;
+
+	if (!data[IFLA_HSR_MULTICAST_SPEC])
+		multicast_spec = 0;
+	else
+		multicast_spec = nla_get_u8(data[IFLA_HSR_MULTICAST_SPEC]);
+
+	return hsr_dev_finalize(dev, link, multicast_spec);
+}
+
+static struct rtnl_link_ops hsr_link_ops __read_mostly = {
+	.kind		= "hsr",
+	.maxtype	= IFLA_HSR_MAX,
+	.policy		= hsr_policy,
+	.priv_size	= sizeof(struct hsr_priv),
+	.setup		= hsr_dev_setup,
+	.newlink	= hsr_newlink,
+};
+
+
+
+/* attribute policy */
+/* NLA_BINARY missing in libnl; use NLA_UNSPEC in userspace instead. */
+static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
+	[HSR_A_NODE_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN },
+	[HSR_A_NODE_ADDR_B] = { .type = NLA_BINARY, .len = ETH_ALEN },
+	[HSR_A_IFINDEX] = { .type = NLA_U32 },
+	[HSR_A_IF1_AGE] = { .type = NLA_U32 },
+	[HSR_A_IF2_AGE] = { .type = NLA_U32 },
+	[HSR_A_IF1_SEQ] = { .type = NLA_U16 },
+	[HSR_A_IF2_SEQ] = { .type = NLA_U16 },
+};
+
+static struct genl_family hsr_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = 0,
+	.name = "HSR",
+	.version = 1,
+	.maxattr = HSR_A_MAX,
+};
+
+static struct genl_multicast_group hsr_network_genl_mcgrp = {
+	.name = "hsr-network",
+};
+
+
+
+/* This is called if for some node with MAC address addr, we only get frames
+ * over one of the slave interfaces. This would indicate an open network ring
+ * (i.e. a link has failed somewhere).
+ */
+void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
+		      enum hsr_dev_idx dev_idx)
+{
+	struct sk_buff *skb;
+	void *msg_head;
+	int res;
+	int ifindex;
+
+	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		goto fail;
+
+	msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_RING_ERROR);
+	if (!msg_head)
+		goto nla_put_failure;
+
+	res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+	if (res < 0)
+		goto nla_put_failure;
+
+	if (hsr_priv->slave[dev_idx])
+		ifindex = hsr_priv->slave[dev_idx]->ifindex;
+	else
+		ifindex = -1;
+	res = nla_put_u32(skb, HSR_A_IFINDEX, ifindex);
+	if (res < 0)
+		goto nla_put_failure;
+
+	genlmsg_end(skb, msg_head);
+	genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+
+	return;
+
+nla_put_failure:
+	kfree_skb(skb);
+
+fail:
+	netdev_warn(hsr_priv->dev, "Could not send HSR ring error message\n");
+}
+
+/* This is called when we haven't heard from the node with MAC address addr for
+ * some time (just before the node is removed from the node table/list).
+ */
+void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN])
+{
+	struct sk_buff *skb;
+	void *msg_head;
+	int res;
+
+	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		goto fail;
+
+	msg_head = genlmsg_put(skb, 0, 0, &hsr_genl_family, 0, HSR_C_NODE_DOWN);
+	if (!msg_head)
+		goto nla_put_failure;
+
+
+	res = nla_put(skb, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+	if (res < 0)
+		goto nla_put_failure;
+
+	genlmsg_end(skb, msg_head);
+	genlmsg_multicast(skb, 0, hsr_network_genl_mcgrp.id, GFP_ATOMIC);
+
+	return;
+
+nla_put_failure:
+	kfree_skb(skb);
+
+fail:
+	netdev_warn(hsr_priv->dev, "Could not send HSR node down\n");
+}
+
+
+/* HSR_C_GET_NODE_STATUS lets userspace query the internal HSR node table
+ * about the status of a specific node in the network, defined by its MAC
+ * address.
+ *
+ * Input: hsr ifindex, node mac address
+ * Output: hsr ifindex, node mac address (copied from request),
+ *	   age of latest frame from node over slave 1, slave 2 [ms]
+ */
+static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info)
+{
+	/* For receiving */
+	struct nlattr *na;
+	struct net_device *hsr_dev;
+
+	/* For sending */
+	struct sk_buff *skb_out;
+	void *msg_head;
+	struct hsr_priv *hsr_priv;
+	unsigned char hsr_node_addr_b[ETH_ALEN];
+	int hsr_node_if1_age;
+	u16 hsr_node_if1_seq;
+	int hsr_node_if2_age;
+	u16 hsr_node_if2_seq;
+	int addr_b_ifindex;
+	int res;
+
+	if (!info)
+		goto invalid;
+
+	na = info->attrs[HSR_A_IFINDEX];
+	if (!na)
+		goto invalid;
+	na = info->attrs[HSR_A_NODE_ADDR];
+	if (!na)
+		goto invalid;
+
+	hsr_dev = __dev_get_by_index(genl_info_net(info),
+					nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+	if (!hsr_dev)
+		goto invalid;
+	if (!is_hsr_master(hsr_dev))
+		goto invalid;
+
+
+	/* Send reply */
+
+	skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb_out) {
+		res = -ENOMEM;
+		goto fail;
+	}
+
+	msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
+				info->snd_seq, &hsr_genl_family, 0,
+				HSR_C_SET_NODE_STATUS);
+	if (!msg_head) {
+		res = -ENOMEM;
+		goto nla_put_failure;
+	}
+
+	res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+	if (res < 0)
+		goto nla_put_failure;
+
+	hsr_priv = netdev_priv(hsr_dev);
+	res = hsr_get_node_data(hsr_priv,
+			(unsigned char *) nla_data(info->attrs[HSR_A_NODE_ADDR]),
+			hsr_node_addr_b,
+			&addr_b_ifindex,
+			&hsr_node_if1_age,
+			&hsr_node_if1_seq,
+			&hsr_node_if2_age,
+			&hsr_node_if2_seq);
+	if (res < 0)
+		goto fail;
+
+	res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN,
+					nla_data(info->attrs[HSR_A_NODE_ADDR]));
+	if (res < 0)
+		goto nla_put_failure;
+
+	if (addr_b_ifindex > -1) {
+		res = nla_put(skb_out, HSR_A_NODE_ADDR_B, ETH_ALEN,
+								hsr_node_addr_b);
+		if (res < 0)
+			goto nla_put_failure;
+
+		res = nla_put_u32(skb_out, HSR_A_ADDR_B_IFINDEX, addr_b_ifindex);
+		if (res < 0)
+			goto nla_put_failure;
+	}
+
+	res = nla_put_u32(skb_out, HSR_A_IF1_AGE, hsr_node_if1_age);
+	if (res < 0)
+		goto nla_put_failure;
+	res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq);
+	if (res < 0)
+		goto nla_put_failure;
+	if (hsr_priv->slave[0])
+		res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX,
+						hsr_priv->slave[0]->ifindex);
+	if (res < 0)
+		goto nla_put_failure;
+
+	res = nla_put_u32(skb_out, HSR_A_IF2_AGE, hsr_node_if2_age);
+	if (res < 0)
+		goto nla_put_failure;
+	res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq);
+	if (res < 0)
+		goto nla_put_failure;
+	if (hsr_priv->slave[1])
+		res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX,
+						hsr_priv->slave[1]->ifindex);
+
+	genlmsg_end(skb_out, msg_head);
+	genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
+
+	return 0;
+
+invalid:
+	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+	return 0;
+
+nla_put_failure:
+	kfree_skb(skb_out);
+	/* Fall through */
+
+fail:
+	return res;
+}
+
+static struct genl_ops hsr_ops_get_node_status = {
+	.cmd = HSR_C_GET_NODE_STATUS,
+	.flags = 0,
+	.policy = hsr_genl_policy,
+	.doit = hsr_get_node_status,
+	.dumpit = NULL,
+};
+
+
+/* Get a list of MacAddressA of all nodes known to this node (other than self).
+ */
+static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info)
+{
+	/* For receiving */
+	struct nlattr *na;
+	struct net_device *hsr_dev;
+
+	/* For sending */
+	struct sk_buff *skb_out;
+	void *msg_head;
+	struct hsr_priv *hsr_priv;
+	void *pos;
+	unsigned char addr[ETH_ALEN];
+	int res;
+
+	if (!info)
+		goto invalid;
+
+	na = info->attrs[HSR_A_IFINDEX];
+	if (!na)
+		goto invalid;
+
+	hsr_dev = __dev_get_by_index(genl_info_net(info),
+				     nla_get_u32(info->attrs[HSR_A_IFINDEX]));
+	if (!hsr_dev)
+		goto invalid;
+	if (!is_hsr_master(hsr_dev))
+		goto invalid;
+
+
+	/* Send reply */
+
+	skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb_out) {
+		res = -ENOMEM;
+		goto fail;
+	}
+
+	msg_head = genlmsg_put(skb_out, NETLINK_CB(skb_in).portid,
+				info->snd_seq, &hsr_genl_family, 0,
+				HSR_C_SET_NODE_LIST);
+	if (!msg_head) {
+		res = -ENOMEM;
+		goto nla_put_failure;
+	}
+
+	res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex);
+	if (res < 0)
+		goto nla_put_failure;
+
+	hsr_priv = netdev_priv(hsr_dev);
+
+	rcu_read_lock();
+	pos = hsr_get_next_node(hsr_priv, NULL, addr);
+	while (pos) {
+		res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr);
+		if (res < 0) {
+			rcu_read_unlock();
+			goto nla_put_failure;
+		}
+		pos = hsr_get_next_node(hsr_priv, pos, addr);
+	}
+	rcu_read_unlock();
+
+	genlmsg_end(skb_out, msg_head);
+	genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid);
+
+	return 0;
+
+invalid:
+	netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL);
+	return 0;
+
+nla_put_failure:
+	kfree_skb(skb_out);
+	/* Fall through */
+
+fail:
+	return res;
+}
+
+
+static struct genl_ops hsr_ops_get_node_list = {
+	.cmd = HSR_C_GET_NODE_LIST,
+	.flags = 0,
+	.policy = hsr_genl_policy,
+	.doit = hsr_get_node_list,
+	.dumpit = NULL,
+};
+
+int __init hsr_netlink_init(void)
+{
+	int rc;
+
+	rc = rtnl_link_register(&hsr_link_ops);
+	if (rc)
+		goto fail_rtnl_link_register;
+
+	rc = genl_register_family(&hsr_genl_family);
+	if (rc)
+		goto fail_genl_register_family;
+
+	rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+	if (rc)
+		goto fail_genl_register_ops;
+
+	rc = genl_register_ops(&hsr_genl_family, &hsr_ops_get_node_list);
+	if (rc)
+		goto fail_genl_register_ops_node_list;
+
+	rc = genl_register_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp);
+	if (rc)
+		goto fail_genl_register_mc_group;
+
+	return 0;
+
+fail_genl_register_mc_group:
+	genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_list);
+fail_genl_register_ops_node_list:
+	genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+fail_genl_register_ops:
+	genl_unregister_family(&hsr_genl_family);
+fail_genl_register_family:
+	rtnl_link_unregister(&hsr_link_ops);
+fail_rtnl_link_register:
+
+	return rc;
+}
+
+void __exit hsr_netlink_exit(void)
+{
+	genl_unregister_mc_group(&hsr_genl_family, &hsr_network_genl_mcgrp);
+	genl_unregister_ops(&hsr_genl_family, &hsr_ops_get_node_status);
+	genl_unregister_family(&hsr_genl_family);
+
+	rtnl_link_unregister(&hsr_link_ops);
+}
+
+MODULE_ALIAS_RTNL_LINK("hsr");
diff --git a/net/hsr/hsr_netlink.h b/net/hsr/hsr_netlink.h
new file mode 100644
index 0000000..d4579dc
--- /dev/null
+++ b/net/hsr/hsr_netlink.h
@@ -0,0 +1,30 @@
+/* Copyright 2011-2013 Autronica Fire and Security AS
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * Author(s):
+ *	2011-2013 Arvid Brodin, arvid.brodin@xdin.com
+ */
+
+#ifndef __HSR_NETLINK_H
+#define __HSR_NETLINK_H
+
+#include <linux/if_ether.h>
+#include <linux/module.h>
+#include <uapi/linux/hsr_netlink.h>
+
+struct hsr_priv;
+
+int __init hsr_netlink_init(void);
+void __exit hsr_netlink_exit(void);
+
+void hsr_nl_ringerror(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN],
+		      int dev_idx);
+void hsr_nl_nodedown(struct hsr_priv *hsr_priv, unsigned char addr[ETH_ALEN]);
+void hsr_nl_framedrop(int dropcount, int dev_idx);
+void hsr_nl_linkdown(int dev_idx);
+
+#endif /* __HSR_NETLINK_H */
-- 
1.8.1.5



-- 
Arvid Brodin | Consultant (Linux)
XDIN AB | Knarrarnäsgatan 7 | SE-164 40 Kista | Sweden | xdin.com

^ permalink raw reply related

* [PATCH 14/16] batman-adv: include the sync-flags when compute the global/local table CRC
From: Antonio Quartulli @ 2013-10-23 16:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Flags covered by TT_SYNC_MASK are kept in sync among the
nodes in the network and therefore they have to be
considered while computing the global/local table CRC.

In this way a generic originator is able to understand if
its table contains the correct flags or not.

Bits from 4 to 7 in the TT flags fields are now reserved for
"synchronized" flags only.

This allows future developers to add more flags of this type
without breaking compatibility.

It's important to note that not all the remote TT flags are
synchronised. This comes from the fact that some flags are
used to inject an information once only.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/packet.h            |  5 +++--
 net/batman-adv/translation-table.c | 19 +++++++++++++++++++
 net/batman-adv/types.h             |  6 ++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 9fbcaac..843b96a 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -110,12 +110,13 @@ enum batadv_tt_data_flags {
 
 /* BATADV_TT_CLIENT flags.
  * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to
- * BIT(15) are used for local computation only
+ * BIT(15) are used for local computation only.
+ * Flags from BIT(4) to BIT(7) are kept in sync with the rest of the network.
  */
 enum batadv_tt_client_flags {
 	BATADV_TT_CLIENT_DEL     = BIT(0),
 	BATADV_TT_CLIENT_ROAM    = BIT(1),
-	BATADV_TT_CLIENT_WIFI    = BIT(2),
+	BATADV_TT_CLIENT_WIFI    = BIT(4),
 	BATADV_TT_CLIENT_NOPURGE = BIT(8),
 	BATADV_TT_CLIENT_NEW     = BIT(9),
 	BATADV_TT_CLIENT_PENDING = BIT(10),
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 267780f..4add57d 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1959,6 +1959,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 	struct batadv_tt_global_entry *tt_global;
 	struct hlist_head *head;
 	uint32_t i, crc_tmp, crc = 0;
+	uint8_t flags;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -1997,6 +1998,13 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
 
 			crc_tmp = crc32c(0, &tt_common->vid,
 					 sizeof(tt_common->vid));
+
+			/* compute the CRC on flags that have to be kept in sync
+			 * among nodes
+			 */
+			flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+			crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
+
 			crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
 		}
 		rcu_read_unlock();
@@ -2022,6 +2030,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
 	struct batadv_tt_common_entry *tt_common;
 	struct hlist_head *head;
 	uint32_t i, crc_tmp, crc = 0;
+	uint8_t flags;
 
 	for (i = 0; i < hash->size; i++) {
 		head = &hash->table[i];
@@ -2042,6 +2051,13 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
 
 			crc_tmp = crc32c(0, &tt_common->vid,
 					 sizeof(tt_common->vid));
+
+			/* compute the CRC on flags that have to be kept in sync
+			 * among nodes
+			 */
+			flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+			crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
+
 			crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
 		}
 		rcu_read_unlock();
@@ -3524,6 +3540,9 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
 {
 	int ret;
 
+	/* synchronized flags must be remote */
+	BUILD_BUG_ON(!(BATADV_TT_SYNC_MASK & BATADV_TT_REMOTE_MASK));
+
 	ret = batadv_tt_local_init(bat_priv);
 	if (ret < 0)
 		return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 61297b6..3c21162 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -42,6 +42,12 @@
 #define BATADV_TT_REMOTE_MASK	0x00FF
 
 /**
+ * BATADV_TT_SYNC_MASK - bitmask of the flags that need to be kept in sync
+ *  among the nodes. These flags are used to compute the global/local CRC
+ */
+#define BATADV_TT_SYNC_MASK	0x00F0
+
+/**
  * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data
  * @ogm_buff: buffer holding the OGM packet
  * @ogm_buff_len: length of the OGM packet buffer
-- 
1.8.4

^ permalink raw reply related

* [PATCH 11/16] batman-adv: send GW_DEL event in case of soft-iface destruction
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

In case of soft_iface destruction send a GW DEL event to
userspace so that applications which are listening for GW
events are informed about the lost of connectivity and can
react accordingly.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/hard-interface.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c60d3ed..1ba8a55 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -28,6 +28,7 @@
 #include "originator.h"
 #include "hash.h"
 #include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
 
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
@@ -535,8 +536,12 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
 	dev_put(hard_iface->soft_iface);
 
 	/* nobody uses this interface anymore */
-	if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
-		batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+	if (!bat_priv->num_ifaces) {
+		batadv_gw_check_client_stop(bat_priv);
+
+		if (autodel == BATADV_IF_CLEANUP_AUTO)
+			batadv_softif_destroy_sysfs(hard_iface->soft_iface);
+	}
 
 	netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
 	hard_iface->soft_iface = NULL;
-- 
1.8.4

^ permalink raw reply related

* [PATCH 12/16] batman-adv: invoke dev_get_by_index() outside of is_wifi_iface()
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Antonio Quartulli <antonio@open-mesh.com>

Upcoming changes need to perform other checks on the
incoming net_device struct.

To avoid performing dev_get_by_index() for each and every
check, it is better to move it outside of is_wifi_iface()
and search the netdev object once only.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
---
 net/batman-adv/hard-interface.c    | 33 ++++-----------------------------
 net/batman-adv/hard-interface.h    |  2 +-
 net/batman-adv/translation-table.c |  8 +++++++-
 3 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 1ba8a55..57c2a19 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -125,8 +125,11 @@ static int batadv_is_valid_iface(const struct net_device *net_dev)
  *
  * Returns true if the net device is a 802.11 wireless device, false otherwise.
  */
-static bool batadv_is_wifi_netdev(struct net_device *net_device)
+bool batadv_is_wifi_netdev(struct net_device *net_device)
 {
+	if (!net_device)
+		return false;
+
 #ifdef CONFIG_WIRELESS_EXT
 	/* pre-cfg80211 drivers have to implement WEXT, so it is possible to
 	 * check for wireless_handlers != NULL
@@ -142,34 +145,6 @@ static bool batadv_is_wifi_netdev(struct net_device *net_device)
 	return false;
 }
 
-/**
- * batadv_is_wifi_iface - check if the given interface represented by ifindex
- *  is a wifi interface
- * @ifindex: interface index to check
- *
- * Returns true if the interface represented by ifindex is a 802.11 wireless
- * device, false otherwise.
- */
-bool batadv_is_wifi_iface(int ifindex)
-{
-	struct net_device *net_device = NULL;
-	bool ret = false;
-
-	if (ifindex == BATADV_NULL_IFINDEX)
-		goto out;
-
-	net_device = dev_get_by_index(&init_net, ifindex);
-	if (!net_device)
-		goto out;
-
-	ret = batadv_is_wifi_netdev(net_device);
-
-out:
-	if (net_device)
-		dev_put(net_device);
-	return ret;
-}
-
 static struct batadv_hard_iface *
 batadv_hardif_get_active(const struct net_device *soft_iface)
 {
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 4989288..df4c8bd 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -41,6 +41,7 @@ enum batadv_hard_if_cleanup {
 
 extern struct notifier_block batadv_hard_if_notifier;
 
+bool batadv_is_wifi_netdev(struct net_device *net_device);
 struct batadv_hard_iface*
 batadv_hardif_get_by_netdev(const struct net_device *net_dev);
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
@@ -51,7 +52,6 @@ void batadv_hardif_remove_interfaces(void);
 int batadv_hardif_min_mtu(struct net_device *soft_iface);
 void batadv_update_min_mtu(struct net_device *soft_iface);
 void batadv_hardif_free_rcu(struct rcu_head *rcu);
-bool batadv_is_wifi_iface(int ifindex);
 
 static inline void
 batadv_hardif_free_ref(struct batadv_hard_iface *hard_iface)
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a3c965d..b0fe177 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -477,11 +477,15 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_tt_global_entry *tt_global;
+	struct net_device *in_dev = NULL;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry;
 	int hash_added, table_size, packet_size_max;
 	bool ret = false, roamed_back = false;
 
+	if (ifindex != BATADV_NULL_IFINDEX)
+		in_dev = dev_get_by_index(&init_net, ifindex);
+
 	tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
 	tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
 
@@ -542,7 +546,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	 */
 	tt_local->common.flags = BATADV_TT_CLIENT_NEW;
 	tt_local->common.vid = vid;
-	if (batadv_is_wifi_iface(ifindex))
+	if (batadv_is_wifi_netdev(in_dev))
 		tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
 	atomic_set(&tt_local->common.refcount, 2);
 	tt_local->last_seen = jiffies;
@@ -595,6 +599,8 @@ check_roaming:
 	ret = true;
 
 out:
+	if (in_dev)
+		dev_put(in_dev);
 	if (tt_local)
 		batadv_tt_local_entry_free_ref(tt_local);
 	if (tt_global)
-- 
1.8.4

^ permalink raw reply related

* [PATCH 10/16] batman-adv: limit local translation table max size
From: Antonio Quartulli @ 2013-10-23 16:04 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Marek Lindner
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Marek Lindner <lindner_marek@yahoo.de>

The local translation table size is limited by what can be
transferred from one node to another via a full table request.

The number of entries fitting into a full table request depend
on whether the fragmentation is enabled or not. Therefore this
patch introduces a max table size check and refuses to add
more local clients when that size is reached. Moreover, if the
max full table packet size changes (MTU change or fragmentation
is disabled) the local table is downsized instantaneously.

Signed-off-by: Marek Lindner <lindner_marek@yahoo.de>
Acked-by: Antonio Quartulli <ordex@autistici.org>
---
 net/batman-adv/hard-interface.c    |  41 ++++++----
 net/batman-adv/soft-interface.c    |  12 ++-
 net/batman-adv/translation-table.c | 150 +++++++++++++++++++++++++++++++++----
 net/batman-adv/translation-table.h |   3 +-
 net/batman-adv/types.h             |   3 +
 5 files changed, 173 insertions(+), 36 deletions(-)

diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c5f871f..c60d3ed 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -266,16 +266,9 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
 
 int batadv_hardif_min_mtu(struct net_device *soft_iface)
 {
-	const struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	const struct batadv_hard_iface *hard_iface;
-	/* allow big frames if all devices are capable to do so
-	 * (have MTU > 1500 + batadv_max_header_len())
-	 */
 	int min_mtu = ETH_DATA_LEN;
-	int max_header_len = batadv_max_header_len();
-
-	if (atomic_read(&bat_priv->fragmentation))
-		goto out;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -286,22 +279,40 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
 		if (hard_iface->soft_iface != soft_iface)
 			continue;
 
-		min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len,
-				min_mtu);
+		min_mtu = min_t(int, hard_iface->net_dev->mtu, min_mtu);
 	}
 	rcu_read_unlock();
+
+	atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+	if (atomic_read(&bat_priv->fragmentation) == 0)
+		goto out;
+
+	/* with fragmentation enabled the maximum size of internally generated
+	 * packets such as translation table exchanges or tvlv containers, etc
+	 * has to be calculated
+	 */
+	min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
+	min_mtu -= sizeof(struct batadv_frag_packet);
+	min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
+	atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+	/* with fragmentation enabled we can fragment external packets easily */
+	min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
+
 out:
-	return min_mtu;
+	return min_mtu - batadv_max_header_len();
 }
 
 /* adjusts the MTU if a new interface with a smaller MTU appeared. */
 void batadv_update_min_mtu(struct net_device *soft_iface)
 {
-	int min_mtu;
+	soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
 
-	min_mtu = batadv_hardif_min_mtu(soft_iface);
-	if (soft_iface->mtu != min_mtu)
-		soft_iface->mtu = min_mtu;
+	/* Check if the local translate table should be cleaned up to match a
+	 * new (and smaller) MTU.
+	 */
+	batadv_tt_local_resize_to_mtu(soft_iface);
 }
 
 static void
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e70f530..36f0508 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -166,7 +166,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
 	unsigned int header_len = 0;
 	int data_len = skb->len, ret;
 	unsigned long brd_delay = 1;
-	bool do_bcast = false;
+	bool do_bcast = false, client_added;
 	unsigned short vid;
 	uint32_t seqno;
 
@@ -196,9 +196,12 @@ static int batadv_interface_tx(struct sk_buff *skb,
 	ethhdr = (struct ethhdr *)skb->data;
 
 	/* Register the client MAC in the transtable */
-	if (!is_multicast_ether_addr(ethhdr->h_source))
-		batadv_tt_local_add(soft_iface, ethhdr->h_source, vid,
-				    skb->skb_iif);
+	if (!is_multicast_ether_addr(ethhdr->h_source)) {
+		client_added = batadv_tt_local_add(soft_iface, ethhdr->h_source,
+						   vid, skb->skb_iif);
+		if (!client_added)
+			goto dropped;
+	}
 
 	/* don't accept stp packets. STP does not help in meshes.
 	 * better use the bridge loop avoidance ...
@@ -674,6 +677,7 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->log_level, 0);
 #endif
 	atomic_set(&bat_priv->fragmentation, 1);
+	atomic_set(&bat_priv->packet_size_max, ETH_DATA_LEN);
 	atomic_set(&bat_priv->bcast_queue_left, BATADV_BCAST_QUEUE_LEN);
 	atomic_set(&bat_priv->batman_queue_left, BATADV_BATMAN_QUEUE_LEN);
 
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 4c28251..a3c965d 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -401,6 +401,35 @@ static uint16_t batadv_tt_entries(uint16_t tt_len)
 	return tt_len / batadv_tt_len(1);
 }
 
+/**
+ * batadv_tt_local_table_transmit_size - calculates the local translation table
+ *  size when transmitted over the air
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Returns local translation table size in bytes.
+ */
+static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
+{
+	uint16_t num_vlan = 0, tt_local_entries = 0;
+	struct batadv_softif_vlan *vlan;
+	int hdr_size;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) {
+		num_vlan++;
+		tt_local_entries += atomic_read(&vlan->tt.num_entries);
+	}
+	rcu_read_unlock();
+
+	/* header size of tvlv encapsulated tt response payload */
+	hdr_size = sizeof(struct batadv_unicast_tvlv_packet);
+	hdr_size += sizeof(struct batadv_tvlv_hdr);
+	hdr_size += sizeof(struct batadv_tvlv_tt_data);
+	hdr_size += num_vlan * sizeof(struct batadv_tvlv_tt_vlan_data);
+
+	return hdr_size + batadv_tt_len(tt_local_entries);
+}
+
 static int batadv_tt_local_init(struct batadv_priv *bat_priv)
 {
 	if (bat_priv->tt.local_hash)
@@ -439,8 +468,10 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv,
  * @vid: VLAN identifier
  * @ifindex: index of the interface where the client is connected to (useful to
  *  identify wireless clients)
+ *
+ * Returns true if the client was successfully added, false otherwise.
  */
-void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 			 unsigned short vid, int ifindex)
 {
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
@@ -448,8 +479,8 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct batadv_tt_global_entry *tt_global;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry;
-	int hash_added;
-	bool roamed_back = false;
+	int hash_added, table_size, packet_size_max;
+	bool ret = false, roamed_back = false;
 
 	tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
 	tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
@@ -484,6 +515,17 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 		goto check_roaming;
 	}
 
+	/* Ignore the client if we cannot send it in a full table response. */
+	table_size = batadv_tt_local_table_transmit_size(bat_priv);
+	table_size += batadv_tt_len(1);
+	packet_size_max = atomic_read(&bat_priv->packet_size_max);
+	if (table_size > packet_size_max) {
+		net_ratelimited_function(batadv_info, soft_iface,
+					 "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n",
+					 table_size, packet_size_max, addr);
+		goto out;
+	}
+
 	tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
 	if (!tt_local)
 		goto out;
@@ -550,11 +592,14 @@ check_roaming:
 		}
 	}
 
+	ret = true;
+
 out:
 	if (tt_local)
 		batadv_tt_local_entry_free_ref(tt_local);
 	if (tt_global)
 		batadv_tt_global_entry_free_ref(tt_global);
+	return ret;
 }
 
 /**
@@ -926,8 +971,16 @@ out:
 	return curr_flags;
 }
 
+/**
+ * batadv_tt_local_purge_list - purge inactive tt local entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @head: pointer to the list containing the local tt entries
+ * @timeout: parameter deciding whether a given tt local entry is considered
+ *  inactive or not
+ */
 static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
-				       struct hlist_head *head)
+				       struct hlist_head *head,
+				       int timeout)
 {
 	struct batadv_tt_local_entry *tt_local_entry;
 	struct batadv_tt_common_entry *tt_common_entry;
@@ -945,8 +998,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
 		if (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)
 			continue;
 
-		if (!batadv_has_timed_out(tt_local_entry->last_seen,
-					  BATADV_TT_LOCAL_TIMEOUT))
+		if (!batadv_has_timed_out(tt_local_entry->last_seen, timeout))
 			continue;
 
 		batadv_tt_local_set_pending(bat_priv, tt_local_entry,
@@ -954,7 +1006,14 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
 	}
 }
 
-static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
+/**
+ * batadv_tt_local_purge - purge inactive tt local entries
+ * @bat_priv: the bat priv with all the soft interface information
+ * @timeout: parameter deciding whether a given tt local entry is considered
+ *  inactive or not
+ */
+static void batadv_tt_local_purge(struct batadv_priv *bat_priv,
+				  int timeout)
 {
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct hlist_head *head;
@@ -966,7 +1025,7 @@ static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
 		list_lock = &hash->list_locks[i];
 
 		spin_lock_bh(list_lock);
-		batadv_tt_local_purge_list(bat_priv, head);
+		batadv_tt_local_purge_list(bat_priv, head, timeout);
 		spin_unlock_bh(list_lock);
 	}
 }
@@ -2383,6 +2442,15 @@ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
 					req_dst_orig_node);
 	}
 
+	/* Don't send the response, if larger than fragmented packet. */
+	tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len;
+	if (tt_len > atomic_read(&bat_priv->packet_size_max)) {
+		net_ratelimited_function(batadv_info, bat_priv->soft_iface,
+					 "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n",
+					 res_dst_orig_node->orig);
+		goto out;
+	}
+
 	tvlv_tt_data->flags = BATADV_TT_RESPONSE;
 	tvlv_tt_data->ttvn = req_ttvn;
 
@@ -2859,7 +2927,7 @@ static void batadv_tt_purge(struct work_struct *work)
 	priv_tt = container_of(delayed_work, struct batadv_priv_tt, work);
 	bat_priv = container_of(priv_tt, struct batadv_priv, tt);
 
-	batadv_tt_local_purge(bat_priv);
+	batadv_tt_local_purge(bat_priv, BATADV_TT_LOCAL_TIMEOUT);
 	batadv_tt_global_purge(bat_priv);
 	batadv_tt_req_purge(bat_priv);
 	batadv_tt_roam_purge(bat_priv);
@@ -2972,18 +3040,18 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 }
 
 /**
- * batadv_tt_local_commit_changes - commit all pending local tt changes which
- *  have been queued in the time since the last commit
+ * batadv_tt_local_commit_changes_nolock - commit all pending local tt changes
+ *  which have been queued in the time since the last commit
  * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Caller must hold tt->commit_lock.
  */
-void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
+static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
 {
-	spin_lock_bh(&bat_priv->tt.commit_lock);
-
 	if (atomic_read(&bat_priv->tt.local_changes) < 1) {
 		if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
 			batadv_tt_tvlv_container_update(bat_priv);
-		goto out;
+		return;
 	}
 
 	batadv_tt_local_set_flags(bat_priv, BATADV_TT_CLIENT_NEW, false, true);
@@ -3000,8 +3068,17 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
 	/* reset the sending counter */
 	atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
 	batadv_tt_tvlv_container_update(bat_priv);
+}
 
-out:
+/**
+ * batadv_tt_local_commit_changes - commit all pending local tt changes which
+ *  have been queued in the time since the last commit
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
+{
+	spin_lock_bh(&bat_priv->tt.commit_lock);
+	batadv_tt_local_commit_changes_nolock(bat_priv);
 	spin_unlock_bh(&bat_priv->tt.commit_lock);
 }
 
@@ -3197,6 +3274,47 @@ out:
 }
 
 /**
+ * batadv_tt_local_resize_to_mtu - resize the local translation table fit the
+ *  maximum packet size that can be transported through the mesh
+ * @soft_iface: netdev struct of the mesh interface
+ *
+ * Remove entries older than 'timeout' and half timeout if more entries need
+ * to be removed.
+ */
+void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
+{
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	int packet_size_max = atomic_read(&bat_priv->packet_size_max);
+	int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2;
+	bool reduced = false;
+
+	spin_lock_bh(&bat_priv->tt.commit_lock);
+
+	while (true) {
+		table_size = batadv_tt_local_table_transmit_size(bat_priv);
+		if (packet_size_max >= table_size)
+			break;
+
+		batadv_tt_local_purge(bat_priv, timeout);
+		batadv_tt_local_purge_pending_clients(bat_priv);
+
+		timeout /= 2;
+		reduced = true;
+		net_ratelimited_function(batadv_info, soft_iface,
+					 "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n",
+					 packet_size_max);
+	}
+
+	/* commit these changes immediately, to avoid synchronization problem
+	 * with the TTVN
+	 */
+	if (reduced)
+		batadv_tt_local_commit_changes_nolock(bat_priv);
+
+	spin_unlock_bh(&bat_priv->tt.commit_lock);
+}
+
+/**
  * batadv_tt_tvlv_ogm_handler_v1 - process incoming tt tvlv container
  * @bat_priv: the bat priv with all the soft interface information
  * @orig: the orig_node of the ogm
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index dc6db4e..026b1ff 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -21,7 +21,7 @@
 #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
-void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
+bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 			 unsigned short vid, int ifindex);
 uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
 				const uint8_t *addr, unsigned short vid,
@@ -45,6 +45,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
 					uint8_t *addr, unsigned short vid);
 bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv,
 				       uint8_t *addr, unsigned short vid);
+void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface);
 bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
 					  struct batadv_orig_node *orig_node,
 					  const unsigned char *addr,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 672a813..04b6b0b 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -612,6 +612,8 @@ struct batadv_softif_vlan {
  * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
  * @bonding: bool indicating whether traffic bonding is enabled
  * @fragmentation: bool indicating whether traffic fragmentation is enabled
+ * @packet_size_max: max packet size that can be transmitted via
+ *  multiple fragmented skbs or a single frame if fragmentation is disabled
  * @frag_seqno: incremental counter to identify chains of egress fragments
  * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
  *  enabled
@@ -658,6 +660,7 @@ struct batadv_priv {
 	atomic_t aggregated_ogms;
 	atomic_t bonding;
 	atomic_t fragmentation;
+	atomic_t packet_size_max;
 	atomic_t frag_seqno;
 #ifdef CONFIG_BATMAN_ADV_BLA
 	atomic_t bridge_loop_avoidance;
-- 
1.8.4

^ permalink raw reply related

* [PATCH 15/16] batman-adv: Start new development cycle
From: Antonio Quartulli @ 2013-10-23 16:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, b.a.t.m.a.n, Simon Wunderlich, Antonio Quartulli
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Simon Wunderlich <sw@simonwunderlich.de>

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 773301a..f94f287b 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.4.0"
+#define BATADV_SOURCE_VERSION "2013.5.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */
-- 
1.8.4

^ permalink raw reply related

* [PATCH 16/16] batman-adv: generalize batman-adv icmp packet handling
From: Antonio Quartulli @ 2013-10-23 16:05 UTC (permalink / raw)
  To: davem
  Cc: netdev, b.a.t.m.a.n, Simon Wunderlich, Marek Lindner,
	Antonio Quartulli
In-Reply-To: <1382544303-2694-1-git-send-email-antonio@meshcoding.com>

From: Simon Wunderlich <sw@simonwunderlich.de>

Instead of handling icmp packets only up to length of icmp_packet_rr,
the code should handle any icmp length size. Therefore the length
truncating is moved to when the packet is actually sent to userspace
(this does not support lengths longer than icmp_packet_rr yet). Longer
packets are forwarded without truncating.

This patch also cleans up some parts where the icmp header struct could
be used instead of other icmp_packet(_rr) structs to make the code more
readable.

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>
---
 net/batman-adv/icmp_socket.c | 128 +++++++++++++++++++++++++++----------------
 net/batman-adv/icmp_socket.h |   2 +-
 net/batman-adv/packet.h      |   2 +
 net/batman-adv/routing.c     | 120 ++++++++++++++++++++++++----------------
 net/batman-adv/types.h       |   2 +-
 5 files changed, 157 insertions(+), 97 deletions(-)

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 82ac647..29ae4ef 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -29,7 +29,7 @@
 static struct batadv_socket_client *batadv_socket_client_hash[256];
 
 static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
-				     struct batadv_icmp_packet_rr *icmp_packet,
+				     struct batadv_icmp_header *icmph,
 				     size_t icmp_len);
 
 void batadv_socket_init(void)
@@ -155,13 +155,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
 	struct batadv_priv *bat_priv = socket_client->bat_priv;
 	struct batadv_hard_iface *primary_if = NULL;
 	struct sk_buff *skb;
-	struct batadv_icmp_packet_rr *icmp_packet;
-
+	struct batadv_icmp_packet_rr *icmp_packet_rr;
+	struct batadv_icmp_header *icmp_header;
 	struct batadv_orig_node *orig_node = NULL;
 	struct batadv_neigh_node *neigh_node = NULL;
 	size_t packet_len = sizeof(struct batadv_icmp_packet);
 
-	if (len < sizeof(struct batadv_icmp_packet)) {
+	if (len < sizeof(struct batadv_icmp_header)) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Error - can't send packet from char device: invalid packet size\n");
 		return -EINVAL;
@@ -174,8 +174,10 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
 		goto out;
 	}
 
-	if (len >= sizeof(struct batadv_icmp_packet_rr))
-		packet_len = sizeof(struct batadv_icmp_packet_rr);
+	if (len >= BATADV_ICMP_MAX_PACKET_SIZE)
+		packet_len = BATADV_ICMP_MAX_PACKET_SIZE;
+	else
+		packet_len = len;
 
 	skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN);
 	if (!skb) {
@@ -185,67 +187,78 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
 
 	skb->priority = TC_PRIO_CONTROL;
 	skb_reserve(skb, ETH_HLEN);
-	icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
+	icmp_header = (struct batadv_icmp_header *)skb_put(skb, packet_len);
 
-	if (copy_from_user(icmp_packet, buff, packet_len)) {
+	if (copy_from_user(icmp_header, buff, packet_len)) {
 		len = -EFAULT;
 		goto free_skb;
 	}
 
-	if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) {
+	if (icmp_header->header.packet_type != BATADV_ICMP) {
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
 			   "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n");
 		len = -EINVAL;
 		goto free_skb;
 	}
 
-	if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
+	switch (icmp_header->msg_type) {
+	case BATADV_ECHO_REQUEST:
+		if (len < sizeof(struct batadv_icmp_packet)) {
+			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+				   "Error - can't send packet from char device: invalid packet size\n");
+			len = -EINVAL;
+			goto free_skb;
+		}
+
+		if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
+			goto dst_unreach;
+
+		orig_node = batadv_orig_hash_find(bat_priv, icmp_header->dst);
+		if (!orig_node)
+			goto dst_unreach;
+
+		neigh_node = batadv_orig_node_get_router(orig_node);
+		if (!neigh_node)
+			goto dst_unreach;
+
+		if (!neigh_node->if_incoming)
+			goto dst_unreach;
+
+		if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
+			goto dst_unreach;
+
+		icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmp_header;
+		if (packet_len == sizeof(*icmp_packet_rr))
+			memcpy(icmp_packet_rr->rr,
+			       neigh_node->if_incoming->net_dev->dev_addr,
+			       ETH_ALEN);
+
+		break;
+	default:
 		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n");
+			   "Error - can't send packet from char device: got unknown message type\n");
 		len = -EINVAL;
 		goto free_skb;
 	}
 
-	icmp_packet->icmph.uid = socket_client->index;
+	icmp_header->uid = socket_client->index;
 
-	if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) {
-		icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM;
-		icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION;
-		batadv_socket_add_packet(socket_client, icmp_packet,
+	if (icmp_header->header.version != BATADV_COMPAT_VERSION) {
+		icmp_header->msg_type = BATADV_PARAMETER_PROBLEM;
+		icmp_header->header.version = BATADV_COMPAT_VERSION;
+		batadv_socket_add_packet(socket_client, icmp_header,
 					 packet_len);
 		goto free_skb;
 	}
 
-	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
-		goto dst_unreach;
-
-	orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
-	if (!orig_node)
-		goto dst_unreach;
-
-	neigh_node = batadv_orig_node_get_router(orig_node);
-	if (!neigh_node)
-		goto dst_unreach;
-
-	if (!neigh_node->if_incoming)
-		goto dst_unreach;
-
-	if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
-		goto dst_unreach;
-
-	memcpy(icmp_packet->icmph.orig,
-	       primary_if->net_dev->dev_addr, ETH_ALEN);
-
-	if (packet_len == sizeof(struct batadv_icmp_packet_rr))
-		memcpy(icmp_packet->rr,
-		       neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
+	memcpy(icmp_header->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
 
 	batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
 	goto out;
 
 dst_unreach:
-	icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE;
-	batadv_socket_add_packet(socket_client, icmp_packet, packet_len);
+	icmp_header->msg_type = BATADV_DESTINATION_UNREACHABLE;
+	batadv_socket_add_packet(socket_client, icmp_header, packet_len);
 free_skb:
 	kfree_skb(skb);
 out:
@@ -298,27 +311,40 @@ err:
 	return -ENOMEM;
 }
 
+/**
+ * batadv_socket_receive_packet - schedule an icmp packet to be sent to userspace
+ *  on an icmp socket.
+ * @socket_client: the socket this packet belongs to
+ * @icmph: pointer to the header of the icmp packet
+ * @icmp_len: total length of the icmp packet
+ */
 static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
-				     struct batadv_icmp_packet_rr *icmp_packet,
+				     struct batadv_icmp_header *icmph,
 				     size_t icmp_len)
 {
 	struct batadv_socket_packet *socket_packet;
+	size_t len;
 
 	socket_packet = kmalloc(sizeof(*socket_packet), GFP_ATOMIC);
 
 	if (!socket_packet)
 		return;
 
+	len = icmp_len;
+	/* check the maximum length before filling the buffer */
+	if (len > sizeof(socket_packet->icmp_packet))
+		len = sizeof(socket_packet->icmp_packet);
+
 	INIT_LIST_HEAD(&socket_packet->list);
-	memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len);
-	socket_packet->icmp_len = icmp_len;
+	memcpy(&socket_packet->icmp_packet, icmph, len);
+	socket_packet->icmp_len = len;
 
 	spin_lock_bh(&socket_client->lock);
 
 	/* while waiting for the lock the socket_client could have been
 	 * deleted
 	 */
-	if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) {
+	if (!batadv_socket_client_hash[icmph->uid]) {
 		spin_unlock_bh(&socket_client->lock);
 		kfree(socket_packet);
 		return;
@@ -342,12 +368,18 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
 	wake_up(&socket_client->queue_wait);
 }
 
-void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet,
+/**
+ * batadv_socket_receive_packet - schedule an icmp packet to be received
+ *  locally and sent to userspace.
+ * @icmph: pointer to the header of the icmp packet
+ * @icmp_len: total length of the icmp packet
+ */
+void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
 				  size_t icmp_len)
 {
 	struct batadv_socket_client *hash;
 
-	hash = batadv_socket_client_hash[icmp_packet->icmph.uid];
+	hash = batadv_socket_client_hash[icmph->uid];
 	if (hash)
-		batadv_socket_add_packet(hash, icmp_packet, icmp_len);
+		batadv_socket_add_packet(hash, icmph, icmp_len);
 }
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 1fcca37..6665080 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -24,7 +24,7 @@
 
 void batadv_socket_init(void);
 int batadv_socket_setup(struct batadv_priv *bat_priv);
-void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet,
+void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
 				  size_t icmp_len);
 
 #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 843b96a..207459b 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -239,6 +239,8 @@ struct batadv_icmp_packet_rr {
 	uint8_t  rr[BATADV_RR_LEN][ETH_ALEN];
 };
 
+#define BATADV_ICMP_MAX_PACKET_SIZE	sizeof(struct batadv_icmp_packet_rr)
+
 /* All packet headers in front of an ethernet header have to be completely
  * divisible by 2 but not by 4 to make the payload after the ethernet
  * header again 4 bytes boundary aligned.
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 71fba14..d4114d7 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -260,47 +260,65 @@ bool batadv_check_management_packet(struct sk_buff *skb,
 	return true;
 }
 
+/**
+ * batadv_recv_my_icmp_packet - receive an icmp packet locally
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: icmp packet to process
+ *
+ * Returns NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP
+ * otherwise.
+ */
 static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
-				      struct sk_buff *skb, size_t icmp_len)
+				      struct sk_buff *skb)
 {
 	struct batadv_hard_iface *primary_if = NULL;
 	struct batadv_orig_node *orig_node = NULL;
-	struct batadv_icmp_packet_rr *icmp_packet;
-	int ret = NET_RX_DROP;
+	struct batadv_icmp_header *icmph;
+	int res, ret = NET_RX_DROP;
 
-	icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+	icmph = (struct batadv_icmp_header *)skb->data;
 
-	/* add data to device queue */
-	if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
-		batadv_socket_receive_packet(icmp_packet, icmp_len);
-		goto out;
-	}
+	switch (icmph->msg_type) {
+	case BATADV_ECHO_REPLY:
+	case BATADV_DESTINATION_UNREACHABLE:
+	case BATADV_TTL_EXCEEDED:
+		/* receive the packet */
+		if (skb_linearize(skb) < 0)
+			break;
 
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
+		batadv_socket_receive_packet(icmph, skb->len);
+		break;
+	case BATADV_ECHO_REQUEST:
+		/* answer echo request (ping) */
+		primary_if = batadv_primary_if_get_selected(bat_priv);
+		if (!primary_if)
+			goto out;
 
-	/* answer echo request (ping) */
-	/* get routing information */
-	orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
-	if (!orig_node)
-		goto out;
+		/* get routing information */
+		orig_node = batadv_orig_hash_find(bat_priv, icmph->orig);
+		if (!orig_node)
+			goto out;
 
-	/* create a copy of the skb, if needed, to modify it. */
-	if (skb_cow(skb, ETH_HLEN) < 0)
-		goto out;
+		/* create a copy of the skb, if needed, to modify it. */
+		if (skb_cow(skb, ETH_HLEN) < 0)
+			goto out;
 
-	icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+		icmph = (struct batadv_icmp_header *)skb->data;
 
-	memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
-	memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
-	       ETH_ALEN);
-	icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY;
-	icmp_packet->icmph.header.ttl = BATADV_TTL;
+		memcpy(icmph->dst, icmph->orig, ETH_ALEN);
+		memcpy(icmph->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
+		icmph->msg_type = BATADV_ECHO_REPLY;
+		icmph->header.ttl = BATADV_TTL;
 
-	if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
-		ret = NET_RX_SUCCESS;
+		res = batadv_send_skb_to_orig(skb, orig_node, NULL);
+		if (res != NET_XMIT_DROP)
+			ret = NET_RX_SUCCESS;
 
+		break;
+	default:
+		/* drop unknown type */
+		goto out;
+	}
 out:
 	if (primary_if)
 		batadv_hardif_free_ref(primary_if);
@@ -363,16 +381,13 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 			    struct batadv_hard_iface *recv_if)
 {
 	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
-	struct batadv_icmp_packet_rr *icmp_packet;
+	struct batadv_icmp_header *icmph;
+	struct batadv_icmp_packet_rr *icmp_packet_rr;
 	struct ethhdr *ethhdr;
 	struct batadv_orig_node *orig_node = NULL;
-	int hdr_size = sizeof(struct batadv_icmp_packet);
+	int hdr_size = sizeof(struct batadv_icmp_header);
 	int ret = NET_RX_DROP;
 
-	/* we truncate all incoming icmp packets if they don't match our size */
-	if (skb->len >= sizeof(struct batadv_icmp_packet_rr))
-		hdr_size = sizeof(struct batadv_icmp_packet_rr);
-
 	/* drop packet if it has not necessary minimum size */
 	if (unlikely(!pskb_may_pull(skb, hdr_size)))
 		goto out;
@@ -391,28 +406,39 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
 		goto out;
 
-	icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+	icmph = (struct batadv_icmp_header *)skb->data;
 
 	/* add record route information if not full */
-	if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY ||
-	     icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) &&
-	    (hdr_size == sizeof(struct batadv_icmp_packet_rr)) &&
-	    (icmp_packet->rr_cur < BATADV_RR_LEN)) {
-		memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]),
+	if ((icmph->msg_type == BATADV_ECHO_REPLY ||
+	     icmph->msg_type == BATADV_ECHO_REQUEST) &&
+	    (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+		if (skb_linearize(skb) < 0)
+			goto out;
+
+		/* create a copy of the skb, if needed, to modify it. */
+		if (skb_cow(skb, ETH_HLEN) < 0)
+			goto out;
+
+		icmph = (struct batadv_icmp_header *)skb->data;
+		icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
+		if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
+			goto out;
+
+		memcpy(&(icmp_packet_rr->rr[icmp_packet_rr->rr_cur]),
 		       ethhdr->h_dest, ETH_ALEN);
-		icmp_packet->rr_cur++;
+		icmp_packet_rr->rr_cur++;
 	}
 
 	/* packet for me */
-	if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst))
-		return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
+	if (batadv_is_my_mac(bat_priv, icmph->dst))
+		return batadv_recv_my_icmp_packet(bat_priv, skb);
 
 	/* TTL exceeded */
-	if (icmp_packet->icmph.header.ttl < 2)
+	if (icmph->header.ttl < 2)
 		return batadv_recv_icmp_ttl_exceeded(bat_priv, skb);
 
 	/* get routing information */
-	orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
+	orig_node = batadv_orig_hash_find(bat_priv, icmph->dst);
 	if (!orig_node)
 		goto out;
 
@@ -420,10 +446,10 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 	if (skb_cow(skb, ETH_HLEN) < 0)
 		goto out;
 
-	icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
+	icmph = (struct batadv_icmp_header *)skb->data;
 
 	/* decrement ttl */
-	icmp_packet->icmph.header.ttl--;
+	icmph->header.ttl--;
 
 	/* route it */
 	if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 3c21162..91dd369 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -749,7 +749,7 @@ struct batadv_socket_client {
 struct batadv_socket_packet {
 	struct list_head list;
 	size_t icmp_len;
-	struct batadv_icmp_packet_rr icmp_packet;
+	uint8_t icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE];
 };
 
 /**
-- 
1.8.4

^ permalink raw reply related

* Re: Neterion and UFO handling [was: Re: [PATCH] ipv6: udp packets following an UFO enqueued packet need also be handled by UFO]
From: Jon Mason @ 2013-10-23 16:35 UTC (permalink / raw)
  To: Jon Mason, Jiri Pirko, netdev, yoshfuji, David Miller,
	Alexey Kuznetsov, jmorris, Patrick McHardy, Herbert Xu,
	Eric Dumazet
In-Reply-To: <20131017044551.GE18135@order.stressinduktion.org>

On Wed, Oct 16, 2013 at 9:45 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> Hi Jon and Jiri!
>
> Just wanted to remind you if you could have a look at this?
>
> If you don't have time to test this may I know your assessment of the
> situation? I could send a compile-time tested patch to disable UFO or if you
> say so we could leave this as is.

So, bad news.  My Xframe 2 adapter (the only variety that does UFO
offload) won't fit in a standard PCI(32) slot.  Since my PCI-X system
at home is faulty (I'm trying to fix it , but it won't be in the time
frame you want), there is no way for me to test it on the hardware.
Terribly sorry.

I am fine with this patch going out, since UFO is off by default.
I'll handle any issues once they are discovered.  Alternatively, we
could just kill UFO and make everyone's lives easier.

Thanks,
Jon

> Jiri, I would suggest you resend your patches then.
>
> Thanks,
>
>   Hannes
>
> [top-posted by intention]
>
> On Tue, Oct 08, 2013 at 04:53:31PM +0200, Hannes Frederic Sowa wrote:
>> On Tue, Oct 08, 2013 at 01:07:29AM -0700, Jon Mason wrote:
>> > On Wed, Oct 2, 2013 at 9:27 AM, Hannes Frederic Sowa
>> > <hannes@stressinduktion.org> wrote:
>> > > Hi!
>> > >
>> > > I have a question regarding UFO and the neterion driver, which as the only one
>> > > advertises hardware UFO support:
>> > >
>> > > The patch discusses in this thread
>> > > http://thread.gmane.org/gmane.linux.network/284348/focus=285405 could change
>> > > some semantics how packets are constructed before submitted to the driver.
>> > >
>> > > We currently guarantee that we have the MAC/IP/UDP header in skb->data and the
>> > > payload is attached in the skb's frags. With the changes discussed in this
>> > > thread it is possible that we also append to skb->data some amount of data
>> > > which is not targeted for the header. From reading the driver sources it seems
>> > > the hardware interprets the skb->data to skb_headlen as the header, so we
>> > > could include some data in the fragments more than once.
>> >
>> > From my reading of the HW Spec and a quick look at the driver, it
>> > appears that the driver is using one entry in the TX ring for the
>> > header and another for the body of the packet to be fragmented (which
>> > is what the hardware wants).  I don't understand what you are saying,
>> > but if you are asking if simply appending a new header & data to the
>> > end of skb->data will get it out on the wire correct, I don't believe
>> > it will.
>>
>> No this is not what I tried to say. I'll try to be more clear this
>> time. ;)
>>
>> We start with an UDP socket which is corked. As soon as we write the
>> first few bytes (smaller than the mtu) onto this socket we put the
>> header in place and the rest of the data is just appended behind the
>> header directly in skb->data via plain ip_append_data.
>>
>> Now a second write with a length > mtu happens: The ip(6)_append_data
>> will branch to ufo_append. This will fetch the first skb and append
>> to skb->frags.  gso_type and gso_size will be updated on this skb (this
>> currently does not happen but will with the patches discussed in this
>> thread).
>>
>> If this packet is transmitted down to the device driver we have the udp
>> header in skb->data *and* also the payload from the first write. The
>> payload from the second write is appended as a frag and gso_type and
>> gso_size are set. This header+payload seem to be mapped just after the
>> ufo_in_band_v descriptor as the header in the first tx descriptor:
>>
>>    4174         txdp->Buffer_Pointer = pci_map_single(sp->pdev, skb->data,
>>    4175                                               frg_len, PCI_DMA_TODEVICE);
>>
>> frg_len is set to skb_headlen(skb). This happens right after setting up
>> the descriptor for the in-band ufo data.
>>
>> My guess is that this data isn't split currently by the neterion driver
>> (at least I could not find it in the driver as Eric showed it for bnx2x)
>> so it might reappear in the packets when the hardware fragments the
>> packet and places the first tx ring in front of every packet.
>>
>> Before these changes we never updated the gso_type and gso_size even when
>> we did append via UFO. So we never had payload in an UFO marked skb->data,
>> only the headers. Now we could also end up with a some payload in the
>> first TX ring, which you said is only for the header.
>>
>> > I do have hardware that I can try the patch on, if you can walk me
>> > through the use case (unless it is as easy as setup an IPv6 connection
>> > and ping).
>>
>> Ok, testing this should not be that complicated:
>>
>> We can test this with plain IPv4/UDP sockets. I would suggest a net-next kernel
>> with this patch from Jiri applied: http://patchwork.ozlabs.org/patch/279691/
>>
>> --- >8 ---
>> #include <sys/types.h>
>> #include <sys/socket.h>
>> #include <netinet/in.h>
>> #include <arpa/inet.h>
>> #include <linux/udp.h>
>> #include <stdio.h>
>>
>> int test(int mtu)
>> {
>>         int fd;
>>         const int one = 1;
>>         const int off = 0;
>>         struct sockaddr_in addr = {.sin_family = AF_INET, .sin_port = htons(53) };
>>         unsigned char buffer[3701];
>>
>>         inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr);
>>
>>         fd = socket(AF_INET, SOCK_DGRAM, 0);
>>         connect(fd, (struct sockaddr *) &addr, sizeof(addr));
>>
>>         setsockopt(fd, IPPROTO_UDP, UDP_CORK, &one, sizeof(one));
>>
>>         write(fd, "    ", 4);
>>         write(fd, buffer, sizeof(buffer));
>>         write(fd, " ", 1);
>>
>>         setsockopt(fd, IPPROTO_UDP, UDP_CORK, &off, sizeof(off));
>>
>>         close(fd);
>> }
>>
>> int main() {
>>         test(1280);
>> }
>> --- >8 ---
>>
>> I left out error handling so it is better observed with strace if
>> something went wrong.
>>
>> You should change the port number and ip address to something reasonable
>> for your network. My guess would be that the spaces (0x20) of the first
>> write is now placed between UDP header and payload of every packet
>> fragmented by the hardware. Would be nice to hear that I am wrong. ;)
>>
>> Be aware that the above program can cause memory corruption in the kernel
>> if you did not apply Jiri's patch.
>>
>> Thanks for helping!
>>
>>   Hannes
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>
> --
> gruss,
>
>   Hannes
>

^ permalink raw reply

* Re: [PATCH 3/3] netfilter: x_tables: fix ordering of jumpstack allocation and table update
From: Will Deacon @ 2013-10-23 16:37 UTC (permalink / raw)
  To: David Laight
  Cc: Pablo Neira Ayuso, netfilter-devel@vger.kernel.org,
	davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B73A2@saturn3.aculab.com>

Hi David,

On Wed, Oct 23, 2013 at 10:45:04AM +0100, David Laight wrote:
> > Subject: [PATCH 3/3] netfilter: x_tables: fix ordering of jumpstack allocation and table update
> ...
> > Meanwhile, CPU0 is handling the network receive path and ends up in
> > ipt_do_table, resulting in:
> > 
> > 	private = table->private;
> > 
> > 	[...]
> > 
> > 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
> > 
> > On weakly ordered memory architectures, the writes to table->private
> > and newinfo->jumpstack from CPU1 can be observed out of order by CPU0.
> > Furthermore, on architectures which don't respect ordering of address
> > dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered.
> 
> Which reads might be out of order?
> AFAICT they are strongly sequenced because they second depends on the
> value read by the first.
> So I don't see why the read barrier is needed.

That is why this is a dependent read barrier. Some architectures (e.g.
Alpha) *do* allow dependent reads to be observed out of order, so you can
effectively load the data pointed to by a pointer before you load the
pointer itself!

Take a look at Paul's paper about memory ordering if you're curious:

  http://www.rdrop.com/users/paulmck/scalability/paper/whymb.2009.04.05a.pdf

> > -	table->private = newinfo;
> >  	newinfo->initial_entries = private->initial_entries;
> > +	/*
> > +	 * Ensure contents of newinfo are visible before assigning to
> > +	 * private.
> > +	 */
> > +	smp_wmb();
> > +	table->private = newinfo;
> 
> Those writes were in the wrong order on all systems.
> Also gcc needs to be told not to reorder the writes even on non-smp
> systems (if the code might be pre-empted).
> So an asm volatile (:::"memory") is needed there even if no specific
> synchronisation instruction is needed.

The smp_* barriers expand to barrier() when !CONFIG_SMP, which gives you the
memory clobber you want.

What I'm *not* 100% sure about is the table freeing path. There is a mutex
there for removing the table from a list, but I'm not sure how we ensure
that there are no parallel readers at that point.

Will

^ permalink raw reply

* Re: [PATCH v5] net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)
From: Joe Perches @ 2013-10-23 16:52 UTC (permalink / raw)
  To: Arvid Brodin
  Cc: netdev@vger.kernel.org, David Miller, Stephen Hemminger,
	Javier Boticario, balferreira@googlemail.com,
	Elías Molina Muñoz
In-Reply-To: <5267F4BD.70409@xdin.com>

On Wed, 2013-10-23 at 18:09 +0200, Arvid Brodin wrote:
> High-availability Seamless Redundancy ("HSR") provides instant failover
> redundancy for Ethernet networks. It requires a special network topology where
> all nodes are connected in a ring (each node having two physical network
> interfaces). It is suited for applications that demand high availability and
> very short reaction time.

trivia: (can be ignored/fixed later)

> +static void restore_slaves(struct net_device *hsr_dev)
> +{
> +	struct hsr_priv *hsr_priv;
> +	int i;
> +	int res;
> +
> +	hsr_priv = netdev_priv(hsr_dev);
> +
> +	rtnl_lock();
> +
> +	/* Restore promiscuity */
> +	for (i = 0; i < 2; i++) {

I presume all of these for slave loops that use
for (i = 0; i < 2; i++) should be i < HSR_MAX_SLAVE

Maybe it'd be useful to add a foreach_slave() helper.

> +static struct node_entry *find_node_by_AddrA(struct list_head *node_db,
> +					     const unsigned char addr[ETH_ALEN])
> +{
> +	struct node_entry *node;
> +
> +	list_for_each_entry_rcu(node, node_db, mac_list) {
> +		if (!compare_ether_addr(node->MacAddressA, addr))

Please use ether_addr_equal instead for all these uses.
compare_ether_addr should be removed one day.

> +static struct node_entry *find_node_by_AddrB(struct list_head *node_db,
> +					     const unsigned char addr[ETH_ALEN])
[]
> +		if (!compare_ether_addr(node->MacAddressB, addr))
[]
> +struct node_entry *hsr_find_node(struct list_head *node_db, struct sk_buff *skb)
[]
> +		if (!compare_ether_addr(node->MacAddressA, ethhdr->h_source))
> +			return node;
> +		if (!compare_ether_addr(node->MacAddressB, ethhdr->h_source))
> +			return node;

> +struct node_entry *hsr_merge_node(struct hsr_priv *hsr_priv,
> +				  struct node_entry *node,
> +				  struct sk_buff *skb,
> +				  enum hsr_dev_idx dev_idx)
[]
> +	if (node && compare_ether_addr(node->MacAddressA, hsr_sp->MacAddressA)) {
[]
> +	if (node && (dev_idx == node->AddrB_if) &&
> +	    compare_ether_addr(node->MacAddressB, hsr_ethsup->ethhdr.h_source)) {
[]
> +	if (node && (dev_idx != node->AddrB_if) &&
> +	    (node->AddrB_if != HSR_DEV_NONE) &&
> +	    compare_ether_addr(node->MacAddressA, hsr_ethsup->ethhdr.h_source)) {
[]
> +	if (compare_ether_addr(hsr_sp->MacAddressA, hsr_ethsup->ethhdr.h_source))

[]

> +/* above(a, b) - return 1 if a > b, 0 otherwise.
> + */
> +static bool above(u16 a, u16 b)
> +{
> +	/* Remove inconsistency where above(a, b) == below(a, b) */
> +	if ((int) b - a == 32768)
> +		return 0;
> +
> +	return (((s16) (b - a)) < 0);
> +}
> +#define below(a, b)		above((b), (a))
> +#define above_or_eq(a, b)	(!below((a), (b)))
> +#define below_or_eq(a, b)	(!above((a), (b)))

This looks odd.  Perhaps

static bool above(u16 a, u16 b)
{
	return a > b;
}
#define below(a, b) above(b, a)

static bool above_or_eq(u16 a, u16 b)
{
	return a >= b;
}
#define below_or_eq(a, b) above_or_eq(b, a)

^ permalink raw reply

* Re: [PATCH 03/16] batman-adv: add bat_orig_print API function
From: Joe Perches @ 2013-10-23 17:00 UTC (permalink / raw)
  To: Antonio Quartulli
  Cc: davem, netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382544303-2694-4-git-send-email-antonio@meshcoding.com>

On Wed, 2013-10-23 at 18:04 +0200, Antonio Quartulli wrote:
> Each routing protocol has its own metric and private
> variables, therefore it is useful to introduce a new API
> for originator information printing.
> 
> This API needs to be implemented by each protocol in order
> to provide its specific originator table output.
[]
> +static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
> +				     struct seq_file *seq)
[]
> +	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
> +		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
> +		   "Nexthop", "outgoingIF", "Potential nexthops");

This header printf really doesn't add much with the formatting sizes.
It's pretty obscure why some of these are sized and others not sized.
For instance: %-15s doesn't refer to a mac address size.
Perhaps it'd be better to just emit the fixed string just using
BATADV_TO_MAX_VALUE.  It'd also be easier to find via grep.

^ permalink raw reply

* Fwd: Re: Big performance loss from 3.4.63 to 3.10.13 when routing ipv4
From: Wolfgang Walter @ 2013-10-23 17:01 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, hannes, klassert

Am Mittwoch, 23. Oktober 2013, 09:17:30 schrieben Sie:
> On Wed, 2013-10-23 at 17:57 +0200, Wolfgang Walter wrote:
> > Am Mittwoch, 23. Oktober 2013, 05:00:01 schrieb Eric Dumazet:
> > > On Wed, 2013-10-23 at 13:33 +0200, Wolfgang Walter wrote:
> > > > I don't know what this value actually means. But on 3.4.x it is much
> > > > higher. On a machine with 512MB ram it is 32768, on a machine with 1GB
> > > > ram it is 262144 and with 16GB ram it is 4194304.
> > > 
> > > Such huge values should not be needed. We should have at most one dst
> > > per packet in flight.
> > > 
> > > On a loaded router, a NIC not using BQL could queue around 16,000
> > > packets.
> > > 
> > > Of course, Qdisc layers could also store a lot of packets, but using the
> > > default pfifo_fast is only adding 1000 packets per interface.
> > > 
> > > I guess using 65536 as the default value should be safe and reasonable
> > > 
> > > Have you tried using 32768 or 65536 ?
> > 
> > I use 32768 on routers with 512MB. They usually have around 50
> > ipsec-tunnels and only about 10 interfaces including vlan-interfaces.
> > 
> > On larger ones I set the bigger values from 3.4.x. That one with 16GB has
> > about 2000 ipsec-tunnels. It has about 80 interfaces (inlcuding vlan-
> > interfaces). Physically it has 8 interfaces with together about 30
> > hardware- queues.
> 
> offlist
> 
> OK, but you do not have the idea of actual number of entries ?
> 
> If you use SLAB, you probably can try "grep dst /proc/slabinfo"

Ah, ok. I use SLUB, but SLABINFO=y.

Without much traffic it is:

# grep dst /proc/slabinfo
xfrm_dst_cache      4435   4608    448   36    4 : tunables    0    0    0 : 
slabdata    128    128      0

on the big one. 

I can recompile the kernels with SLAB instead of SLUB if SLAB gives more 
usefull infos.

Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts

^ permalink raw reply

* Re: [PATCH 3/3] netfilter: x_tables: fix ordering of jumpstack allocation and table update
From: Eric Dumazet @ 2013-10-23 17:04 UTC (permalink / raw)
  To: Will Deacon
  Cc: David Laight, Pablo Neira Ayuso, netfilter-devel@vger.kernel.org,
	davem@davemloft.net, netdev@vger.kernel.org
In-Reply-To: <20131023163706.GA5117@mudshark.cambridge.arm.com>

On Wed, 2013-10-23 at 17:37 +0100, Will Deacon wrote:

> What I'm *not* 100% sure about is the table freeing path. There is a mutex
> there for removing the table from a list, but I'm not sure how we ensure
> that there are no parallel readers at that point.

Sequence is :

xt_replace_table();
get_counters();
xt_free_table_info();

get_counters() is the way we ensure no cpu is using old copy of the
table before freeing.





^ permalink raw reply

* Re: [PATCH 03/16] batman-adv: add bat_orig_print API function
From: Antonio Quartulli @ 2013-10-23 17:18 UTC (permalink / raw)
  To: Joe Perches; +Cc: davem, netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382547630.22433.23.camel@joe-AO722>

[-- Attachment #1: Type: text/plain, Size: 2648 bytes --]

On Wed, Oct 23, 2013 at 10:00:30AM -0700, Joe Perches wrote:
> On Wed, 2013-10-23 at 18:04 +0200, Antonio Quartulli wrote:
> > Each routing protocol has its own metric and private
> > variables, therefore it is useful to introduce a new API
> > for originator information printing.
> > 
> > This API needs to be implemented by each protocol in order
> > to provide its specific originator table output.
> []
> > +static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
> > +				     struct seq_file *seq)
> []
> > +	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
> > +		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
> > +		   "Nexthop", "outgoingIF", "Potential nexthops");
> 
> This header printf really doesn't add much with the formatting sizes.
> It's pretty obscure why some of these are sized and others not sized.
> For instance: %-15s doesn't refer to a mac address size.
> Perhaps it'd be better to just emit the fixed string just using
> BATADV_TO_MAX_VALUE.  It'd also be easier to find via grep.

This string is printed out of a debugfs file and follows a format that we have
been using for long time (this patch is just moving this code from one point to
another).

I didn't get what you mean with BATADV_TO_MAX_VALUE (maybe you meant
BATADV_TQ_MAX_VALUE? but even in this case I don't get it).

The sized parameters in the format string are such that this header is correctly
aligned with the entries of the table that follow.
Moreover we tried to keep this table human readable as much as possible.

Here you have an example of what we get (this output is not printed with exactly
that format string because it belongs to an older batman-adv version)

[B.A.T.M.A.N. adv 2013.3.0, MainIF/MAC: mesh0/xx:xx:xx:xx:xx:xx (bat0)]
  Originator      last-seen (#/255)           Nexthop [outgoingIF]:   Potential nexthops ...
             yyyy    4.720s   (247)              yyyy [     mesh0]:             aaaaa (221)            bbbbbb (200)
            aaaaa    4.270s   (237)             ddddd [     mesh0]:              aaaa (195)            vvvvvv (215)
           bbbbbb    4.480s   (248)            cccccc [     mesh0]:              bbbb (213)            cccccc (200)
            ccccc    0.480s   (255)             bbbbb [     mesh0]:              cccc (217)            dddddd (190)
           dddddd    1.050s   (223)            dddddd [     mesh0]:            dddddd (191)              eeee (203)

If the width of your terminal is adequate you get a nice table which is rather
helpful for debugging purposes.


Regards,


-- 
Antonio Quartulli

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH 03/16] batman-adv: add bat_orig_print API function
From: Joe Perches @ 2013-10-23 17:27 UTC (permalink / raw)
  To: Antonio Quartulli
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	b.a.t.m.a.n-ZwoEplunGu2X36UT3dwllkB+6BGkLq7r, Marek Lindner,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, Antonio Quartulli
In-Reply-To: <20131023171810.GB2651-rVWd3aGhH2zPj3vggD0kEA@public.gmane.org>

On Wed, 2013-10-23 at 19:18 +0200, Antonio Quartulli wrote:
> On Wed, Oct 23, 2013 at 10:00:30AM -0700, Joe Perches wrote:
> > On Wed, 2013-10-23 at 18:04 +0200, Antonio Quartulli wrote:
> > > Each routing protocol has its own metric and private
> > > variables, therefore it is useful to introduce a new API
> > > for originator information printing.
> > > 
> > > This API needs to be implemented by each protocol in order
> > > to provide its specific originator table output.
> > []
> > > +static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
> > > +				     struct seq_file *seq)
> > []
> > > +	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
> > > +		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
> > > +		   "Nexthop", "outgoingIF", "Potential nexthops");
> > 
> > This header printf really doesn't add much with the formatting sizes.
> > It's pretty obscure why some of these are sized and others not sized.
> > For instance: %-15s doesn't refer to a mac address size.
> > Perhaps it'd be better to just emit the fixed string just using
> > BATADV_TO_MAX_VALUE.  It'd also be easier to find via grep.
> 
> This string is printed out of a debugfs file and follows a format that we have
> been using for long time (this patch is just moving this code from one point to
> another).

I saw.

> I didn't get what you mean with BATADV_TO_MAX_VALUE (maybe you meant
> BATADV_TQ_MAX_VALUE? but even in this case I don't get it).

Yeah, TQ not TO.  Both look similar with squiggles
underneath them when using spell checking.

It's a #define, all the others are fixed strings.

Anyway, using
	seq_printf(seq, "  Originator      last-seen (#/%d)           Nexthop [outgoingIF]:   Potential nexthops ...\n",
		   BATADV_TQ_MAX_VALUE);
is probably trivially smaller overall code size too.

Your code, you decide...

cheers, Joe

^ permalink raw reply

* Re: [PATCH 03/16] batman-adv: add bat_orig_print API function
From: Antonio Quartulli @ 2013-10-23 17:37 UTC (permalink / raw)
  To: Joe Perches; +Cc: davem, netdev, b.a.t.m.a.n, Antonio Quartulli, Marek Lindner
In-Reply-To: <1382549233.22433.33.camel@joe-AO722>

[-- Attachment #1: Type: text/plain, Size: 2466 bytes --]

On Wed, Oct 23, 2013 at 10:27:13AM -0700, Joe Perches wrote:
> On Wed, 2013-10-23 at 19:18 +0200, Antonio Quartulli wrote:
> > On Wed, Oct 23, 2013 at 10:00:30AM -0700, Joe Perches wrote:
> > > On Wed, 2013-10-23 at 18:04 +0200, Antonio Quartulli wrote:
> > > > Each routing protocol has its own metric and private
> > > > variables, therefore it is useful to introduce a new API
> > > > for originator information printing.
> > > > 
> > > > This API needs to be implemented by each protocol in order
> > > > to provide its specific originator table output.
> > > []
> > > > +static void batadv_iv_ogm_orig_print(struct batadv_priv *bat_priv,
> > > > +				     struct seq_file *seq)
> > > []
> > > > +	seq_printf(seq, "  %-15s %s (%s/%i) %17s [%10s]: %20s ...\n",
> > > > +		   "Originator", "last-seen", "#", BATADV_TQ_MAX_VALUE,
> > > > +		   "Nexthop", "outgoingIF", "Potential nexthops");
> > > 
> > > This header printf really doesn't add much with the formatting sizes.
> > > It's pretty obscure why some of these are sized and others not sized.
> > > For instance: %-15s doesn't refer to a mac address size.
> > > Perhaps it'd be better to just emit the fixed string just using
> > > BATADV_TO_MAX_VALUE.  It'd also be easier to find via grep.
> > 
> > This string is printed out of a debugfs file and follows a format that we have
> > been using for long time (this patch is just moving this code from one point to
> > another).
> 
> I saw.
> 
> > I didn't get what you mean with BATADV_TO_MAX_VALUE (maybe you meant
> > BATADV_TQ_MAX_VALUE? but even in this case I don't get it).
> 
> Yeah, TQ not TO.  Both look similar with squiggles
> underneath them when using spell checking.
> 
> It's a #define, all the others are fixed strings.
> 
> Anyway, using
> 	seq_printf(seq, "  Originator      last-seen (#/%d)           Nexthop [outgoingIF]:   Potential nexthops ...\n",
> 		   BATADV_TQ_MAX_VALUE);
> is probably trivially smaller overall code size too.
> 
> Your code, you decide...

Now I fully understand your point.

Honestly I agree with you, but this string is there since a while and I'd
like to get a feedback from the other guys before changing it.

If for David it is not a problem I'd like keep it like that and I'll discuss and
possibly add this change (this is not the only header in the code printed this
way) later.

Thanks a lot for your feedback!

Cheers,

-- 
Antonio Quartulli

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [PATCH net-next v2 1/2] net: make net_get_random_once irq safe
From: Hannes Frederic Sowa @ 2013-10-23 18:05 UTC (permalink / raw)
  To: netdev, davem, edumazet
In-Reply-To: <20131023111200.GB26236@order.stressinduktion.org>

I initial build non irq safe version of net_get_random_once because I
would liked to have the freedom to defer even the extraction process of
get_random_bytes until the nonblocking pool is fully seeded.

I don't think this is a good idea anymore and thus this patch makes
net_get_random_once irq safe. Now someone using net_get_random_once does
not need to care from where it is called.

Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
v2: Reword the commit message only. It looked horribly.

 include/linux/net.h | 1 -
 net/core/utils.c    | 7 ++++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index aca446b..b292a04 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -250,7 +250,6 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 #define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
 #endif /* HAVE_JUMP_LABEL */
 
-/* BE CAREFUL: this function is not interrupt safe */
 #define net_get_random_once(buf, nbytes)				\
 	({								\
 		bool ___ret = false;					\
diff --git a/net/core/utils.c b/net/core/utils.c
index bf09371..2f737bf 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -370,16 +370,17 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 			   struct static_key *done_key)
 {
 	static DEFINE_SPINLOCK(lock);
+	unsigned long flags;
 
-	spin_lock_bh(&lock);
+	spin_lock_irqsave(&lock, flags);
 	if (*done) {
-		spin_unlock_bh(&lock);
+		spin_unlock_irqrestore(&lock, flags);
 		return false;
 	}
 
 	get_random_bytes(buf, nbytes);
 	*done = true;
-	spin_unlock_bh(&lock);
+	spin_unlock_irqrestore(&lock, flags);
 
 	__net_random_once_disable_jump(done_key);
 
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox