Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 0/3 - GIT PULL] include/linux: Remove externs from networking function prototypes
From: Joe Perches @ 2013-09-26 22:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, linux-kernel, linux-hippi, netfilter-devel,
	netfilter, coreteam


The following changes since commit aae8c287e664d49df4aa315ad263c33b9a2af3e1:

  Merge branch 'bonding_neighbours' (2013-09-26 16:02:19 -0400)

are available in the git repository at:

  git://repo.or.cz/linux-2.6/trivial-mods.git 20130926_include_linux_networking_externs

for you to fetch changes up to f629d208d27a22f495b7734eede585b5d207e912:

  [networking]device.h: Remove extern from function prototypes (2013-09-26 15:06:58 -0700)

----------------------------------------------------------------

Joe Perches (3):
  netfilter: Remove extern from function prototypes
  net.h/skbuff.h: Remove extern from function prototypes
  [networking]device.h: Remove extern from function prototypes

 include/linux/etherdevice.h                      |  35 +-
 include/linux/fcdevice.h                         |   2 +-
 include/linux/fddidevice.h                       |   7 +-
 include/linux/hippidevice.h                      |  10 +-
 include/linux/inetdevice.h                       |  28 +-
 include/linux/net.h                              |  82 ++---
 include/linux/netdevice.h                        | 432 +++++++++++------------
 include/linux/netfilter.h                        |  10 +-
 include/linux/netfilter/nf_conntrack_common.h    |   2 +-
 include/linux/netfilter/nf_conntrack_h323.h      |  14 +-
 include/linux/netfilter/nf_conntrack_proto_gre.h |   4 +-
 include/linux/netfilter/nf_conntrack_sip.h       |  57 ++-
 include/linux/netfilter/nfnetlink.h              |  28 +-
 include/linux/netfilter/nfnetlink_acct.h         |   6 +-
 include/linux/netfilter/x_tables.h               | 128 ++++---
 include/linux/netfilter_bridge.h                 |   4 +-
 include/linux/netfilter_ipv4.h                   |   6 +-
 include/linux/netfilter_ipv6.h                   |  10 +-
 include/linux/skbuff.h                           | 250 ++++++-------
 19 files changed, 529 insertions(+), 586 deletions(-)

-- 
1.8.1.2.459.gbcd45b4.dirty


^ permalink raw reply

* [PATCH 1/3] netfilter: Remove extern from function prototypes
From: Joe Perches @ 2013-09-26 22:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Pablo Neira Ayuso, Patrick McHardy,
	Jozsef Kadlecsik, netfilter-devel, netfilter, coreteam,
	linux-kernel
In-Reply-To: <cover.1380233637.git.joe@perches.com>

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/netfilter.h                        |  10 +-
 include/linux/netfilter/nf_conntrack_common.h    |   2 +-
 include/linux/netfilter/nf_conntrack_h323.h      |  14 +--
 include/linux/netfilter/nf_conntrack_proto_gre.h |   4 +-
 include/linux/netfilter/nf_conntrack_sip.h       |  57 +++++-----
 include/linux/netfilter/nfnetlink.h              |  28 ++---
 include/linux/netfilter/nfnetlink_acct.h         |   6 +-
 include/linux/netfilter/x_tables.h               | 128 +++++++++++------------
 include/linux/netfilter_bridge.h                 |   4 +-
 include/linux/netfilter_ipv4.h                   |   6 +-
 include/linux/netfilter_ipv6.h                   |  10 +-
 11 files changed, 133 insertions(+), 136 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 708fe72ab9..61223c5 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -35,7 +35,7 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 	result->all[3] = a1->all[3] & mask->all[3];
 }
 
-extern int netfilter_init(void);
+int netfilter_init(void);
 
 /* Largest hook number + 1 */
 #define NF_MAX_HOOKS 8
@@ -208,7 +208,7 @@ int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 /* Call this before modifying an existing packet: ensures it is
    modifiable and linear to the point you care about (writable_len).
    Returns true or false. */
-extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len);
 
 struct flowi;
 struct nf_queue_entry;
@@ -269,8 +269,8 @@ nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
-extern int nf_register_afinfo(const struct nf_afinfo *afinfo);
-extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+int nf_register_afinfo(const struct nf_afinfo *afinfo);
+void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
 
 #include <net/flow.h>
 extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
@@ -315,7 +315,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
-extern void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
 extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 
 struct nf_conn;
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 127d0b9..2755057 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -23,6 +23,6 @@ struct ip_conntrack_stat {
 };
 
 /* call to create an explicit dependency on nf_conntrack. */
-extern void need_conntrack(void);
+void need_conntrack(void);
 
 #endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index f381020..858d9b2 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -29,13 +29,13 @@ struct nf_ct_h323_master {
 
 struct nf_conn;
 
-extern int get_h225_addr(struct nf_conn *ct, unsigned char *data,
-			 TransportAddress *taddr,
-			 union nf_inet_addr *addr, __be16 *port);
-extern void nf_conntrack_h245_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
-extern void nf_conntrack_q931_expect(struct nf_conn *new,
-				     struct nf_conntrack_expect *this);
+int get_h225_addr(struct nf_conn *ct, unsigned char *data,
+		  TransportAddress *taddr, union nf_inet_addr *addr,
+		  __be16 *port);
+void nf_conntrack_h245_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
+void nf_conntrack_q931_expect(struct nf_conn *new,
+			      struct nf_conntrack_expect *this);
 extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress *taddr,
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 6a0664c..ec2ffaf 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -87,8 +87,8 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
-extern void nf_ct_gre_keymap_flush(struct net *net);
-extern void nf_nat_need_gre(void);
+void nf_ct_gre_keymap_flush(struct net *net);
+void nf_nat_need_gre(void);
 
 #endif /* __KERNEL__ */
 #endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index ba7f571..5cac020 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -157,35 +157,34 @@ extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
 					     unsigned int medialen,
 					     union nf_inet_addr *rtp_addr);
 
-extern int ct_sip_parse_request(const struct nf_conn *ct,
-				const char *dptr, unsigned int datalen,
-				unsigned int *matchoff, unsigned int *matchlen,
-				union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
-			     unsigned int dataoff, unsigned int datalen,
-			     enum sip_header_types type,
-			     unsigned int *matchoff, unsigned int *matchlen);
-extern int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
-				   unsigned int *dataoff, unsigned int datalen,
-				   enum sip_header_types type, int *in_header,
-				   unsigned int *matchoff, unsigned int *matchlen,
-				   union nf_inet_addr *addr, __be16 *port);
-extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
-				      unsigned int dataoff, unsigned int datalen,
-				      const char *name,
-				      unsigned int *matchoff, unsigned int *matchlen,
-				      union nf_inet_addr *addr, bool delim);
-extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
-					unsigned int off, unsigned int datalen,
-					const char *name,
-					unsigned int *matchoff, unsigned int *matchen,
-					unsigned int *val);
-
-extern int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
-				 unsigned int dataoff, unsigned int datalen,
-				 enum sdp_header_types type,
-				 enum sdp_header_types term,
-				 unsigned int *matchoff, unsigned int *matchlen);
+int ct_sip_parse_request(const struct nf_conn *ct, const char *dptr,
+			 unsigned int datalen, unsigned int *matchoff,
+			 unsigned int *matchlen, union nf_inet_addr *addr,
+			 __be16 *port);
+int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
+		      unsigned int dataoff, unsigned int datalen,
+		      enum sip_header_types type, unsigned int *matchoff,
+		      unsigned int *matchlen);
+int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
+			    unsigned int *dataoff, unsigned int datalen,
+			    enum sip_header_types type, int *in_header,
+			    unsigned int *matchoff, unsigned int *matchlen,
+			    union nf_inet_addr *addr, __be16 *port);
+int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
+			       unsigned int dataoff, unsigned int datalen,
+			       const char *name, unsigned int *matchoff,
+			       unsigned int *matchlen, union nf_inet_addr *addr,
+			       bool delim);
+int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
+				 unsigned int off, unsigned int datalen,
+				 const char *name, unsigned int *matchoff,
+				 unsigned int *matchen, unsigned int *val);
+
+int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
+			  unsigned int dataoff, unsigned int datalen,
+			  enum sdp_header_types type,
+			  enum sdp_header_types term,
+			  unsigned int *matchoff, unsigned int *matchlen);
 
 #endif /* __KERNEL__ */
 #endif /* __NF_CONNTRACK_SIP_H__ */
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index cadb740..4f68cd7 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -25,20 +25,20 @@ struct nfnetlink_subsystem {
 	const struct nfnl_callback *cb;	/* callback for individual types */
 };
 
-extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
-extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
-
-extern int nfnetlink_has_listeners(struct net *net, unsigned int group);
-extern struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
-					   u32 dst_portid, gfp_t gfp_mask);
-extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
-			  unsigned int group, int echo, gfp_t flags);
-extern int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
-extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net,
-			     u32 portid, int flags);
-
-extern void nfnl_lock(__u8 subsys_id);
-extern void nfnl_unlock(__u8 subsys_id);
+int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
+int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n);
+
+int nfnetlink_has_listeners(struct net *net, unsigned int group);
+struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
+				    u32 dst_portid, gfp_t gfp_mask);
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
+		   unsigned int group, int echo, gfp_t flags);
+int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error);
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
+		      int flags);
+
+void nfnl_lock(__u8 subsys_id);
+void nfnl_unlock(__u8 subsys_id);
 
 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
 	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index bb4bbc9..b2e85e5 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -6,8 +6,8 @@
 
 struct nf_acct;
 
-extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
-extern void nfnl_acct_put(struct nf_acct *acct);
-extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+void nfnl_acct_put(struct nf_acct *acct);
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
 
 #endif /* _NFNL_ACCT_H */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dd49566..a3e215b 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -229,50 +229,48 @@ struct xt_table_info {
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
 			  + nr_cpu_ids * sizeof(char *))
-extern int xt_register_target(struct xt_target *target);
-extern void xt_unregister_target(struct xt_target *target);
-extern int xt_register_targets(struct xt_target *target, unsigned int n);
-extern void xt_unregister_targets(struct xt_target *target, unsigned int n);
-
-extern int xt_register_match(struct xt_match *target);
-extern void xt_unregister_match(struct xt_match *target);
-extern int xt_register_matches(struct xt_match *match, unsigned int n);
-extern void xt_unregister_matches(struct xt_match *match, unsigned int n);
-
-extern int xt_check_match(struct xt_mtchk_param *,
-			  unsigned int size, u_int8_t proto, bool inv_proto);
-extern int xt_check_target(struct xt_tgchk_param *,
-			   unsigned int size, u_int8_t proto, bool inv_proto);
-
-extern struct xt_table *xt_register_table(struct net *net,
-					  const struct xt_table *table,
-					  struct xt_table_info *bootstrap,
-					  struct xt_table_info *newinfo);
-extern void *xt_unregister_table(struct xt_table *table);
-
-extern struct xt_table_info *xt_replace_table(struct xt_table *table,
-					      unsigned int num_counters,
-					      struct xt_table_info *newinfo,
-					      int *error);
-
-extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
-extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
-extern struct xt_match *xt_request_find_match(u8 af, const char *name,
-					      u8 revision);
-extern struct xt_target *xt_request_find_target(u8 af, const char *name,
-						u8 revision);
-extern int xt_find_revision(u8 af, const char *name, u8 revision,
-			    int target, int *err);
-
-extern struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
-					   const char *name);
-extern void xt_table_unlock(struct xt_table *t);
-
-extern int xt_proto_init(struct net *net, u_int8_t af);
-extern void xt_proto_fini(struct net *net, u_int8_t af);
-
-extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
-extern void xt_free_table_info(struct xt_table_info *info);
+int xt_register_target(struct xt_target *target);
+void xt_unregister_target(struct xt_target *target);
+int xt_register_targets(struct xt_target *target, unsigned int n);
+void xt_unregister_targets(struct xt_target *target, unsigned int n);
+
+int xt_register_match(struct xt_match *target);
+void xt_unregister_match(struct xt_match *target);
+int xt_register_matches(struct xt_match *match, unsigned int n);
+void xt_unregister_matches(struct xt_match *match, unsigned int n);
+
+int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
+		   bool inv_proto);
+int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
+		    bool inv_proto);
+
+struct xt_table *xt_register_table(struct net *net,
+				   const struct xt_table *table,
+				   struct xt_table_info *bootstrap,
+				   struct xt_table_info *newinfo);
+void *xt_unregister_table(struct xt_table *table);
+
+struct xt_table_info *xt_replace_table(struct xt_table *table,
+				       unsigned int num_counters,
+				       struct xt_table_info *newinfo,
+				       int *error);
+
+struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+struct xt_match *xt_request_find_match(u8 af, const char *name, u8 revision);
+struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision);
+int xt_find_revision(u8 af, const char *name, u8 revision, int target,
+		     int *err);
+
+struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
+				    const char *name);
+void xt_table_unlock(struct xt_table *t);
+
+int xt_proto_init(struct net *net, u_int8_t af);
+void xt_proto_fini(struct net *net, u_int8_t af);
+
+struct xt_table_info *xt_alloc_table_info(unsigned int size);
+void xt_free_table_info(struct xt_table_info *info);
 
 /**
  * xt_recseq - recursive seqcount for netfilter use
@@ -353,8 +351,8 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
 	return ret;
 }
 
-extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
-extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
+void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
@@ -414,25 +412,25 @@ struct _compat_xt_align {
 
 #define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align))
 
-extern void xt_compat_lock(u_int8_t af);
-extern void xt_compat_unlock(u_int8_t af);
-
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
-extern void xt_compat_flush_offsets(u_int8_t af);
-extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
-extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
-
-extern int xt_compat_match_offset(const struct xt_match *match);
-extern int xt_compat_match_from_user(struct xt_entry_match *m,
-				     void **dstptr, unsigned int *size);
-extern int xt_compat_match_to_user(const struct xt_entry_match *m,
-				   void __user **dstptr, unsigned int *size);
-
-extern int xt_compat_target_offset(const struct xt_target *target);
-extern void xt_compat_target_from_user(struct xt_entry_target *t,
-				       void **dstptr, unsigned int *size);
-extern int xt_compat_target_to_user(const struct xt_entry_target *t,
-				    void __user **dstptr, unsigned int *size);
+void xt_compat_lock(u_int8_t af);
+void xt_compat_unlock(u_int8_t af);
+
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
+void xt_compat_flush_offsets(u_int8_t af);
+void xt_compat_init_offsets(u_int8_t af, unsigned int number);
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
+
+int xt_compat_match_offset(const struct xt_match *match);
+int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+			      unsigned int *size);
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size);
+
+int xt_compat_target_offset(const struct xt_target *target);
+void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
+				unsigned int *size);
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size);
 
 #endif /* CONFIG_COMPAT */
 #endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index dfb4d9e..8ab1c27 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -25,7 +25,7 @@ enum nf_br_hook_priorities {
 #define BRNF_PPPoE			0x20
 
 /* Only used in br_forward.c */
-extern int nf_bridge_copy_header(struct sk_buff *skb);
+int nf_bridge_copy_header(struct sk_buff *skb);
 static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
 	if (skb->nf_bridge &&
@@ -53,7 +53,7 @@ static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-extern int br_handle_frame_finish(struct sk_buff *skb);
+int br_handle_frame_finish(struct sk_buff *skb);
 /* Only used in br_device.c */
 static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 {
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index dfaf116..6e4591b 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -6,7 +6,7 @@
 
 #include <uapi/linux/netfilter_ipv4.h>
 
-extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
-extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-				   unsigned int dataoff, u_int8_t protocol);
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u_int8_t protocol);
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 2d4df6ce..64dad1cc 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -11,12 +11,12 @@
 
 
 #ifdef CONFIG_NETFILTER
-extern int ip6_route_me_harder(struct sk_buff *skb);
-extern __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-				    unsigned int dataoff, u_int8_t protocol);
+int ip6_route_me_harder(struct sk_buff *skb);
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u_int8_t protocol);
 
-extern int ipv6_netfilter_init(void);
-extern void ipv6_netfilter_fini(void);
+int ipv6_netfilter_init(void);
+void ipv6_netfilter_fini(void);
 
 /*
  * Hook functions for ipv6 to allow xt_* modules to be built-in even
-- 
1.8.1.2.459.gbcd45b4.dirty

^ permalink raw reply related

* [PATCH 2/3] net.h/skbuff.h: Remove extern from function prototypes
From: Joe Perches @ 2013-09-26 22:18 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, linux-kernel
In-Reply-To: <cover.1380233637.git.joe@perches.com>

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/net.h    |  82 ++++++++--------
 include/linux/skbuff.h | 250 ++++++++++++++++++++++---------------------------
 2 files changed, 148 insertions(+), 184 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4f27575..ca9ec85 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -195,27 +195,23 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-extern int	     sock_wake_async(struct socket *sk, int how, int band);
-extern int	     sock_register(const struct net_proto_family *fam);
-extern void	     sock_unregister(int family);
-extern int	     __sock_create(struct net *net, int family, int type, int proto,
-				 struct socket **res, int kern);
-extern int	     sock_create(int family, int type, int proto,
-				 struct socket **res);
-extern int	     sock_create_kern(int family, int type, int proto,
-				      struct socket **res);
-extern int	     sock_create_lite(int family, int type, int proto,
-				      struct socket **res); 
-extern void	     sock_release(struct socket *sock);
-extern int   	     sock_sendmsg(struct socket *sock, struct msghdr *msg,
-				  size_t len);
-extern int	     sock_recvmsg(struct socket *sock, struct msghdr *msg,
-				  size_t size, int flags);
-extern struct file  *sock_alloc_file(struct socket *sock, int flags, const char *dname);
-extern struct socket *sockfd_lookup(int fd, int *err);
-extern struct socket *sock_from_file(struct file *file, int *err);
+int sock_wake_async(struct socket *sk, int how, int band);
+int sock_register(const struct net_proto_family *fam);
+void sock_unregister(int family);
+int __sock_create(struct net *net, int family, int type, int proto,
+		  struct socket **res, int kern);
+int sock_create(int family, int type, int proto, struct socket **res);
+int sock_create_kern(int family, int type, int proto, struct socket **res);
+int sock_create_lite(int family, int type, int proto, struct socket **res);
+void sock_release(struct socket *sock);
+int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+		 int flags);
+struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname);
+struct socket *sockfd_lookup(int fd, int *err);
+struct socket *sock_from_file(struct file *file, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
-extern int	     net_ratelimit(void);
+int net_ratelimit(void);
 
 #define net_ratelimited_function(function, ...)			\
 do {								\
@@ -243,32 +239,28 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
-extern int   	     kernel_sendmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num, size_t len);
-extern int   	     kernel_recvmsg(struct socket *sock, struct msghdr *msg,
-				    struct kvec *vec, size_t num,
-				    size_t len, int flags);
+int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len);
+int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
+		   size_t num, size_t len, int flags);
 
-extern int kernel_bind(struct socket *sock, struct sockaddr *addr,
-		       int addrlen);
-extern int kernel_listen(struct socket *sock, int backlog);
-extern int kernel_accept(struct socket *sock, struct socket **newsock,
-			 int flags);
-extern int kernel_connect(struct socket *sock, struct sockaddr *addr,
-			  int addrlen, int flags);
-extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
-			      int *addrlen);
-extern int kernel_getsockopt(struct socket *sock, int level, int optname,
-			     char *optval, int *optlen);
-extern int kernel_setsockopt(struct socket *sock, int level, int optname,
-			     char *optval, unsigned int optlen);
-extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-			   size_t size, int flags);
-extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
-extern int kernel_sock_shutdown(struct socket *sock,
-				enum sock_shutdown_cmd how);
+int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen);
+int kernel_listen(struct socket *sock, int backlog);
+int kernel_accept(struct socket *sock, struct socket **newsock, int flags);
+int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
+		   int flags);
+int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
+		       int *addrlen);
+int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
+		      int *optlen);
+int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
+		      unsigned int optlen);
+int kernel_sendpage(struct socket *sock, struct page *page, int offset,
+		    size_t size, int flags);
+int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
+int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
 #define MODULE_ALIAS_NETPROTO(proto) \
 	MODULE_ALIAS("net-pf-" __stringify(proto))
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ddb48d..6d56840 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -585,8 +585,8 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 	skb->_skb_refdst = (unsigned long)dst;
 }
 
-extern void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
-				bool force);
+void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
+			 bool force);
 
 /**
  * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
@@ -634,20 +634,20 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 	return (struct rtable *)skb_dst(skb);
 }
 
-extern void kfree_skb(struct sk_buff *skb);
-extern void kfree_skb_list(struct sk_buff *segs);
-extern void skb_tx_error(struct sk_buff *skb);
-extern void consume_skb(struct sk_buff *skb);
-extern void	       __kfree_skb(struct sk_buff *skb);
+void kfree_skb(struct sk_buff *skb);
+void kfree_skb_list(struct sk_buff *segs);
+void skb_tx_error(struct sk_buff *skb);
+void consume_skb(struct sk_buff *skb);
+void  __kfree_skb(struct sk_buff *skb);
 extern struct kmem_cache *skbuff_head_cache;
 
-extern void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
-extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
-			     bool *fragstolen, int *delta_truesize);
+void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
+bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
+		      bool *fragstolen, int *delta_truesize);
 
-extern struct sk_buff *__alloc_skb(unsigned int size,
-				   gfp_t priority, int flags, int node);
-extern struct sk_buff *build_skb(void *data, unsigned int frag_size);
+struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
+			    int node);
+struct sk_buff *build_skb(void *data, unsigned int frag_size);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
@@ -660,41 +660,33 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
 }
 
-extern struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
+struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
 static inline struct sk_buff *alloc_skb_head(gfp_t priority)
 {
 	return __alloc_skb_head(priority, -1);
 }
 
-extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
-extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
-extern struct sk_buff *skb_clone(struct sk_buff *skb,
-				 gfp_t priority);
-extern struct sk_buff *skb_copy(const struct sk_buff *skb,
-				gfp_t priority);
-extern struct sk_buff *__pskb_copy(struct sk_buff *skb,
-				 int headroom, gfp_t gfp_mask);
-
-extern int	       pskb_expand_head(struct sk_buff *skb,
-					int nhead, int ntail,
-					gfp_t gfp_mask);
-extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
-					    unsigned int headroom);
-extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
-				       int newheadroom, int newtailroom,
-				       gfp_t priority);
-extern int	       skb_to_sgvec(struct sk_buff *skb,
-				    struct scatterlist *sg, int offset,
-				    int len);
-extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
-				    struct sk_buff **trailer);
-extern int	       skb_pad(struct sk_buff *skb, int pad);
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
+struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
+struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+
+int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
+struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+				     unsigned int headroom);
+struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
+				int newtailroom, gfp_t priority);
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
+		 int len);
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
+int skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
 
-extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int getfrag(void *from, char *to, int offset,
-			int len,int odd, struct sk_buff *skb),
-			void *from, int length);
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+			    int getfrag(void *from, char *to, int offset,
+					int len, int odd, struct sk_buff *skb),
+			    void *from, int length);
 
 struct skb_seq_state {
 	__u32		lower_offset;
@@ -706,18 +698,17 @@ struct skb_seq_state {
 	__u8		*frag_data;
 };
 
-extern void	      skb_prepare_seq_read(struct sk_buff *skb,
-					   unsigned int from, unsigned int to,
-					   struct skb_seq_state *st);
-extern unsigned int   skb_seq_read(unsigned int consumed, const u8 **data,
-				   struct skb_seq_state *st);
-extern void	      skb_abort_seq_read(struct skb_seq_state *st);
+void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
+			  unsigned int to, struct skb_seq_state *st);
+unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
+			  struct skb_seq_state *st);
+void skb_abort_seq_read(struct skb_seq_state *st);
 
-extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
-				    unsigned int to, struct ts_config *config,
-				    struct ts_state *state);
+unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
+			   unsigned int to, struct ts_config *config,
+			   struct ts_state *state);
 
-extern void __skb_get_rxhash(struct sk_buff *skb);
+void __skb_get_rxhash(struct sk_buff *skb);
 static inline __u32 skb_get_rxhash(struct sk_buff *skb)
 {
 	if (!skb->l4_rxhash)
@@ -1095,7 +1086,8 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
  *	The "__skb_xxxx()" functions are the non-atomic ones that
  *	can only be called with interrupts disabled.
  */
-extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list);
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 static inline void __skb_insert(struct sk_buff *newsk,
 				struct sk_buff *prev, struct sk_buff *next,
 				struct sk_buff_head *list)
@@ -1201,8 +1193,8 @@ static inline void __skb_queue_after(struct sk_buff_head *list,
 	__skb_insert(newsk, prev, prev->next, list);
 }
 
-extern void skb_append(struct sk_buff *old, struct sk_buff *newsk,
-		       struct sk_buff_head *list);
+void skb_append(struct sk_buff *old, struct sk_buff *newsk,
+		struct sk_buff_head *list);
 
 static inline void __skb_queue_before(struct sk_buff_head *list,
 				      struct sk_buff *next,
@@ -1221,7 +1213,7 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
 				    struct sk_buff *newsk)
 {
@@ -1238,7 +1230,7 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
 				   struct sk_buff *newsk)
 {
@@ -1249,7 +1241,7 @@ static inline void __skb_queue_tail(struct sk_buff_head *list,
  * remove sk_buff from list. _Must_ be called atomically, and with
  * the list known..
  */
-extern void	   skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list);
 static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
 	struct sk_buff *next, *prev;
@@ -1270,7 +1262,7 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The head item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek(list);
@@ -1287,7 +1279,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The tail item is
  *	returned or %NULL if the list is empty.
  */
-extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek_tail(list);
@@ -1373,8 +1365,8 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
-extern void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page,
-			    int off, int size, unsigned int truesize);
+void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+		     int size, unsigned int truesize);
 
 #define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_has_frag_list(skb))
@@ -1418,7 +1410,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 /*
  *	Add data to an sk_buff
  */
-extern unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_put(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
 	unsigned char *tmp = skb_tail_pointer(skb);
@@ -1428,7 +1420,7 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 	return tmp;
 }
 
-extern unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_push(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 {
 	skb->data -= len;
@@ -1436,7 +1428,7 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
 	return skb->data;
 }
 
-extern unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
+unsigned char *skb_pull(struct sk_buff *skb, unsigned int len);
 static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 {
 	skb->len -= len;
@@ -1449,7 +1441,7 @@ static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int l
 	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
 }
 
-extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
 {
@@ -1753,7 +1745,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 #define NET_SKB_PAD	max(32, L1_CACHE_BYTES)
 #endif
 
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
+int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1765,7 +1757,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 	skb_set_tail_pointer(skb, len);
 }
 
-extern void skb_trim(struct sk_buff *skb, unsigned int len);
+void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -1838,7 +1830,7 @@ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
  *	the list and one reference dropped. This function does not take the
  *	list lock and the caller must hold the relevant locks to use it.
  */
-extern void skb_queue_purge(struct sk_buff_head *list);
+void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
@@ -1850,11 +1842,10 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 #define NETDEV_FRAG_PAGE_MAX_SIZE  (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
 #define NETDEV_PAGECNT_MAX_BIAS	   NETDEV_FRAG_PAGE_MAX_SIZE
 
-extern void *netdev_alloc_frag(unsigned int fragsz);
+void *netdev_alloc_frag(unsigned int fragsz);
 
-extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-					  unsigned int length,
-					  gfp_t gfp_mask);
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
+				   gfp_t gfp_mask);
 
 /**
  *	netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -2342,60 +2333,42 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag)
 #define skb_walk_frags(skb, iter)	\
 	for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next)
 
-extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
-					   int *peeked, int *off, int *err);
-extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-					 int noblock, int *err);
-extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
-				     struct poll_table_struct *wait);
-extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
-					       int offset, struct iovec *to,
-					       int size);
-extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
-							int hlen,
-							struct iovec *iov);
-extern int	       skb_copy_datagram_from_iovec(struct sk_buff *skb,
-						    int offset,
-						    const struct iovec *from,
-						    int from_offset,
-						    int len);
-extern int	       zerocopy_sg_from_iovec(struct sk_buff *skb,
-					      const struct iovec *frm,
-					      int offset,
-					      size_t count);
-extern int	       skb_copy_datagram_const_iovec(const struct sk_buff *from,
-						     int offset,
-						     const struct iovec *to,
-						     int to_offset,
-						     int size);
-extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
-extern void	       skb_free_datagram_locked(struct sock *sk,
-						struct sk_buff *skb);
-extern int	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
-					 unsigned int flags);
-extern __wsum	       skb_checksum(const struct sk_buff *skb, int offset,
-				    int len, __wsum csum);
-extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
-				     void *to, int len);
-extern int	       skb_store_bits(struct sk_buff *skb, int offset,
-				      const void *from, int len);
-extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
-					      int offset, u8 *to, int len,
-					      __wsum csum);
-extern int             skb_splice_bits(struct sk_buff *skb,
-						unsigned int offset,
-						struct pipe_inode_info *pipe,
-						unsigned int len,
-						unsigned int flags);
-extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
-extern void	       skb_split(struct sk_buff *skb,
-				 struct sk_buff *skb1, const u32 len);
-extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
-				 int shiftlen);
-extern void	       skb_scrub_packet(struct sk_buff *skb, bool xnet);
-
-extern struct sk_buff *skb_segment(struct sk_buff *skb,
-				   netdev_features_t features);
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+				    int *peeked, int *off, int *err);
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
+				  int *err);
+unsigned int datagram_poll(struct file *file, struct socket *sock,
+			   struct poll_table_struct *wait);
+int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
+			    struct iovec *to, int size);
+int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
+				     struct iovec *iov);
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+				 const struct iovec *from, int from_offset,
+				 int len);
+int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
+			   int offset, size_t count);
+int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
+				  const struct iovec *to, int to_offset,
+				  int size);
+void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
+__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
+		    __wsum csum);
+int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
+__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
+			      int len, __wsum csum);
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
+		    struct pipe_inode_info *pipe, unsigned int len,
+		    unsigned int flags);
+void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
+int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
+void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
@@ -2440,7 +2413,7 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 	memcpy(skb->data + offset, from, len);
 }
 
-extern void skb_init(void);
+void skb_init(void);
 
 static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
 {
@@ -2483,12 +2456,12 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
-extern void skb_timestamping_init(void);
+void skb_timestamping_init(void);
 
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
-extern void skb_clone_tx_timestamp(struct sk_buff *skb);
-extern bool skb_defer_rx_timestamp(struct sk_buff *skb);
+void skb_clone_tx_timestamp(struct sk_buff *skb);
+bool skb_defer_rx_timestamp(struct sk_buff *skb);
 
 #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
 
@@ -2529,8 +2502,8 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
  * generates a software time stamp (otherwise), then queues the clone
  * to the error queue of the socket.  Errors are silently ignored.
  */
-extern void skb_tstamp_tx(struct sk_buff *orig_skb,
-			struct skb_shared_hwtstamps *hwtstamps);
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		   struct skb_shared_hwtstamps *hwtstamps);
 
 static inline void sw_tx_timestamp(struct sk_buff *skb)
 {
@@ -2562,8 +2535,8 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
  */
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
 
-extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
-extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
+__sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
@@ -2593,7 +2566,7 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void nf_conntrack_destroy(struct nf_conntrack *nfct);
+void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
@@ -2732,9 +2705,8 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
 	return skb->queue_mapping != 0;
 }
 
-extern u16 __skb_tx_hash(const struct net_device *dev,
-			 const struct sk_buff *skb,
-			 unsigned int num_tx_queues);
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues);
 
 #ifdef CONFIG_XFRM
 static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
@@ -2788,7 +2760,7 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
-extern void __skb_warn_lro_forwarding(const struct sk_buff *skb);
+void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 {
-- 
1.8.1.2.459.gbcd45b4.dirty

^ permalink raw reply related

* [PATCH 3/3] [networking]device.h: Remove extern from function prototypes
From: Joe Perches @ 2013-09-26 22:18 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Jes Sorensen, linux-kernel, linux-hippi
In-Reply-To: <cover.1380233637.git.joe@perches.com>

There are a mix of function prototypes with and without extern
in the kernel sources.  Standardize on not using extern for
function prototypes.

Function prototypes don't need to be written with extern.
extern is assumed by the compiler.  Its use is as unnecessary as
using auto to declare automatic/local variables in a block.

Signed-off-by: Joe Perches <joe@perches.com>
---
 include/linux/etherdevice.h |  35 ++--
 include/linux/fcdevice.h    |   2 +-
 include/linux/fddidevice.h  |   7 +-
 include/linux/hippidevice.h |  10 +-
 include/linux/inetdevice.h  |  28 +--
 include/linux/netdevice.h   | 432 +++++++++++++++++++++-----------------------
 6 files changed, 248 insertions(+), 266 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index d8b5124..fc4a9aa 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -28,27 +28,24 @@
 #include <asm/unaligned.h>
 
 #ifdef __KERNEL__
-extern __be16		eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
-extern int eth_header(struct sk_buff *skb, struct net_device *dev,
-		      unsigned short type,
-		      const void *daddr, const void *saddr, unsigned len);
-extern int eth_rebuild_header(struct sk_buff *skb);
-extern int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
-extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type);
-extern void eth_header_cache_update(struct hh_cache *hh,
-				    const struct net_device *dev,
-				    const unsigned char *haddr);
-extern int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
-extern void eth_commit_mac_addr_change(struct net_device *dev, void *p);
-extern int eth_mac_addr(struct net_device *dev, void *p);
-extern int eth_change_mtu(struct net_device *dev, int new_mtu);
-extern int eth_validate_addr(struct net_device *dev);
-
-
-
-extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
+int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+	       const void *daddr, const void *saddr, unsigned len);
+int eth_rebuild_header(struct sk_buff *skb);
+int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
+int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
+		     __be16 type);
+void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
+			     const unsigned char *haddr);
+int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
+void eth_commit_mac_addr_change(struct net_device *dev, void *p);
+int eth_mac_addr(struct net_device *dev, void *p);
+int eth_change_mtu(struct net_device *dev, int new_mtu);
+int eth_validate_addr(struct net_device *dev);
+
+struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 					    unsigned int rxqs);
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h
index e460ef8..5009fa1 100644
--- a/include/linux/fcdevice.h
+++ b/include/linux/fcdevice.h
@@ -27,7 +27,7 @@
 #include <linux/if_fc.h>
 
 #ifdef __KERNEL__
-extern struct net_device *alloc_fcdev(int sizeof_priv);
+struct net_device *alloc_fcdev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FCDEVICE_H */
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index 155bafd..9a79f01 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -25,10 +25,9 @@
 #include <linux/if_fddi.h>
 
 #ifdef __KERNEL__
-extern __be16	fddi_type_trans(struct sk_buff *skb,
-				struct net_device *dev);
-extern int fddi_change_mtu(struct net_device *dev, int new_mtu);
-extern struct net_device *alloc_fddidev(int sizeof_priv);
+__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int fddi_change_mtu(struct net_device *dev, int new_mtu);
+struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_FDDIDEVICE_H */
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index f148e49..8ec23fb 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -31,11 +31,11 @@ struct hippi_cb {
 	__u32	ifield;
 };
 
-extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-extern int hippi_change_mtu(struct net_device *dev, int new_mtu);
-extern int hippi_mac_addr(struct net_device *dev, void *p);
-extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
-extern struct net_device *alloc_hippi_dev(int sizeof_priv);
+__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
+int hippi_change_mtu(struct net_device *dev, int new_mtu);
+int hippi_mac_addr(struct net_device *dev, void *p);
+int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
+struct net_device *alloc_hippi_dev(int sizeof_priv);
 #endif
 
 #endif	/* _LINUX_HIPPIDEVICE_H */
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 79640e0..0d678ae 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -147,25 +147,27 @@ struct in_ifaddr {
 	unsigned long		ifa_tstamp; /* updated timestamp */
 };
 
-extern int register_inetaddr_notifier(struct notifier_block *nb);
-extern int unregister_inetaddr_notifier(struct notifier_block *nb);
+int register_inetaddr_notifier(struct notifier_block *nb);
+int unregister_inetaddr_notifier(struct notifier_block *nb);
 
-extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
-					struct ipv4_devconf *devconf);
+void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
+				 struct ipv4_devconf *devconf);
 
-extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
+struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref);
 static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
 {
 	return __ip_dev_find(net, addr, true);
 }
 
-extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
-extern int		devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern void		devinet_init(void);
-extern struct in_device	*inetdev_by_index(struct net *, int);
-extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
-extern __be32		inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local, int scope);
-extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
+int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
+int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+void devinet_init(void);
+struct in_device *inetdev_by_index(struct net *, int);
+__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
+__be32 inet_confirm_addr(struct in_device *in_dev, __be32 dst, __be32 local,
+			 int scope);
+struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
+				    __be32 mask);
 
 static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
 {
@@ -218,7 +220,7 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev)
 	return rtnl_dereference(dev->ip_ptr);
 }
 
-extern void in_dev_finish_destroy(struct in_device *idev);
+void in_dev_finish_destroy(struct in_device *idev);
 
 static inline void in_dev_put(struct in_device *idev)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b4cfb63..5f01af3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -60,8 +60,8 @@ struct wireless_dev;
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
 
-extern void netdev_set_default_ethtool_ops(struct net_device *dev,
-					   const struct ethtool_ops *ops);
+void netdev_set_default_ethtool_ops(struct net_device *dev,
+				    const struct ethtool_ops *ops);
 
 /* hardware address assignment types */
 #define NET_ADDR_PERM		0	/* address is permanent (default) */
@@ -298,7 +298,7 @@ struct netdev_boot_setup {
 };
 #define NETDEV_BOOT_SETUP_MAX 8
 
-extern int __init netdev_boot_setup(char *str);
+int __init netdev_boot_setup(char *str);
 
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
@@ -394,7 +394,7 @@ enum rx_handler_result {
 typedef enum rx_handler_result rx_handler_result_t;
 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
 
-extern void __napi_schedule(struct napi_struct *n);
+void __napi_schedule(struct napi_struct *n);
 
 static inline bool napi_disable_pending(struct napi_struct *n)
 {
@@ -445,8 +445,8 @@ static inline bool napi_reschedule(struct napi_struct *napi)
  *
  * Mark NAPI processing as complete.
  */
-extern void __napi_complete(struct napi_struct *n);
-extern void napi_complete(struct napi_struct *n);
+void __napi_complete(struct napi_struct *n);
+void napi_complete(struct napi_struct *n);
 
 /**
  *	napi_by_id - lookup a NAPI by napi_id
@@ -455,7 +455,7 @@ extern void napi_complete(struct napi_struct *n);
  * lookup @napi_id in napi_hash table
  * must be called under rcu_read_lock()
  */
-extern struct napi_struct *napi_by_id(unsigned int napi_id);
+struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /**
  *	napi_hash_add - add a NAPI to global hashtable
@@ -463,7 +463,7 @@ extern struct napi_struct *napi_by_id(unsigned int napi_id);
  *
  * generate a new napi_id and store a @napi under it in napi_hash
  */
-extern void napi_hash_add(struct napi_struct *napi);
+void napi_hash_add(struct napi_struct *napi);
 
 /**
  *	napi_hash_del - remove a NAPI from global table
@@ -472,7 +472,7 @@ extern void napi_hash_add(struct napi_struct *napi);
  * Warning: caller must observe rcu grace period
  * before freeing memory containing @napi
  */
-extern void napi_hash_del(struct napi_struct *napi);
+void napi_hash_del(struct napi_struct *napi);
 
 /**
  *	napi_disable - prevent NAPI from scheduling
@@ -664,8 +664,8 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
 extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
 
 #ifdef CONFIG_RFS_ACCEL
-extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
-				u32 flow_id, u16 filter_id);
+bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
+			 u16 filter_id);
 #endif
 
 /* This structure contains an instance of an RX queue. */
@@ -1497,9 +1497,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
-extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
-					   struct sk_buff *skb);
-extern u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb);
+u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
 
 /*
  * Net namespace inlines
@@ -1683,8 +1683,8 @@ struct packet_offload {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 
-extern int register_netdevice_notifier(struct notifier_block *nb);
-extern int unregister_netdevice_notifier(struct notifier_block *nb);
+int register_netdevice_notifier(struct notifier_block *nb);
+int unregister_netdevice_notifier(struct notifier_block *nb);
 
 struct netdev_notifier_info {
 	struct net_device *dev;
@@ -1707,9 +1707,9 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
 	return info->dev;
 }
 
-extern int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
-					 struct netdev_notifier_info *info);
-extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+				  struct netdev_notifier_info *info);
+int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 
 
 extern rwlock_t				dev_base_lock;		/* Device list lock */
@@ -1764,54 +1764,52 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
 	return lh == &net->dev_base_head ? NULL : net_device_entry(lh);
 }
 
-extern int 			netdev_boot_setup_check(struct net_device *dev);
-extern unsigned long		netdev_boot_base(const char *prefix, int unit);
-extern struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
-					      const char *hwaddr);
-extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
-extern void		dev_add_pack(struct packet_type *pt);
-extern void		dev_remove_pack(struct packet_type *pt);
-extern void		__dev_remove_pack(struct packet_type *pt);
-extern void		dev_add_offload(struct packet_offload *po);
-extern void		dev_remove_offload(struct packet_offload *po);
-extern void		__dev_remove_offload(struct packet_offload *po);
-
-extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-						      unsigned short mask);
-extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
-extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
-extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
-extern int		dev_alloc_name(struct net_device *dev, const char *name);
-extern int		dev_open(struct net_device *dev);
-extern int		dev_close(struct net_device *dev);
-extern void		dev_disable_lro(struct net_device *dev);
-extern int		dev_loopback_xmit(struct sk_buff *newskb);
-extern int		dev_queue_xmit(struct sk_buff *skb);
-extern int		register_netdevice(struct net_device *dev);
-extern void		unregister_netdevice_queue(struct net_device *dev,
-						   struct list_head *head);
-extern void		unregister_netdevice_many(struct list_head *head);
+int netdev_boot_setup_check(struct net_device *dev);
+unsigned long netdev_boot_base(const char *prefix, int unit);
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *hwaddr);
+struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
+struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
+void dev_add_pack(struct packet_type *pt);
+void dev_remove_pack(struct packet_type *pt);
+void __dev_remove_pack(struct packet_type *pt);
+void dev_add_offload(struct packet_offload *po);
+void dev_remove_offload(struct packet_offload *po);
+void __dev_remove_offload(struct packet_offload *po);
+
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
+					unsigned short mask);
+struct net_device *dev_get_by_name(struct net *net, const char *name);
+struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
+struct net_device *__dev_get_by_name(struct net *net, const char *name);
+int dev_alloc_name(struct net_device *dev, const char *name);
+int dev_open(struct net_device *dev);
+int dev_close(struct net_device *dev);
+void dev_disable_lro(struct net_device *dev);
+int dev_loopback_xmit(struct sk_buff *newskb);
+int dev_queue_xmit(struct sk_buff *skb);
+int register_netdevice(struct net_device *dev);
+void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
+void unregister_netdevice_many(struct list_head *head);
 static inline void unregister_netdevice(struct net_device *dev)
 {
 	unregister_netdevice_queue(dev, NULL);
 }
 
-extern int 		netdev_refcnt_read(const struct net_device *dev);
-extern void		free_netdev(struct net_device *dev);
-extern void		synchronize_net(void);
-extern int		init_dummy_netdev(struct net_device *dev);
+int netdev_refcnt_read(const struct net_device *dev);
+void free_netdev(struct net_device *dev);
+void synchronize_net(void);
+int init_dummy_netdev(struct net_device *dev);
 
-extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
-extern struct net_device	*dev_get_by_index_rcu(struct net *net, int ifindex);
-extern int		netdev_get_name(struct net *net, char *name, int ifindex);
-extern int		dev_restart(struct net_device *dev);
+struct net_device *dev_get_by_index(struct net *net, int ifindex);
+struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
+int netdev_get_name(struct net *net, char *name, int ifindex);
+int dev_restart(struct net_device *dev);
 #ifdef CONFIG_NETPOLL_TRAP
-extern int		netpoll_trap(void);
+int netpoll_trap(void);
 #endif
-extern int	       skb_gro_receive(struct sk_buff **head,
-				       struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -1883,7 +1881,7 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 }
 
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
-extern int		register_gifconf(unsigned int family, gifconf_func_t * gifconf);
+int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
 static inline int unregister_gifconf(unsigned int family)
 {
 	return register_gifconf(family, NULL);
@@ -1954,7 +1952,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
-extern void __netif_schedule(struct Qdisc *q);
+void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
@@ -2274,8 +2272,8 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 }
 
 #ifdef CONFIG_XPS
-extern int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
-			       u16 index);
+int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask,
+			u16 index);
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      struct cpumask *mask,
@@ -2306,12 +2304,10 @@ static inline bool netif_is_multiqueue(const struct net_device *dev)
 	return dev->num_tx_queues > 1;
 }
 
-extern int netif_set_real_num_tx_queues(struct net_device *dev,
-					unsigned int txq);
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 
 #ifdef CONFIG_RPS
-extern int netif_set_real_num_rx_queues(struct net_device *dev,
-					unsigned int rxq);
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
 						unsigned int rxq)
@@ -2338,28 +2334,27 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
 }
 
 #define DEFAULT_MAX_NUM_RSS_QUEUES	(8)
-extern int netif_get_num_default_rss_queues(void);
+int netif_get_num_default_rss_queues(void);
 
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
  */
-extern void dev_kfree_skb_irq(struct sk_buff *skb);
+void dev_kfree_skb_irq(struct sk_buff *skb);
 
 /* Use this variant in places where it could be invoked
  * from either hardware interrupt or other context, with hardware interrupts
  * either disabled or enabled.
  */
-extern void dev_kfree_skb_any(struct sk_buff *skb);
+void dev_kfree_skb_any(struct sk_buff *skb);
 
-extern int		netif_rx(struct sk_buff *skb);
-extern int		netif_rx_ni(struct sk_buff *skb);
-extern int		netif_receive_skb(struct sk_buff *skb);
-extern gro_result_t	napi_gro_receive(struct napi_struct *napi,
-					 struct sk_buff *skb);
-extern void		napi_gro_flush(struct napi_struct *napi, bool flush_old);
-extern struct sk_buff *	napi_get_frags(struct napi_struct *napi);
-extern gro_result_t	napi_gro_frags(struct napi_struct *napi);
+int netif_rx(struct sk_buff *skb);
+int netif_rx_ni(struct sk_buff *skb);
+int netif_receive_skb(struct sk_buff *skb);
+gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
+void napi_gro_flush(struct napi_struct *napi, bool flush_old);
+struct sk_buff *napi_get_frags(struct napi_struct *napi);
+gro_result_t napi_gro_frags(struct napi_struct *napi);
 
 static inline void napi_free_frags(struct napi_struct *napi)
 {
@@ -2367,40 +2362,35 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
-extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler,
-				      void *rx_handler_data);
-extern void netdev_rx_handler_unregister(struct net_device *dev);
-
-extern bool		dev_valid_name(const char *name);
-extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
-extern int		dev_ethtool(struct net *net, struct ifreq *);
-extern unsigned int	dev_get_flags(const struct net_device *);
-extern int		__dev_change_flags(struct net_device *, unsigned int flags);
-extern int		dev_change_flags(struct net_device *, unsigned int);
-extern void		__dev_notify_flags(struct net_device *, unsigned int old_flags);
-extern int		dev_change_name(struct net_device *, const char *);
-extern int		dev_set_alias(struct net_device *, const char *, size_t);
-extern int		dev_change_net_namespace(struct net_device *,
-						 struct net *, const char *);
-extern int		dev_set_mtu(struct net_device *, int);
-extern void		dev_set_group(struct net_device *, int);
-extern int		dev_set_mac_address(struct net_device *,
-					    struct sockaddr *);
-extern int		dev_change_carrier(struct net_device *,
-					   bool new_carrier);
-extern int		dev_get_phys_port_id(struct net_device *dev,
-					     struct netdev_phys_port_id *ppid);
-extern int		dev_hard_start_xmit(struct sk_buff *skb,
-					    struct net_device *dev,
-					    struct netdev_queue *txq);
-extern int		dev_forward_skb(struct net_device *dev,
-					struct sk_buff *skb);
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data);
+void netdev_rx_handler_unregister(struct net_device *dev);
+
+bool dev_valid_name(const char *name);
+int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
+int dev_ethtool(struct net *net, struct ifreq *);
+unsigned int dev_get_flags(const struct net_device *);
+int __dev_change_flags(struct net_device *, unsigned int flags);
+int dev_change_flags(struct net_device *, unsigned int);
+void __dev_notify_flags(struct net_device *, unsigned int old_flags);
+int dev_change_name(struct net_device *, const char *);
+int dev_set_alias(struct net_device *, const char *, size_t);
+int dev_change_net_namespace(struct net_device *, struct net *, const char *);
+int dev_set_mtu(struct net_device *, int);
+void dev_set_group(struct net_device *, int);
+int dev_set_mac_address(struct net_device *, struct sockaddr *);
+int dev_change_carrier(struct net_device *, bool new_carrier);
+int dev_get_phys_port_id(struct net_device *dev,
+			 struct netdev_phys_port_id *ppid);
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+			struct netdev_queue *txq);
+int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 
 extern int		netdev_budget;
 
 /* Called by rtnetlink.c:rtnl_unlock() */
-extern void netdev_run_todo(void);
+void netdev_run_todo(void);
 
 /**
  *	dev_put - release reference to device
@@ -2433,9 +2423,9 @@ static inline void dev_hold(struct net_device *dev)
  * kind of lower layer not just hardware media.
  */
 
-extern void linkwatch_init_dev(struct net_device *dev);
-extern void linkwatch_fire_event(struct net_device *dev);
-extern void linkwatch_forget_dev(struct net_device *dev);
+void linkwatch_init_dev(struct net_device *dev);
+void linkwatch_fire_event(struct net_device *dev);
+void linkwatch_forget_dev(struct net_device *dev);
 
 /**
  *	netif_carrier_ok - test if carrier present
@@ -2448,13 +2438,13 @@ static inline bool netif_carrier_ok(const struct net_device *dev)
 	return !test_bit(__LINK_STATE_NOCARRIER, &dev->state);
 }
 
-extern unsigned long dev_trans_start(struct net_device *dev);
+unsigned long dev_trans_start(struct net_device *dev);
 
-extern void __netdev_watchdog_up(struct net_device *dev);
+void __netdev_watchdog_up(struct net_device *dev);
 
-extern void netif_carrier_on(struct net_device *dev);
+void netif_carrier_on(struct net_device *dev);
 
-extern void netif_carrier_off(struct net_device *dev);
+void netif_carrier_off(struct net_device *dev);
 
 /**
  *	netif_dormant_on - mark device as dormant.
@@ -2522,9 +2512,9 @@ static inline bool netif_device_present(struct net_device *dev)
 	return test_bit(__LINK_STATE_PRESENT, &dev->state);
 }
 
-extern void netif_device_detach(struct net_device *dev);
+void netif_device_detach(struct net_device *dev);
 
-extern void netif_device_attach(struct net_device *dev);
+void netif_device_attach(struct net_device *dev);
 
 /*
  * Network interface message level settings
@@ -2733,98 +2723,93 @@ static inline void netif_addr_unlock_bh(struct net_device *dev)
 
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
 
-extern void		ether_setup(struct net_device *dev);
+void ether_setup(struct net_device *dev);
 
 /* Support for loadable net-drivers */
-extern struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
-				       void (*setup)(struct net_device *),
-				       unsigned int txqs, unsigned int rxqs);
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+				    void (*setup)(struct net_device *),
+				    unsigned int txqs, unsigned int rxqs);
 #define alloc_netdev(sizeof_priv, name, setup) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, 1, 1)
 
 #define alloc_netdev_mq(sizeof_priv, name, setup, count) \
 	alloc_netdev_mqs(sizeof_priv, name, setup, count, count)
 
-extern int		register_netdev(struct net_device *dev);
-extern void		unregister_netdev(struct net_device *dev);
+int register_netdev(struct net_device *dev);
+void unregister_netdev(struct net_device *dev);
 
 /* General hardware address lists handling functions */
-extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len, unsigned char addr_type);
-extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len, unsigned char addr_type);
-extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len);
-extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len);
-extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
-extern void __hw_addr_init(struct netdev_hw_addr_list *list);
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type);
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type);
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list, int addr_len);
+void __hw_addr_flush(struct netdev_hw_addr_list *list);
+void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
-extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
-			unsigned char addr_type);
-extern int dev_addr_add_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern int dev_addr_del_multiple(struct net_device *to_dev,
-				 struct net_device *from_dev,
-				 unsigned char addr_type);
-extern void dev_addr_flush(struct net_device *dev);
-extern int dev_addr_init(struct net_device *dev);
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
+		 unsigned char addr_type);
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev, unsigned char addr_type);
+void dev_addr_flush(struct net_device *dev);
+int dev_addr_init(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
-extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_uc_sync(struct net_device *to, struct net_device *from);
-extern int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_uc_flush(struct net_device *dev);
-extern void dev_uc_init(struct net_device *dev);
+int dev_uc_add(struct net_device *dev, const unsigned char *addr);
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_uc_del(struct net_device *dev, const unsigned char *addr);
+int dev_uc_sync(struct net_device *to, struct net_device *from);
+int dev_uc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_uc_unsync(struct net_device *to, struct net_device *from);
+void dev_uc_flush(struct net_device *dev);
+void dev_uc_init(struct net_device *dev);
 
 /* Functions used for multicast addresses handling */
-extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
-extern int dev_mc_sync(struct net_device *to, struct net_device *from);
-extern int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
-extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void dev_mc_flush(struct net_device *dev);
-extern void dev_mc_init(struct net_device *dev);
+int dev_mc_add(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del(struct net_device *dev, const unsigned char *addr);
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
+int dev_mc_sync(struct net_device *to, struct net_device *from);
+int dev_mc_sync_multiple(struct net_device *to, struct net_device *from);
+void dev_mc_unsync(struct net_device *to, struct net_device *from);
+void dev_mc_flush(struct net_device *dev);
+void dev_mc_init(struct net_device *dev);
 
 /* Functions used for secondary unicast and multicast support */
-extern void		dev_set_rx_mode(struct net_device *dev);
-extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_set_promiscuity(struct net_device *dev, int inc);
-extern int		dev_set_allmulti(struct net_device *dev, int inc);
-extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_notify_peers(struct net_device *dev);
-extern void		netdev_features_change(struct net_device *dev);
+void dev_set_rx_mode(struct net_device *dev);
+void __dev_set_rx_mode(struct net_device *dev);
+int dev_set_promiscuity(struct net_device *dev, int inc);
+int dev_set_allmulti(struct net_device *dev, int inc);
+void netdev_state_change(struct net_device *dev);
+void netdev_notify_peers(struct net_device *dev);
+void netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
-extern void		dev_load(struct net *net, const char *name);
-extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					       struct rtnl_link_stats64 *storage);
-extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
-				    const struct net_device_stats *netdev_stats);
+void dev_load(struct net *net, const char *name);
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage);
+void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+			     const struct net_device_stats *netdev_stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		bpf_jit_enable;
 
-extern bool netdev_has_upper_dev(struct net_device *dev,
-				 struct net_device *upper_dev);
-extern bool netdev_has_any_upper_dev(struct net_device *dev);
-extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
-							    struct list_head **iter);
+bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
+bool netdev_has_any_upper_dev(struct net_device *dev);
+struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter);
 
 /* iterate through upper list, must be called under RCU read lock */
 #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
@@ -2833,10 +2818,10 @@ extern struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *d
 	     updev; \
 	     updev = netdev_all_upper_get_next_dev_rcu(dev, &(iter)))
 
-extern void *netdev_lower_get_next_private(struct net_device *dev,
-					   struct list_head **iter);
-extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
-					       struct list_head **iter);
+void *netdev_lower_get_next_private(struct net_device *dev,
+				    struct list_head **iter);
+void *netdev_lower_get_next_private_rcu(struct net_device *dev,
+					struct list_head **iter);
 
 #define netdev_for_each_lower_private(dev, priv, iter) \
 	for (iter = (dev)->adj_list.lower.next, \
@@ -2850,27 +2835,26 @@ extern void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
-extern void *netdev_adjacent_get_private(struct list_head *adj_list);
-extern struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
-extern struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
-extern int netdev_upper_dev_link(struct net_device *dev,
+void *netdev_adjacent_get_private(struct list_head *adj_list);
+struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
+struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
+int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
+int netdev_master_upper_dev_link(struct net_device *dev,
 				 struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link(struct net_device *dev,
-					struct net_device *upper_dev);
-extern int netdev_master_upper_dev_link_private(struct net_device *dev,
-						struct net_device *upper_dev,
-						void *private);
-extern void netdev_upper_dev_unlink(struct net_device *dev,
-				    struct net_device *upper_dev);
-extern void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
-					      struct net_device *lower_dev);
-extern void *netdev_lower_dev_get_private(struct net_device *dev,
-					  struct net_device *lower_dev);
-extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-	netdev_features_t features, bool tx_path);
-extern struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-					  netdev_features_t features);
+int netdev_master_upper_dev_link_private(struct net_device *dev,
+					 struct net_device *upper_dev,
+					 void *private);
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev);
+void *netdev_lower_dev_get_private_rcu(struct net_device *dev,
+				       struct net_device *lower_dev);
+void *netdev_lower_dev_get_private(struct net_device *dev,
+				   struct net_device *lower_dev);
+int skb_checksum_help(struct sk_buff *skb);
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path);
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features);
 
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
@@ -2892,30 +2876,30 @@ static inline bool can_checksum_protocol(netdev_features_t features,
 }
 
 #ifdef CONFIG_BUG
-extern void netdev_rx_csum_fault(struct net_device *dev);
+void netdev_rx_csum_fault(struct net_device *dev);
 #else
 static inline void netdev_rx_csum_fault(struct net_device *dev)
 {
 }
 #endif
 /* rx skb timestamps */
-extern void		net_enable_timestamp(void);
-extern void		net_disable_timestamp(void);
+void net_enable_timestamp(void);
+void net_disable_timestamp(void);
 
 #ifdef CONFIG_PROC_FS
-extern int __init dev_proc_init(void);
+int __init dev_proc_init(void);
 #else
 #define dev_proc_init() 0
 #endif
 
-extern int netdev_class_create_file(struct class_attribute *class_attr);
-extern void netdev_class_remove_file(struct class_attribute *class_attr);
+int netdev_class_create_file(struct class_attribute *class_attr);
+void netdev_class_remove_file(struct class_attribute *class_attr);
 
 extern struct kobj_ns_type_operations net_ns_type_operations;
 
-extern const char *netdev_drivername(const struct net_device *dev);
+const char *netdev_drivername(const struct net_device *dev);
 
-extern void linkwatch_run_queue(void);
+void linkwatch_run_queue(void);
 
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
@@ -3007,22 +2991,22 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-extern __printf(3, 4)
+__printf(3, 4)
 int netdev_printk(const char *level, const struct net_device *dev,
 		  const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_emerg(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_alert(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_crit(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_err(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_warn(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_notice(const struct net_device *dev, const char *format, ...);
-extern __printf(2, 3)
+__printf(2, 3)
 int netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
-- 
1.8.1.2.459.gbcd45b4.dirty

^ permalink raw reply related

* Re: [PATCH 7/7] sysfs: @name comes before @ns
From: Greg KH @ 2013-09-26 22:32 UTC (permalink / raw)
  To: Tejun Heo; +Cc: Eric W. Biederman, linux-kernel, kay, netdev, lizefan
In-Reply-To: <20130912034943.GB4723@mtj.dyndns.org>

On Wed, Sep 11, 2013 at 11:49:43PM -0400, Tejun Heo wrote:
> Hello, Eric.
> 
> On Wed, Sep 11, 2013 at 08:39:27PM -0700, Eric W. Biederman wrote:
> > @ns is more significant so it should come first.
> > 
> > Where do we have the backwards convention of putting @name first?
> 
> Because @ns is optional and you end up with stupid stuff like
> 
> 	sysfs_xxx_ns(@param, @ns, @name)
> 	sysfs_xxx(@param, @name)
> 
> You put optional params after the mandatory ones.  It may be difficult
> to accept for you but @ns is a *clearly* optional thing for sysfs.

Sorry Eric, but I agree with Tejun here, the optional part is @ns, not
name, so it should go at the end.

thanks,

greg k-h

^ permalink raw reply

* Introduce support to lazy initialize mostly static keys
From: Hannes Frederic Sowa @ 2013-09-26 23:16 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, davem, fw, ycheng

Hi!

This series implements support for delaying the initialization of secret
keys, e.g. used for hashing, for as long as possible. This functionality
is implemented by a new macro, net_get_random_bytes.

I already used it to protect the socket hashes, the syncookie secret
(most important) and the tcp_fastopen secrets.

This series depends on a patch from Eric Dumazet (already in patchworks):
"net: net_secret should not depend on TCP"

Included patches:
  ipv4: split inet_ehashfn to one hash
  ipv6: split inet6_ehashfn to one hash
  net: introduce new macro net_get_random_once
  inet: split syncookie keys for ipv4 and ipv6 and
  inet: convert inet_ehash_secret and
  tcp: switch tcp_fastopen key generation to
  net: switch net_secret key generation to

Diffstat:
 include/linux/net.h            |   14 ++++++++++++++
 include/net/inet6_hashtables.h |   29 +++++++----------------------
 include/net/inet_sock.h        |   26 ++++++--------------------
 include/net/ipv6.h             |    4 ++--
 include/net/tcp.h              |    3 +--
 net/core/secure_seq.c          |   14 ++------------
 net/core/utils.c               |   21 +++++++++++++++++++++
 net/ipv4/af_inet.c             |   27 ---------------------------
 net/ipv4/inet_hashtables.c     |   25 +++++++++++++++++++++++++
 net/ipv4/syncookies.c          |   15 +++++----------
 net/ipv4/sysctl_net_ipv4.c     |    5 +++++
 net/ipv4/tcp_fastopen.c        |   21 ++++++++++-----------
 net/ipv4/udp.c                 |   22 +++++++++++++++++-----
 net/ipv6/af_inet6.c            |    5 -----
 net/ipv6/inet6_hashtables.c    |   40 +++++++++++++++++++++++++++++++++++++---
 net/ipv6/syncookies.c          |   12 +++++++++---
 net/ipv6/udp.c                 |   37 ++++++++++++++++++++++++++++++-------
 net/rds/connection.c           |   18 ++++++++++++------
 18 files changed, 203 insertions(+), 135 deletions(-)


Greetings,

  Hannes

^ permalink raw reply

* [PATCH net-next] bonding: trivial: remove forgotten bond_next_vlan()
From: Veaceslav Falico @ 2013-09-26 23:22 UTC (permalink / raw)
  To: netdev; +Cc: Veaceslav Falico, Jay Vosburgh, Andy Gospodarek

It's a forgotten function declaration, which was removed some time ago
already.

CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bonding.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 5b71601..05d6268 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -429,7 +429,6 @@ static inline bool slave_can_tx(struct slave *slave)
 struct bond_net;
 
 int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
-struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id);
 int bond_create(struct net *net, const char *name);
-- 
1.8.4

^ permalink raw reply related

* Re: Introduce support to lazy initialize mostly static keys
From: Hannes Frederic Sowa @ 2013-09-26 23:30 UTC (permalink / raw)
  To: netdev, edumazet, davem, fw, ycheng
In-Reply-To: <1380236199-3726-1-git-send-email-hannes@stressinduktion.org>

On Fri, Sep 27, 2013 at 01:16:57AM +0200, Hannes Frederic Sowa wrote:
> This series implements support for delaying the initialization of secret
> keys, e.g. used for hashing, for as long as possible. This functionality
> is implemented by a new macro, net_get_random_bytes.
> 
> I already used it to protect the socket hashes, the syncookie secret
> (most important) and the tcp_fastopen secrets.
> 
> This series depends on a patch from Eric Dumazet (already in patchworks):
> "net: net_secret should not depend on TCP"

Oh, sorry. My tunnel was not working. Mails hit a strange fallback. Will
resend shortly.

^ permalink raw reply

* Introduce support to lazy initialize mostly static keys
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev

Hi!

This series implements support for delaying the initialization of secret
keys, e.g. used for hashing, for as long as possible. This functionality
is implemented by a new macro, net_get_random_bytes.

I already used it to protect the socket hashes, the syncookie secret
(most important) and the tcp_fastopen secrets.

This series depends on a patch from Eric Dumazet (already in patchworks):
"net: net_secret should not depend on TCP"

Included patches:
  ipv4: split inet_ehashfn to one hash
  ipv6: split inet6_ehashfn to one hash
  net: introduce new macro net_get_random_once
  inet: split syncookie keys for ipv4 and ipv6 and
  inet: convert inet_ehash_secret and
  tcp: switch tcp_fastopen key generation to
  net: switch net_secret key generation to

Diffstat:
 include/linux/net.h            |   14 ++++++++++++++
 include/net/inet6_hashtables.h |   29 +++++++----------------------
 include/net/inet_sock.h        |   26 ++++++--------------------
 include/net/ipv6.h             |    4 ++--
 include/net/tcp.h              |    3 +--
 net/core/secure_seq.c          |   14 ++------------
 net/core/utils.c               |   21 +++++++++++++++++++++
 net/ipv4/af_inet.c             |   27 ---------------------------
 net/ipv4/inet_hashtables.c     |   25 +++++++++++++++++++++++++
 net/ipv4/syncookies.c          |   15 +++++----------
 net/ipv4/sysctl_net_ipv4.c     |    5 +++++
 net/ipv4/tcp_fastopen.c        |   21 ++++++++++-----------
 net/ipv4/udp.c                 |   22 +++++++++++++++++-----
 net/ipv6/af_inet6.c            |    5 -----
 net/ipv6/inet6_hashtables.c    |   40 +++++++++++++++++++++++++++++++++++++---
 net/ipv6/syncookies.c          |   12 +++++++++---
 net/ipv6/udp.c                 |   37 ++++++++++++++++++++++++++++++-------
 net/rds/connection.c           |   18 ++++++++++++------
 18 files changed, 203 insertions(+), 135 deletions(-)


Greetings,

  Hannes

^ permalink raw reply

* [PATCH net-next 1/7] ipv4: split inet_ehashfn to hash functions per compilation unit
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

This duplicates a bit of code but let's us easily introduce
separate secret keys later. The separate compilation units are
ipv4/inet_hashtabbles.o, ipv4/udp.o and rds/connection.o.

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/inet_sock.h    | 22 ++++++----------------
 net/ipv4/inet_hashtables.c | 21 +++++++++++++++++++++
 net/ipv4/udp.c             | 16 ++++++++++++----
 net/rds/connection.c       |  6 +++---
 4 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 636d203..fba0b23 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -205,26 +205,16 @@ extern u32 inet_ehash_secret;
 extern u32 ipv6_hash_secret;
 void build_ehash_secret(void);
 
-static inline unsigned int inet_ehashfn(struct net *net,
-					const __be32 laddr, const __u16 lport,
-					const __be32 faddr, const __be16 fport)
+static inline unsigned int __inet_ehashfn(const __be32 laddr,
+					  const __u16 lport,
+					  const __be32 faddr,
+					  const __be16 fport,
+					  u32 initval)
 {
 	return jhash_3words((__force __u32) laddr,
 			    (__force __u32) faddr,
 			    ((__u32) lport) << 16 | (__force __u32)fport,
-			    inet_ehash_secret + net_hash_mix(net));
-}
-
-static inline int inet_sk_ehashfn(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const __be32 laddr = inet->inet_rcv_saddr;
-	const __u16 lport = inet->inet_num;
-	const __be32 faddr = inet->inet_daddr;
-	const __be16 fport = inet->inet_dport;
-	struct net *net = sock_net(sk);
-
-	return inet_ehashfn(net, laddr, lport, faddr, fport);
+			    initval);
 }
 
 static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7bd8983..f29bdf6 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,27 @@
 #include <net/secure_seq.h>
 #include <net/ip.h>
 
+static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
+				 const __u16 lport, const __be32 faddr,
+				 const __be16 fport)
+{
+	return __inet_ehashfn(laddr, lport, faddr, fport,
+			      inet_ehash_secret + net_hash_mix(net));
+}
+
+
+static unsigned int inet_sk_ehashfn(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const __be32 laddr = inet->inet_rcv_saddr;
+	const __u16 lport = inet->inet_num;
+	const __be32 faddr = inet->inet_daddr;
+	const __be16 fport = inet->inet_dport;
+	struct net *net = sock_net(sk);
+
+	return inet_ehashfn(net, laddr, lport, faddr, fport);
+}
+
 /*
  * Allocate and initialize a new local port bind bucket.
  * The bindhash mutex for snum's hash chain must be held here.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95..86b43f5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -406,6 +406,14 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 	return score;
 }
 
+static unsigned int udp_ehashfn(struct net *net, const __be32 laddr,
+				 const __u16 lport, const __be32 faddr,
+				 const __be16 fport)
+{
+	return __inet_ehashfn(laddr, lport, faddr, fport,
+			      inet_ehash_secret + net_hash_mix(net));
+}
+
 
 /* called with read_rcu_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
@@ -429,8 +437,8 @@ begin:
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				hash = inet_ehashfn(net, daddr, hnum,
-						    saddr, sport);
+				hash = udp_ehashfn(net, daddr, hnum,
+						   saddr, sport);
 				matches = 1;
 			}
 		} else if (score == badness && reuseport) {
@@ -510,8 +518,8 @@ begin:
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				hash = inet_ehashfn(net, daddr, hnum,
-						    saddr, sport);
+				hash = udp_ehashfn(net, daddr, hnum,
+						   saddr, sport);
 				matches = 1;
 			}
 		} else if (score == badness && reuseport) {
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 642ad42..45e2366 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -52,9 +52,9 @@ static struct kmem_cache *rds_conn_slab;
 static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
 {
 	/* Pass NULL, don't need struct net for hash */
-	unsigned long hash = inet_ehashfn(NULL,
-					  be32_to_cpu(laddr), 0,
-					  be32_to_cpu(faddr), 0);
+	unsigned long hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
+					    be32_to_cpu(faddr), 0,
+					    inet_ehash_secret);
 	return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 3/7] net: introduce new macro net_get_random_once
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

net_get_random_once is a new macro which handles the initialization
of secret keys. It is possible to call it in the fast path. Only the
initialization depends on the spinlock and is rather slow. Otherwise
it should get used just before the key is used to delay the entropy
extration as late as possible to get better randomness. It returns true
if the key got initialized.

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/linux/net.h | 14 ++++++++++++++
 net/core/utils.c    | 21 +++++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4f27575..d14fad5 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -243,6 +243,20 @@ do {								\
 #define net_random()		prandom_u32()
 #define net_srandom(seed)	prandom_seed((__force u32)(seed))
 
+bool __net_get_random_once(void *buf, int nbytes, bool *done);
+
+/* BE CAREFUL: this function is not interrupt safe */
+#define net_get_random_once(buf, nbytes)				\
+	({								\
+		static bool ___done = false;				\
+		bool ___ret = false;					\
+		if (unlikely(!___done))					\
+			___ret = __net_get_random_once(buf,		\
+						       nbytes,		\
+						       &___done);	\
+		___ret;							\
+	})
+
 extern int   	     kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 				    struct kvec *vec, size_t num, size_t len);
 extern int   	     kernel_recvmsg(struct socket *sock, struct msghdr *msg,
diff --git a/net/core/utils.c b/net/core/utils.c
index aa88e23..b420547 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -338,3 +338,24 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
 				  csum_unfold(*sum)));
 }
 EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+bool __net_get_random_once(void *buf, int nbytes, bool *done)
+{
+	static DEFINE_SPINLOCK(lock);
+
+	spin_lock_bh(&lock);
+	if (*done) {
+		spin_unlock_bh(&lock);
+		return false;
+	}
+
+	get_random_bytes(buf, nbytes);
+	/* Make sure random data is published before toggeling done.
+	 * There is no corresponding rmb.
+	 */
+	smp_wmb();
+	*done = true;
+	spin_unlock_bh(&lock);
+	return true;
+}
+EXPORT_SYMBOL(__net_get_random_once);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 4/7] inet: split syncookie keys for ipv4 and ipv6 and initialize with net_get_random_once
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev
  Cc: Hannes Frederic Sowa, Florian Westphal, Eric Dumazet,
	David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

This patch splits the secret key for syncookies for ipv4 and ipv6 and
initializes them with net_get_random_once. This change was the reason I
did this series. I think the initialization of the syncookie_secret is
way to early.

Cc: Florian Westphal <fw@strlen.de>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/tcp.h     |  1 -
 net/ipv4/syncookies.c | 15 +++++----------
 net/ipv6/syncookies.c | 12 +++++++++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index de870ee..9299560 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -475,7 +475,6 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
 
 /* From syncookies.c */
-extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
 		      u32 cookie);
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 15e0241..22f5409 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -25,15 +25,7 @@
 
 extern int sysctl_tcp_syncookies;
 
-__u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-EXPORT_SYMBOL(syncookie_secret);
-
-static __init int init_syncookies(void)
-{
-	get_random_bytes(syncookie_secret, sizeof(syncookie_secret));
-	return 0;
-}
-__initcall(init_syncookies);
+static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
@@ -44,8 +36,11 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
 static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 		       u32 count, int c)
 {
-	__u32 *tmp = __get_cpu_var(ipv4_cookie_scratch);
+	__u32 *tmp;
+
+	net_get_random_once(syncookie_secret, sizeof(syncookie_secret));
 
+	tmp  = __get_cpu_var(ipv4_cookie_scratch);
 	memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c]));
 	tmp[0] = (__force u32)saddr;
 	tmp[1] = (__force u32)daddr;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d703218..413eb7c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -24,6 +24,8 @@
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
+static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS];
+
 /* RFC 2460, Section 8.3:
  * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..]
  *
@@ -61,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
 static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,
 		       __be16 sport, __be16 dport, u32 count, int c)
 {
-	__u32 *tmp = __get_cpu_var(ipv6_cookie_scratch);
+	__u32 *tmp;
+
+	net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret));
+
+	tmp  = __get_cpu_var(ipv6_cookie_scratch);
 
 	/*
 	 * we have 320 bits of information to hash, copy in the remaining
-	 * 192 bits required for sha_transform, from the syncookie_secret
+	 * 192 bits required for sha_transform, from the syncookie6_secret
 	 * and overwrite the digest with the secret
 	 */
-	memcpy(tmp + 10, syncookie_secret[c], 44);
+	memcpy(tmp + 10, syncookie6_secret[c], 44);
 	memcpy(tmp, saddr, 16);
 	memcpy(tmp + 4, daddr, 16);
 	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 5/7] inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

Initialize the ehash and ipv6_hash_secrets with net_get_random_once.

Each compilation unit gets its own secret now:
  ipv4/inet_hashtables.o
  ipv4/udp.o
  ipv6/inet6_hashtables.o
  ipv6/udp.o
  rds/connection.o

The functions still get inlined into the hashing functions. In the fast
path we have at most two (needed in ipv6) if (unlikely(...)).

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/inet_sock.h     |  4 ----
 net/ipv4/af_inet.c          | 27 ---------------------------
 net/ipv4/inet_hashtables.c  |  4 ++++
 net/ipv4/udp.c              |  6 +++++-
 net/ipv6/af_inet6.c         |  5 -----
 net/ipv6/inet6_hashtables.c | 15 ++++++++++++---
 net/ipv6/udp.c              | 17 ++++++++++++++---
 net/rds/connection.c        | 12 +++++++++---
 8 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index fba0b23..10cda2f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -201,10 +201,6 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 
 int inet_sk_rebuild_header(struct sock *sk);
 
-extern u32 inet_ehash_secret;
-extern u32 ipv6_hash_secret;
-void build_ehash_secret(void);
-
 static inline unsigned int __inet_ehashfn(const __be32 laddr,
 					  const __u16 lport,
 					  const __be32 faddr,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cfeb85c..c3352ce 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -245,29 +245,6 @@ out:
 }
 EXPORT_SYMBOL(inet_listen);
 
-u32 inet_ehash_secret __read_mostly;
-EXPORT_SYMBOL(inet_ehash_secret);
-
-u32 ipv6_hash_secret __read_mostly;
-EXPORT_SYMBOL(ipv6_hash_secret);
-
-/*
- * inet_ehash_secret must be set exactly once, and to a non nul value
- * ipv6_hash_secret must be set exactly once.
- */
-void build_ehash_secret(void)
-{
-	u32 rnd;
-
-	do {
-		get_random_bytes(&rnd, sizeof(rnd));
-	} while (rnd == 0);
-
-	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
-		get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
-}
-EXPORT_SYMBOL(build_ehash_secret);
-
 /*
  *	Create an inet socket.
  */
@@ -284,10 +261,6 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
-	if (unlikely(!inet_ehash_secret))
-		if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
-			build_ehash_secret();
-
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f29bdf6..1427818 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -28,6 +28,10 @@ static unsigned int inet_ehashfn(struct net *net, const __be32 laddr,
 				 const __u16 lport, const __be32 faddr,
 				 const __be16 fport)
 {
+	static u32 inet_ehash_secret __read_mostly;
+
+	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
+
 	return __inet_ehashfn(laddr, lport, faddr, fport,
 			      inet_ehash_secret + net_hash_mix(net));
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 86b43f5..4536feb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -410,8 +410,12 @@ static unsigned int udp_ehashfn(struct net *net, const __be32 laddr,
 				 const __u16 lport, const __be32 faddr,
 				 const __be16 fport)
 {
+	static u32 udp_ehash_secret __read_mostly;
+
+	net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret));
+
 	return __inet_ehashfn(laddr, lport, faddr, fport,
-			      inet_ehash_secret + net_hash_mix(net));
+			      udp_ehash_secret + net_hash_mix(net));
 }
 
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 4966b12..5bd9b25 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -110,11 +110,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
 	int try_loading_module = 0;
 	int err;
 
-	if (sock->type != SOCK_RAW &&
-	    sock->type != SOCK_DGRAM &&
-	    !inet_ehash_secret)
-		build_ehash_secret();
-
 	/* Look for the requested type/protocol pair. */
 lookup_protocol:
 	err = -ESOCKTNOSUPPORT;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 0b8e101..02abe8f 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -29,10 +29,19 @@ static unsigned int inet6_ehashfn(struct net *net,
 				  const struct in6_addr *faddr,
 				  const __be16 fport)
 {
-	const u32 lhash = (__force u32)laddr->s6_addr32[3];
-	const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+	static u32 inet6_ehash_secret __read_mostly;
+	static u32 ipv6_hash_secret __read_mostly;
+
+	u32 lhash, fhash;
+
+	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+	lhash = (__force u32)laddr->s6_addr32[3];
+	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
 	return __inet6_ehashfn(lhash, lport, fhash, fport,
-			       inet_ehash_secret + net_hash_mix(net));
+			       inet6_ehash_secret + net_hash_mix(net));
 }
 
 static int inet6_sk_ehashfn(const struct sock *sk)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index df32619..dea278a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -59,10 +59,21 @@ static unsigned int udp6_ehashfn(struct net *net,
 				  const struct in6_addr *faddr,
 				  const __be16 fport)
 {
-	const u32 lhash = (__force u32)laddr->s6_addr32[3];
-	const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+	static u32 udp6_ehash_secret __read_mostly;
+	static u32 udp_ipv6_hash_secret __read_mostly;
+
+	u32 lhash, fhash;
+
+	net_get_random_once(&udp6_ehash_secret,
+			    sizeof(udp6_ehash_secret));
+	net_get_random_once(&udp_ipv6_hash_secret,
+			    sizeof(udp_ipv6_hash_secret));
+
+	lhash = (__force u32)laddr->s6_addr32[3];
+	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
+
 	return __inet6_ehashfn(lhash, lport, fhash, fport,
-			       inet_ehash_secret + net_hash_mix(net));
+			       udp_ipv6_hash_secret + net_hash_mix(net));
 }
 
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 45e2366..378c3a6 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -51,10 +51,16 @@ static struct kmem_cache *rds_conn_slab;
 
 static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
 {
+	static u32 rds_hash_secret __read_mostly;
+
+	unsigned long hash;
+
+	net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret));
+
 	/* Pass NULL, don't need struct net for hash */
-	unsigned long hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
-					    be32_to_cpu(faddr), 0,
-					    inet_ehash_secret);
+	hash = __inet_ehashfn(be32_to_cpu(laddr), 0,
+			      be32_to_cpu(faddr), 0,
+			      rds_hash_secret);
 	return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
 }
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 6/7] tcp: switch tcp_fastopen key generation to net_get_random_once
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Yuchung Cheng, Eric Dumazet,
	David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

Changed key initialization of tcp_fastopen cookies to net_get_random_once.

If the user sets a custom key net_get_random_once must be called at
least once to ensure we don't overwrite the user provided key when the
first cookie is generated later on.

Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/tcp.h          |  2 +-
 net/ipv4/sysctl_net_ipv4.c |  5 +++++
 net/ipv4/tcp_fastopen.c    | 21 ++++++++++-----------
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9299560..2a26100 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1322,7 +1322,7 @@ extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 int tcp_fastopen_reset_cipher(void *key, unsigned int len);
 void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 			     struct tcp_fastopen_cookie *foc);
-
+void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
 /* Fastopen key context */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 540279f..d2b5140 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -267,6 +267,11 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
 			ret = -EINVAL;
 			goto bad_key;
 		}
+		/* Generate a dummy secret but don't publish it. This
+		 * is needed so we don't regenerate a new key on the
+		 * first invocation of tcp_fastopen_cookie_gen
+		 */
+		tcp_fastopen_init_key_once(false);
 		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
 	}
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index ab7bd35..316bfdc 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -14,6 +14,14 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
 
 static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
 
+void tcp_fastopen_init_key_once(bool publish)
+{
+	static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+	if (net_get_random_once(key, sizeof(key)) && publish)
+		tcp_fastopen_reset_cipher(key, sizeof(key));
+}
+
 static void tcp_fastopen_ctx_free(struct rcu_head *head)
 {
 	struct tcp_fastopen_context *ctx =
@@ -70,6 +78,8 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	__be32 path[4] = { src, dst, 0, 0 };
 	struct tcp_fastopen_context *ctx;
 
+	tcp_fastopen_init_key_once(true);
+
 	rcu_read_lock();
 	ctx = rcu_dereference(tcp_fastopen_ctx);
 	if (ctx) {
@@ -78,14 +88,3 @@ void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
 	}
 	rcu_read_unlock();
 }
-
-static int __init tcp_fastopen_init(void)
-{
-	__u8 key[TCP_FASTOPEN_KEY_LENGTH];
-
-	get_random_bytes(key, sizeof(key));
-	tcp_fastopen_reset_cipher(key, sizeof(key));
-	return 0;
-}
-
-late_initcall(tcp_fastopen_init);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 2/7] ipv6: split inet6_ehashfn to hash functions per compilation unit
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

This patch splits the inet6_ehashfn into separate ones in
ipv6/inet6_hashtables.o and ipv6/udp.o.

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/inet6_hashtables.h | 29 +++++++----------------------
 include/net/ipv6.h             |  4 ++--
 net/ipv6/inet6_hashtables.c    | 25 +++++++++++++++++++++++++
 net/ipv6/udp.c                 | 20 ++++++++++++++++----
 4 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index f52fa88..ae06135 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -28,29 +28,14 @@
 
 struct inet_hashinfo;
 
-static inline unsigned int inet6_ehashfn(struct net *net,
-				const struct in6_addr *laddr, const u16 lport,
-				const struct in6_addr *faddr, const __be16 fport)
+static inline unsigned int __inet6_ehashfn(const u32 lhash,
+				    const u16 lport,
+				    const u32 fhash,
+				    const __be16 fport,
+				    const u32 initval)
 {
-	u32 ports = (((u32)lport) << 16) | (__force u32)fport;
-
-	return jhash_3words((__force u32)laddr->s6_addr32[3],
-			    ipv6_addr_jhash(faddr),
-			    ports,
-			    inet_ehash_secret + net_hash_mix(net));
-}
-
-static inline int inet6_sk_ehashfn(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *laddr = &np->rcv_saddr;
-	const struct in6_addr *faddr = &np->daddr;
-	const __u16 lport = inet->inet_num;
-	const __be16 fport = inet->inet_dport;
-	struct net *net = sock_net(sk);
-
-	return inet6_ehashfn(net, laddr, lport, faddr, fport);
+	const u32 ports = (((u32)lport) << 16) | (__force u32)fport;
+	return jhash_3words(lhash, fhash, ports, initval);
 }
 
 int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index fe1c7f6..a35055f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -539,14 +539,14 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
 }
 
 /* more secured version of ipv6_addr_hash() */
-static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
 {
 	u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
 
 	return jhash_3words(v,
 			    (__force u32)a->s6_addr32[2],
 			    (__force u32)a->s6_addr32[3],
-			    ipv6_hash_secret);
+			    initval);
 }
 
 static inline bool ipv6_addr_loopback(const struct in6_addr *a)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a16..0b8e101 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -23,6 +23,31 @@
 #include <net/secure_seq.h>
 #include <net/ip.h>
 
+static unsigned int inet6_ehashfn(struct net *net,
+				  const struct in6_addr *laddr,
+				  const u16 lport,
+				  const struct in6_addr *faddr,
+				  const __be16 fport)
+{
+	const u32 lhash = (__force u32)laddr->s6_addr32[3];
+	const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+	return __inet6_ehashfn(lhash, lport, fhash, fport,
+			       inet_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct in6_addr *laddr = &np->rcv_saddr;
+	const struct in6_addr *faddr = &np->daddr;
+	const __u16 lport = inet->inet_num;
+	const __be16 fport = inet->inet_dport;
+	struct net *net = sock_net(sk);
+
+	return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
 int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f405815..df32619 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -53,6 +53,18 @@
 #include <trace/events/skb.h>
 #include "udp_impl.h"
 
+static unsigned int udp6_ehashfn(struct net *net,
+				  const struct in6_addr *laddr,
+				  const u16 lport,
+				  const struct in6_addr *faddr,
+				  const __be16 fport)
+{
+	const u32 lhash = (__force u32)laddr->s6_addr32[3];
+	const u32 fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+	return __inet6_ehashfn(lhash, lport, fhash, fport,
+			       inet_ehash_secret + net_hash_mix(net));
+}
+
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
@@ -219,8 +231,8 @@ begin:
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				hash = inet6_ehashfn(net, daddr, hnum,
-						     saddr, sport);
+				hash = udp6_ehashfn(net, daddr, hnum,
+						    saddr, sport);
 				matches = 1;
 			} else if (score == SCORE2_MAX)
 				goto exact_match;
@@ -300,8 +312,8 @@ begin:
 			badness = score;
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-				hash = inet6_ehashfn(net, daddr, hnum,
-						     saddr, sport);
+				hash = udp6_ehashfn(net, daddr, hnum,
+						    saddr, sport);
 				matches = 1;
 			}
 		} else if (score == badness && reuseport) {
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next 7/7] net: switch net_secret key generation to net_get_random_once
From: Hannes Frederic Sowa @ 2013-09-26 23:32 UTC (permalink / raw)
  To: netdev; +Cc: Hannes Frederic Sowa, Eric Dumazet, David S. Miller
In-Reply-To: <1380238343-4318-1-git-send-email-hannes@stressinduktion.org>

Cc: Eric Dumazet <edumazet@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 net/core/secure_seq.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 3f1ec15..b02fd16 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -7,6 +7,7 @@
 #include <linux/hrtimer.h>
 #include <linux/ktime.h>
 #include <linux/string.h>
+#include <linux/net.h>
 
 #include <net/secure_seq.h>
 
@@ -16,18 +17,7 @@ static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
 
 static void net_secret_init(void)
 {
-	u32 tmp;
-	int i;
-
-	if (likely(net_secret[0]))
-		return;
-
-	for (i = NET_SECRET_SIZE; i > 0;) {
-		do {
-			get_random_bytes(&tmp, sizeof(tmp));
-		} while (!tmp);
-		cmpxchg(&net_secret[--i], 0, tmp);
-	}
+	net_get_random_once(net_secret, sizeof(net_secret));
 }
 
 #ifdef CONFIG_INET
-- 
1.8.3.1

^ permalink raw reply related

* Re: Question on Netlink IPv6 routing table lookup
From: Hannes Frederic Sowa @ 2013-09-27  0:16 UTC (permalink / raw)
  To: Fernando Gont; +Cc: netdev
In-Reply-To: <52440E48.3030102@gont.com.ar>

On Thu, Sep 26, 2013 at 07:36:56AM -0300, Fernando Gont wrote:
> Hi, Hannes,
> 
> On 09/23/2013 09:04 PM, Hannes Frederic Sowa wrote:
> > On Mon, Sep 23, 2013 at 04:41:07PM -0300, Fernando Gont wrote:
> >> If that's not (currently) possible, should I expect RTA_SRC to work as
> >> described above at some point in the future?
> > 
> > The RTA_SRC attriute matches on sutrees in the ipv6 routing table:
> > 
> > ip -6 r a default via fe80::1 dev eth0 from 2000::/64
> > ip -6 r a default via fe80::2 dev eth0 from 2000:1:/64
> > 
> > ip -6 r g :: from 2000::
> > ip -6 r g :: from 2000:1::
> > 
> > ...should return different routes. The from parameter is the RTA_SRC
> > attribute.
> 
> Isn't the "from" the "source network" in /proc/net/ipv6_route? (as
> described in <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/proc-net.html>)

Correct.

> I have a node with multiple Ethernet interfaces, each of which connected
> to a different network where IPv6 Router Advertisements are received...
> and the "source network" is set to all-zeros in all cases.

As soon as you add a route with RTA_SRC you will see those two fields != 0.
Subtrees don't get used by router advertisments. Only way to set these fields
is by using the RTA_SRC from user-land.

Greetings,

  Hannes

^ permalink raw reply

* [PATCH v2.40 3/7] ofp-actions: Add OFPUTIL_OFPAT13_PUSH_MPLS
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	Jesse Gross, Ben Pfaff
  Cc: Isaku Yamahata, Ravi K
In-Reply-To: <1380241116-7661-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

From: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>

This patch adds a new compatibility enum for use with MPLS, so that the
differing behaviour between OpenFlow 1.2 and 1.3 can be implemented in
ofproto-dpif-xlate.

Signed-off-by: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>
Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

---

v2.36 - v2.39
* No change

v2.35
* First post
---
 lib/ofp-actions.c | 5 ++++-
 lib/ofp-parse.c   | 1 +
 lib/ofp-util.c    | 3 +++
 lib/ofp-util.h    | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index dcc82db..6d33711 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -322,6 +322,7 @@ ofpact_from_nxast(const union ofp_action *a, enum ofputil_action_code code,
 #define OFPAT10_ACTION(ENUM, STRUCT, NAME) case OFPUTIL_##ENUM:
 #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM:
 #include "ofp-util.def"
+    case OFPUTIL_OFPAT13_PUSH_MPLS:
         NOT_REACHED();
 
     case OFPUTIL_NXAST_RESUBMIT:
@@ -480,6 +481,7 @@ ofpact_from_openflow10(const union ofp_action *a, struct ofpbuf *out)
     case OFPUTIL_ACTION_INVALID:
 #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM:
 #include "ofp-util.def"
+    case OFPUTIL_OFPAT13_PUSH_MPLS:
         NOT_REACHED();
 
     case OFPUTIL_OFPAT10_OUTPUT:
@@ -842,7 +844,8 @@ ofpact_from_openflow11(const union ofp_action *a, struct ofpbuf *out)
         ofpact_put_DEC_MPLS_TTL(out);
         break;
 
-    case OFPUTIL_OFPAT11_PUSH_MPLS: {
+    case OFPUTIL_OFPAT11_PUSH_MPLS:
+    case OFPUTIL_OFPAT13_PUSH_MPLS: {
         struct ofp11_action_push *oap = (struct ofp11_action_push *)a;
         if (!eth_type_mpls(oap->ethertype)) {
             return OFPERR_OFPBAC_BAD_ARGUMENT;
diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c
index 522bd95..85eff4d 100644
--- a/lib/ofp-parse.c
+++ b/lib/ofp-parse.c
@@ -804,6 +804,7 @@ parse_named_action(enum ofputil_action_code code,
         break;
 
     case OFPUTIL_OFPAT11_PUSH_MPLS:
+    case OFPUTIL_OFPAT13_PUSH_MPLS:
     case OFPUTIL_NXAST_PUSH_MPLS:
         error = str_to_u16(arg, "push_mpls", &ethertype);
         if (!error) {
diff --git a/lib/ofp-util.c b/lib/ofp-util.c
index 173b534..c9a2731 100644
--- a/lib/ofp-util.c
+++ b/lib/ofp-util.c
@@ -4777,6 +4777,9 @@ ofputil_put_action(enum ofputil_action_code code, struct ofpbuf *buf)
     case OFPUTIL_ACTION_INVALID:
         NOT_REACHED();
 
+    case OFPUTIL_OFPAT13_PUSH_MPLS:
+        return ofputil_put_OFPAT11_PUSH_MPLS(buf);
+
 #define OFPAT10_ACTION(ENUM, STRUCT, NAME)                  \
     case OFPUTIL_##ENUM: return ofputil_put_##ENUM(buf);
 #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME)      \
diff --git a/lib/ofp-util.h b/lib/ofp-util.h
index d5f34d7..51e8f7e 100644
--- a/lib/ofp-util.h
+++ b/lib/ofp-util.h
@@ -795,6 +795,7 @@ enum OVS_PACKED_ENUM ofputil_action_code {
 #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) OFPUTIL_##ENUM,
 #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME)   OFPUTIL_##ENUM,
 #include "ofp-util.def"
+    OFPUTIL_OFPAT13_PUSH_MPLS
 };
 
 /* The number of values of "enum ofputil_action_code". */
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 4/7] ofp-actions: Add separate OpenFlow 1.3 action parser
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
	Jesse Gross, Ben Pfaff
  Cc: Isaku Yamahata, Ravi K
In-Reply-To: <1380241116-7661-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

From: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>

This patch adds new ofpact_from_openflow13() and
ofpacts_from_openflow13() functions parallel to the existing ofpact
handling code. In the OpenFlow 1.3 version, push_mpls is handled
differently, but all other actions are handled by the existing code.

For push_mpls, ofpact_push_mpls.ofpact.compat is set to
OFPUTIL_OFPAT13_PUSH_MPLS, which allows correct VLAN+MPLS datapath
behaviour to be determined at odp translation time.

Signed-off-by: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>
Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

---

v2.36 - v2.39
* No change

v2.35
* First post
---
 lib/ofp-actions.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index 6d33711..399560d 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -884,6 +884,40 @@ ofpacts_from_openflow11(const union ofp_action *in, size_t n_in,
     return ofpacts_from_openflow(in, n_in, out, ofpact_from_openflow11);
 }
 \f
+static enum ofperr
+ofpact_from_openflow13(const union ofp_action *a, struct ofpbuf *out)
+{
+    enum ofputil_action_code code;
+    enum ofperr error;
+
+    error = decode_openflow11_action(a, &code);
+    if (error) {
+        return error;
+    }
+
+    if (code == OFPUTIL_OFPAT11_PUSH_MPLS) {
+        struct ofpact_push_mpls *oam;
+        struct ofp11_action_push *oap = (struct ofp11_action_push *)a;
+        if (!eth_type_mpls(oap->ethertype)) {
+            return OFPERR_OFPBAC_BAD_ARGUMENT;
+        }
+        oam = ofpact_put_PUSH_MPLS(out);
+        oam->ethertype = oap->ethertype;
+        oam->ofpact.compat = OFPUTIL_OFPAT13_PUSH_MPLS;
+    } else {
+        return ofpact_from_openflow11(a, out);
+    }
+
+    return error;
+}
+
+static enum ofperr
+ofpacts_from_openflow13(const union ofp_action *in, size_t n_in,
+                        struct ofpbuf *out)
+{
+    return ofpacts_from_openflow(in, n_in, out, ofpact_from_openflow13);
+}
+\f
 /* OpenFlow 1.1 instructions. */
 
 #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME)             \
@@ -1088,6 +1122,17 @@ get_actions_from_instruction(const struct ofp11_instruction *inst,
     *n_actions = (ntohs(inst->len) - sizeof *inst) / OFP11_INSTRUCTION_ALIGN;
 }
 
+static uint8_t
+get_version_from_ofpbuf(const struct ofpbuf *openflow)
+{
+    if (openflow && openflow->l2) {
+        struct ofp_header *oh = openflow->l2;
+        return oh->version;
+    }
+
+    return OFP10_VERSION;
+}
+
 /* Attempts to convert 'actions_len' bytes of OpenFlow 1.1 actions from the
  * front of 'openflow' into ofpacts.  On success, replaces any existing content
  * in 'ofpacts' by the converted ofpacts; on failure, clears 'ofpacts'.
@@ -1107,8 +1152,15 @@ ofpacts_pull_openflow11_actions(struct ofpbuf *openflow,
                                 unsigned int actions_len,
                                 struct ofpbuf *ofpacts)
 {
-    return ofpacts_pull_actions(openflow, actions_len, ofpacts,
-                                ofpacts_from_openflow11);
+    uint8_t version = get_version_from_ofpbuf(openflow);
+
+    if (version < OFP13_VERSION) {
+        return ofpacts_pull_actions(openflow, actions_len, ofpacts,
+                                    ofpacts_from_openflow11);
+    } else {
+        return ofpacts_pull_actions(openflow, actions_len, ofpacts,
+                                    ofpacts_from_openflow13);
+    }
 }
 
 enum ofperr
@@ -1160,10 +1212,15 @@ ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow,
     if (insts[OVSINST_OFPIT11_APPLY_ACTIONS]) {
         const union ofp_action *actions;
         size_t n_actions;
+        uint8_t version = get_version_from_ofpbuf(openflow);
 
         get_actions_from_instruction(insts[OVSINST_OFPIT11_APPLY_ACTIONS],
                                      &actions, &n_actions);
-        error = ofpacts_from_openflow11(actions, n_actions, ofpacts);
+        if (version < OFP13_VERSION) {
+            error = ofpacts_from_openflow11(actions, n_actions, ofpacts);
+        } else {
+            error = ofpacts_from_openflow13(actions, n_actions, ofpacts);
+        }
         if (error) {
             goto exit;
         }
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 0/7] MPLS actions and matches
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer

Hi,

This series implements MPLS actions and matches based on work by
Ravi K, Leo Alterman, Yamahata-san and Joe Stringer.

This series provides two changes

* Patches 1 - 5

  Provide user-space support for the VLAN/MPLS tag insertion order
  up to and including OpenFlow 1.2, and the different ordering
  specified from OpenFlow 1.3. In a nutshell the datapath always
  uses the OpenFlow 1.3 ordering, which is to always insert tags
  immediately after the L2 header, regardless of the presence of other
  tags. And ovs-vswtichd provides compatibility for the behaviour up
  to OpenFlow 1.2, which is that MPLS tags should follow VLAN tags
  if present.

  Ben, these are for you to review.

* Patches 6 and 7

  Adding basic MPLS action and match support to the kernel datapath

  Jesse, these are for you to review.


Differences between v2.40 and v2.39:

* Rebase for:
  + New dev_queue_xmit compat code
  + Updated put_vlan()
  + Removal of mpls_depth field from struct flow
* As suggested by Jesse Gross
  + Remove bogus mac_len update from push_mpls()
  + Slightly simplify push_mpls() by using eth_hdr()
  + Remove dubious condition !eth_p_mpls(inner_protocol) on
    an skb being considered to be MPLS in netdev_send()
  + Only use compatibility code for MPLS GSO segmentation on kernels
    older than 3.11
  + Revamp setting of inner_protocol
    1. Do not unconditionally set inner_protocol to the value of
       skb->protocol in ovs_execute_actions().
    2. Initialise inner_protocol it to zero only if compatibility code is in
       use. In the case where compatibility code is not in use it will either
       be zero due since the allocation of the skb or some other value set
       by some other user.
    3. Conditionally set the inner_protocol in push_mpls() to the value of
       skb->protocol when entering push_mpls(). The condition is that
       inner_protocol is zero and the value of skb->protocol is not an MPLS
       ethernet type.
    - This new scheme:
      + Pushes logic to set inner_protocol closer to the case where it is
	needed.
      + Avoids over-writing values set by other users.
* As suggested by Pravin Shelar
  + Only set and restore skb->protocol in rpl___skb_gso_segment() in the
    case of MPLS
  + Add inner_protocol field to struct ovs_gso_cb instead of ovs_skb_cb.
    This moves compatibility code closer to where it is used
    and creates fewer differences with mainline.
* Update comment on mac_len updates in datapath/actions.c
* Remove HAVE_INNER_PROCOTOL and instead just check
  against kernel version 3.11 directly.
  HAVE_INNER_PROCOTOL is a hang-over from work done prior
  to the merge of inner_protocol into the kernel.
* Remove dubious condition !eth_p_mpls(inner_protocol) on
  using inner_protocol as the type in rpl_skb_network_protocol()
* Do not update type of features in rpl_dev_queue_xmit.
  Though arguably correct this is not an inherent part of
  the changes made by this patch.
* Use skb_cow_head() in push_mpls()
  + Call skb_cow_head(skb, MPLS_HLEN) instead of
    make_writable(skb, skb->mac_len) to ensure that there is enough head
    room to push an MPLS LSE regardless of whether the skb is cloned or not.
  + This is consistent with the behaviour of rpl__vlan_put_tag().
  + This is a fix for crashes reported when performing mpls_push
    with headroom less than 4. This problem was introduced in v3.36.
* Skip popping in mpls_pop if the skb is too short to contain an MPLS LSE


Differences between v2.39 and v2.38:

* Rebase for removal of vlan, checksum and skb->mark compat code
  - This includes adding adding a new patch,
    "[PATCH v2.39 6/7] datapath: Break out deacceleration portion of
    vlan_push" to allow re-use of some existing code.


Differences between v2.38 and v2.37:

* Rebase for SCTP support
* Refactor validate_tp_port() to iterate over eth_types rather
  than open-coding the loop. With the addition of SCTP this logic
  is now used three times.


Differences between v2.37 and v2.36:

* Rebase


Differences between v2.36 and v2.35:

* Rebase

* Do not add set_ethertype() to datapath/actions.c.
  As this patch has evolved this function had devolved into
  to sets of functionality wrapped into a single function with
  only one line of common code. Refactor things to simply
  open-code setting the ether type in the two locations where
  set_ethertype() was previously used. The aim here is to improve
  readability.

* Update setting skb->ethertype after mpls push and pop.
  - In the case of push_mpls it should be set unconditionally
    as in v2.35 the behaviour of this function to always push
    an MPLS LSE before any VLAN tags.
  - In the case of mpls_pop eth_p_mpls(skb->protocol) is a better
    test than skb->protocol != htons(ETH_P_8021Q) as it will give the
    correct behaviour in the presence of other VLAN ethernet types,
    for example 0x88a8 which is used by 802.1ad. Moreover, it seems
    correct to update the ethernet type if it was previously set
    according to the top-most MPLS LSE.

* Deaccelerate VLANs when pushing MPLS tags the
  - Since v2.35 MPLS push will insert an MPLS LSE before any VLAN tags.
    This means that if an accelerated tag is present it should be
    deaccelerated to ensure it ends up in the correct position.

* Update skb->mac_len in push_mpls() so that it will be correct
  when used by a subsequent call to pop_mpls().

  As things stand I do not believe this is strictly necessary as
  ovs-vswitchd will not send a pop MPLS action after a push MPLS action.
  However, I have added this in order to code more defensively as I believe
  that if such a sequence did occur it would be rather unobvious why
  it didn't work.

* Do not add skb_cow_head() call in push_mpls().
  It is unnecessary as there is a make_writable() call.
  This change was also made in v2.30 but some how the
  code regressed between then and v2.35.


Differences between v2.35 and v2.34:

* Add support for the tag ordering specified up until OpenFlow 1.2 and
  the ordering specified from OpenFlow 1.3.

* Correct error in datapath patch's handling of GSO in the presence
  of MPLS and absence of VLANs.


Pre-requisites.

This series applies on top of "[PATCH v3] Remove mpls_depth field from flow"

To aid review this series and its pre-requisite is available in git at:

git://github.com/horms/openvswitch.git devel/mpls-v2.40


Patch list and overall diffstat:

Joe Stringer (5):
  odp: Only pass vlan_tci to commit_vlan_action()
  odp: Allow VLAN actions after MPLS actions
  ofp-actions: Add OFPUTIL_OFPAT13_PUSH_MPLS
  ofp-actions: Add separate OpenFlow 1.3 action parser
  lib: Push MPLS tags in the OpenFlow 1.3 ordering

Simon Horman (2):
  datapath: Break out deacceleration portion of vlan_push
  datapath: Add basic MPLS support to kernel

 datapath/Modules.mk                             |   1 +
 datapath/actions.c                              | 156 ++++++++-
 datapath/datapath.c                             | 259 ++++++++++++--
 datapath/datapath.h                             |   2 +
 datapath/flow.c                                 |  58 ++-
 datapath/flow.h                                 |  17 +-
 datapath/linux/compat/gso.c                     | 117 ++++++-
 datapath/linux/compat/gso.h                     |  53 +++
 datapath/linux/compat/include/linux/netdevice.h |  14 +-
 datapath/linux/compat/netdevice.c               |  28 --
 datapath/mpls.h                                 |  15 +
 include/linux/openvswitch.h                     |   7 +-
 lib/flow.c                                      |   2 +-
 lib/odp-util.c                                  |  21 +-
 lib/odp-util.h                                  |   2 +-
 lib/ofp-actions.c                               |  68 +++-
 lib/ofp-parse.c                                 |   1 +
 lib/ofp-util.c                                  |   3 +
 lib/ofp-util.h                                  |   1 +
 lib/packets.c                                   |  10 +-
 lib/packets.h                                   |   2 +-
 ofproto/ofproto-dpif-xlate.c                    |  98 ++++--
 ofproto/ofproto-dpif-xlate.h                    |   5 +
 tests/ofproto-dpif.at                           | 446 ++++++++++++++++++++++++
 24 files changed, 1246 insertions(+), 140 deletions(-)
 create mode 100644 datapath/mpls.h

^ permalink raw reply

* [PATCH v2.40 1/7] odp: Only pass vlan_tci to commit_vlan_action()
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer
In-Reply-To: <1380241116-7661-1-git-send-email-horms@verge.net.au>

From: Joe Stringer <joe@wand.net.nz>

This allows for future patches to pass different tci values to
commit_vlan_action() without passing an entire flow structure.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Simon Horman <horms@verge.net.au>

---

v2.36 - v2.39
* No change

v2.35
* First post
---
 lib/odp-util.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/odp-util.c b/lib/odp-util.c
index 85256b7..0785c6a 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -3318,10 +3318,10 @@ commit_set_ether_addr_action(const struct flow *flow, struct flow *base,
 }
 
 static void
-commit_vlan_action(const struct flow *flow, struct flow *base,
+commit_vlan_action(ovs_be16 vlan_tci, struct flow *base,
                    struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
-    if (base->vlan_tci == flow->vlan_tci) {
+    if (base->vlan_tci == vlan_tci) {
         return;
     }
 
@@ -3331,15 +3331,15 @@ commit_vlan_action(const struct flow *flow, struct flow *base,
         nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN);
     }
 
-    if (flow->vlan_tci & htons(VLAN_CFI)) {
+    if (vlan_tci & htons(VLAN_CFI)) {
         struct ovs_action_push_vlan vlan;
 
         vlan.vlan_tpid = htons(ETH_TYPE_VLAN);
-        vlan.vlan_tci = flow->vlan_tci;
+        vlan.vlan_tci = vlan_tci;
         nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN,
                           &vlan, sizeof vlan);
     }
-    base->vlan_tci = flow->vlan_tci;
+    base->vlan_tci = vlan_tci;
 }
 
 static void
@@ -3556,7 +3556,7 @@ commit_odp_actions(const struct flow *flow, struct flow *base,
                    int *mpls_depth_delta)
 {
     commit_set_ether_addr_action(flow, base, odp_actions, wc);
-    commit_vlan_action(flow, base, odp_actions, wc);
+    commit_vlan_action(flow->vlan_tci, base, odp_actions, wc);
     commit_set_nw_action(flow, base, odp_actions, wc);
     commit_set_port_action(flow, base, odp_actions, wc);
     /* Committing MPLS actions should occur after committing nw and port
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 2/7] odp: Allow VLAN actions after MPLS actions
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer
In-Reply-To: <1380241116-7661-1-git-send-email-horms@verge.net.au>

From: Joe Stringer <joe@wand.net.nz>

OpenFlow 1.2 and 1.3 differ on their handling of MPLS actions in the
presence of VLAN tags. To allow correct behaviour to be committed in
each situation, this patch adds a second round of VLAN tag action
handling to commit_odp_actions(), which occurs after MPLS actions. This
is implemented with a new field in 'struct xlate_in' called 'vlan_tci'.

When an push_mpls action is composed, the flow's current VLAN state is
stored into xin->vlan_tci, and flow->vlan_tci is set to 0 (pop_vlan). If
a VLAN tag is present, it is stripped; if not, then there is no change.
Any later modifications to the VLAN state is written to xin->vlan_tci.
When committing the actions, flow->vlan_tci is used before MPLS actions,
and xin->vlan_tci is used afterwards. This retains the current datapath
behaviour, but allows VLAN actions to be applied in a more flexible
manner.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Simon Horman <horms@verge.net.au>

---

v2.40
* Rebase for removal of mpls_depth from struct flow

v2.38 - v2.39
* No change

v2.37
* Rebase

v2.36
* No change

v2.5
* First post
---
 lib/odp-util.c               |   9 +-
 lib/odp-util.h               |   2 +-
 ofproto/ofproto-dpif-xlate.c |  90 ++++++++++++++-----
 ofproto/ofproto-dpif-xlate.h |   5 ++
 tests/ofproto-dpif.at        | 209 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 292 insertions(+), 23 deletions(-)

diff --git a/lib/odp-util.c b/lib/odp-util.c
index 0785c6a..fcfa91b 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -3549,11 +3549,15 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base,
  * key from 'base' into 'flow', and then changes 'base' the same way.  Does not
  * commit set_tunnel actions.  Users should call commit_odp_tunnel_action()
  * in addition to this function if needed.  Sets fields in 'wc' that are
- * used as part of the action. */
+ * used as part of the action.
+ *
+ * VLAN actions may be committed twice; If vlan_tci in 'flow' differs from the
+ * one in 'base', then it is committed before MPLS actions. If 'final_vlan_tci'
+ * differs from 'flow->vlan_tci', it is committed afterwards. */
 void
 commit_odp_actions(const struct flow *flow, struct flow *base,
                    struct ofpbuf *odp_actions, struct flow_wildcards *wc,
-                   int *mpls_depth_delta)
+                   int *mpls_depth_delta, ovs_be16 final_vlan_tci)
 {
     commit_set_ether_addr_action(flow, base, odp_actions, wc);
     commit_vlan_action(flow->vlan_tci, base, odp_actions, wc);
@@ -3564,6 +3568,7 @@ commit_odp_actions(const struct flow *flow, struct flow *base,
      * that it is no longer IP and thus nw and port actions are no longer valid.
      */
     commit_mpls_action(flow, base, odp_actions, wc, mpls_depth_delta);
+    commit_vlan_action(final_vlan_tci, base, odp_actions, wc);
     commit_set_priority_action(flow, base, odp_actions, wc);
     commit_set_pkt_mark_action(flow, base, odp_actions, wc);
 }
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 4abf543..c7fc1eb 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -131,7 +131,7 @@ void commit_odp_tunnel_action(const struct flow *, struct flow *base,
                               struct ofpbuf *odp_actions);
 void commit_odp_actions(const struct flow *, struct flow *base,
                         struct ofpbuf *odp_actions, struct flow_wildcards *wc,
-                        int *mpls_depth_delta);
+                        int *mpls_depth_delta, ovs_be16 final_vlan_tci);
 \f
 /* ofproto-dpif interface.
  *
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 5482323..1cf5d52 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -982,10 +982,11 @@ static void
 output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
               uint16_t vlan)
 {
-    ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci;
+    ovs_be16 *flow_tci = &ctx->xin->vlan_tci;
     uint16_t vid;
     ovs_be16 tci, old_tci;
     struct xport *xport;
+    bool flow_tci_equal_to_xin = (*flow_tci == ctx->xin->flow.vlan_tci);
 
     vid = output_vlan_to_vid(out_xbundle, vlan);
     if (list_is_empty(&out_xbundle->xports)) {
@@ -1016,9 +1017,15 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
         }
     }
     *flow_tci = tci;
+    if (flow_tci_equal_to_xin) {
+        ctx->xin->flow.vlan_tci = tci;
+    }
 
     compose_output_action(ctx, xport->ofp_port);
     *flow_tci = old_tci;
+    if (flow_tci_equal_to_xin) {
+        ctx->xin->flow.vlan_tci = old_tci;
+    }
 }
 
 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
@@ -1251,7 +1258,7 @@ xlate_normal(struct xlate_ctx *ctx)
 
     /* Drop malformed frames. */
     if (flow->dl_type == htons(ETH_TYPE_VLAN) &&
-        !(flow->vlan_tci & htons(VLAN_CFI))) {
+        !(ctx->xin->vlan_tci & htons(VLAN_CFI))) {
         if (ctx->xin->packet != NULL) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
             VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
@@ -1275,7 +1282,7 @@ xlate_normal(struct xlate_ctx *ctx)
     }
 
     /* Check VLAN. */
-    vid = vlan_tci_to_vid(flow->vlan_tci);
+    vid = vlan_tci_to_vid(ctx->xin->vlan_tci);
     if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) {
         xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
         return;
@@ -1533,7 +1540,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
     struct flow_wildcards *wc = &ctx->xout->wc;
     struct flow *flow = &ctx->xin->flow;
-    ovs_be16 flow_vlan_tci;
+    ovs_be16 flow_vlan_tci, xin_vlan_tci;
     uint32_t flow_pkt_mark;
     uint8_t flow_nw_tos;
     odp_port_t out_port, odp_port;
@@ -1602,6 +1609,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
     }
 
     flow_vlan_tci = flow->vlan_tci;
+    xin_vlan_tci = ctx->xin->vlan_tci;
     flow_pkt_mark = flow->pkt_mark;
     flow_nw_tos = flow->nw_tos;
 
@@ -1641,19 +1649,20 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
         }
         vlandev_port = vsp_realdev_to_vlandev(ctx->xbridge->ofproto, ofp_port,
-                                              flow->vlan_tci);
+                                              ctx->xin->vlan_tci);
         if (vlandev_port == ofp_port) {
             out_port = odp_port;
         } else {
             out_port = ofp_port_to_odp_port(ctx->xbridge, vlandev_port);
             flow->vlan_tci = htons(0);
+            ctx->xin->vlan_tci = htons(0);
         }
     }
 
     if (out_port != ODPP_NONE) {
         commit_odp_actions(flow, &ctx->base_flow,
                            &ctx->xout->odp_actions, &ctx->xout->wc,
-                           &ctx->mpls_depth_delta);
+                           &ctx->mpls_depth_delta, ctx->xin->vlan_tci);
         nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT,
                             out_port);
 
@@ -1665,6 +1674,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
  out:
     /* Restore flow */
     flow->vlan_tci = flow_vlan_tci;
+    ctx->xin->vlan_tci = xin_vlan_tci;
     flow->pkt_mark = flow_pkt_mark;
     flow->nw_tos = flow_nw_tos;
 }
@@ -1809,7 +1819,7 @@ execute_controller_action(struct xlate_ctx *ctx, int len,
 
     commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
                        &ctx->xout->odp_actions, &ctx->xout->wc,
-                       &ctx->mpls_depth_delta);
+                       &ctx->mpls_depth_delta, ctx->xin->vlan_tci);
 
     odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data,
                         ctx->xout->odp_actions.size, NULL, NULL);
@@ -2197,7 +2207,7 @@ xlate_sample_action(struct xlate_ctx *ctx,
 
   commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
                      &ctx->xout->odp_actions, &ctx->xout->wc,
-                     &ctx->mpls_depth_delta);
+                     &ctx->mpls_depth_delta, ctx->xin->vlan_tci);
 
   compose_flow_sample_cookie(os->probability, os->collector_set_id,
                              os->obs_domain_id, os->obs_point_id, &cookie);
@@ -2226,11 +2236,23 @@ may_receive(const struct xport *xport, struct xlate_ctx *ctx)
 }
 
 static void
+vlan_tci_restore(struct xlate_in *xin, ovs_be16 *tci_ptr, ovs_be16 orig_tci)
+{
+    /* If MPLS actions were executed after MPLS, copy the final vlan_tci out
+     * and restore the intermediate VLAN state. */
+    if (xin->flow.vlan_tci != orig_tci && tci_ptr == &xin->vlan_tci) {
+        xin->vlan_tci = xin->flow.vlan_tci;
+        xin->flow.vlan_tci = orig_tci;
+    }
+}
+
+static void
 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct xlate_ctx *ctx)
 {
     struct flow_wildcards *wc = &ctx->xout->wc;
     struct flow *flow = &ctx->xin->flow;
+    ovs_be16 *vlan_tci = &ctx->xin->flow.vlan_tci;
     const struct ofpact *a;
 
     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
@@ -2241,6 +2263,15 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
         }
 
+        /* Update the final vlan state to be equal to the current state.
+         * - If 'vlan_tci' points to 'xin->flow->vlan_tci'. then additional
+         *   VLAN actions will be applied before MPLS actions. 'xin->vlan_tci'
+         *   is updated to reflect the final state of the flow.
+         * - If 'vlan_tci' already points to 'xin->vlan_tci', then additional
+         *   VLAN actions will be applied after MPLS actions. 'xin->vlan_tci'
+         *   is already equal to the current state. */
+        ctx->xin->vlan_tci = *vlan_tci;
+
         switch (a->type) {
         case OFPACT_OUTPUT:
             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
@@ -2264,28 +2295,28 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
         case OFPACT_SET_VLAN_VID:
             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
-            flow->vlan_tci &= ~htons(VLAN_VID_MASK);
-            flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
-                               | htons(VLAN_CFI));
+            *vlan_tci &= ~htons(VLAN_VID_MASK);
+            *vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
+                          | htons(VLAN_CFI));
             break;
 
         case OFPACT_SET_VLAN_PCP:
-            wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
-            flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
-            flow->vlan_tci |=
+            wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
+            *vlan_tci &= ~htons(VLAN_PCP_MASK);
+            *vlan_tci |=
                 htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp << VLAN_PCP_SHIFT)
                       | VLAN_CFI);
             break;
 
         case OFPACT_STRIP_VLAN:
             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
-            flow->vlan_tci = htons(0);
+            *vlan_tci = htons(0);
             break;
 
         case OFPACT_PUSH_VLAN:
             /* XXX 802.1AD(QinQ) */
             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
-            flow->vlan_tci = htons(VLAN_CFI);
+            *vlan_tci = htons(VLAN_CFI);
             break;
 
         case OFPACT_SET_ETH_SRC:
@@ -2353,29 +2384,47 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             flow->skb_priority = ctx->orig_skb_priority;
             break;
 
-        case OFPACT_REG_MOVE:
+        case OFPACT_REG_MOVE: {
+            ovs_be16 orig_tci = flow->vlan_tci;
             nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
+            vlan_tci_restore(ctx->xin, vlan_tci, orig_tci);
             break;
+        }
 
-        case OFPACT_REG_LOAD:
+        case OFPACT_REG_LOAD: {
+            ovs_be16 orig_tci = flow->vlan_tci;
             nxm_execute_reg_load(ofpact_get_REG_LOAD(a), flow);
+            vlan_tci_restore(ctx->xin, vlan_tci, orig_tci);
             break;
+        }
 
-        case OFPACT_STACK_PUSH:
+        case OFPACT_STACK_PUSH: {
+            ovs_be16 orig_tci = flow->vlan_tci;
+            flow->vlan_tci = *vlan_tci;
             nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
                                    &ctx->stack);
+            flow->vlan_tci = orig_tci;
             break;
+        }
 
-        case OFPACT_STACK_POP:
+        case OFPACT_STACK_POP: {
+            ovs_be16 orig_tci = flow->vlan_tci;
             nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
                                   &ctx->stack);
+            vlan_tci_restore(ctx->xin, vlan_tci, orig_tci);
             break;
+        }
 
         case OFPACT_PUSH_MPLS:
             if (compose_mpls_push_action(ctx,
                                          ofpact_get_PUSH_MPLS(a)->ethertype)) {
                 return;
             }
+
+            /* Save and pop any existing VLAN tags if running in OF1.2 mode. */
+            ctx->xin->vlan_tci = *vlan_tci;
+            flow->vlan_tci = htons(0);
+            vlan_tci = &ctx->xin->vlan_tci;
             break;
 
         case OFPACT_POP_MPLS:
@@ -2477,6 +2526,7 @@ xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
 {
     xin->ofproto = ofproto;
     xin->flow = *flow;
+    xin->vlan_tci = flow->vlan_tci;
     xin->packet = packet;
     xin->may_learn = packet != NULL;
     xin->rule = rule;
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index a54a9e4..6ce3b31 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -60,6 +60,11 @@ struct xlate_in {
      * this flow when actions change header fields. */
     struct flow flow;
 
+    /* If MPLS and VLAN actions were both present in the translation, and VLAN
+     * actions should occur after the MPLS actions, then this field is used
+     * to store the final vlan_tci state. */
+    ovs_be16 vlan_tci;
+
     /* The packet corresponding to 'flow', or a null pointer if we are
      * revalidating without a packet to refer to. */
     const struct ofpbuf *packet;
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 652304e..c07c64e 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -869,6 +869,215 @@ done
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif - OF1.2 VLAN+MPLS handling])
+OVS_VSWITCHD_START([dnl
+   add-port br0 p1 -- set Interface p1 type=dummy
+])
+ON_EXIT([kill `cat ovs-ofctl.pid`])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_DATA([flows.txt], [dnl
+cookie=0xa dl_src=40:44:44:44:54:50 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],push_vlan:0x8100,mod_vlan_vid:99,mod_vlan_pcp:1,controller
+cookie=0xa dl_src=40:44:44:44:54:51 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],push_vlan:0x8100,mod_vlan_vid:99,mod_vlan_pcp:1,controller
+cookie=0xa dl_src=40:44:44:44:54:52 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,controller
+cookie=0xa dl_src=40:44:44:44:54:53 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,controller
+cookie=0xa dl_src=40:44:44:44:54:54 actions=push_vlan:0x8100,mod_vlan_vid:99,mod_vlan_pcp:1,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],controller
+cookie=0xa dl_src=40:44:44:44:54:55 actions=push_vlan:0x8100,mod_vlan_vid:99,mod_vlan_pcp:1,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],controller
+cookie=0xa dl_src=40:44:44:44:54:56 actions=push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],controller
+cookie=0xa dl_src=40:44:44:44:54:57 actions=push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],controller
+])
+AT_CHECK([ovs-ofctl --protocols=OpenFlow12 add-flows br0 flows.txt])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the MPLS tag before pushing a VLAN tag, so we see
+dnl both of these in the final flow
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:50,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:51,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the MPLS tag before pushing a VLAN tag, so we see
+dnl both of these in the final flow
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:52,dst=52:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:52,dl_dst=52:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:52,dl_dst=52:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:52,dl_dst=52:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:53,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:53,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:53,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:53,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the VLAN tag before pushing a MPLS tag, but these
+dnl actions are reordered, so we see both of these in the final flow.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:54,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:54,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:54,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:54,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:55,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:55,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:55,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:55,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the VLAN tag before pushing a MPLS tag, but these
+dnl actions are reordered, so we see both of these in the final flow.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:56,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:56,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:56,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:56,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:54:57,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:57,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:57,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:54:57,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=0,mpls_ttl=64,mpls_bos=1
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:50 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],mod_vlan_vid:99,mod_vlan_pcp:1,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:51 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],mod_vlan_vid:99,mod_vlan_pcp:1,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:52 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:53 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:54 actions=mod_vlan_vid:99,mod_vlan_pcp:1,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:55 actions=mod_vlan_vid:99,mod_vlan_pcp:1,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:56 actions=load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:54:57 actions=load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535
+NXST_FLOW reply:
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - fragment handling])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [2], [3], [4], [5], [6], [90])
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 6/7] datapath: Break out deacceleration portion of vlan_push
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer
In-Reply-To: <1380241116-7661-1-git-send-email-horms@verge.net.au>

Break out deacceleration portion of vlan_push into vlan_put
so that it may be re-used by mpls_push.

For both vlan_push and mpls_push if there is an accelerated VLAN tag
present then it should be deaccelerated, adding it to the data of
the skb, before the new tag is added.

Signed-off-by: Simon Horman <horms@verge.net.au>

---
v2.40
* As suggested by Jesse Gross
  + Simplify vlan_push by returning an error code
    rather than an error code encoded as a struct xkb_buff *

v2.39
* First post
---
 datapath/actions.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 30ea1d2..d961e5d 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -105,22 +105,31 @@ static int pop_vlan(struct sk_buff *skb)
 	return 0;
 }
 
-static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+/* push down current VLAN tag */
+static int put_vlan(struct sk_buff *skb)
 {
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 current_tag;
+	u16 current_tag = vlan_tx_tag_get(skb);
 
-		/* push down current VLAN tag */
-		current_tag = vlan_tx_tag_get(skb);
+	if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
+		return -ENOMEM;
 
-		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
-			return -ENOMEM;
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(skb->data
+				+ (2 * ETH_ALEN), VLAN_HLEN, 0));
 
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+	return 0;
+}
 
+static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+{
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		int err;
+
+		err = put_vlan(skb);
+		if (unlikely(err))
+			return err;
 	}
+
 	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 	return 0;
 }
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 5/7] lib: Push MPLS tags in the OpenFlow 1.3 ordering
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer
In-Reply-To: <1380241116-7661-1-git-send-email-horms@verge.net.au>

From: Joe Stringer <joe@wand.net.nz>

This patch modifies the push_mpls behaviour to follow the OpenFlow 1.3
specification in the presence of VLAN tagged packets. From the spec:

"Newly pushed tags should always be inserted as the outermost tag in the
outermost valid location for that tag. When a new VLAN tag is pushed, it
should be the outermost tag inserted, immediately after the Ethernet
header and before other tags. Likewise, when a new MPLS tag is pushed,
it should be the outermost tag inserted, immediately after the Ethernet
header and before other tags."

When the push_mpls action was inserted using OpenFlow 1.2, we implement
the previous behaviour by inserting VLAN actions around the MPLS action
in the odp translation; Pop VLAN tags before committing MPLS actions,
and push the expected VLAN tag afterwards. The trigger condition for
this is based on the ofpact->compat field.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Simon Horman <horms@verge.net.au>

---

v2.40
* Trivial rebase for removal of set_ethertype()

v2.36 - v2.39
* No change

v2.35
* First post
---
 lib/flow.c                   |   2 +-
 lib/packets.c                |  10 +-
 lib/packets.h                |   2 +-
 ofproto/ofproto-dpif-xlate.c |  10 +-
 tests/ofproto-dpif.at        | 237 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 253 insertions(+), 8 deletions(-)

diff --git a/lib/flow.c b/lib/flow.c
index 0ce694d..1039222 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1064,7 +1064,7 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
     }
 
     if (eth_type_mpls(flow->dl_type)) {
-        b->l2_5 = b->l3;
+        b->l2_5 = (char*)b->l2 + ETH_HEADER_LEN;
         push_mpls(b, flow->dl_type, flow->mpls_lse);
     }
 }
diff --git a/lib/packets.c b/lib/packets.c
index 922c5db..f8a58b6 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -220,11 +220,11 @@ eth_pop_vlan(struct ofpbuf *packet)
 
 /* Set ethertype of the packet. */
 void
-set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type)
+set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type, bool inner)
 {
     struct eth_header *eh = packet->data;
 
-    if (eh->eth_type == htons(ETH_TYPE_VLAN)) {
+    if (inner && eh->eth_type == htons(ETH_TYPE_VLAN)) {
         ovs_be16 *p;
         p = ALIGNED_CAST(ovs_be16 *,
                 (char *)(packet->l2_5 ? packet->l2_5 : packet->l3) - 2);
@@ -332,8 +332,8 @@ push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse)
 
     if (!is_mpls(packet)) {
         /* Set ethtype and MPLS label stack entry. */
-        set_ethertype(packet, ethtype);
-        packet->l2_5 = packet->l3;
+        set_ethertype(packet, ethtype, false);
+        packet->l2_5 = (char*)packet->l2 + ETH_HEADER_LEN;
     }
 
     /* Push new MPLS shim header onto packet. */
@@ -354,7 +354,7 @@ pop_mpls(struct ofpbuf *packet, ovs_be16 ethtype)
         size_t len;
         mh = packet->l2_5;
         len = (char*)packet->l2_5 - (char*)packet->l2;
-        set_ethertype(packet, ethtype);
+        set_ethertype(packet, ethtype, true);
         if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) {
             packet->l2_5 = NULL;
         } else {
diff --git a/lib/packets.h b/lib/packets.h
index 7388152..38fec70 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -143,7 +143,7 @@ void compose_rarp(struct ofpbuf *, const uint8_t eth_src[ETH_ADDR_LEN]);
 void eth_push_vlan(struct ofpbuf *, ovs_be16 tci);
 void eth_pop_vlan(struct ofpbuf *);
 
-void set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type);
+void set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type, bool inner);
 
 const char *eth_from_hex(const char *hex, struct ofpbuf **packetp);
 void eth_format_masked(const uint8_t eth[ETH_ADDR_LEN],
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 1cf5d52..9f7298b 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -2235,6 +2235,12 @@ may_receive(const struct xport *xport, struct xlate_ctx *ctx)
     return true;
 }
 
+static bool
+mpls_compat_behaviour(enum ofputil_action_code compat)
+{
+    return (compat != OFPUTIL_OFPAT13_PUSH_MPLS);
+}
+
 static void
 vlan_tci_restore(struct xlate_in *xin, ovs_be16 *tci_ptr, ovs_be16 orig_tci)
 {
@@ -2423,7 +2429,9 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
             /* Save and pop any existing VLAN tags if running in OF1.2 mode. */
             ctx->xin->vlan_tci = *vlan_tci;
-            flow->vlan_tci = htons(0);
+            if (mpls_compat_behaviour(a->compat)) {
+                flow->vlan_tci = htons(0);
+            }
             vlan_tci = &ctx->xin->vlan_tci;
             break;
 
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index c07c64e..17b2b30 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -1078,6 +1078,243 @@ NXST_FLOW reply:
 OVS_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([ofproto-dpif - OF1.3+ VLAN+MPLS handling])
+OVS_VSWITCHD_START([dnl
+   add-port br0 p1 -- set Interface p1 type=dummy
+])
+ON_EXIT([kill `cat ovs-ofctl.pid`])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_DATA([flows.txt], [dnl
+cookie=0xa dl_src=40:44:44:44:55:44 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=40:44:44:44:55:45 actions=push_vlan:0x8100,mod_vlan_vid:99,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=40:44:44:44:55:46 actions=push_vlan:0x8100,mod_vlan_vid:99,push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=40:44:44:44:55:47 actions=push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=40:44:44:44:55:48 actions=push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller
+cookie=0xa dl_src=40:44:44:44:55:49 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],push_vlan:0x8100,mod_vlan_vid:99,controller
+cookie=0xa dl_src=40:44:44:44:55:50 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],push_vlan:0x8100,mod_vlan_vid:99,controller
+cookie=0xa dl_src=40:44:44:44:55:51 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,controller
+cookie=0xa dl_src=40:44:44:44:55:52 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],push_vlan:0x8100,load:99->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,controller
+])
+AT_CHECK([ovs-ofctl --protocols=OpenFlow13 add-flows br0 flows.txt])
+
+dnl Modified MPLS controller action.
+dnl The input packet has a VLAN tag, but because we push an MPLS tag in
+dnl OF1.3 mode, we can no longer see it.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:44,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push a VLAN tag, then an MPLS tag in OF1.3 mode, so we
+dnl can only see the MPLS tag in the result.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:45,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet is vlan-tagged; we update this tag then
+dnl push an MPLS tag in OF1.3 mode. As such, we can only see the MPLS tag in
+dnl the result.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:46,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push a VLAN tag, then an MPLS tag in OF1.3 mode, so we
+dnl can only see the MPLS tag in the result.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:47,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet is vlan-tagged; we update this tag then
+dnl push an MPLS tag in OF1.3 mode. As such, we can only see the MPLS tag in
+dnl the result.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:48,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered)
+mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:55:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the MPLS tag before pushing a VLAN tag, so we see
+dnl both of these in the final flow.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:49,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:49,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:49,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:49,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:50,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=0,dl_src=40:44:44:44:55:50,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, we push the MPLS tag before pushing a VLAN tag, so we see
+dnl both of these in the final flow.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:51,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:51,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+dnl Modified MPLS controller action.
+dnl In this test, the input packet in vlan-tagged, which should be stripped
+dnl before we push the MPLS and VLAN tags.
+AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log])
+
+for i in 1 2 3; do
+    ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:55:52,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=88,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))'
+done
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+ovs-appctl -t ovs-ofctl exit
+
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:52,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:52,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+dnl
+NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=68 in_port=1 (via action) data_len=68 (unbuffered)
+mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:55:52,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1
+])
+
+AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
+AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:44 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:45 actions=mod_vlan_vid:99,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:46 actions=mod_vlan_vid:99,push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:47 actions=load:0x63->OXM_OF_VLAN_VID[[]],push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:48 actions=load:0x63->OXM_OF_VLAN_VID[[]],push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:49 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],mod_vlan_vid:99,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:50 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],mod_vlan_vid:99,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:51 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,CONTROLLER:65535
+ cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:55:52 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],load:0x63->OXM_OF_VLAN_VID[[]],mod_vlan_pcp:1,CONTROLLER:65535
+NXST_FLOW reply:
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([ofproto-dpif - fragment handling])
 OVS_VSWITCHD_START
 ADD_OF_PORTS([br0], [1], [2], [3], [4], [5], [6], [90])
-- 
1.8.4

^ permalink raw reply related

* [PATCH v2.40 7/7] datapath: Add basic MPLS support to kernel
From: Simon Horman @ 2013-09-27  0:18 UTC (permalink / raw)
  To: dev, netdev, Jesse Gross, Ben Pfaff
  Cc: Pravin B Shelar, Ravi K, Isaku Yamahata, Joe Stringer
In-Reply-To: <1380241116-7661-1-git-send-email-horms@verge.net.au>

Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer.

Cc: Ravi K <rkerur@gmail.com>
Cc: Leo Alterman <lalterman@nicira.com>
Cc: Isaku Yamahata <yamahata@valinux.co.jp>
Cc: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Simon Horman <horms@verge.net.au>

---

v2.40
* Rebase for:
  + New dev_queue_xmit compat code
  + Updated put_vlan()
* As suggested by Jesse Gross
  + Remove bogus mac_len update from push_mpls()
  + Slightly simplify push_mpls() by using eth_hdr()
  + Remove dubious condition !eth_p_mpls(inner_protocol) on
    an skb being considered to be MPLS in netdev_send()
  + Only use compatibility code for MPLS GSO segmentation on kernels
    older than 3.11
  + Revamp setting of inner_protocol
    1. Do not unconditionally set inner_protocol to the value of
       skb->protocol in ovs_execute_actions().
    2. Initialise inner_protocol it to zero only if compatibility code is in
       use. In the case where compatibility code is not in use it will either
       be zero due since the allocation of the skb or some other value set
       by some other user.
    3. Conditionally set the inner_protocol in push_mpls() to the value of
       skb->protocol when entering push_mpls(). The condition is that
       inner_protocol is zero and the value of skb->protocol is not an MPLS
       ethernet type.
    - This new scheme:
      + Pushes logic to set inner_protocol closer to the case where it is
	needed.
      + Avoids over-writing values set by other users.
* As suggested by Pravin Shelar
  + Only set and restore skb->protocol in rpl___skb_gso_segment() in the
    case of MPLS
  + Add inner_protocol field to struct ovs_gso_cb instead of ovs_skb_cb.
    This moves compatibility code closer to where it is used
    and creates fewer differences with mainline.
* Update comment on mac_len updates in datapath/actions.c
* Remove HAVE_INNER_PROCOTOL and instead just check
  against kernel version 3.11 directly.
  HAVE_INNER_PROCOTOL is a hang-over from work done prior
  to the merge of inner_protocol into the kernel.
* Remove dubious condition !eth_p_mpls(inner_protocol) on
  using inner_protocol as the type in rpl_skb_network_protocol()
* Do not update type of features in rpl_dev_queue_xmit.
  Though arguably correct this is not an inherent part of
  the changes made by this patch.
* Use skb_cow_head() in push_mpls()
  + Call skb_cow_head(skb, MPLS_HLEN) instead of
    make_writable(skb, skb->mac_len) to ensure that there is enough head
    room to push an MPLS LSE regardless of whether the skb is cloned or not.
  + This is consistent with the behaviour of rpl__vlan_put_tag().
  + This is a fix for crashes reported when performing mpls_push
    with headroom less than 4. This problem was introduced in v3.36.
* Skip popping in mpls_pop if the skb is too short to contain an MPLS LSE

v2.39
* Rebase for removal of vlan, checksum and skb->mark compat code

v2.38
* Rebase for SCTP support
* Refactor validate_tp_port() to iterate over eth_types rather
  than open-coding the loop. With the addition of SCTP this logic
  is now used three times.

v2.36 - v2.37
* Rebase

* Do not add set_ethertype() to datapath/actions.c.
  As this patch has evolved this function had devolved into
  to sets of functionality wrapped into a single function with
  only one line of common code. Refactor things to simply
  open-code setting the ether type in the two locations where
  set_ethertype() was previously used. The aim here is to improve
  readability.

* Update setting skb->ethertype after mpls push and pop.
  - In the case of push_mpls it should be set unconditionally
    as in v2.35 the behaviour of this function to always push
    an MPLS LSE before any VLAN tags.
  - In the case of mpls_pop eth_p_mpls(skb->protocol) is a better
    test than skb->protocol != htons(ETH_P_8021Q) as it will give the
    correct behaviour in the presence of other VLAN ethernet types,
    for example 0x88a8 which is used by 802.1ad. Moreover, it seems
    correct to update the ethernet type if it was previously set
    according to the top-most MPLS LSE.

* Deaccelerate VLANs when pushing MPLS tags the
  - Since v2.35 MPLS push will insert an MPLS LSE before any VLAN tags.
    This means that if an accelerated tag is present it should be
    deaccelerated to ensure it ends up in the correct position.

* Update skb->mac_len in push_mpls() so that it will be correct
  when used by a subsequent call to pop_mpls().

  As things stand I do not believe this is strictly necessary as
  ovs-vswitchd will not send a pop MPLS action after a push MPLS action.
  However, I have added this in order to code more defensively as I believe
  that if such a sequence did occur it would be rather unobvious why
  it didn't work.

* Do not add skb_cow_head() call in push_mpls().
  It is unnecessary as there is a make_writable() call.
  This change was also made in v2.30 but some how the
  code regressed between then and v2.35.

v2.35
* Rebase
* Move MPLS constants to mpls.h
* Push MPLS tags after ethernet, before VLAN tags
  - This is consistent with the OpenFlow 1.3 specification
  - Compatibility with OpenFlow 1.2 and earlier versions
    may be provided by ovs-vswitchd.
* Correct GSO behaviour in the presence of MPLS but absence of VLANs

v2.34
* Rebase for megaflow changes

v2.33
* Ensure that inner_protocol is always set to to the current
  skb->protocol value in ovs_execute_actions(). This ensures
  it is set to the correct value in the absence of a push_mpls action.
  Also remove setting of inner_protocol in push_mpls() as
  it duplicates the code now in ovs_execute_actions().
* Call __skb_gso_segment() instead of skb_gso_segment() from
  rpl___skb_gso_segment() in the case that HAVE___SKB_GSO_SEGMENT is set.
  This was a typo.

v2.32
* As suggested by Jesse Gross
  - Use int instead of size_t in validate_and_copy_actions__().
  - Fix crazy edit mess in pop_mpls() action comment
  - Move eth_p_mpls() into mpls.h
  - Refactor skb_gso_segment MPLS handling into rpl_skb_gso_segment
    Address Jesse's comments regarding this code:
    "Can we push this completely into the skb_gso_segment() compatibility
     code? It's both nicer and may make the interactions with the vlan code
     less confusing."
  - Move GSO compatibility code into linux/compat/gso.*
  - Set skb->protocol on mpls_push and mpls_pop in the presence
    of an offloaded VLAN.

v2.31
* As suggested by Jesse Gross
  - There is no need to make mac_header_end inline as it is not in a header file
  - Remove dubious if (*skb_ethertype == ethertype) optimisation from
    set_ethertype
  - Only set skb->protocol in push_mpls() or pop_mpls() for non-VLAN packets
  - Use MAX_ETH_TYPES instead of SAMPLE_ACTION_DEPTH for array size
    of types in struct eth_types. This corrects a typo/thinko.
  - Correct eth type tracking logic such that start isn't advanced
    when entering a sample action, ensuring that all possibly types
    are checked when verifying nested actions.
* Define HAVE_INNER_PROTOCOL based on kernel version.
  inner_protocol has been merged into net-next and should appear in
  v3.11 so there is no longer a need for a acinclude.m4 test to check for it.
* Add MPLS GSO compatibility code.
  This is for use on kernels that do not have MPLS GSO support.
  Thanks to Joe Stringer for his work on this.

v2.30
* As suggested by Jesse Gross
  - Use skb_cow_head in push_mpls to ensure there is sufficient headroom for
    skb_push
  - Call make_writable with skb->mac_len instead of skb->mac_len + MPLS_HLEN
    in push_mpls as only the first skb->mac_len bytes of existing packet data
    are modified.
  - Rename skb_mac_header_end as mac_header_end, this seems
    to be a more appropriate name for a local function.
  - Remove OVS_CSUM_COMPLETE code from set_ethertype().
    Inside OVS the ethernet header is not covered by OVS_CSUM_COMPLETE.
  - Use __skb_pull() instead of skb_pull() in pop_mpls()
  - Decrement and decrement skb->mac_len when poping and pushing VLAN tags.
    Previously mac_len was reset, but this would result in forgetting
    the MPLS label stack.
  - Remove spurious comment from before do_execute_actions().
  - Move OVS_KEY_ATTR_MPLS attribute to its final, upstreamable, location.
  - Correct ethertype check for OVS_ACTION_ATTR_POP_MPLS case in
    validate_and_copy_actions() to check for MPLS ethertypes rather than
    ETH_P_IP.
  - Rewrite tracking of eth types used to verify actions in the presence
    of sample actions. There is a large comment above struct eth_types
    describing the new implementation.

v2.29
* Break include/ and lib/ portions of the patch out into a
  separate patch "datapath: Add basic MPLS support to kernel"
* Update for new MPLS GSO scheme
  - skb->protocol is set to the new ethertype of the packet
    on MPLS push and pop
  - When pushing the first MPLS LSE onto a previously non-MPLS
    packet set skb->inner_protocol to the original ethertype.
  - skb->inner_protocol may be used by the network stack
    for GSO of the inner-packet.
* Drop const from ethertype parameter of set_ethertype.
  This appears to be a legacy of this parameter being a pointer.
* Pass the ethertype patrameter of pop_mpls as a value rather
  than a pointer.

v2.28
* Kernel Datapath changes as suggested by Jarno Rajahalme
  + Correct the logic introduced in v2.27 to set the network_header
    to after the MPLS label stack in the case of an MPLS packet.
    - Increment stack_len offset so that label stacks of depth greater
      than two do not cause an infinite loop.
    - Correct offset passed to check_header to include skb->mac len

v2.27
* Kernel Datapath changes as suggested by Jarno Rajahalme and Jesse Gross:
  + Previously the mac_len and network_header of an skb corresponded
    to the end of the L2 header.  To support GSO, just before transmission,
    do_output, with the results as follows:

    Input: non-MPLS skb: Output: network header and mac_len correspond
                         to the beginning of the L3 headers
    Input: MPLS:         Output: network header and mac_len correspond to the
                         end of the L2 headers.

    This is somewhat confusing.

  + The new scheme is as follows:
    - The mac_len always corresponds to the end of the L2 header.
    - The network header always corresponds to the beginning of the
      L3 header.

  + Note that in the case of MPLS output the end of the L2 headers and the
  beginning of the L3 headers will differ.

* Remove unused declaration of skb_cb_mpls_stack()

v2.26
* Rebase on master
* Kernel Datapath changes as suggested by Jarno Rajahalme
  - Use skb_network_header() instead of skb_mac_header() to locate
    the ethertype to set in set_ethertype() as the latter will
    be wrong in the presence of VLAN tags. This resolves
    a regression introduced in v2.24.
  - Enhance comment in do_output()
  - do_execute_actions(): Do not alter mpls_stack_depth if
    a MPLS push or pop action fail. This is achieved by altering
    mpls_stack_depth at the end of push_mpls() and pop_mpls().

v2.25
* Rebase on master
* Pass big-endian value as the last argument of eth_types_set() in
  validate_and_copy_actions__()
* Use revised GSO support as provided by the patch series
  "[PATCH 0/2] Small Modifications to GSO to allow segmentation of MPLS"
  - Set skb->mac_len to the length of the l2 header + MPLS stack length
  - Update skb->network_header accordingly
  - Set skb->encapsulated_features

v2.24
* Use skb_mac_header() in set_ethertype()
* Set skb->encapsulation in set_ethertype() to support MPLS GSO.
  Also add a note about the other requirements for MPLS GSO.
  MPLS GSO support will be posted as a patch net-next (Linux mainline)
  "MPLS: Add limited GSO support"
* Do not add ETH_TYPE_MIN, it is no longer used

v2.23
* As suggested by Jesse Gross:
  - Verify the current ethernet type when validating sample actions
    both for the taken and not-taken path if the sample action.
  - Document that the OVS_KEY_ATTR_MPLS attribute accepts a list of
    struct ovs_key_mpls but that an implementation may restrict
    the length it accepts.
  - Restrict the array length of the OVS_KEY_ATTR_MPLS to one.
    + Don't add ovs_flow_verify_key_len as it was added to
      handle attributes whose values are arrays but there are
      no attributes with values that are arrays (of length greater than one).

v2.22
* As suggested by Jesse Gross:
  - Fix sparse warning in validate_and_copy_actions()
    I have no idea why sparse doesn't show this up this on my system.
  - Remove call to skb_cow_head() from push_mpls() as it
    is already covered by a call to make_writable()
  - Check (key_type > OVS_KEY_ATTR_MAX) in ovs_flow_verify_key_len()
  - Disallow set actions on l2.5+ data and MPLS push and pop actions
    after an MPLS pop action as there is no verification that the packet
    is actually of the new ethernet type. This may later be supported
    using recirculation or by other means.
  - Do not add spurious debuging message to ovs_flow_cmd_new_or_set()

v2.21
* As suggested by Jesse Gross:
  - Verify that l3 and l4 actions always always occur prior to
    a push_mpls action and use the network header pointer of an skb
    to track the top of the MPLS stack. This avoids adding an l2_size
    element to the skb callback.

v2.20
* As suggested by Jesse Gross:
  - Do not add ovs_dp_ioctl_hook
    + This appears to be garbage from a rebase
  - Do not add skb_cb_set_l2_size. Instead set OVS_CB(skb)->l2_size
    in ovs_flow_extract().
  - Do not free skb on error in push_mpls(), it is freed in the caller
  - Call skb_reset_mac_len() in pop_mpls() and push_mpls()
  - Update checksums in pop_mpls(), push_mpls() and set_mpls().
  - Rename skb_cb_mpls_bos() as skb_cb_mpls_stack().
    It returns the top not the bottom of the stack.
  - Track the current eth_type in validate_and_copy_actions
    which is initially the eth_type of the flow and may be modified
    by push_mpls and pop_mpls actions. Use this to correctly validate
    mpls_set actions. This is to allow mpls_set actions to be applied
    to a non-MPLS frame after an mpls_push action (although ovs-vswitchd
    doesn't currently do that).
    Also:
    + Remove the check of the eth_type in set_mpls() as the new validation
      scheme should ensure it cannot be incorrect.
    + Use the current eth_type to validate mpls_pop actions and remove
      the eth_type check from pop_mpls().
  - Move OVS_KEY_ATTR_MPLS to non-upstream group in ovs_key_lens
  - Remove unnecessary memset of mpls_key in ovs_flow_to_nlattrs()
  - Make a union of the mpls and ip elements of struct sw_flow_key.
    Currently the code stops parsing after an MPLS header so it is
    not possible for the ip and mpls elements to be used simultaneously
    and some space can be saved by using a union.
  - Allow an array of MPLS key attributes
    + Currently all but the first element is ignored
    + User-space needs to be updated to accept more than one element,
      currently it will treat their presence as an error
  - Do not update network header in ovs_flow_extract() for after parsing
    the MPLS stack as it is never used because no l3+ processing
    occurs on MPLS frames.
  - Allow multiple MPLS entries in a match by allowing the OVS_KEY_ATTR_MPLS
    to be an array of struct ovs_key_mpls with at least one entry.
    Currently only one entry is used which is byte-for-byte compatible with
    the previous scheme of having OVS_KEY_ATTR_MPLS as a struct
    ovs_key_mpls.
* Make skb writable in pop_mpls(), push_mpls() and set_mpls().

v2.18 - v2.19
* No change

v2.17
* As suggested by Ben Pfaff
  - Use consistent terminology for MPLS.
    + Consistently refer to the MPLS component of a packet as the
      MPLS label stack and entries in the stack as MPLS label stack entries
      (LSE).  An MPLS label is a component of an MPLS label stack entry.
      The other components are the traffic class (TC), time to live (TTL)
      and bottom of stack (BoS) bit.
  - Rename compose_.*mpls_ functions as execute_.*mpls_

v2.16
* No change

v2.15
* As suggested by Ben Pfaff
  - Use OVS_ACTION_SET to set OVS_KEY_ATTR_MPLS instead of
    OVS_ACTION_ATTR_SET_MPLS

v2.14
* Remove include/linux/openvswitch.h portion which added add
  new key and action attributes. This
  now present in "User-Space MPLS actions and matches"
  which is now a dependency of this patch

v2.13
* As suggested by Jarno Rajahalme
  - Rename mpls_bos element of ovs_skb_cb as l2_size as it is set and used
    regardless of if an MPLS stack is present or not. Update the name of
    helper functions and documentation accordingly.
  - Ensure that skb_cb_mpls_bos() never returns NULL
* Correct endieness in eth_p_mpls()

v2.12
* Update skb and network header on MPLS extraction in ovs_flow_extract()
* Use NULL in skb_cb_mpls_bos()
* Add eth_p_mpls helper

v2.10 - v2.11
* No change

v2.9
* datapath: Always update the mpls bos if  vlan_pop is successful

  Regardless of the details of how a successful
  vlan_pop is achieved, the mpls bos needs to be updated.

  Without this fix it has been observed that the following
  results in malformed packets

v2.8
* No change

v2.7
* Rebase

v2.6
* As suggested by Yamahata-san
  - Do not guard against label == 0 for
    OVS_ACTION_ATTR_SET_MPLS in validate_actions().
    A label of 0 is valid
  - Remove comment stupulating that if
    the top_label element of struct sw_flow_key is 0 then
    there is no MPLS label. An MPLS label of 0 is valid
    and the correct check if ethertype is
    ntohs(ETH_TYPE_MPLS) or ntohs(ETH_TYPE_MPLS_MCAST)

v2.4 - v2.5
* No change

v2.3
* s/mpls_stack/mpls_bos/
  This is in keeping with the naming used in the OpenFlow 1.3 specification

v2.2
* Call skb_reset_mac_header() in skb_cb_set_mpls_stack()
  eth_hdr(skb) is non-NULL when called in skb_cb_set_mpls_stack().
* Add a call to skb_cb_set_mpls_stack() in ovs_packet_cmd_execute().
  I apologise that I have mislaid my notes on this but
  it avoids a kernel panic. I can investigate again if necessary.
* Use struct ovs_action_push_mpls instead of
  __be16 to decode OVS_ACTION_ATTR_PUSH_MPLS in validate_actions(). This is
  consistent with the data format for the attribute.
* Indentation fix in skb_cb_mpls_stack(). [cosmetic]

v2.1
* Manual rebase
---
 datapath/Modules.mk                             |   1 +
 datapath/actions.c                              | 129 +++++++++++-
 datapath/datapath.c                             | 259 +++++++++++++++++++++---
 datapath/datapath.h                             |   2 +
 datapath/flow.c                                 |  58 +++++-
 datapath/flow.h                                 |  17 +-
 datapath/linux/compat/gso.c                     | 117 +++++++++--
 datapath/linux/compat/gso.h                     |  53 +++++
 datapath/linux/compat/include/linux/netdevice.h |  14 +-
 datapath/linux/compat/netdevice.c               |  28 ---
 datapath/mpls.h                                 |  15 ++
 include/linux/openvswitch.h                     |   7 +-
 12 files changed, 609 insertions(+), 91 deletions(-)
 create mode 100644 datapath/mpls.h

diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 7ddf79c..b54dc5b 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -22,6 +22,7 @@ openvswitch_headers = \
 	compat.h \
 	datapath.h \
 	flow.h \
+	mpls.h \
 	vlan.h \
 	vport.h \
 	vport-internal_dev.h \
diff --git a/datapath/actions.c b/datapath/actions.c
index d961e5d..bfab9ec 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -35,6 +35,8 @@
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
+#include "gso.h"
+#include "mpls.h"
 #include "vlan.h"
 #include "vport.h"
 
@@ -71,7 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 
 	vlan_set_encap_proto(skb, vhdr);
 	skb->mac_header += VLAN_HLEN;
-	skb_reset_mac_len(skb);
+	/* Update mac_len for subsequent MPLS actions */
+	skb->mac_len -= VLAN_HLEN;
 
 	return 0;
 }
@@ -113,6 +116,9 @@ static int put_vlan(struct sk_buff *skb)
 	if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
 		return -ENOMEM;
 
+	/* update mac_len for subsequent MPLS actions */
+	skb->mac_len += VLAN_HLEN;
+
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_add(skb->csum, csum_partial(skb->data
 				+ (2 * ETH_ALEN), VLAN_HLEN, 0));
@@ -134,6 +140,114 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
 	return 0;
 }
 
+/* The end of the mac header.
+ *
+ * For non-MPLS skbs this will correspond to the network header.
+ * For MPLS skbs it will be before the network_header as the MPLS
+ * label stack lies between the end of the mac header and the network
+ * header. That is, for MPLS skbs the end of the mac header
+ * is the top of the MPLS label stack.
+ */
+static unsigned char *mac_header_end(const struct sk_buff *skb)
+{
+	return skb_mac_header(skb) + skb->mac_len;
+}
+
+/* Push MPLS after the ethernet header. */
+static int push_mpls(struct sk_buff *skb,
+		     const struct ovs_action_push_mpls *mpls)
+{
+	__be32 *new_mpls_lse;
+	struct ethhdr *hdr;
+
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		int err;
+
+		err = put_vlan(skb);
+		if (unlikely(err))
+			return err;
+
+	        vlan_set_tci(skb, 0);
+	}
+
+	if (skb_cow_head(skb, MPLS_HLEN) < 0) {
+		kfree_skb(skb);
+		return -ENOMEM;
+	}
+	skb_push(skb, MPLS_HLEN);
+
+	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+		ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	new_mpls_lse = (__be32 *)(skb_mac_header(skb) + ETH_HLEN);
+	*new_mpls_lse = mpls->mpls_lse;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
+							     MPLS_HLEN, 0));
+
+	hdr = eth_hdr(skb);
+	hdr->h_proto = mpls->mpls_ethertype;
+	if (!eth_p_mpls(skb->protocol) && !ovs_skb_get_inner_protocol(skb))
+		ovs_skb_set_inner_protocol(skb, skb->protocol);
+	skb->protocol = mpls->mpls_ethertype;
+	return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
+{
+	struct ethhdr *hdr;
+	int err;
+
+	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (unlikely(skb->len < skb->mac_len + MPLS_HLEN))
+		return -ENOMEM;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_sub(skb->csum,
+				     csum_partial(mac_header_end(skb),
+						  MPLS_HLEN, 0));
+
+	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+		skb->mac_len);
+
+	__skb_pull(skb, MPLS_HLEN);
+	skb_reset_mac_header(skb);
+
+	/* mac_header_end() is used to locate the ethertype
+	 * field correctly in the presence of VLAN tags.
+	 */
+	hdr = (struct ethhdr *)(mac_header_end(skb) - ETH_HLEN);
+	hdr->h_proto = ethertype;
+	if (eth_p_mpls(skb->protocol))
+		skb->protocol = ethertype;
+	return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+{
+	__be32 *stack = (__be32 *)mac_header_end(skb);
+	int err;
+
+	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		__be32 diff[] = { ~(*stack), *mpls_lse };
+		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+					  ~skb->csum);
+	}
+
+	*stack = *mpls_lse;
+
+	return 0;
+}
+
 static int set_eth_addr(struct sk_buff *skb,
 			const struct ovs_key_ethernet *eth_key)
 {
@@ -509,6 +623,9 @@ static int execute_set_action(struct sk_buff *skb,
 
 	case OVS_KEY_ATTR_SCTP:
 		err = set_sctp(skb, nla_data(nested_attr));
+
+	case OVS_KEY_ATTR_MPLS:
+		err = set_mpls(skb, nla_data(nested_attr));
 		break;
 	}
 
@@ -545,6 +662,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			output_userspace(dp, skb, a);
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS:
+			err = push_mpls(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_POP_MPLS:
+			err = pop_mpls(skb, nla_get_be16(a));
+			break;
+
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			err = push_vlan(skb, nla_data(a));
 			if (unlikely(err)) /* skb already freed. */
@@ -618,6 +743,8 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 		goto out_loop;
 	}
 
+	ovs_skb_init_inner_protocol(skb);
+
 	OVS_CB(skb)->tun_key = NULL;
 	error = do_execute_actions(dp, skb, acts->actions,
 					 acts->actions_len, false);
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 4defcdb..5a62201 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -56,6 +56,8 @@
 
 #include "datapath.h"
 #include "flow.h"
+#include "gso.h"
+#include "mpls.h"
 #include "vlan.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
@@ -543,18 +545,132 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_off
 	a->nla_len = sfa->actions_len - st_offset;
 }
 
-static int validate_and_copy_actions(const struct nlattr *attr,
+#define MAX_ETH_TYPES 16 /* Arbitrary Limit */
+
+/* struct eth_types - possible eth types
+ * @types: provides storage for the possible eth types.
+ * @start: is the index of the first entry of types which is possible.
+ * @end: is the index of the last entry of types which is possible.
+ * @cursor: is the index of the entry which should be updated if an action
+ * changes the eth type.
+ *
+ * Due to the sample action there may be multiple possible eth types.
+ * In order to correctly validate actions all possible types are tracked
+ * and verified. This is done using struct eth_types.
+ *
+ * Initially start, end and cursor should be 0, and the first element of
+ * types should be set to the eth type of the flow.
+ *
+ * When an action changes the eth type then the values of start and end are
+ * updated to the value of cursor. The new type is stored at types[cursor].
+ *
+ * When entering a sample action the start and cursor values are saved. The
+ * value of cursor is set to the value of end plus one.
+ *
+ * When leaving a sample action the start and cursor values are restored to
+ * their saved values.
+ *
+ * An example follows.
+ *
+ * actions: pop_mpls(A),sample(pop_mpls(B)),sample(pop_mpls(C)),pop_mpls(D)
+ *
+ * 0. Initial state:
+ *	types = { original_eth_type }
+ * 	start = end = cursor = 0;
+ *
+ * 1. pop_mpls(A)
+ *    a. Check types from start (0) to end (0) inclusive
+ *       i.e. Check against original_eth_type
+ *    b. Set start = end = cursor
+ *    c. Set types[cursor] = A
+ *    New state:
+ *	types = { A }
+ *	start = end = cursor = 0;
+ *
+ * 2. Enter first sample()
+ *    a. Save start and cursor
+ *    b. Set cursor = end + 1
+ *    New state:
+ *	types = { A }
+ *	start = end = 0;
+ *	cursor = 1;
+ *
+ * 3. pop_mpls(B)
+ *    a. Check types from start (0) to end (0)
+ *       i.e: Check against A
+ *    b. Set start = end = cursor
+ *    c. Set types[cursor] = B
+ *    New state:
+ *	types = { A, B }
+ *	start = end = cursor = 1;
+ *
+ * 4. Leave first sample()
+ *    a. Restore start and cursor to the values when entering 2.
+ *    New state:
+ *	types = { A, B }
+ *	start = cursor = 0;
+ *	end = 1;
+ *
+ * 5. Enter second sample()
+ *    a. Save start and cursor
+ *    b. Set cursor = end + 1
+ *    New state:
+ *	types = { A, B }
+ *	start = 0;
+ *	end = 1;
+ *	cursor = 2;
+ *
+ * 6. pop_mpls(C)
+ *    a. Check types from start (0) to end (1) inclusive
+ *       i.e: Check against A and B
+ *    b. Set start = end = cursor
+ *    c. Set types[cursor] = C
+ *    New state:
+ *	types = { A, B, C }
+ *	start = end = cursor = 2;
+ *
+ * 7. Leave second sample()
+ *    a. Restore start and cursor to the values when entering 5.
+ *    New state:
+ *	types = { A, B, C }
+ *	start = cursor = 0;
+ *	end = 2;
+ *
+ * 8. pop_mpls(D)
+ *    a. Check types from start (0) to end (2) inclusive
+ *       i.e: Check against A, B and C
+ *    b. Set start = end = cursor
+ *    c. Set types[cursor] = D
+ *    New state:
+ *	types = { D } // Trailing entries of type are no longer used end = 0
+ *	start = end = cursor = 0;
+ */
+struct eth_types {
+	int start, end, cursor;
+	__be16 types[MAX_ETH_TYPES];
+};
+
+static void eth_types_set(struct eth_types *types, __be16 type)
+{
+	types->start = types->end = types->cursor;
+	types->types[types->cursor] = type;
+}
+
+static int validate_and_copy_actions__(const struct nlattr *attr,
 				const struct sw_flow_key *key, int depth,
-				struct sw_flow_actions **sfa);
+				struct sw_flow_actions **sfa,
+				struct eth_types *eth_types);
 
 static int validate_and_copy_sample(const struct nlattr *attr,
 			   const struct sw_flow_key *key, int depth,
-			   struct sw_flow_actions **sfa)
+			   struct sw_flow_actions **sfa,
+			   struct eth_types *eth_types)
 {
 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 	const struct nlattr *probability, *actions;
 	const struct nlattr *a;
 	int rem, start, err, st_acts;
+	int saved_eth_types_start, saved_eth_types_cursor;
 
 	memset(attrs, 0, sizeof(attrs));
 	nla_for_each_nested(a, attr, rem) {
@@ -585,22 +701,39 @@ static int validate_and_copy_sample(const struct nlattr *attr,
 	if (st_acts < 0)
 		return st_acts;
 
-	err = validate_and_copy_actions(actions, key, depth + 1, sfa);
+	/* Save and update eth_types cursor and start.  Please see the
+	 * comment for struct eth_types for a discussion of this.
+	 */
+	saved_eth_types_start = eth_types->start;
+	saved_eth_types_cursor = eth_types->cursor;
+	eth_types->cursor = eth_types->end + 1;
+	if (eth_types->cursor == MAX_ETH_TYPES)
+		return -EINVAL;
+
+	err = validate_and_copy_actions__(actions, key, depth + 1, sfa,
+					  eth_types);
 	if (err)
 		return err;
 
+	/* Restore eth_types cursor and start.  Please see the
+	 * comment for struct eth_types for a discussion of this.
+	 */
+	eth_types->cursor = saved_eth_types_cursor;
+	eth_types->start = saved_eth_types_start;
+
 	add_nested_action_end(*sfa, st_acts);
 	add_nested_action_end(*sfa, start);
 
 	return 0;
 }
 
-static int validate_tp_port(const struct sw_flow_key *flow_key)
+static int validate_tp_port__(const struct sw_flow_key *flow_key,
+			      __be16 eth_type)
 {
-	if (flow_key->eth.type == htons(ETH_P_IP)) {
+	if (eth_type == htons(ETH_P_IP)) {
 		if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 			return 0;
-	} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
+	} else 	if (eth_type == htons(ETH_P_IPV6)) {
 		if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 			return 0;
 	}
@@ -608,6 +741,21 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
 	return -EINVAL;
 }
 
+static int validate_tp_port(const struct sw_flow_key *flow_key,
+			    const struct eth_types *eth_types)
+{
+	int i;
+
+	for (i = eth_types->start; i < eth_types->end; i++) {
+		int ret = validate_tp_port__(flow_key, eth_types->types[i]);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int validate_and_copy_set_tun(const struct nlattr *attr,
 				     struct sw_flow_actions **sfa)
 {
@@ -634,7 +782,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun)
+			bool *set_tun, struct eth_types *eth_types)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
@@ -665,9 +813,12 @@ static int validate_set(const struct nlattr *a,
 			return err;
 		break;
 
-	case OVS_KEY_ATTR_IPV4:
-		if (flow_key->eth.type != htons(ETH_P_IP))
-			return -EINVAL;
+	case OVS_KEY_ATTR_IPV4: {
+		int i;
+
+		for (i = eth_types->start; i <= eth_types->end; i++)
+			if (eth_types->types[i] != htons(ETH_P_IP))
+				return -EINVAL;
 
 		if (!flow_key->ip.proto)
 			return -EINVAL;
@@ -680,10 +831,14 @@ static int validate_set(const struct nlattr *a,
 			return -EINVAL;
 
 		break;
+	}
 
-	case OVS_KEY_ATTR_IPV6:
-		if (flow_key->eth.type != htons(ETH_P_IPV6))
-			return -EINVAL;
+	case OVS_KEY_ATTR_IPV6: {
+		int i;
+
+		for (i = eth_types->start; i <= eth_types->end; i++)
+			if (eth_types->types[i] != htons(ETH_P_IPV6))
+				return -EINVAL;
 
 		if (!flow_key->ip.proto)
 			return -EINVAL;
@@ -699,24 +854,34 @@ static int validate_set(const struct nlattr *a,
 			return -EINVAL;
 
 		break;
+	}
 
 	case OVS_KEY_ATTR_TCP:
 		if (flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key);
+		return validate_tp_port(flow_key, eth_types);
 
 	case OVS_KEY_ATTR_UDP:
 		if (flow_key->ip.proto != IPPROTO_UDP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key);
+		return validate_tp_port(flow_key, eth_types);
+
+	case OVS_KEY_ATTR_MPLS: {
+		int i;
+
+		for (i = eth_types->start; i < eth_types->end; i++)
+			if (!eth_p_mpls(eth_types->types[i]))
+				return -EINVAL;
+		break;
+	}
 
 	case OVS_KEY_ATTR_SCTP:
 		if (flow_key->ip.proto != IPPROTO_SCTP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key);
+		return validate_tp_port(flow_key, eth_types);
 
 	default:
 		return -EINVAL;
@@ -760,10 +925,10 @@ static int copy_action(const struct nlattr *from,
 	return 0;
 }
 
-static int validate_and_copy_actions(const struct nlattr *attr,
-				const struct sw_flow_key *key,
-				int depth,
-				struct sw_flow_actions **sfa)
+static int validate_and_copy_actions__(const struct nlattr *attr,
+				const struct sw_flow_key *key, int depth,
+				struct sw_flow_actions **sfa,
+				struct eth_types *eth_types)
 {
 	const struct nlattr *a;
 	int rem, err;
@@ -776,6 +941,8 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -806,6 +973,33 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 				return -EINVAL;
 			break;
 
+		case OVS_ACTION_ATTR_PUSH_MPLS: {
+			const struct ovs_action_push_mpls *mpls = nla_data(a);
+			if (!eth_p_mpls(mpls->mpls_ethertype))
+				return -EINVAL;
+			eth_types_set(eth_types, mpls->mpls_ethertype);
+			break;
+		}
+
+		case OVS_ACTION_ATTR_POP_MPLS: {
+			int i;
+
+			for (i = eth_types->start; i <= eth_types->end; i++)
+				if (!eth_p_mpls(eth_types->types[i]))
+					return -EINVAL;
+
+			/* Disallow subsequent L2.5+ set and mpls_pop actions
+			 * as there is no check here to ensure that the new
+			 * eth_type is valid and thus set actions could
+			 * write off the end of the packet or otherwise
+			 * corrupt it.
+			 *
+			 * Support for these actions is planned using packet
+			 * recirculation.
+			 */
+			eth_types_set(eth_types, htons(0));
+			break;
+		}
 
 		case OVS_ACTION_ATTR_POP_VLAN:
 			break;
@@ -819,13 +1013,14 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 			break;
 
 		case OVS_ACTION_ATTR_SET:
-			err = validate_set(a, key, sfa, &skip_copy);
+			err = validate_set(a, key, sfa, &skip_copy, eth_types);
 			if (err)
 				return err;
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
-			err = validate_and_copy_sample(a, key, depth, sfa);
+			err = validate_and_copy_sample(a, key, depth, sfa,
+						       eth_types);
 			if (err)
 				return err;
 			skip_copy = true;
@@ -847,6 +1042,20 @@ static int validate_and_copy_actions(const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_actions(const struct nlattr *attr,
+				const struct sw_flow_key *key,
+				struct sw_flow_actions **sfa)
+{
+	struct eth_types eth_type = {
+		.start = 0,
+		.end = 0,
+		.cursor = 0,
+		.types = { key->eth.type, },
+	};
+
+	return validate_and_copy_actions__(attr, key, 0, sfa, &eth_type);
+}
+
 static void clear_stats(struct sw_flow *flow)
 {
 	flow->used = 0;
@@ -910,7 +1119,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(acts))
 		goto err_flow_free;
 
-	err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts);
+	err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts);
 	rcu_assign_pointer(flow->sf_acts, acts);
 	if (err)
 		goto err_flow_free;
@@ -1268,7 +1477,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 
 		ovs_flow_key_mask(&masked_key, &key, &mask);
 		error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
-						  &masked_key, 0, &acts);
+						  &masked_key, &acts);
 		if (error) {
 			OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
 			goto err_kfree;
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 4a49a7d..31fe10a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -95,6 +95,8 @@ struct datapath {
  * @pkt_key: The flow information extracted from the packet.  Must be nonnull.
  * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
  * packet is not being tunneled.
+ * @inner_protocol: Provides a substitute for the skb->inner_protocol field on
+ * kernels before 3.11.
  */
 struct ovs_skb_cb {
 	struct sw_flow		*flow;
diff --git a/datapath/flow.c b/datapath/flow.c
index 29122af..51e7965 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -44,6 +44,7 @@
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 
+#include "mpls.h"
 #include "vlan.h"
 
 static struct kmem_cache *flow_cache;
@@ -140,7 +141,8 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
 			| (1ULL << OVS_KEY_ATTR_ICMP)
 			| (1ULL << OVS_KEY_ATTR_ICMPV6)
 			| (1ULL << OVS_KEY_ATTR_ARP)
-			| (1ULL << OVS_KEY_ATTR_ND));
+			| (1ULL << OVS_KEY_ATTR_ND)
+			| (1ULL << OVS_KEY_ATTR_MPLS));
 
 	/* Always allowed mask fields. */
 	mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL)
@@ -155,6 +157,12 @@ static bool ovs_match_validate(const struct sw_flow_match *match,
 			mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP;
 	}
 
+	if (eth_p_mpls(match->key->eth.type)) {
+		key_expected |= 1ULL << OVS_KEY_ATTR_MPLS;
+		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+			mask_allowed |= 1ULL << OVS_KEY_ATTR_MPLS;
+	}
+
 	if (match->key->eth.type == htons(ETH_P_IP)) {
 		key_expected |= 1ULL << OVS_KEY_ATTR_IPV4;
 		if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -879,6 +887,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 		return -ENOMEM;
 
 	skb_reset_network_header(skb);
+	skb_reset_mac_len(skb);
 	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/* Network layer. */
@@ -961,6 +970,33 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
 			memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
 			memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
 		}
+	} else if (eth_p_mpls(key->eth.type)) {
+		size_t stack_len = MPLS_HLEN;
+
+		/* In the presence of an MPLS label stack the end of the L2
+		 * header and the beginning of the L3 header differ.
+		 *
+		 * Advance network_header to the beginning of the L3
+		 * header. mac_len corresponds to the end of the L2 header.
+		 */
+		while (1) {
+			__be32 lse;
+
+			error = check_header(skb, skb->mac_len + stack_len);
+			if (unlikely(error))
+				return 0;
+
+			memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+
+			if (stack_len == MPLS_HLEN)
+				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+
+			skb_set_network_header(skb, skb->mac_len + stack_len);
+			if (lse & htonl(MPLS_BOS_MASK))
+				break;
+
+			stack_len += MPLS_HLEN;
+		}
 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
 		int nh_len;             /* IPv6 Header + Extensions */
 
@@ -1154,6 +1190,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
 	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
 	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
 	[OVS_KEY_ATTR_TUNNEL] = -1,
+	[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
 };
 
 static bool is_all_zero(const u8 *fp, size_t size)
@@ -1528,6 +1565,17 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match,  u64 attrs,
 		attrs &= ~(1ULL << OVS_KEY_ATTR_ARP);
 	}
 
+
+	if (attrs & (1ULL << OVS_KEY_ATTR_MPLS)) {
+		const struct ovs_key_mpls *mpls_key;
+
+		mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+		SW_FLOW_KEY_PUT(match, mpls.top_lse,
+				mpls_key->mpls_lse, is_mask);
+
+		attrs &= ~(1ULL << OVS_KEY_ATTR_MPLS);
+        }
+
 	if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) {
 		const struct ovs_key_tcp *tcp_key;
 
@@ -1891,6 +1939,14 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
 		arp_key->arp_op = htons(output->ip.proto);
 		memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
 		memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+	} else if (eth_p_mpls(swkey->eth.type)) {
+		struct ovs_key_mpls *mpls_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+		if (!nla)
+			goto nla_put_failure;
+		mpls_key = nla_data(nla);
+		mpls_key->mpls_lse = output->mpls.top_lse;
 	}
 
 	if ((swkey->eth.type == htons(ETH_P_IP) ||
diff --git a/datapath/flow.h b/datapath/flow.h
index 03eae03..9376802 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -87,12 +87,17 @@ struct sw_flow_key {
 		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
-	struct {
-		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
-		u8     tos;		/* IP ToS. */
-		u8     ttl;		/* IP TTL/hop limit. */
-		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
-	} ip;
+	union {
+		struct {
+			__be32 top_lse;		/* top label stack entry */
+		} mpls;
+		struct {
+			u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+			u8     tos;		/* IP ToS. */
+			u8     ttl;		/* IP TTL/hop limit. */
+			u8     frag;		/* One of OVS_FRAG_TYPE_*. */
+		} ip;
+	};
 	union {
 		struct {
 			struct {
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
index 32f906c..f917356 100644
--- a/datapath/linux/compat/gso.c
+++ b/datapath/linux/compat/gso.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/if.h>
 #include <linux/if_tunnel.h>
+#include <linux/if_vlan.h>
 #include <linux/icmp.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -35,6 +36,8 @@
 #include <net/xfrm.h>
 
 #include "gso.h"
+#include "mpls.h"
+#include "vlan.h"
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \
 	!defined(HAVE_VLAN_BUG_WORKAROUND)
@@ -47,10 +50,12 @@ MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets");
 #define vlan_tso true
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
 static bool dev_supports_vlan_tx(struct net_device *dev)
 {
-#if defined(HAVE_VLAN_BUG_WORKAROUND)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+	return true;
+#elif defined(HAVE_VLAN_BUG_WORKAROUND)
 	return dev->features & NETIF_F_HW_VLAN_TX;
 #else
 	/* Assume that the driver is buggy. */
@@ -58,24 +63,66 @@ static bool dev_supports_vlan_tx(struct net_device *dev)
 #endif
 }
 
+/* Strictly this is not needed and will be optimised out
+ * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0).
+ * It is here to make things explicit should the compatibility
+ * code be extended in some way prior extending its life-span
+ * beyond v3.11.
+ */
+static bool supports_mpls_gso(void)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
+	return true;
+#else
+	return false;
+#endif
+}
+
 int rpl_dev_queue_xmit(struct sk_buff *skb)
 {
 #undef dev_queue_xmit
 	int err = -ENOMEM;
+	__be16 inner_protocol;
+	bool vlan, mpls;
 
-	if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) {
+	vlan = mpls = false;
+
+	inner_protocol = ovs_skb_get_inner_protocol(skb);
+	if (eth_p_mpls(skb->protocol) && !supports_mpls_gso())
+		mpls = true;
+
+	if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev))
+		vlan = true;
+
+	if (vlan || mpls) {
 		int features;
 
 		features = netif_skb_features(skb);
 
-		if (!vlan_tso)
-			features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
-				      NETIF_F_UFO | NETIF_F_FSO);
+		if (vlan) {
+			if (!vlan_tso)
+				features &= ~(NETIF_F_TSO | NETIF_F_TSO6 |
+					      NETIF_F_UFO | NETIF_F_FSO);
 
-		skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
-		if (unlikely(!skb))
-			return err;
-		vlan_set_tci(skb, 0);
+			skb = __vlan_put_tag(skb, skb->vlan_proto,
+					     vlan_tx_tag_get(skb));
+			if (unlikely(!skb))
+				return err;
+			vlan_set_tci(skb, 0);
+		}
+
+		/* As of v3.11 the kernel provides an mpls_features field in
+		 * struct net_device which allows devices to advertise which
+		 * features its supports for MPLS. This value defaults to
+		 * NETIF_F_SG and as of v3.11.
+		 *
+		 * This compatibility code is intended for kernels older
+		 * than v3.11 that do not support MPLS GSO and thus do not
+		 * provide mpls_features. Thus this code uses NETIF_F_SG
+		 * directly in place of mpls_features.
+		 */
+		if (mpls)
+			features &= NETIF_F_SG;
 
 		if (netif_needs_gso(skb, features)) {
 			struct sk_buff *nskb;
@@ -114,13 +161,17 @@ drop:
 	kfree_skb(skb);
 	return err;
 }
-#endif /* kernel version < 2.6.37 */
 
-static __be16 __skb_network_protocol(struct sk_buff *skb)
+__be16 rpl_skb_network_protocol(struct sk_buff *skb)
 {
 	__be16 type = skb->protocol;
+	__be16 inner_proto;
 	int vlan_depth = ETH_HLEN;
 
+	inner_proto = ovs_skb_get_inner_protocol(skb);
+	if (eth_p_mpls(skb->protocol))
+		type = inner_proto;
+
 	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
 		struct vlan_hdr *vh;
 
@@ -135,6 +186,46 @@ static __be16 __skb_network_protocol(struct sk_buff *skb)
 	return type;
 }
 
+struct sk_buff *rpl___skb_gso_segment(struct sk_buff *skb,
+				      netdev_features_t features,
+				      bool tx_path)
+{
+	struct sk_buff *skb_gso;
+	__be16 type = skb->protocol;
+	bool mpls;
+
+	mpls = eth_p_mpls(type);
+	if (mpls)
+		skb->protocol = skb_network_protocol(skb);
+
+	/* this hack needed to get regular skb_gso_segment() */
+#ifdef HAVE___SKB_GSO_SEGMENT
+#undef __skb_gso_segment
+	skb_gso = __skb_gso_segment(skb, features, tx_path);
+#else
+#undef skb_gso_segment
+	skb_gso = skb_gso_segment(skb, features);
+#endif
+
+	if (!skb_gso || IS_ERR(skb_gso) || !mpls)
+	    return skb_gso;
+
+	skb = skb_gso;
+	while (skb) {
+		skb->protocol = type;
+		skb = skb->next;
+	}
+
+	return skb_gso;
+}
+
+struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
+{
+	return rpl___skb_gso_segment(skb, features, true);
+}
+#endif /* kernel version < 3.11.0 */
+
 static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
 					   netdev_features_t features,
 					   bool tx_path)
@@ -149,7 +240,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
 
 	/* setup whole inner packet to get protocol. */
 	__skb_pull(skb, mac_offset);
-	skb->protocol = __skb_network_protocol(skb);
+	skb->protocol = skb_network_protocol(skb);
 
 	/* setup l3 packet to gso, to get around segmentation bug on older kernel.*/
 	__skb_pull(skb, (pkt_hlen - mac_offset));
diff --git a/datapath/linux/compat/gso.h b/datapath/linux/compat/gso.h
index 44fd213..c6cd8fa 100644
--- a/datapath/linux/compat/gso.h
+++ b/datapath/linux/compat/gso.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_GSO_WRAPPER_H
 #define __LINUX_GSO_WRAPPER_H
 
+#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <net/protocol.h>
 
@@ -11,6 +12,9 @@ struct ovs_gso_cb {
 	sk_buff_data_t	inner_network_header;
 	sk_buff_data_t	inner_mac_header;
 	void (*fix_segment)(struct sk_buff *);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
+	__be16			inner_protocol;
+#endif
 };
 #define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb)
 
@@ -69,4 +73,53 @@ static inline void skb_reset_inner_headers(struct sk_buff *skb)
 
 #define ip_local_out rpl_ip_local_out
 int ip_local_out(struct sk_buff *skb);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
+#define skb_network_protocol rpl_skb_network_protocol
+__be16 rpl_skb_network_protocol(struct sk_buff *skb);
+
+#define skb_gso_segment rpl_skb_gso_segment
+struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features);
+
+#define __skb_gso_segment rpl___skb_gso_segment
+struct sk_buff *rpl___skb_gso_segment(struct sk_buff *skb,
+				      netdev_features_t features,
+				      bool tx_path);
+
+static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) {
+	OVS_GSO_CB(skb)->inner_protocol = htons(0);
+}
+
+static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
+					      __be16 ethertype) {
+	OVS_GSO_CB(skb)->inner_protocol = ethertype;
+}
+
+static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
+{
+	return OVS_GSO_CB(skb)->inner_protocol;
+}
+
+#else
+
+static inline void ovs_skb_init_inner_protocol(struct sk_buff *skb) {
+	/* Nothing to do. The inner_protocol is either zero or
+	 * has been set to a value by another user.
+	 * Either way it may be considered initialised.
+	 */
+}
+
+static inline void ovs_skb_set_inner_protocol(struct sk_buff *skb,
+					      __be16 ethertype)
+{
+	skb->inner_protocol = ethertype;
+}
+
+static inline __be16 ovs_skb_get_inner_protocol(struct sk_buff *skb)
+{
+	return skb->inner_protocol;
+}
+#endif
+
 #endif
diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h
index 2b2c855..958ea81 100644
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ b/datapath/linux/compat/include/linux/netdevice.h
@@ -74,9 +74,6 @@ static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifind
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38)
-#define skb_gso_segment rpl_skb_gso_segment
-struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, u32 features);
-
 #define netif_skb_features rpl_netif_skb_features
 u32 rpl_netif_skb_features(struct sk_buff *skb);
 
@@ -92,15 +89,6 @@ static inline int rpl_netif_needs_gso(struct sk_buff *skb, int features)
 typedef u32 netdev_features_t;
 #endif
 
-#ifndef HAVE___SKB_GSO_SEGMENT
-static inline struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-						netdev_features_t features,
-						bool tx_path)
-{
-	return skb_gso_segment(skb, features);
-}
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
 
 /* XEN dom0 networking assumes dev->master is bond device
@@ -120,7 +108,7 @@ static inline void netdev_upper_dev_unlink(struct net_device *dev,
 }
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
 #define dev_queue_xmit rpl_dev_queue_xmit
 int dev_queue_xmit(struct sk_buff *skb);
 #endif
diff --git a/datapath/linux/compat/netdevice.c b/datapath/linux/compat/netdevice.c
index 248066d..5f190b9 100644
--- a/datapath/linux/compat/netdevice.c
+++ b/datapath/linux/compat/netdevice.c
@@ -71,32 +71,4 @@ u32 rpl_netif_skb_features(struct sk_buff *skb)
 		return harmonize_features(skb, protocol, features);
 	}
 }
-
-struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, u32 features)
-{
-	int vlan_depth = ETH_HLEN;
-	__be16 type = skb->protocol;
-	__be16 skb_proto;
-	struct sk_buff *skb_gso;
-
-	while (type == htons(ETH_P_8021Q)) {
-		struct vlan_hdr *vh;
-
-		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
-			return ERR_PTR(-EINVAL);
-
-		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
-		type = vh->h_vlan_encapsulated_proto;
-		vlan_depth += VLAN_HLEN;
-	}
-
-	/* this hack needed to get regular skb_gso_segment() */
-#undef skb_gso_segment
-	skb_proto = skb->protocol;
-	skb->protocol = type;
-
-	skb_gso = skb_gso_segment(skb, features);
-	skb->protocol = skb_proto;
-	return skb_gso;
-}
 #endif	/* kernel version < 2.6.38 */
diff --git a/datapath/mpls.h b/datapath/mpls.h
new file mode 100644
index 0000000..7eab104
--- /dev/null
+++ b/datapath/mpls.h
@@ -0,0 +1,15 @@
+#ifndef MPLS_H
+#define MPLS_H 1
+
+#include <linux/if_ether.h>
+
+#define MPLS_BOS_MASK	0x00000100
+#define MPLS_HLEN 4
+
+static inline bool eth_p_mpls(__be16 eth_type)
+{
+	return eth_type == htons(ETH_P_MPLS_UC) ||
+		eth_type == htons(ETH_P_MPLS_MC);
+}
+
+#endif
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 09c26b5..1ef98a8 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -283,14 +283,13 @@ enum ovs_key_attr {
 	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
 	OVS_KEY_ATTR_TUNNEL,	/* Nested set of ovs_tunnel attributes */
 	OVS_KEY_ATTR_SCTP,      /* struct ovs_key_sctp */
+	OVS_KEY_ATTR_MPLS,      /* array of struct ovs_key_mpls.
+				 * The implementation may restrict
+				 * the accepted length of the array. */
 
 #ifdef __KERNEL__
 	OVS_KEY_ATTR_IPV4_TUNNEL,  /* struct ovs_key_ipv4_tunnel */
 #endif
-
-	OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls.
-				 * The implementation may restrict
-				 * the accepted length of the array. */
 	__OVS_KEY_ATTR_MAX
 };
 
-- 
1.8.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox