* [patch net-next v4 16/21] rocker: implement rocker ofdpa flow table manipulation
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
The rocker driver maintains 4 hash tables: flows, groups, FDB, and VLANs.
Flow and group tables track the entries installed to OF-DPA tables,
per the OF-DPA spec. See OF-DPA spec for full description of fields
in each flow and group table. New table entries are pushed to the
device with ADD cmd. Updated entries are pushed to the device with
MOD cmd. For flow table entries, a crc32 key is made from fields of
the particular field. For group table entries, the group_id is used
as the key.
The FDB table tracks fdb entries learned by the device or manually
pushed to the bridge by the user. A crc32 key is made from the
port/mac/vlan tuple for the fdb entry.
The VLAN table tracks the ifindex-to-internal-vlan mapping for
untagged pkts. On ingress, an untagged pkt is inserted with an
internal VLAN ID based on the input port's current internal VLAN ID.
The input port's internal VLAN will either be referenced by the port's
ifindex, if not bridged, or the containing bridge's ifindex, if
bridged. Since the ifindex space isn't within a fixed range, uses a
hash table (with ifindex as key) to track internal VLAN ID for a given
ifindex. The internal VLAN ID range is fixed and currently uses the
upper 255 VLAN IDs, starting at 0xf00.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
v1->v2->v3->v4:
-no change
---
drivers/net/ethernet/rocker/rocker.c | 1469 +++++++++++++++++++++++++++++++++-
1 file changed, 1467 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index a53011c..6345f60 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -16,6 +16,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/spinlock.h>
+#include <linux/hashtable.h>
#include <linux/crc32.h>
#include <linux/sort.h>
#include <linux/random.h>
@@ -27,6 +28,7 @@
#include <linux/ethtool.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/bitops.h>
#include <net/switchdev.h>
#include <net/rtnetlink.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
@@ -41,6 +43,123 @@ static const struct pci_device_id rocker_pci_id_table[] = {
{0, }
};
+struct rocker_flow_tbl_key {
+ u32 priority;
+ enum rocker_of_dpa_table_id tbl_id;
+ union {
+ struct {
+ u32 in_lport;
+ u32 in_lport_mask;
+ enum rocker_of_dpa_table_id goto_tbl;
+ } ig_port;
+ struct {
+ u32 in_lport;
+ __be16 vlan_id;
+ __be16 vlan_id_mask;
+ enum rocker_of_dpa_table_id goto_tbl;
+ bool untagged;
+ __be16 new_vlan_id;
+ } vlan;
+ struct {
+ u32 in_lport;
+ u32 in_lport_mask;
+ __be16 eth_type;
+ u8 eth_dst[ETH_ALEN];
+ u8 eth_dst_mask[ETH_ALEN];
+ __be16 vlan_id;
+ __be16 vlan_id_mask;
+ enum rocker_of_dpa_table_id goto_tbl;
+ bool copy_to_cpu;
+ } term_mac;
+ struct {
+ __be16 eth_type;
+ __be32 dst4;
+ __be32 dst4_mask;
+ enum rocker_of_dpa_table_id goto_tbl;
+ u32 group_id;
+ } ucast_routing;
+ struct {
+ u8 eth_dst[ETH_ALEN];
+ u8 eth_dst_mask[ETH_ALEN];
+ int has_eth_dst;
+ int has_eth_dst_mask;
+ __be16 vlan_id;
+ u32 tunnel_id;
+ enum rocker_of_dpa_table_id goto_tbl;
+ u32 group_id;
+ bool copy_to_cpu;
+ } bridge;
+ struct {
+ u32 in_lport;
+ u32 in_lport_mask;
+ u8 eth_src[ETH_ALEN];
+ u8 eth_src_mask[ETH_ALEN];
+ u8 eth_dst[ETH_ALEN];
+ u8 eth_dst_mask[ETH_ALEN];
+ __be16 eth_type;
+ __be16 vlan_id;
+ __be16 vlan_id_mask;
+ u8 ip_proto;
+ u8 ip_proto_mask;
+ u8 ip_tos;
+ u8 ip_tos_mask;
+ u32 group_id;
+ } acl;
+ };
+};
+
+struct rocker_flow_tbl_entry {
+ struct hlist_node entry;
+ u32 ref_count;
+ u64 cookie;
+ struct rocker_flow_tbl_key key;
+ u32 key_crc32; /* key */
+};
+
+struct rocker_group_tbl_entry {
+ struct hlist_node entry;
+ u32 cmd;
+ u32 group_id; /* key */
+ u16 group_count;
+ u32 *group_ids;
+ union {
+ struct {
+ u8 pop_vlan;
+ } l2_interface;
+ struct {
+ u8 eth_src[ETH_ALEN];
+ u8 eth_dst[ETH_ALEN];
+ __be16 vlan_id;
+ u32 group_id;
+ } l2_rewrite;
+ struct {
+ u8 eth_src[ETH_ALEN];
+ u8 eth_dst[ETH_ALEN];
+ __be16 vlan_id;
+ bool ttl_check;
+ u32 group_id;
+ } l3_unicast;
+ };
+};
+
+struct rocker_fdb_tbl_entry {
+ struct hlist_node entry;
+ u32 key_crc32; /* key */
+ bool learned;
+ struct rocker_fdb_tbl_key {
+ u32 lport;
+ u8 addr[ETH_ALEN];
+ __be16 vlan_id;
+ } key;
+};
+
+struct rocker_internal_vlan_tbl_entry {
+ struct hlist_node entry;
+ int ifindex; /* key */
+ u32 ref_count;
+ __be16 vlan_id;
+};
+
struct rocker_desc_info {
char *data; /* mapped */
size_t data_size;
@@ -61,11 +180,28 @@ struct rocker_dma_ring_info {
struct rocker;
+enum {
+ ROCKER_CTRL_LINK_LOCAL_MCAST,
+ ROCKER_CTRL_LOCAL_ARP,
+ ROCKER_CTRL_IPV4_MCAST,
+ ROCKER_CTRL_IPV6_MCAST,
+ ROCKER_CTRL_DFLT_BRIDGING,
+ ROCKER_CTRL_MAX,
+};
+
+#define ROCKER_INTERNAL_VLAN_ID_BASE 0x0f00
+#define ROCKER_N_INTERNAL_VLANS 255
+#define ROCKER_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID)
+#define ROCKER_INTERNAL_VLAN_BITMAP_LEN BITS_TO_LONGS(ROCKER_N_INTERNAL_VLANS)
+
struct rocker_port {
struct net_device *dev;
struct rocker *rocker;
unsigned int port_number;
u32 lport;
+ __be16 internal_vlan_id;
+ bool ctrls[ROCKER_CTRL_MAX];
+ unsigned long vlan_bitmap[ROCKER_VLAN_BITMAP_LEN];
struct napi_struct napi_tx;
struct napi_struct napi_rx;
struct rocker_dma_ring_info tx_ring;
@@ -84,8 +220,76 @@ struct rocker {
spinlock_t cmd_ring_lock;
struct rocker_dma_ring_info cmd_ring;
struct rocker_dma_ring_info event_ring;
+ DECLARE_HASHTABLE(flow_tbl, 16);
+ spinlock_t flow_tbl_lock;
+ u64 flow_tbl_next_cookie;
+ DECLARE_HASHTABLE(group_tbl, 16);
+ spinlock_t group_tbl_lock;
+ DECLARE_HASHTABLE(fdb_tbl, 16);
+ spinlock_t fdb_tbl_lock;
+ unsigned long internal_vlan_bitmap[ROCKER_INTERNAL_VLAN_BITMAP_LEN];
+ DECLARE_HASHTABLE(internal_vlan_tbl, 8);
+ spinlock_t internal_vlan_tbl_lock;
+};
+
+static const u8 zero_mac[ETH_ALEN] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static const u8 ff_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+static const u8 ll_mac[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
+static const u8 ll_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0 };
+static const u8 mcast_mac[ETH_ALEN] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 };
+static const u8 ipv4_mcast[ETH_ALEN] = { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
+static const u8 ipv4_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0x80, 0x00, 0x00 };
+static const u8 ipv6_mcast[ETH_ALEN] = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
+static const u8 ipv6_mask[ETH_ALEN] = { 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 };
+
+/* Rocker priority levels for flow table entries. Higher
+ * priority match takes precedence over lower priority match.
+ */
+
+enum {
+ ROCKER_PRIORITY_UNKNOWN = 0,
+ ROCKER_PRIORITY_IG_PORT = 1,
+ ROCKER_PRIORITY_VLAN = 1,
+ ROCKER_PRIORITY_TERM_MAC_UCAST = 0,
+ ROCKER_PRIORITY_TERM_MAC_MCAST = 1,
+ ROCKER_PRIORITY_UNICAST_ROUTING = 1,
+ ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT = 1,
+ ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD = 2,
+ ROCKER_PRIORITY_BRIDGING_VLAN = 3,
+ ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_EXACT = 1,
+ ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_WILD = 2,
+ ROCKER_PRIORITY_BRIDGING_TENANT = 3,
+ ROCKER_PRIORITY_ACL_CTRL = 3,
+ ROCKER_PRIORITY_ACL_NORMAL = 2,
+ ROCKER_PRIORITY_ACL_DFLT = 1,
};
+static bool rocker_vlan_id_is_internal(__be16 vlan_id)
+{
+ u16 start = ROCKER_INTERNAL_VLAN_ID_BASE;
+ u16 end = 0xffe;
+ u16 _vlan_id = ntohs(vlan_id);
+
+ return (_vlan_id >= start && _vlan_id <= end);
+}
+
+static __be16 rocker_port_vid_to_vlan(struct rocker_port *rocker_port,
+ u16 vid, bool *pop_vlan)
+{
+ __be16 vlan_id;
+
+ if (pop_vlan)
+ *pop_vlan = false;
+ vlan_id = htons(vid);
+ if (!vlan_id) {
+ vlan_id = rocker_port->internal_vlan_id;
+ if (pop_vlan)
+ *pop_vlan = true;
+ }
+
+ return vlan_id;
+}
+
struct rocker_wait {
wait_queue_head_t wait;
bool done;
@@ -1094,6 +1298,10 @@ static int rocker_event_link_change(struct rocker *rocker,
return 0;
}
+#define ROCKER_OP_FLAG_REMOVE BIT(0)
+#define ROCKER_OP_FLAG_NOWAIT BIT(1)
+#define ROCKER_OP_FLAG_LEARNED BIT(2)
+
static int rocker_event_process(struct rocker *rocker,
struct rocker_desc_info *desc_info)
{
@@ -1399,6 +1607,1240 @@ static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
macaddr, NULL, NULL, false);
}
+static int rocker_cmd_flow_tbl_add_ig_port(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT,
+ entry->key.ig_port.in_lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT_MASK,
+ entry->key.ig_port.in_lport_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID,
+ entry->key.ig_port.goto_tbl))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_add_vlan(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT,
+ entry->key.vlan.in_lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->key.vlan.vlan_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK,
+ entry->key.vlan.vlan_id_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID,
+ entry->key.vlan.goto_tbl))
+ return -EMSGSIZE;
+ if (entry->key.vlan.untagged &&
+ rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_NEW_VLAN_ID,
+ entry->key.vlan.new_vlan_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_add_term_mac(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT,
+ entry->key.term_mac.in_lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT_MASK,
+ entry->key.term_mac.in_lport_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE,
+ entry->key.term_mac.eth_type))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC,
+ ETH_ALEN, entry->key.term_mac.eth_dst))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK,
+ ETH_ALEN, entry->key.term_mac.eth_dst_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->key.term_mac.vlan_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK,
+ entry->key.term_mac.vlan_id_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID,
+ entry->key.term_mac.goto_tbl))
+ return -EMSGSIZE;
+ if (entry->key.term_mac.copy_to_cpu &&
+ rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION,
+ entry->key.term_mac.copy_to_cpu))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+rocker_cmd_flow_tbl_add_ucast_routing(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE,
+ entry->key.ucast_routing.eth_type))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_DST_IP,
+ entry->key.ucast_routing.dst4))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_DST_IP_MASK,
+ entry->key.ucast_routing.dst4_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID,
+ entry->key.ucast_routing.goto_tbl))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID,
+ entry->key.ucast_routing.group_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_add_bridge(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (entry->key.bridge.has_eth_dst &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC,
+ ETH_ALEN, entry->key.bridge.eth_dst))
+ return -EMSGSIZE;
+ if (entry->key.bridge.has_eth_dst_mask &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK,
+ ETH_ALEN, entry->key.bridge.eth_dst_mask))
+ return -EMSGSIZE;
+ if (entry->key.bridge.vlan_id &&
+ rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->key.bridge.vlan_id))
+ return -EMSGSIZE;
+ if (entry->key.bridge.tunnel_id &&
+ rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_TUNNEL_ID,
+ entry->key.bridge.tunnel_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GOTO_TABLE_ID,
+ entry->key.bridge.goto_tbl))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID,
+ entry->key.bridge.group_id))
+ return -EMSGSIZE;
+ if (entry->key.bridge.copy_to_cpu &&
+ rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_COPY_CPU_ACTION,
+ entry->key.bridge.copy_to_cpu))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_add_acl(struct rocker_desc_info *desc_info,
+ struct rocker_flow_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT,
+ entry->key.acl.in_lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_IN_LPORT_MASK,
+ entry->key.acl.in_lport_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC,
+ ETH_ALEN, entry->key.acl.eth_src))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC_MASK,
+ ETH_ALEN, entry->key.acl.eth_src_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC,
+ ETH_ALEN, entry->key.acl.eth_dst))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC_MASK,
+ ETH_ALEN, entry->key.acl.eth_dst_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_ETHERTYPE,
+ entry->key.acl.eth_type))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->key.acl.vlan_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID_MASK,
+ entry->key.acl.vlan_id_mask))
+ return -EMSGSIZE;
+
+ switch (ntohs(entry->key.acl.eth_type)) {
+ case ETH_P_IP:
+ case ETH_P_IPV6:
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_PROTO,
+ entry->key.acl.ip_proto))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info,
+ ROCKER_TLV_OF_DPA_IP_PROTO_MASK,
+ entry->key.acl.ip_proto_mask))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_DSCP,
+ entry->key.acl.ip_tos & 0x3f))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info,
+ ROCKER_TLV_OF_DPA_IP_DSCP_MASK,
+ entry->key.acl.ip_tos_mask & 0x3f))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_IP_ECN,
+ (entry->key.acl.ip_tos & 0xc0) >> 6))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info,
+ ROCKER_TLV_OF_DPA_IP_ECN_MASK,
+ (entry->key.acl.ip_tos_mask & 0xc0) >> 6))
+ return -EMSGSIZE;
+ break;
+ }
+
+ if (entry->key.acl.group_id != ROCKER_GROUP_NONE &&
+ rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID,
+ entry->key.acl.group_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_add(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct rocker_flow_tbl_entry *entry = priv;
+ struct rocker_tlv *cmd_info;
+ int err = 0;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_TABLE_ID,
+ entry->key.tbl_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_PRIORITY,
+ entry->key.priority))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_HARDTIME, 0))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE,
+ entry->cookie))
+ return -EMSGSIZE;
+
+ switch (entry->key.tbl_id) {
+ case ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT:
+ err = rocker_cmd_flow_tbl_add_ig_port(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_VLAN:
+ err = rocker_cmd_flow_tbl_add_vlan(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC:
+ err = rocker_cmd_flow_tbl_add_term_mac(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING:
+ err = rocker_cmd_flow_tbl_add_ucast_routing(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_BRIDGING:
+ err = rocker_cmd_flow_tbl_add_bridge(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_TABLE_ID_ACL_POLICY:
+ err = rocker_cmd_flow_tbl_add_acl(desc_info, entry);
+ break;
+ default:
+ err = -ENOTSUPP;
+ break;
+ }
+
+ if (err)
+ return err;
+
+ rocker_tlv_nest_end(desc_info, cmd_info);
+
+ return 0;
+}
+
+static int rocker_cmd_flow_tbl_del(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ const struct rocker_flow_tbl_entry *entry = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_OF_DPA_COOKIE,
+ entry->cookie))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+
+ return 0;
+}
+
+static int
+rocker_cmd_group_tbl_add_l2_interface(struct rocker_desc_info *desc_info,
+ struct rocker_group_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_OUT_LPORT,
+ ROCKER_GROUP_PORT_GET(entry->group_id)))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_POP_VLAN,
+ entry->l2_interface.pop_vlan))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+rocker_cmd_group_tbl_add_l2_rewrite(struct rocker_desc_info *desc_info,
+ struct rocker_group_tbl_entry *entry)
+{
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER,
+ entry->l2_rewrite.group_id))
+ return -EMSGSIZE;
+ if (!is_zero_ether_addr(entry->l2_rewrite.eth_src) &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC,
+ ETH_ALEN, entry->l2_rewrite.eth_src))
+ return -EMSGSIZE;
+ if (!is_zero_ether_addr(entry->l2_rewrite.eth_dst) &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC,
+ ETH_ALEN, entry->l2_rewrite.eth_dst))
+ return -EMSGSIZE;
+ if (entry->l2_rewrite.vlan_id &&
+ rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->l2_rewrite.vlan_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int
+rocker_cmd_group_tbl_add_group_ids(struct rocker_desc_info *desc_info,
+ struct rocker_group_tbl_entry *entry)
+{
+ int i;
+ struct rocker_tlv *group_ids;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_GROUP_COUNT,
+ entry->group_count))
+ return -EMSGSIZE;
+
+ group_ids = rocker_tlv_nest_start(desc_info,
+ ROCKER_TLV_OF_DPA_GROUP_IDS);
+ if (!group_ids)
+ return -EMSGSIZE;
+
+ for (i = 0; i < entry->group_count; i++)
+ /* Note TLV array is 1-based */
+ if (rocker_tlv_put_u32(desc_info, i + 1, entry->group_ids[i]))
+ return -EMSGSIZE;
+
+ rocker_tlv_nest_end(desc_info, group_ids);
+
+ return 0;
+}
+
+static int
+rocker_cmd_group_tbl_add_l3_unicast(struct rocker_desc_info *desc_info,
+ struct rocker_group_tbl_entry *entry)
+{
+ if (!is_zero_ether_addr(entry->l3_unicast.eth_src) &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_SRC_MAC,
+ ETH_ALEN, entry->l3_unicast.eth_src))
+ return -EMSGSIZE;
+ if (!is_zero_ether_addr(entry->l3_unicast.eth_dst) &&
+ rocker_tlv_put(desc_info, ROCKER_TLV_OF_DPA_DST_MAC,
+ ETH_ALEN, entry->l3_unicast.eth_dst))
+ return -EMSGSIZE;
+ if (entry->l3_unicast.vlan_id &&
+ rocker_tlv_put_u16(desc_info, ROCKER_TLV_OF_DPA_VLAN_ID,
+ entry->l3_unicast.vlan_id))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_OF_DPA_TTL_CHECK,
+ entry->l3_unicast.ttl_check))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID_LOWER,
+ entry->l3_unicast.group_id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int rocker_cmd_group_tbl_add(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct rocker_group_tbl_entry *entry = priv;
+ struct rocker_tlv *cmd_info;
+ int err = 0;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID,
+ entry->group_id))
+ return -EMSGSIZE;
+
+ switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) {
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE:
+ err = rocker_cmd_group_tbl_add_l2_interface(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE:
+ err = rocker_cmd_group_tbl_add_l2_rewrite(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
+ err = rocker_cmd_group_tbl_add_group_ids(desc_info, entry);
+ break;
+ case ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST:
+ err = rocker_cmd_group_tbl_add_l3_unicast(desc_info, entry);
+ break;
+ default:
+ err = -ENOTSUPP;
+ break;
+ }
+
+ if (err)
+ return err;
+
+ rocker_tlv_nest_end(desc_info, cmd_info);
+
+ return 0;
+}
+
+static int rocker_cmd_group_tbl_del(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ const struct rocker_group_tbl_entry *entry = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE, entry->cmd))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_OF_DPA_GROUP_ID,
+ entry->group_id))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+
+ return 0;
+}
+
+/*****************************************
+ * Flow, group, FDB, internal VLAN tables
+ *****************************************/
+
+static int rocker_init_tbls(struct rocker *rocker)
+{
+ hash_init(rocker->flow_tbl);
+ spin_lock_init(&rocker->flow_tbl_lock);
+
+ hash_init(rocker->group_tbl);
+ spin_lock_init(&rocker->group_tbl_lock);
+
+ hash_init(rocker->fdb_tbl);
+ spin_lock_init(&rocker->fdb_tbl_lock);
+
+ hash_init(rocker->internal_vlan_tbl);
+ spin_lock_init(&rocker->internal_vlan_tbl_lock);
+
+ return 0;
+}
+
+static void rocker_free_tbls(struct rocker *rocker)
+{
+ unsigned long flags;
+ struct rocker_flow_tbl_entry *flow_entry;
+ struct rocker_group_tbl_entry *group_entry;
+ struct rocker_fdb_tbl_entry *fdb_entry;
+ struct rocker_internal_vlan_tbl_entry *internal_vlan_entry;
+ struct hlist_node *tmp;
+ int bkt;
+
+ spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
+ hash_for_each_safe(rocker->flow_tbl, bkt, tmp, flow_entry, entry)
+ hash_del(&flow_entry->entry);
+ spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
+
+ spin_lock_irqsave(&rocker->group_tbl_lock, flags);
+ hash_for_each_safe(rocker->group_tbl, bkt, tmp, group_entry, entry)
+ hash_del(&group_entry->entry);
+ spin_unlock_irqrestore(&rocker->group_tbl_lock, flags);
+
+ spin_lock_irqsave(&rocker->fdb_tbl_lock, flags);
+ hash_for_each_safe(rocker->fdb_tbl, bkt, tmp, fdb_entry, entry)
+ hash_del(&fdb_entry->entry);
+ spin_unlock_irqrestore(&rocker->fdb_tbl_lock, flags);
+
+ spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, flags);
+ hash_for_each_safe(rocker->internal_vlan_tbl, bkt,
+ tmp, internal_vlan_entry, entry)
+ hash_del(&internal_vlan_entry->entry);
+ spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, flags);
+}
+
+static struct rocker_flow_tbl_entry *
+rocker_flow_tbl_find(struct rocker *rocker, struct rocker_flow_tbl_entry *match)
+{
+ struct rocker_flow_tbl_entry *found;
+
+ hash_for_each_possible(rocker->flow_tbl, found,
+ entry, match->key_crc32) {
+ if (memcmp(&found->key, &match->key, sizeof(found->key)) == 0)
+ return found;
+ }
+
+ return NULL;
+}
+
+static int rocker_flow_tbl_add(struct rocker_port *rocker_port,
+ struct rocker_flow_tbl_entry *match,
+ bool nowait)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_flow_tbl_entry *found;
+ unsigned long flags;
+ bool add_to_hw = false;
+ int err = 0;
+
+ match->key_crc32 = crc32(~0, &match->key, sizeof(match->key));
+
+ spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
+
+ found = rocker_flow_tbl_find(rocker, match);
+
+ if (found) {
+ kfree(match);
+ } else {
+ found = match;
+ found->cookie = rocker->flow_tbl_next_cookie++;
+ hash_add(rocker->flow_tbl, &found->entry, found->key_crc32);
+ add_to_hw = true;
+ }
+
+ found->ref_count++;
+
+ spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
+
+ if (add_to_hw) {
+ err = rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_flow_tbl_add,
+ found, NULL, NULL, nowait);
+ if (err) {
+ spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
+ hash_del(&found->entry);
+ spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
+ kfree(found);
+ }
+ }
+
+ return err;
+}
+
+static int rocker_flow_tbl_del(struct rocker_port *rocker_port,
+ struct rocker_flow_tbl_entry *match,
+ bool nowait)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_flow_tbl_entry *found;
+ unsigned long flags;
+ bool del_from_hw = false;
+ int err = 0;
+
+ match->key_crc32 = crc32(~0, &match->key, sizeof(match->key));
+
+ spin_lock_irqsave(&rocker->flow_tbl_lock, flags);
+
+ found = rocker_flow_tbl_find(rocker, match);
+
+ if (found) {
+ found->ref_count--;
+ if (found->ref_count == 0) {
+ hash_del(&found->entry);
+ del_from_hw = true;
+ }
+ }
+
+ spin_unlock_irqrestore(&rocker->flow_tbl_lock, flags);
+
+ kfree(match);
+
+ if (del_from_hw) {
+ err = rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_flow_tbl_del,
+ found, NULL, NULL, nowait);
+ kfree(found);
+ }
+
+ return err;
+}
+
+static gfp_t rocker_op_flags_gfp(int flags)
+{
+ return flags & ROCKER_OP_FLAG_NOWAIT ? GFP_ATOMIC : GFP_KERNEL;
+}
+
+static int rocker_flow_tbl_do(struct rocker_port *rocker_port,
+ int flags, struct rocker_flow_tbl_entry *entry)
+{
+ bool nowait = flags & ROCKER_OP_FLAG_NOWAIT;
+
+ if (flags & ROCKER_OP_FLAG_REMOVE)
+ return rocker_flow_tbl_del(rocker_port, entry, nowait);
+ else
+ return rocker_flow_tbl_add(rocker_port, entry, nowait);
+}
+
+static int rocker_flow_tbl_ig_port(struct rocker_port *rocker_port,
+ int flags, u32 in_lport, u32 in_lport_mask,
+ enum rocker_of_dpa_table_id goto_tbl)
+{
+ struct rocker_flow_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key.priority = ROCKER_PRIORITY_IG_PORT;
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT;
+ entry->key.ig_port.in_lport = in_lport;
+ entry->key.ig_port.in_lport_mask = in_lport_mask;
+ entry->key.ig_port.goto_tbl = goto_tbl;
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_flow_tbl_vlan(struct rocker_port *rocker_port,
+ int flags, u32 in_lport,
+ __be16 vlan_id, __be16 vlan_id_mask,
+ enum rocker_of_dpa_table_id goto_tbl,
+ bool untagged, __be16 new_vlan_id)
+{
+ struct rocker_flow_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key.priority = ROCKER_PRIORITY_VLAN;
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_VLAN;
+ entry->key.vlan.in_lport = in_lport;
+ entry->key.vlan.vlan_id = vlan_id;
+ entry->key.vlan.vlan_id_mask = vlan_id_mask;
+ entry->key.vlan.goto_tbl = goto_tbl;
+
+ entry->key.vlan.untagged = untagged;
+ entry->key.vlan.new_vlan_id = new_vlan_id;
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_flow_tbl_term_mac(struct rocker_port *rocker_port,
+ u32 in_lport, u32 in_lport_mask,
+ __be16 eth_type, const u8 *eth_dst,
+ const u8 *eth_dst_mask, __be16 vlan_id,
+ __be16 vlan_id_mask, bool copy_to_cpu,
+ int flags)
+{
+ struct rocker_flow_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ if (is_multicast_ether_addr(eth_dst)) {
+ entry->key.priority = ROCKER_PRIORITY_TERM_MAC_MCAST;
+ entry->key.term_mac.goto_tbl =
+ ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING;
+ } else {
+ entry->key.priority = ROCKER_PRIORITY_TERM_MAC_UCAST;
+ entry->key.term_mac.goto_tbl =
+ ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING;
+ }
+
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC;
+ entry->key.term_mac.in_lport = in_lport;
+ entry->key.term_mac.in_lport_mask = in_lport_mask;
+ entry->key.term_mac.eth_type = eth_type;
+ ether_addr_copy(entry->key.term_mac.eth_dst, eth_dst);
+ ether_addr_copy(entry->key.term_mac.eth_dst_mask, eth_dst_mask);
+ entry->key.term_mac.vlan_id = vlan_id;
+ entry->key.term_mac.vlan_id_mask = vlan_id_mask;
+ entry->key.term_mac.copy_to_cpu = copy_to_cpu;
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_flow_tbl_bridge(struct rocker_port *rocker_port,
+ int flags,
+ const u8 *eth_dst, const u8 *eth_dst_mask,
+ __be16 vlan_id, u32 tunnel_id,
+ enum rocker_of_dpa_table_id goto_tbl,
+ u32 group_id, bool copy_to_cpu)
+{
+ struct rocker_flow_tbl_entry *entry;
+ u32 priority;
+ bool vlan_bridging = !!vlan_id;
+ bool dflt = !eth_dst || (eth_dst && eth_dst_mask);
+ bool wild = false;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_BRIDGING;
+
+ if (eth_dst) {
+ entry->key.bridge.has_eth_dst = 1;
+ ether_addr_copy(entry->key.bridge.eth_dst, eth_dst);
+ }
+ if (eth_dst_mask) {
+ entry->key.bridge.has_eth_dst_mask = 1;
+ ether_addr_copy(entry->key.bridge.eth_dst_mask, eth_dst_mask);
+ if (memcmp(eth_dst_mask, ff_mac, ETH_ALEN))
+ wild = true;
+ }
+
+ priority = ROCKER_PRIORITY_UNKNOWN;
+ if (vlan_bridging & dflt & wild)
+ priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_WILD;
+ else if (vlan_bridging & dflt & !wild)
+ priority = ROCKER_PRIORITY_BRIDGING_VLAN_DFLT_EXACT;
+ else if (vlan_bridging & !dflt)
+ priority = ROCKER_PRIORITY_BRIDGING_VLAN;
+ else if (!vlan_bridging & dflt & wild)
+ priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_WILD;
+ else if (!vlan_bridging & dflt & !wild)
+ priority = ROCKER_PRIORITY_BRIDGING_TENANT_DFLT_EXACT;
+ else if (!vlan_bridging & !dflt)
+ priority = ROCKER_PRIORITY_BRIDGING_TENANT;
+
+ entry->key.priority = priority;
+ entry->key.bridge.vlan_id = vlan_id;
+ entry->key.bridge.tunnel_id = tunnel_id;
+ entry->key.bridge.goto_tbl = goto_tbl;
+ entry->key.bridge.group_id = group_id;
+ entry->key.bridge.copy_to_cpu = copy_to_cpu;
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_flow_tbl_acl(struct rocker_port *rocker_port,
+ int flags, u32 in_lport,
+ u32 in_lport_mask,
+ const u8 *eth_src, const u8 *eth_src_mask,
+ const u8 *eth_dst, const u8 *eth_dst_mask,
+ __be16 eth_type,
+ __be16 vlan_id, __be16 vlan_id_mask,
+ u8 ip_proto, u8 ip_proto_mask,
+ u8 ip_tos, u8 ip_tos_mask,
+ u32 group_id)
+{
+ u32 priority;
+ struct rocker_flow_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ priority = ROCKER_PRIORITY_ACL_NORMAL;
+ if (eth_dst && eth_dst_mask) {
+ if (memcmp(eth_dst_mask, mcast_mac, ETH_ALEN) == 0)
+ priority = ROCKER_PRIORITY_ACL_DFLT;
+ else if (is_link_local_ether_addr(eth_dst))
+ priority = ROCKER_PRIORITY_ACL_CTRL;
+ }
+
+ entry->key.priority = priority;
+ entry->key.tbl_id = ROCKER_OF_DPA_TABLE_ID_ACL_POLICY;
+ entry->key.acl.in_lport = in_lport;
+ entry->key.acl.in_lport_mask = in_lport_mask;
+
+ if (eth_src)
+ ether_addr_copy(entry->key.acl.eth_src, eth_src);
+ if (eth_src_mask)
+ ether_addr_copy(entry->key.acl.eth_src_mask, eth_src_mask);
+ if (eth_dst)
+ ether_addr_copy(entry->key.acl.eth_dst, eth_dst);
+ if (eth_dst_mask)
+ ether_addr_copy(entry->key.acl.eth_dst_mask, eth_dst_mask);
+
+ entry->key.acl.eth_type = eth_type;
+ entry->key.acl.vlan_id = vlan_id;
+ entry->key.acl.vlan_id_mask = vlan_id_mask;
+ entry->key.acl.ip_proto = ip_proto;
+ entry->key.acl.ip_proto_mask = ip_proto_mask;
+ entry->key.acl.ip_tos = ip_tos;
+ entry->key.acl.ip_tos_mask = ip_tos_mask;
+ entry->key.acl.group_id = group_id;
+
+ return rocker_flow_tbl_do(rocker_port, flags, entry);
+}
+
+static struct rocker_group_tbl_entry *
+rocker_group_tbl_find(struct rocker *rocker,
+ struct rocker_group_tbl_entry *match)
+{
+ struct rocker_group_tbl_entry *found;
+
+ hash_for_each_possible(rocker->group_tbl, found,
+ entry, match->group_id) {
+ if (found->group_id == match->group_id)
+ return found;
+ }
+
+ return NULL;
+}
+
+static void rocker_group_tbl_entry_free(struct rocker_group_tbl_entry *entry)
+{
+ switch (ROCKER_GROUP_TYPE_GET(entry->group_id)) {
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
+ case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
+ kfree(entry->group_ids);
+ break;
+ default:
+ break;
+ }
+ kfree(entry);
+}
+
+static int rocker_group_tbl_add(struct rocker_port *rocker_port,
+ struct rocker_group_tbl_entry *match,
+ bool nowait)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_group_tbl_entry *found;
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&rocker->group_tbl_lock, flags);
+
+ found = rocker_group_tbl_find(rocker, match);
+
+ if (found) {
+ hash_del(&found->entry);
+ rocker_group_tbl_entry_free(found);
+ found = match;
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD;
+ } else {
+ found = match;
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD;
+ }
+
+ hash_add(rocker->group_tbl, &found->entry, found->group_id);
+
+ spin_unlock_irqrestore(&rocker->group_tbl_lock, flags);
+
+ if (found->cmd)
+ err = rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_group_tbl_add,
+ found, NULL, NULL, nowait);
+
+ return err;
+}
+
+static int rocker_group_tbl_del(struct rocker_port *rocker_port,
+ struct rocker_group_tbl_entry *match,
+ bool nowait)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_group_tbl_entry *found;
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&rocker->group_tbl_lock, flags);
+
+ found = rocker_group_tbl_find(rocker, match);
+
+ if (found) {
+ hash_del(&found->entry);
+ found->cmd = ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL;
+ }
+
+ spin_unlock_irqrestore(&rocker->group_tbl_lock, flags);
+
+ rocker_group_tbl_entry_free(match);
+
+ if (found) {
+ err = rocker_cmd_exec(rocker, rocker_port,
+ rocker_cmd_group_tbl_del,
+ found, NULL, NULL, nowait);
+ rocker_group_tbl_entry_free(found);
+ }
+
+ return err;
+}
+
+static int rocker_group_tbl_do(struct rocker_port *rocker_port,
+ int flags, struct rocker_group_tbl_entry *entry)
+{
+ bool nowait = flags & ROCKER_OP_FLAG_NOWAIT;
+
+ if (flags & ROCKER_OP_FLAG_REMOVE)
+ return rocker_group_tbl_del(rocker_port, entry, nowait);
+ else
+ return rocker_group_tbl_add(rocker_port, entry, nowait);
+}
+
+static int rocker_group_l2_interface(struct rocker_port *rocker_port,
+ int flags, __be16 vlan_id,
+ u32 out_lport, int pop_vlan)
+{
+ struct rocker_group_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_lport);
+ entry->l2_interface.pop_vlan = pop_vlan;
+
+ return rocker_group_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_group_l2_fan_out(struct rocker_port *rocker_port,
+ int flags, u8 group_count,
+ u32 *group_ids, u32 group_id)
+{
+ struct rocker_group_tbl_entry *entry;
+
+ entry = kzalloc(sizeof(*entry), rocker_op_flags_gfp(flags));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->group_id = group_id;
+ entry->group_count = group_count;
+
+ entry->group_ids = kcalloc(group_count, sizeof(u32),
+ rocker_op_flags_gfp(flags));
+ if (!entry->group_ids) {
+ kfree(entry);
+ return -ENOMEM;
+ }
+ memcpy(entry->group_ids, group_ids, group_count * sizeof(u32));
+
+ return rocker_group_tbl_do(rocker_port, flags, entry);
+}
+
+static int rocker_group_l2_flood(struct rocker_port *rocker_port,
+ int flags, __be16 vlan_id,
+ u8 group_count, u32 *group_ids,
+ u32 group_id)
+{
+ return rocker_group_l2_fan_out(rocker_port, flags,
+ group_count, group_ids,
+ group_id);
+}
+
+static struct rocker_ctrl {
+ const u8 *eth_dst;
+ const u8 *eth_dst_mask;
+ u16 eth_type;
+ bool acl;
+ bool bridge;
+ bool term;
+ bool copy_to_cpu;
+} rocker_ctrls[] = {
+ [ROCKER_CTRL_LINK_LOCAL_MCAST] = {
+ /* pass link local multicast pkts up to CPU for filtering */
+ .eth_dst = ll_mac,
+ .eth_dst_mask = ll_mask,
+ .acl = true,
+ },
+ [ROCKER_CTRL_LOCAL_ARP] = {
+ /* pass local ARP pkts up to CPU */
+ .eth_dst = zero_mac,
+ .eth_dst_mask = zero_mac,
+ .eth_type = htons(ETH_P_ARP),
+ .acl = true,
+ },
+ [ROCKER_CTRL_IPV4_MCAST] = {
+ /* pass IPv4 mcast pkts up to CPU, RFC 1112 */
+ .eth_dst = ipv4_mcast,
+ .eth_dst_mask = ipv4_mask,
+ .eth_type = htons(ETH_P_IP),
+ .term = true,
+ .copy_to_cpu = true,
+ },
+ [ROCKER_CTRL_IPV6_MCAST] = {
+ /* pass IPv6 mcast pkts up to CPU, RFC 2464 */
+ .eth_dst = ipv6_mcast,
+ .eth_dst_mask = ipv6_mask,
+ .eth_type = htons(ETH_P_IPV6),
+ .term = true,
+ .copy_to_cpu = true,
+ },
+ [ROCKER_CTRL_DFLT_BRIDGING] = {
+ /* flood any pkts on vlan */
+ .bridge = true,
+ .copy_to_cpu = true,
+ },
+};
+
+static int rocker_port_ctrl_vlan_acl(struct rocker_port *rocker_port,
+ int flags, struct rocker_ctrl *ctrl,
+ __be16 vlan_id)
+{
+ u32 in_lport = rocker_port->lport;
+ u32 in_lport_mask = 0xffffffff;
+ u32 out_lport = 0;
+ u8 *eth_src = NULL;
+ u8 *eth_src_mask = NULL;
+ __be16 vlan_id_mask = htons(0xffff);
+ u8 ip_proto = 0;
+ u8 ip_proto_mask = 0;
+ u8 ip_tos = 0;
+ u8 ip_tos_mask = 0;
+ u32 group_id = ROCKER_GROUP_L2_INTERFACE(vlan_id, out_lport);
+ int err;
+
+ err = rocker_flow_tbl_acl(rocker_port, flags,
+ in_lport, in_lport_mask,
+ eth_src, eth_src_mask,
+ ctrl->eth_dst, ctrl->eth_dst_mask,
+ ctrl->eth_type,
+ vlan_id, vlan_id_mask,
+ ip_proto, ip_proto_mask,
+ ip_tos, ip_tos_mask,
+ group_id);
+
+ if (err)
+ netdev_err(rocker_port->dev, "Error (%d) ctrl ACL\n", err);
+
+ return err;
+}
+
+static int rocker_port_ctrl_vlan_term(struct rocker_port *rocker_port,
+ int flags, struct rocker_ctrl *ctrl,
+ __be16 vlan_id)
+{
+ u32 in_lport_mask = 0xffffffff;
+ __be16 vlan_id_mask = htons(0xffff);
+ int err;
+
+ if (ntohs(vlan_id) == 0)
+ vlan_id = rocker_port->internal_vlan_id;
+
+ err = rocker_flow_tbl_term_mac(rocker_port,
+ rocker_port->lport, in_lport_mask,
+ ctrl->eth_type, ctrl->eth_dst,
+ ctrl->eth_dst_mask, vlan_id,
+ vlan_id_mask, ctrl->copy_to_cpu,
+ flags);
+
+ if (err)
+ netdev_err(rocker_port->dev, "Error (%d) ctrl term\n", err);
+
+ return err;
+}
+
+static int rocker_port_ctrl_vlan(struct rocker_port *rocker_port, int flags,
+ struct rocker_ctrl *ctrl, __be16 vlan_id)
+{
+ if (ctrl->acl)
+ return rocker_port_ctrl_vlan_acl(rocker_port, flags,
+ ctrl, vlan_id);
+
+ if (ctrl->term)
+ return rocker_port_ctrl_vlan_term(rocker_port, flags,
+ ctrl, vlan_id);
+
+ return -EOPNOTSUPP;
+}
+
+static int rocker_port_ctrl_vlan_add(struct rocker_port *rocker_port,
+ int flags, __be16 vlan_id)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < ROCKER_CTRL_MAX; i++) {
+ if (rocker_port->ctrls[i]) {
+ err = rocker_port_ctrl_vlan(rocker_port, flags,
+ &rocker_ctrls[i], vlan_id);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static int rocker_port_ctrl(struct rocker_port *rocker_port, int flags,
+ struct rocker_ctrl *ctrl)
+{
+ u16 vid;
+ int err = 0;
+
+ for (vid = 1; vid < VLAN_N_VID; vid++) {
+ if (!test_bit(vid, rocker_port->vlan_bitmap))
+ continue;
+ err = rocker_port_ctrl_vlan(rocker_port, flags,
+ ctrl, htons(vid));
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int rocker_port_ig_tbl(struct rocker_port *rocker_port, int flags)
+{
+ enum rocker_of_dpa_table_id goto_tbl;
+ u32 in_lport;
+ u32 in_lport_mask;
+ int err;
+
+ /* Normal Ethernet Frames. Matches pkts from any local physical
+ * ports. Goto VLAN tbl.
+ */
+
+ in_lport = 0;
+ in_lport_mask = 0xffff0000;
+ goto_tbl = ROCKER_OF_DPA_TABLE_ID_VLAN;
+
+ err = rocker_flow_tbl_ig_port(rocker_port, flags,
+ in_lport, in_lport_mask,
+ goto_tbl);
+ if (err)
+ netdev_err(rocker_port->dev,
+ "Error (%d) ingress port table entry\n", err);
+
+ return err;
+}
+
+static int rocker_port_router_mac(struct rocker_port *rocker_port,
+ int flags, __be16 vlan_id)
+{
+ u32 in_lport_mask = 0xffffffff;
+ __be16 eth_type;
+ const u8 *dst_mac_mask = ff_mac;
+ __be16 vlan_id_mask = htons(0xffff);
+ bool copy_to_cpu = false;
+ int err;
+
+ if (ntohs(vlan_id) == 0)
+ vlan_id = rocker_port->internal_vlan_id;
+
+ eth_type = htons(ETH_P_IP);
+ err = rocker_flow_tbl_term_mac(rocker_port,
+ rocker_port->lport, in_lport_mask,
+ eth_type, rocker_port->dev->dev_addr,
+ dst_mac_mask, vlan_id, vlan_id_mask,
+ copy_to_cpu, flags);
+ if (err)
+ return err;
+
+ eth_type = htons(ETH_P_IPV6);
+ err = rocker_flow_tbl_term_mac(rocker_port,
+ rocker_port->lport, in_lport_mask,
+ eth_type, rocker_port->dev->dev_addr,
+ dst_mac_mask, vlan_id, vlan_id_mask,
+ copy_to_cpu, flags);
+
+ return err;
+}
+
+static struct rocker_internal_vlan_tbl_entry *
+rocker_internal_vlan_tbl_find(struct rocker *rocker, int ifindex)
+{
+ struct rocker_internal_vlan_tbl_entry *found;
+
+ hash_for_each_possible(rocker->internal_vlan_tbl, found,
+ entry, ifindex) {
+ if (found->ifindex == ifindex)
+ return found;
+ }
+
+ return NULL;
+}
+
+static __be16 rocker_port_internal_vlan_id_get(struct rocker_port *rocker_port,
+ int ifindex)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_internal_vlan_tbl_entry *entry;
+ struct rocker_internal_vlan_tbl_entry *found;
+ unsigned long lock_flags;
+ int i;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return 0;
+
+ entry->ifindex = ifindex;
+
+ spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, lock_flags);
+
+ found = rocker_internal_vlan_tbl_find(rocker, ifindex);
+ if (found) {
+ kfree(entry);
+ goto found;
+ }
+
+ found = entry;
+ hash_add(rocker->internal_vlan_tbl, &found->entry, found->ifindex);
+
+ for (i = 0; i < ROCKER_N_INTERNAL_VLANS; i++) {
+ if (test_and_set_bit(i, rocker->internal_vlan_bitmap))
+ continue;
+ found->vlan_id = htons(ROCKER_INTERNAL_VLAN_ID_BASE + i);
+ goto found;
+ }
+
+ netdev_err(rocker_port->dev, "Out of internal VLAN IDs\n");
+
+found:
+ found->ref_count++;
+ spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags);
+
+ return found->vlan_id;
+}
+
+static void rocker_port_internal_vlan_id_put(struct rocker_port *rocker_port,
+ int ifindex)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_internal_vlan_tbl_entry *found;
+ unsigned long lock_flags;
+ unsigned long bit;
+
+ spin_lock_irqsave(&rocker->internal_vlan_tbl_lock, lock_flags);
+
+ found = rocker_internal_vlan_tbl_find(rocker, ifindex);
+ if (!found) {
+ netdev_err(rocker_port->dev,
+ "ifindex (%d) not found in internal VLAN tbl\n",
+ ifindex);
+ goto not_found;
+ }
+
+ if (--found->ref_count <= 0) {
+ bit = ntohs(found->vlan_id) - ROCKER_INTERNAL_VLAN_ID_BASE;
+ clear_bit(bit, rocker->internal_vlan_bitmap);
+ hash_del(&found->entry);
+ kfree(found);
+ }
+
+not_found:
+ spin_unlock_irqrestore(&rocker->internal_vlan_tbl_lock, lock_flags);
+}
+
/*****************
* Net device ops
*****************/
@@ -1768,10 +3210,14 @@ static void rocker_carrier_init(struct rocker_port *rocker_port)
static void rocker_remove_ports(struct rocker *rocker)
{
+ struct rocker_port *rocker_port;
int i;
- for (i = 0; i < rocker->port_count; i++)
- unregister_netdev(rocker->ports[i]->dev);
+ for (i = 0; i < rocker->port_count; i++) {
+ rocker_port = rocker->ports[i];
+ rocker_port_ig_tbl(rocker_port, ROCKER_OP_FLAG_REMOVE);
+ unregister_netdev(rocker_port->dev);
+ }
kfree(rocker->ports);
}
@@ -1823,8 +3269,18 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
}
rocker->ports[port_number] = rocker_port;
+ rocker_port->internal_vlan_id =
+ rocker_port_internal_vlan_id_get(rocker_port, dev->ifindex);
+ err = rocker_port_ig_tbl(rocker_port, 0);
+ if (err) {
+ dev_err(&pdev->dev, "install ig port table failed\n");
+ goto err_port_ig_tbl;
+ }
+
return 0;
+err_port_ig_tbl:
+ unregister_netdev(dev);
err_register_netdev:
free_netdev(dev);
return err;
@@ -1981,6 +3437,12 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
+ err = rocker_init_tbls(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot init rocker tables\n");
+ goto err_init_tbls;
+ }
+
err = rocker_probe_ports(rocker);
if (err) {
dev_err(&pdev->dev, "failed to probe ports\n");
@@ -1992,6 +3454,8 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_probe_ports:
+ rocker_free_tbls(rocker);
+err_init_tbls:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
err_request_event_irq:
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
@@ -2017,6 +3481,7 @@ static void rocker_remove(struct pci_dev *pdev)
{
struct rocker *rocker = pci_get_drvdata(pdev);
+ rocker_free_tbls(rocker);
rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
rocker_remove_ports(rocker);
free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 15/21] rocker: introduce rocker switch driver
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
This patch introduces the first driver to benefit from the switchdev
infrastructure and to implement newly introduced switch ndos. This is a
driver for emulated switch chip implemented in qemu:
https://github.com/sfeldma/qemu-rocker/
This patch is a result of joint work with Scott Feldman.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Reviewed-by: John Fastabend <john.r.fastabend@intel.com>
---
v3->v4:
-no change
v2->v3:
-changed "sw" string to "switch" to avoid confusion
v1->v2:
-fixed "unsigned" -> "unsigned int" as suggested by DaveM
-removed "ifdef CONFIG_NET_SWITCHDEV" in ndos as suggested by DaveM
-removed unnecessary "__packed" annotations as suggested by DaveM
---
MAINTAINERS | 7 +
drivers/net/ethernet/Kconfig | 1 +
drivers/net/ethernet/Makefile | 1 +
drivers/net/ethernet/rocker/Kconfig | 27 +
drivers/net/ethernet/rocker/Makefile | 5 +
drivers/net/ethernet/rocker/rocker.c | 2060 ++++++++++++++++++++++++++++++++++
drivers/net/ethernet/rocker/rocker.h | 427 +++++++
7 files changed, 2528 insertions(+)
create mode 100644 drivers/net/ethernet/rocker/Kconfig
create mode 100644 drivers/net/ethernet/rocker/Makefile
create mode 100644 drivers/net/ethernet/rocker/rocker.c
create mode 100644 drivers/net/ethernet/rocker/rocker.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 05addb6..9b1bd7b7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7864,6 +7864,13 @@ F: drivers/hid/hid-roccat*
F: include/linux/hid-roccat*
F: Documentation/ABI/*/sysfs-driver-hid-roccat*
+ROCKER DRIVER
+M: Jiri Pirko <jiri@resnulli.us>
+M: Scott Feldman <sfeldma@gmail.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/rocker/
+
ROCKETPORT DRIVER
P: Comtrol Corp.
W: http://www.comtrol.com
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1ed1fbb..df76050 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -155,6 +155,7 @@ source "drivers/net/ethernet/qualcomm/Kconfig"
source "drivers/net/ethernet/realtek/Kconfig"
source "drivers/net/ethernet/renesas/Kconfig"
source "drivers/net/ethernet/rdc/Kconfig"
+source "drivers/net/ethernet/rocker/Kconfig"
config S6GMAC
tristate "S6105 GMAC ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 6e0b629..bf56f8b 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_NET_VENDOR_QUALCOMM) += qualcomm/
obj-$(CONFIG_NET_VENDOR_REALTEK) += realtek/
obj-$(CONFIG_SH_ETH) += renesas/
obj-$(CONFIG_NET_VENDOR_RDC) += rdc/
+obj-$(CONFIG_NET_VENDOR_ROCKER) += rocker/
obj-$(CONFIG_S6GMAC) += s6gmac.o
obj-$(CONFIG_NET_VENDOR_SAMSUNG) += samsung/
obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig
new file mode 100644
index 0000000..11a850e
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Kconfig
@@ -0,0 +1,27 @@
+#
+# Rocker device configuration
+#
+
+config NET_VENDOR_ROCKER
+ bool "Rocker devices"
+ default y
+ ---help---
+ If you have a network device belonging to this class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Rocker devices. If you say Y, you will be asked for
+ your specific card in the following questions.
+
+if NET_VENDOR_ROCKER
+
+config ROCKER
+ tristate "Rocker switch driver (EXPERIMENTAL)"
+ depends on PCI && NET_SWITCHDEV
+ ---help---
+ This driver supports Rocker switch device.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rocker.
+
+endif # NET_VENDOR_ROCKER
diff --git a/drivers/net/ethernet/rocker/Makefile b/drivers/net/ethernet/rocker/Makefile
new file mode 100644
index 0000000..f85fb12
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Rocker network device drivers.
+#
+
+obj-$(CONFIG_ROCKER) += rocker.o
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
new file mode 100644
index 0000000..a53011c
--- /dev/null
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -0,0 +1,2060 @@
+/*
+ * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/crc32.h>
+#include <linux/sort.h>
+#include <linux/random.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+#include <net/rtnetlink.h>
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <generated/utsrelease.h>
+
+#include "rocker.h"
+
+static const char rocker_driver_name[] = "rocker";
+
+static const struct pci_device_id rocker_pci_id_table[] = {
+ {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_ROCKER), 0},
+ {0, }
+};
+
+struct rocker_desc_info {
+ char *data; /* mapped */
+ size_t data_size;
+ size_t tlv_size;
+ struct rocker_desc *desc;
+ DEFINE_DMA_UNMAP_ADDR(mapaddr);
+};
+
+struct rocker_dma_ring_info {
+ size_t size;
+ u32 head;
+ u32 tail;
+ struct rocker_desc *desc; /* mapped */
+ dma_addr_t mapaddr;
+ struct rocker_desc_info *desc_info;
+ unsigned int type;
+};
+
+struct rocker;
+
+struct rocker_port {
+ struct net_device *dev;
+ struct rocker *rocker;
+ unsigned int port_number;
+ u32 lport;
+ struct napi_struct napi_tx;
+ struct napi_struct napi_rx;
+ struct rocker_dma_ring_info tx_ring;
+ struct rocker_dma_ring_info rx_ring;
+};
+
+struct rocker {
+ struct pci_dev *pdev;
+ u8 __iomem *hw_addr;
+ struct msix_entry *msix_entries;
+ unsigned int port_count;
+ struct rocker_port **ports;
+ struct {
+ u64 id;
+ } hw;
+ spinlock_t cmd_ring_lock;
+ struct rocker_dma_ring_info cmd_ring;
+ struct rocker_dma_ring_info event_ring;
+};
+
+struct rocker_wait {
+ wait_queue_head_t wait;
+ bool done;
+ bool nowait;
+};
+
+static void rocker_wait_reset(struct rocker_wait *wait)
+{
+ wait->done = false;
+ wait->nowait = false;
+}
+
+static void rocker_wait_init(struct rocker_wait *wait)
+{
+ init_waitqueue_head(&wait->wait);
+ rocker_wait_reset(wait);
+}
+
+static struct rocker_wait *rocker_wait_create(gfp_t gfp)
+{
+ struct rocker_wait *wait;
+
+ wait = kmalloc(sizeof(*wait), gfp);
+ if (!wait)
+ return NULL;
+ rocker_wait_init(wait);
+ return wait;
+}
+
+static void rocker_wait_destroy(struct rocker_wait *work)
+{
+ kfree(work);
+}
+
+static bool rocker_wait_event_timeout(struct rocker_wait *wait,
+ unsigned long timeout)
+{
+ wait_event_timeout(wait->wait, wait->done, HZ / 10);
+ if (!wait->done)
+ return false;
+ return true;
+}
+
+static void rocker_wait_wake_up(struct rocker_wait *wait)
+{
+ wait->done = true;
+ wake_up(&wait->wait);
+}
+
+static u32 rocker_msix_vector(struct rocker *rocker, unsigned int vector)
+{
+ return rocker->msix_entries[vector].vector;
+}
+
+static u32 rocker_msix_tx_vector(struct rocker_port *rocker_port)
+{
+ return rocker_msix_vector(rocker_port->rocker,
+ ROCKER_MSIX_VEC_TX(rocker_port->port_number));
+}
+
+static u32 rocker_msix_rx_vector(struct rocker_port *rocker_port)
+{
+ return rocker_msix_vector(rocker_port->rocker,
+ ROCKER_MSIX_VEC_RX(rocker_port->port_number));
+}
+
+#define rocker_write32(rocker, reg, val) \
+ writel((val), (rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_read32(rocker, reg) \
+ readl((rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_write64(rocker, reg, val) \
+ writeq((val), (rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_read64(rocker, reg) \
+ readq((rocker)->hw_addr + (ROCKER_ ## reg))
+
+/*****************************
+ * HW basic testing functions
+ *****************************/
+
+static int rocker_reg_test(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ u64 test_reg;
+ u64 rnd;
+
+ rnd = prandom_u32();
+ rnd >>= 1;
+ rocker_write32(rocker, TEST_REG, rnd);
+ test_reg = rocker_read32(rocker, TEST_REG);
+ if (test_reg != rnd * 2) {
+ dev_err(&pdev->dev, "unexpected 32bit register value %08llx, expected %08llx\n",
+ test_reg, rnd * 2);
+ return -EIO;
+ }
+
+ rnd = prandom_u32();
+ rnd <<= 31;
+ rnd |= prandom_u32();
+ rocker_write64(rocker, TEST_REG64, rnd);
+ test_reg = rocker_read64(rocker, TEST_REG64);
+ if (test_reg != rnd * 2) {
+ dev_err(&pdev->dev, "unexpected 64bit register value %16llx, expected %16llx\n",
+ test_reg, rnd * 2);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int rocker_dma_test_one(struct rocker *rocker, struct rocker_wait *wait,
+ u32 test_type, dma_addr_t dma_handle,
+ unsigned char *buf, unsigned char *expect,
+ size_t size)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+
+ rocker_wait_reset(wait);
+ rocker_write32(rocker, TEST_DMA_CTRL, test_type);
+
+ if (!rocker_wait_event_timeout(wait, HZ / 10)) {
+ dev_err(&pdev->dev, "no interrupt received within a timeout\n");
+ return -EIO;
+ }
+
+ for (i = 0; i < size; i++) {
+ if (buf[i] != expect[i]) {
+ dev_err(&pdev->dev, "unexpected memory content %02x at byte %x\n, %02x expected",
+ buf[i], i, expect[i]);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+#define ROCKER_TEST_DMA_BUF_SIZE (PAGE_SIZE * 4)
+#define ROCKER_TEST_DMA_FILL_PATTERN 0x96
+
+static int rocker_dma_test_offset(struct rocker *rocker,
+ struct rocker_wait *wait, int offset)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ unsigned char *alloc;
+ unsigned char *buf;
+ unsigned char *expect;
+ dma_addr_t dma_handle;
+ int i;
+ int err;
+
+ alloc = kzalloc(ROCKER_TEST_DMA_BUF_SIZE * 2 + offset,
+ GFP_KERNEL | GFP_DMA);
+ if (!alloc)
+ return -ENOMEM;
+ buf = alloc + offset;
+ expect = buf + ROCKER_TEST_DMA_BUF_SIZE;
+
+ dma_handle = pci_map_single(pdev, buf, ROCKER_TEST_DMA_BUF_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(pdev, dma_handle)) {
+ err = -EIO;
+ goto free_alloc;
+ }
+
+ rocker_write64(rocker, TEST_DMA_ADDR, dma_handle);
+ rocker_write32(rocker, TEST_DMA_SIZE, ROCKER_TEST_DMA_BUF_SIZE);
+
+ memset(expect, ROCKER_TEST_DMA_FILL_PATTERN, ROCKER_TEST_DMA_BUF_SIZE);
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_FILL,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+ memset(expect, 0, ROCKER_TEST_DMA_BUF_SIZE);
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_CLEAR,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+ prandom_bytes(buf, ROCKER_TEST_DMA_BUF_SIZE);
+ for (i = 0; i < ROCKER_TEST_DMA_BUF_SIZE; i++)
+ expect[i] = ~buf[i];
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_INVERT,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+unmap:
+ pci_unmap_single(pdev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+free_alloc:
+ kfree(alloc);
+
+ return err;
+}
+
+static int rocker_dma_test(struct rocker *rocker, struct rocker_wait *wait)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < 8; i++) {
+ err = rocker_dma_test_offset(rocker, wait, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static irqreturn_t rocker_test_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_wait *wait = dev_id;
+
+ rocker_wait_wake_up(wait);
+
+ return IRQ_HANDLED;
+}
+
+static int rocker_basic_hw_test(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_wait wait;
+ int err;
+
+ err = rocker_reg_test(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "reg test failed\n");
+ return err;
+ }
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST),
+ rocker_test_irq_handler, 0,
+ rocker_driver_name, &wait);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign test irq\n");
+ return err;
+ }
+
+ rocker_wait_init(&wait);
+ rocker_write32(rocker, TEST_IRQ, ROCKER_MSIX_VEC_TEST);
+
+ if (!rocker_wait_event_timeout(&wait, HZ / 10)) {
+ dev_err(&pdev->dev, "no interrupt received within a timeout\n");
+ err = -EIO;
+ goto free_irq;
+ }
+
+ err = rocker_dma_test(rocker, &wait);
+ if (err)
+ dev_err(&pdev->dev, "dma test failed\n");
+
+free_irq:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), &wait);
+ return err;
+}
+
+/******
+ * TLV
+ ******/
+
+#define ROCKER_TLV_ALIGNTO 8U
+#define ROCKER_TLV_ALIGN(len) \
+ (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1))
+#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(struct rocker_tlv))
+
+/* <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) --->
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct rocker_tlv) | ing | | ing |
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * <--------------------------- tlv->len -------------------------->
+ */
+
+static struct rocker_tlv *rocker_tlv_next(const struct rocker_tlv *tlv,
+ int *remaining)
+{
+ int totlen = ROCKER_TLV_ALIGN(tlv->len);
+
+ *remaining -= totlen;
+ return (struct rocker_tlv *) ((char *) tlv + totlen);
+}
+
+static int rocker_tlv_ok(const struct rocker_tlv *tlv, int remaining)
+{
+ return remaining >= (int) ROCKER_TLV_HDRLEN &&
+ tlv->len >= ROCKER_TLV_HDRLEN &&
+ tlv->len <= remaining;
+}
+
+#define rocker_tlv_for_each(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ rocker_tlv_ok(pos, rem); \
+ pos = rocker_tlv_next(pos, &(rem)))
+
+#define rocker_tlv_for_each_nested(pos, tlv, rem) \
+ rocker_tlv_for_each(pos, rocker_tlv_data(tlv), \
+ rocker_tlv_len(tlv), rem)
+
+static int rocker_tlv_attr_size(int payload)
+{
+ return ROCKER_TLV_HDRLEN + payload;
+}
+
+static int rocker_tlv_total_size(int payload)
+{
+ return ROCKER_TLV_ALIGN(rocker_tlv_attr_size(payload));
+}
+
+static int rocker_tlv_padlen(int payload)
+{
+ return rocker_tlv_total_size(payload) - rocker_tlv_attr_size(payload);
+}
+
+static int rocker_tlv_type(const struct rocker_tlv *tlv)
+{
+ return tlv->type;
+}
+
+static void *rocker_tlv_data(const struct rocker_tlv *tlv)
+{
+ return (char *) tlv + ROCKER_TLV_HDRLEN;
+}
+
+static int rocker_tlv_len(const struct rocker_tlv *tlv)
+{
+ return tlv->len - ROCKER_TLV_HDRLEN;
+}
+
+static u8 rocker_tlv_get_u8(const struct rocker_tlv *tlv)
+{
+ return *(u8 *) rocker_tlv_data(tlv);
+}
+
+static u16 rocker_tlv_get_u16(const struct rocker_tlv *tlv)
+{
+ return *(u16 *) rocker_tlv_data(tlv);
+}
+
+static u32 rocker_tlv_get_u32(const struct rocker_tlv *tlv)
+{
+ return *(u32 *) rocker_tlv_data(tlv);
+}
+
+static u64 rocker_tlv_get_u64(const struct rocker_tlv *tlv)
+{
+ return *(u64 *) rocker_tlv_data(tlv);
+}
+
+static void rocker_tlv_parse(struct rocker_tlv **tb, int maxtype,
+ const char *buf, int buf_len)
+{
+ const struct rocker_tlv *tlv;
+ const struct rocker_tlv *head = (const struct rocker_tlv *) buf;
+ int rem;
+
+ memset(tb, 0, sizeof(struct rocker_tlv *) * (maxtype + 1));
+
+ rocker_tlv_for_each(tlv, head, buf_len, rem) {
+ u32 type = rocker_tlv_type(tlv);
+
+ if (type > 0 && type <= maxtype)
+ tb[type] = (struct rocker_tlv *) tlv;
+ }
+}
+
+static void rocker_tlv_parse_nested(struct rocker_tlv **tb, int maxtype,
+ const struct rocker_tlv *tlv)
+{
+ rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv),
+ rocker_tlv_len(tlv));
+}
+
+static void rocker_tlv_parse_desc(struct rocker_tlv **tb, int maxtype,
+ struct rocker_desc_info *desc_info)
+{
+ rocker_tlv_parse(tb, maxtype, desc_info->data,
+ desc_info->desc->tlv_size);
+}
+
+static struct rocker_tlv *rocker_tlv_start(struct rocker_desc_info *desc_info)
+{
+ return (struct rocker_tlv *) ((char *) desc_info->data +
+ desc_info->tlv_size);
+}
+
+static int rocker_tlv_put(struct rocker_desc_info *desc_info,
+ int attrtype, int attrlen, const void *data)
+{
+ int tail_room = desc_info->data_size - desc_info->tlv_size;
+ int total_size = rocker_tlv_total_size(attrlen);
+ struct rocker_tlv *tlv;
+
+ if (unlikely(tail_room < total_size))
+ return -EMSGSIZE;
+
+ tlv = rocker_tlv_start(desc_info);
+ desc_info->tlv_size += total_size;
+ tlv->type = attrtype;
+ tlv->len = rocker_tlv_attr_size(attrlen);
+ memcpy(rocker_tlv_data(tlv), data, attrlen);
+ memset((char *) tlv + tlv->len, 0, rocker_tlv_padlen(attrlen));
+ return 0;
+}
+
+static int rocker_tlv_put_u8(struct rocker_desc_info *desc_info,
+ int attrtype, u8 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value);
+}
+
+static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info,
+ int attrtype, u16 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value);
+}
+
+static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info,
+ int attrtype, u32 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value);
+}
+
+static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info,
+ int attrtype, u64 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value);
+}
+
+static struct rocker_tlv *
+rocker_tlv_nest_start(struct rocker_desc_info *desc_info, int attrtype)
+{
+ struct rocker_tlv *start = rocker_tlv_start(desc_info);
+
+ if (rocker_tlv_put(desc_info, attrtype, 0, NULL) < 0)
+ return NULL;
+
+ return start;
+}
+
+static void rocker_tlv_nest_end(struct rocker_desc_info *desc_info,
+ struct rocker_tlv *start)
+{
+ start->len = (char *) rocker_tlv_start(desc_info) - (char *) start;
+}
+
+static void rocker_tlv_nest_cancel(struct rocker_desc_info *desc_info,
+ struct rocker_tlv *start)
+{
+ desc_info->tlv_size = (char *) start - desc_info->data;
+}
+
+/******************************************
+ * DMA rings and descriptors manipulations
+ ******************************************/
+
+static u32 __pos_inc(u32 pos, size_t limit)
+{
+ return ++pos == limit ? 0 : pos;
+}
+
+static int rocker_desc_err(struct rocker_desc_info *desc_info)
+{
+ return -(desc_info->desc->comp_err & ~ROCKER_DMA_DESC_COMP_ERR_GEN);
+}
+
+static void rocker_desc_gen_clear(struct rocker_desc_info *desc_info)
+{
+ desc_info->desc->comp_err &= ~ROCKER_DMA_DESC_COMP_ERR_GEN;
+}
+
+static bool rocker_desc_gen(struct rocker_desc_info *desc_info)
+{
+ u32 comp_err = desc_info->desc->comp_err;
+
+ return comp_err & ROCKER_DMA_DESC_COMP_ERR_GEN ? true : false;
+}
+
+static void *rocker_desc_cookie_ptr_get(struct rocker_desc_info *desc_info)
+{
+ return (void *) desc_info->desc->cookie;
+}
+
+static void rocker_desc_cookie_ptr_set(struct rocker_desc_info *desc_info,
+ void *ptr)
+{
+ desc_info->desc->cookie = (long) ptr;
+}
+
+static struct rocker_desc_info *
+rocker_desc_head_get(struct rocker_dma_ring_info *info)
+{
+ static struct rocker_desc_info *desc_info;
+ u32 head = __pos_inc(info->head, info->size);
+
+ desc_info = &info->desc_info[info->head];
+ if (head == info->tail)
+ return NULL; /* ring full */
+ desc_info->tlv_size = 0;
+ return desc_info;
+}
+
+static void rocker_desc_commit(struct rocker_desc_info *desc_info)
+{
+ desc_info->desc->buf_size = desc_info->data_size;
+ desc_info->desc->tlv_size = desc_info->tlv_size;
+}
+
+static void rocker_desc_head_set(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ struct rocker_desc_info *desc_info)
+{
+ u32 head = __pos_inc(info->head, info->size);
+
+ BUG_ON(head == info->tail);
+ rocker_desc_commit(desc_info);
+ info->head = head;
+ rocker_write32(rocker, DMA_DESC_HEAD(info->type), head);
+}
+
+static struct rocker_desc_info *
+rocker_desc_tail_get(struct rocker_dma_ring_info *info)
+{
+ static struct rocker_desc_info *desc_info;
+
+ if (info->tail == info->head)
+ return NULL; /* nothing to be done between head and tail */
+ desc_info = &info->desc_info[info->tail];
+ if (!rocker_desc_gen(desc_info))
+ return NULL; /* gen bit not set, desc is not ready yet */
+ info->tail = __pos_inc(info->tail, info->size);
+ desc_info->tlv_size = desc_info->desc->tlv_size;
+ return desc_info;
+}
+
+static void rocker_dma_ring_credits_set(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ u32 credits)
+{
+ if (credits)
+ rocker_write32(rocker, DMA_DESC_CREDITS(info->type), credits);
+}
+
+static unsigned long rocker_dma_ring_size_fix(size_t size)
+{
+ return max(ROCKER_DMA_SIZE_MIN,
+ min(roundup_pow_of_two(size), ROCKER_DMA_SIZE_MAX));
+}
+
+static int rocker_dma_ring_create(struct rocker *rocker,
+ unsigned int type,
+ size_t size,
+ struct rocker_dma_ring_info *info)
+{
+ int i;
+
+ BUG_ON(size != rocker_dma_ring_size_fix(size));
+ info->size = size;
+ info->type = type;
+ info->head = 0;
+ info->tail = 0;
+ info->desc_info = kcalloc(info->size, sizeof(*info->desc_info),
+ GFP_KERNEL);
+ if (!info->desc_info)
+ return -ENOMEM;
+
+ info->desc = pci_alloc_consistent(rocker->pdev,
+ info->size * sizeof(*info->desc),
+ &info->mapaddr);
+ if (!info->desc) {
+ kfree(info->desc_info);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < info->size; i++)
+ info->desc_info[i].desc = &info->desc[i];
+
+ rocker_write32(rocker, DMA_DESC_CTRL(info->type),
+ ROCKER_DMA_DESC_CTRL_RESET);
+ rocker_write64(rocker, DMA_DESC_ADDR(info->type), info->mapaddr);
+ rocker_write32(rocker, DMA_DESC_SIZE(info->type), info->size);
+
+ return 0;
+}
+
+static void rocker_dma_ring_destroy(struct rocker *rocker,
+ struct rocker_dma_ring_info *info)
+{
+ rocker_write64(rocker, DMA_DESC_ADDR(info->type), 0);
+
+ pci_free_consistent(rocker->pdev,
+ info->size * sizeof(struct rocker_desc),
+ info->desc, info->mapaddr);
+ kfree(info->desc_info);
+}
+
+static void rocker_dma_ring_pass_to_producer(struct rocker *rocker,
+ struct rocker_dma_ring_info *info)
+{
+ int i;
+
+ BUG_ON(info->head || info->tail);
+
+ /* When ring is consumer, we need to advance head for each desc.
+ * That tells hw that the desc is ready to be used by it.
+ */
+ for (i = 0; i < info->size - 1; i++)
+ rocker_desc_head_set(rocker, info, &info->desc_info[i]);
+ rocker_desc_commit(&info->desc_info[i]);
+}
+
+static int rocker_dma_ring_bufs_alloc(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ int direction, size_t buf_size)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+ int err;
+
+ for (i = 0; i < info->size; i++) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+ struct rocker_desc *desc = &info->desc[i];
+ dma_addr_t dma_handle;
+ char *buf;
+
+ buf = kzalloc(buf_size, GFP_KERNEL | GFP_DMA);
+ if (!buf) {
+ err = -ENOMEM;
+ goto rollback;
+ }
+
+ dma_handle = pci_map_single(pdev, buf, buf_size, direction);
+ if (pci_dma_mapping_error(pdev, dma_handle)) {
+ kfree(buf);
+ err = -EIO;
+ goto rollback;
+ }
+
+ desc_info->data = buf;
+ desc_info->data_size = buf_size;
+ dma_unmap_addr_set(desc_info, mapaddr, dma_handle);
+
+ desc->buf_addr = dma_handle;
+ desc->buf_size = buf_size;
+ }
+ return 0;
+
+rollback:
+ for (i--; i >= 0; i--) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+
+ pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+ desc_info->data_size, direction);
+ kfree(desc_info->data);
+ }
+ return err;
+}
+
+static void rocker_dma_ring_bufs_free(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ int direction)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+
+ for (i = 0; i < info->size; i++) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+ struct rocker_desc *desc = &info->desc[i];
+
+ desc->buf_addr = 0;
+ desc->buf_size = 0;
+ pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+ desc_info->data_size, direction);
+ kfree(desc_info->data);
+ }
+}
+
+static int rocker_dma_rings_init(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int err;
+
+ err = rocker_dma_ring_create(rocker, ROCKER_DMA_CMD,
+ ROCKER_DMA_CMD_DEFAULT_SIZE,
+ &rocker->cmd_ring);
+ if (err) {
+ dev_err(&pdev->dev, "failed to create command dma ring\n");
+ return err;
+ }
+
+ spin_lock_init(&rocker->cmd_ring_lock);
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL, PAGE_SIZE);
+ if (err) {
+ dev_err(&pdev->dev, "failed to alloc command dma ring buffers\n");
+ goto err_dma_cmd_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_ring_create(rocker, ROCKER_DMA_EVENT,
+ ROCKER_DMA_EVENT_DEFAULT_SIZE,
+ &rocker->event_ring);
+ if (err) {
+ dev_err(&pdev->dev, "failed to create event dma ring\n");
+ goto err_dma_event_ring_create;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker->event_ring,
+ PCI_DMA_FROMDEVICE, PAGE_SIZE);
+ if (err) {
+ dev_err(&pdev->dev, "failed to alloc event dma ring buffers\n");
+ goto err_dma_event_ring_bufs_alloc;
+ }
+ rocker_dma_ring_pass_to_producer(rocker, &rocker->event_ring);
+ return 0;
+
+err_dma_event_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker->event_ring);
+err_dma_event_ring_create:
+ rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL);
+err_dma_cmd_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
+ return err;
+}
+
+static void rocker_dma_rings_fini(struct rocker *rocker)
+{
+ rocker_dma_ring_bufs_free(rocker, &rocker->event_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker->event_ring);
+ rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
+}
+
+static int rocker_dma_rx_ring_skb_map(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ struct sk_buff *skb, size_t buf_len)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+
+ dma_handle = pci_map_single(pdev, skb->data, buf_len,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(pdev, dma_handle))
+ return -EIO;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_RX_FRAG_ADDR, dma_handle))
+ goto tlv_put_failure;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_RX_FRAG_MAX_LEN, buf_len))
+ goto tlv_put_failure;
+ return 0;
+
+tlv_put_failure:
+ pci_unmap_single(pdev, dma_handle, buf_len, PCI_DMA_FROMDEVICE);
+ desc_info->tlv_size = 0;
+ return -EMSGSIZE;
+}
+
+static size_t rocker_port_rx_buf_len(struct rocker_port *rocker_port)
+{
+ return rocker_port->dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+}
+
+static int rocker_dma_rx_ring_skb_alloc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct net_device *dev = rocker_port->dev;
+ struct sk_buff *skb;
+ size_t buf_len = rocker_port_rx_buf_len(rocker_port);
+ int err;
+
+ /* Ensure that hw will see tlv_size zero in case of an error.
+ * That tells hw to use another descriptor.
+ */
+ rocker_desc_cookie_ptr_set(desc_info, NULL);
+ desc_info->tlv_size = 0;
+
+ skb = netdev_alloc_skb_ip_align(dev, buf_len);
+ if (!skb)
+ return -ENOMEM;
+ err = rocker_dma_rx_ring_skb_map(rocker, rocker_port, desc_info,
+ skb, buf_len);
+ if (err) {
+ dev_kfree_skb_any(skb);
+ return err;
+ }
+ rocker_desc_cookie_ptr_set(desc_info, skb);
+ return 0;
+}
+
+static void rocker_dma_rx_ring_skb_unmap(struct rocker *rocker,
+ struct rocker_tlv **attrs)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+ size_t len;
+
+ if (!attrs[ROCKER_TLV_RX_FRAG_ADDR] ||
+ !attrs[ROCKER_TLV_RX_FRAG_MAX_LEN])
+ return;
+ dma_handle = rocker_tlv_get_u64(attrs[ROCKER_TLV_RX_FRAG_ADDR]);
+ len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
+ pci_unmap_single(pdev, dma_handle, len, PCI_DMA_FROMDEVICE);
+}
+
+static void rocker_dma_rx_ring_skb_free(struct rocker *rocker,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
+ struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
+
+ if (!skb)
+ return;
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
+ rocker_dma_rx_ring_skb_unmap(rocker, attrs);
+ dev_kfree_skb_any(skb);
+}
+
+static int rocker_dma_rx_ring_skbs_alloc(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring;
+ int i;
+ int err;
+
+ for (i = 0; i < rx_ring->size; i++) {
+ err = rocker_dma_rx_ring_skb_alloc(rocker, rocker_port,
+ &rx_ring->desc_info[i]);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ for (i--; i >= 0; i--)
+ rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]);
+ return err;
+}
+
+static void rocker_dma_rx_ring_skbs_free(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring;
+ int i;
+
+ for (i = 0; i < rx_ring->size; i++)
+ rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]);
+}
+
+static int rocker_port_dma_rings_init(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ int err;
+
+ err = rocker_dma_ring_create(rocker,
+ ROCKER_DMA_TX(rocker_port->port_number),
+ ROCKER_DMA_TX_DEFAULT_SIZE,
+ &rocker_port->tx_ring);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to create tx dma ring\n");
+ return err;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE,
+ ROCKER_DMA_TX_DESC_SIZE);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc tx dma ring buffers\n");
+ goto err_dma_tx_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_ring_create(rocker,
+ ROCKER_DMA_RX(rocker_port->port_number),
+ ROCKER_DMA_RX_DEFAULT_SIZE,
+ &rocker_port->rx_ring);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to create rx dma ring\n");
+ goto err_dma_rx_ring_create;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL,
+ ROCKER_DMA_RX_DESC_SIZE);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc rx dma ring buffers\n");
+ goto err_dma_rx_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_rx_ring_skbs_alloc(rocker, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc rx dma ring skbs\n");
+ goto err_dma_rx_ring_skbs_alloc;
+ }
+ rocker_dma_ring_pass_to_producer(rocker, &rocker_port->rx_ring);
+
+ return 0;
+
+err_dma_rx_ring_skbs_alloc:
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL);
+err_dma_rx_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
+err_dma_rx_ring_create:
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE);
+err_dma_tx_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
+ return err;
+}
+
+static void rocker_port_dma_rings_fini(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+
+ rocker_dma_rx_ring_skbs_free(rocker, rocker_port);
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE);
+ rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
+}
+
+static void rocker_port_set_enable(struct rocker_port *rocker_port, bool enable)
+{
+ u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE);
+
+ if (enable)
+ val |= 1 << rocker_port->lport;
+ else
+ val &= ~(1 << rocker_port->lport);
+ rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val);
+}
+
+/********************************
+ * Interrupt handler and helpers
+ ********************************/
+
+static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id)
+{
+ struct rocker *rocker = dev_id;
+ struct rocker_desc_info *desc_info;
+ struct rocker_wait *wait;
+ u32 credits = 0;
+
+ spin_lock(&rocker->cmd_ring_lock);
+ while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) {
+ wait = rocker_desc_cookie_ptr_get(desc_info);
+ if (wait->nowait) {
+ rocker_desc_gen_clear(desc_info);
+ rocker_wait_destroy(wait);
+ } else {
+ rocker_wait_wake_up(wait);
+ }
+ credits++;
+ }
+ spin_unlock(&rocker->cmd_ring_lock);
+ rocker_dma_ring_credits_set(rocker, &rocker->cmd_ring, credits);
+
+ return IRQ_HANDLED;
+}
+
+static void rocker_port_link_up(struct rocker_port *rocker_port)
+{
+ netif_carrier_on(rocker_port->dev);
+ netdev_info(rocker_port->dev, "Link is up\n");
+}
+
+static void rocker_port_link_down(struct rocker_port *rocker_port)
+{
+ netif_carrier_off(rocker_port->dev);
+ netdev_info(rocker_port->dev, "Link is down\n");
+}
+
+static int rocker_event_link_change(struct rocker *rocker,
+ const struct rocker_tlv *info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_EVENT_LINK_CHANGED_MAX + 1];
+ unsigned int port_number;
+ bool link_up;
+ struct rocker_port *rocker_port;
+
+ rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_LINK_CHANGED_MAX, info);
+ if (!attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LPORT] ||
+ !attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP])
+ return -EIO;
+ port_number =
+ rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LPORT]) - 1;
+ link_up = rocker_tlv_get_u8(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]);
+
+ if (port_number >= rocker->port_count)
+ return -EINVAL;
+
+ rocker_port = rocker->ports[port_number];
+ if (netif_carrier_ok(rocker_port->dev) != link_up) {
+ if (link_up)
+ rocker_port_link_up(rocker_port);
+ else
+ rocker_port_link_down(rocker_port);
+ }
+
+ return 0;
+}
+
+static int rocker_event_process(struct rocker *rocker,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAX + 1];
+ struct rocker_tlv *info;
+ u16 type;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_EVENT_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_EVENT_TYPE] ||
+ !attrs[ROCKER_TLV_EVENT_INFO])
+ return -EIO;
+
+ type = rocker_tlv_get_u16(attrs[ROCKER_TLV_EVENT_TYPE]);
+ info = attrs[ROCKER_TLV_EVENT_INFO];
+
+ switch (type) {
+ case ROCKER_TLV_EVENT_TYPE_LINK_CHANGED:
+ return rocker_event_link_change(rocker, info);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static irqreturn_t rocker_event_irq_handler(int irq, void *dev_id)
+{
+ struct rocker *rocker = dev_id;
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ while ((desc_info = rocker_desc_tail_get(&rocker->event_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err) {
+ dev_err(&pdev->dev, "event desc received with err %d\n",
+ err);
+ } else {
+ err = rocker_event_process(rocker, desc_info);
+ if (err)
+ dev_err(&pdev->dev, "event processing failed with err %d\n",
+ err);
+ }
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker->event_ring, desc_info);
+ credits++;
+ }
+ rocker_dma_ring_credits_set(rocker, &rocker->event_ring, credits);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rocker_tx_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_port *rocker_port = dev_id;
+
+ napi_schedule(&rocker_port->napi_tx);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rocker_rx_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_port *rocker_port = dev_id;
+
+ napi_schedule(&rocker_port->napi_rx);
+ return IRQ_HANDLED;
+}
+
+/********************
+ * Command interface
+ ********************/
+
+typedef int (*rocker_cmd_cb_t)(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv);
+
+static int rocker_cmd_exec(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ rocker_cmd_cb_t prepare, void *prepare_priv,
+ rocker_cmd_cb_t process, void *process_priv,
+ bool nowait)
+{
+ struct rocker_desc_info *desc_info;
+ struct rocker_wait *wait;
+ unsigned long flags;
+ int err;
+
+ wait = rocker_wait_create(nowait ? GFP_ATOMIC : GFP_KERNEL);
+ if (!wait)
+ return -ENOMEM;
+ wait->nowait = nowait;
+
+ spin_lock_irqsave(&rocker->cmd_ring_lock, flags);
+ desc_info = rocker_desc_head_get(&rocker->cmd_ring);
+ if (!desc_info) {
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+ err = -EAGAIN;
+ goto out;
+ }
+ err = prepare(rocker, rocker_port, desc_info, prepare_priv);
+ if (err) {
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+ goto out;
+ }
+ rocker_desc_cookie_ptr_set(desc_info, wait);
+ rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info);
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+
+ if (nowait)
+ return 0;
+
+ if (!rocker_wait_event_timeout(wait, HZ / 10))
+ return -EIO;
+
+ err = rocker_desc_err(desc_info);
+ if (err)
+ return err;
+
+ if (process)
+ err = process(rocker, rocker_port, desc_info, process_priv);
+
+ rocker_desc_gen_clear(desc_info);
+out:
+ rocker_wait_destroy(wait);
+ return err;
+}
+
+static int
+rocker_cmd_get_port_settings_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int
+rocker_cmd_get_port_settings_ethtool_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct ethtool_cmd *ecmd = priv;
+ struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
+ struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ u32 speed;
+ u8 duplex;
+ u8 autoneg;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_CMD_INFO])
+ return -EIO;
+
+ rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ attrs[ROCKER_TLV_CMD_INFO]);
+ if (!info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] ||
+ !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] ||
+ !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG])
+ return -EIO;
+
+ speed = rocker_tlv_get_u32(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
+ duplex = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
+ autoneg = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
+
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->supported = SUPPORTED_TP;
+ ecmd->phy_address = 0xff;
+ ecmd->port = PORT_TP;
+ ethtool_cmd_speed_set(ecmd, speed);
+ ecmd->duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF;
+ ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+ return 0;
+}
+
+static int
+rocker_cmd_get_port_settings_macaddr_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ unsigned char *macaddr = priv;
+ struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
+ struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ struct rocker_tlv *attr;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_CMD_INFO])
+ return -EIO;
+
+ rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ attrs[ROCKER_TLV_CMD_INFO]);
+ attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR];
+ if (!attr)
+ return -EIO;
+
+ if (rocker_tlv_len(attr) != ETH_ALEN)
+ return -EINVAL;
+
+ ether_addr_copy(macaddr, rocker_tlv_data(attr));
+ return 0;
+}
+
+static int
+rocker_cmd_set_port_settings_ethtool_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct ethtool_cmd *ecmd = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED,
+ ethtool_cmd_speed(ecmd)))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX,
+ ecmd->duplex))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG,
+ ecmd->autoneg))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int
+rocker_cmd_set_port_settings_macaddr_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ unsigned char *macaddr = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
+ ETH_ALEN, macaddr))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port,
+ struct ethtool_cmd *ecmd)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_get_port_settings_prep, NULL,
+ rocker_cmd_get_port_settings_ethtool_proc,
+ ecmd, false);
+}
+
+static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port,
+ unsigned char *macaddr)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_get_port_settings_prep, NULL,
+ rocker_cmd_get_port_settings_macaddr_proc,
+ macaddr, false);
+}
+
+static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port,
+ struct ethtool_cmd *ecmd)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_set_port_settings_ethtool_prep,
+ ecmd, NULL, NULL, false);
+}
+
+static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
+ unsigned char *macaddr)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_set_port_settings_macaddr_prep,
+ macaddr, NULL, NULL, false);
+}
+
+/*****************
+ * Net device ops
+ *****************/
+
+static int rocker_port_open(struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int err;
+
+ err = rocker_port_dma_rings_init(rocker_port);
+ if (err)
+ return err;
+
+ err = request_irq(rocker_msix_tx_vector(rocker_port),
+ rocker_tx_irq_handler, 0,
+ rocker_driver_name, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "cannot assign tx irq\n");
+ goto err_request_tx_irq;
+ }
+
+ err = request_irq(rocker_msix_rx_vector(rocker_port),
+ rocker_rx_irq_handler, 0,
+ rocker_driver_name, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "cannot assign rx irq\n");
+ goto err_request_rx_irq;
+ }
+
+ napi_enable(&rocker_port->napi_tx);
+ napi_enable(&rocker_port->napi_rx);
+ rocker_port_set_enable(rocker_port, true);
+ netif_start_queue(dev);
+ return 0;
+
+err_request_rx_irq:
+ free_irq(rocker_msix_tx_vector(rocker_port), rocker_port);
+err_request_tx_irq:
+ rocker_port_dma_rings_fini(rocker_port);
+ return err;
+}
+
+static int rocker_port_stop(struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ netif_stop_queue(dev);
+ rocker_port_set_enable(rocker_port, false);
+ napi_disable(&rocker_port->napi_rx);
+ napi_disable(&rocker_port->napi_tx);
+ free_irq(rocker_msix_rx_vector(rocker_port), rocker_port);
+ free_irq(rocker_msix_tx_vector(rocker_port), rocker_port);
+ rocker_port_dma_rings_fini(rocker_port);
+
+ return 0;
+}
+
+static void rocker_tx_desc_frags_unmap(struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_tlv *attrs[ROCKER_TLV_TX_MAX + 1];
+ struct rocker_tlv *attr;
+ int rem;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_TX_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_TX_FRAGS])
+ return;
+ rocker_tlv_for_each_nested(attr, attrs[ROCKER_TLV_TX_FRAGS], rem) {
+ struct rocker_tlv *frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_MAX + 1];
+ dma_addr_t dma_handle;
+ size_t len;
+
+ if (rocker_tlv_type(attr) != ROCKER_TLV_TX_FRAG)
+ continue;
+ rocker_tlv_parse_nested(frag_attrs, ROCKER_TLV_TX_FRAG_ATTR_MAX,
+ attr);
+ if (!frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
+ !frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN])
+ continue;
+ dma_handle = rocker_tlv_get_u64(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
+ len = rocker_tlv_get_u16(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
+ pci_unmap_single(pdev, dma_handle, len, DMA_TO_DEVICE);
+ }
+}
+
+static int rocker_tx_desc_frag_map_put(struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ char *buf, size_t buf_len)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+ struct rocker_tlv *frag;
+
+ dma_handle = pci_map_single(pdev, buf, buf_len, DMA_TO_DEVICE);
+ if (unlikely(pci_dma_mapping_error(pdev, dma_handle))) {
+ if (net_ratelimit())
+ netdev_err(rocker_port->dev, "failed to dma map tx frag\n");
+ return -EIO;
+ }
+ frag = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAG);
+ if (!frag)
+ goto unmap_frag;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_TX_FRAG_ATTR_ADDR,
+ dma_handle))
+ goto nest_cancel;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_TX_FRAG_ATTR_LEN,
+ buf_len))
+ goto nest_cancel;
+ rocker_tlv_nest_end(desc_info, frag);
+ return 0;
+
+nest_cancel:
+ rocker_tlv_nest_cancel(desc_info, frag);
+unmap_frag:
+ pci_unmap_single(pdev, dma_handle, buf_len, DMA_TO_DEVICE);
+ return -EMSGSIZE;
+}
+
+static netdev_tx_t rocker_port_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ struct rocker_tlv *frags;
+ int i;
+ int err;
+
+ desc_info = rocker_desc_head_get(&rocker_port->tx_ring);
+ if (unlikely(!desc_info)) {
+ if (net_ratelimit())
+ netdev_err(dev, "tx ring full when queue awake\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ rocker_desc_cookie_ptr_set(desc_info, skb);
+
+ frags = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAGS);
+ if (!frags)
+ goto out;
+ err = rocker_tx_desc_frag_map_put(rocker_port, desc_info,
+ skb->data, skb_headlen(skb));
+ if (err)
+ goto nest_cancel;
+ if (skb_shinfo(skb)->nr_frags > ROCKER_TX_FRAGS_MAX)
+ goto nest_cancel;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ err = rocker_tx_desc_frag_map_put(rocker_port, desc_info,
+ skb_frag_address(frag),
+ skb_frag_size(frag));
+ if (err)
+ goto unmap_frags;
+ }
+ rocker_tlv_nest_end(desc_info, frags);
+
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker_port->tx_ring, desc_info);
+
+ desc_info = rocker_desc_head_get(&rocker_port->tx_ring);
+ if (!desc_info)
+ netif_stop_queue(dev);
+
+ return NETDEV_TX_OK;
+
+unmap_frags:
+ rocker_tx_desc_frags_unmap(rocker_port, desc_info);
+nest_cancel:
+ rocker_tlv_nest_cancel(desc_info, frags);
+out:
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int rocker_port_set_mac_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int err;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ err = rocker_cmd_set_port_settings_macaddr(rocker_port, addr->sa_data);
+ if (err)
+ return err;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+static int rocker_port_switch_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ struct rocker *rocker = rocker_port->rocker;
+
+ psid->id_len = sizeof(rocker->hw.id);
+ memcpy(&psid->id, &rocker->hw.id, psid->id_len);
+ return 0;
+}
+
+static const struct net_device_ops rocker_port_netdev_ops = {
+ .ndo_open = rocker_port_open,
+ .ndo_stop = rocker_port_stop,
+ .ndo_start_xmit = rocker_port_xmit,
+ .ndo_set_mac_address = rocker_port_set_mac_address,
+ .ndo_switch_parent_id_get = rocker_port_switch_parent_id_get,
+};
+
+/********************
+ * ethtool interface
+ ********************/
+
+static int rocker_port_get_settings(struct net_device *dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ return rocker_cmd_get_port_settings_ethtool(rocker_port, ecmd);
+}
+
+static int rocker_port_set_settings(struct net_device *dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ return rocker_cmd_set_port_settings_ethtool(rocker_port, ecmd);
+}
+
+static void rocker_port_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, rocker_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct ethtool_ops rocker_port_ethtool_ops = {
+ .get_settings = rocker_port_get_settings,
+ .set_settings = rocker_port_set_settings,
+ .get_drvinfo = rocker_port_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+/*****************
+ * NAPI interface
+ *****************/
+
+static struct rocker_port *rocker_port_napi_tx_get(struct napi_struct *napi)
+{
+ return container_of(napi, struct rocker_port, napi_tx);
+}
+
+static int rocker_port_poll_tx(struct napi_struct *napi, int budget)
+{
+ struct rocker_port *rocker_port = rocker_port_napi_tx_get(napi);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ /* Cleanup tx descriptors */
+ while ((desc_info = rocker_desc_tail_get(&rocker_port->tx_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err && net_ratelimit())
+ netdev_err(rocker_port->dev, "tx desc received with err %d\n",
+ err);
+ rocker_tx_desc_frags_unmap(rocker_port, desc_info);
+ dev_kfree_skb_any(rocker_desc_cookie_ptr_get(desc_info));
+ credits++;
+ }
+
+ if (credits && netif_queue_stopped(rocker_port->dev))
+ netif_wake_queue(rocker_port->dev);
+
+ napi_complete(napi);
+ rocker_dma_ring_credits_set(rocker, &rocker_port->tx_ring, credits);
+
+ return 0;
+}
+
+static int rocker_port_rx_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
+ struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
+ size_t rx_len;
+
+ if (!skb)
+ return -ENOENT;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
+ return -EINVAL;
+
+ rocker_dma_rx_ring_skb_unmap(rocker, attrs);
+
+ rx_len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_LEN]);
+ skb_put(skb, rx_len);
+ skb->protocol = eth_type_trans(skb, rocker_port->dev);
+ netif_receive_skb(skb);
+
+ return rocker_dma_rx_ring_skb_alloc(rocker, rocker_port, desc_info);
+}
+
+static struct rocker_port *rocker_port_napi_rx_get(struct napi_struct *napi)
+{
+ return container_of(napi, struct rocker_port, napi_rx);
+}
+
+static int rocker_port_poll_rx(struct napi_struct *napi, int budget)
+{
+ struct rocker_port *rocker_port = rocker_port_napi_rx_get(napi);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ /* Process rx descriptors */
+ while (credits < budget &&
+ (desc_info = rocker_desc_tail_get(&rocker_port->rx_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err) {
+ if (net_ratelimit())
+ netdev_err(rocker_port->dev, "rx desc received with err %d\n",
+ err);
+ } else {
+ err = rocker_port_rx_proc(rocker, rocker_port,
+ desc_info);
+ if (err && net_ratelimit())
+ netdev_err(rocker_port->dev, "rx processing failed with err %d\n",
+ err);
+ }
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker_port->rx_ring, desc_info);
+ credits++;
+ }
+
+ if (credits < budget)
+ napi_complete(napi);
+
+ rocker_dma_ring_credits_set(rocker, &rocker_port->rx_ring, credits);
+
+ return credits;
+}
+
+/*****************
+ * PCI driver ops
+ *****************/
+
+static void rocker_carrier_init(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ u64 link_status = rocker_read64(rocker, PORT_PHYS_LINK_STATUS);
+ bool link_up;
+
+ link_up = link_status & (1 << rocker_port->lport);
+ if (link_up)
+ netif_carrier_on(rocker_port->dev);
+ else
+ netif_carrier_off(rocker_port->dev);
+}
+
+static void rocker_remove_ports(struct rocker *rocker)
+{
+ int i;
+
+ for (i = 0; i < rocker->port_count; i++)
+ unregister_netdev(rocker->ports[i]->dev);
+ kfree(rocker->ports);
+}
+
+static void rocker_port_dev_addr_init(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int err;
+
+ err = rocker_cmd_get_port_settings_macaddr(rocker_port,
+ rocker_port->dev->dev_addr);
+ if (err) {
+ dev_warn(&pdev->dev, "failed to get mac address, using random\n");
+ eth_hw_addr_random(rocker_port->dev);
+ }
+}
+
+static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_port *rocker_port;
+ struct net_device *dev;
+ int err;
+
+ dev = alloc_etherdev(sizeof(struct rocker_port));
+ if (!dev)
+ return -ENOMEM;
+ rocker_port = netdev_priv(dev);
+ rocker_port->dev = dev;
+ rocker_port->rocker = rocker;
+ rocker_port->port_number = port_number;
+ rocker_port->lport = port_number + 1;
+
+ rocker_port_dev_addr_init(rocker, rocker_port);
+ dev->netdev_ops = &rocker_port_netdev_ops;
+ dev->ethtool_ops = &rocker_port_ethtool_ops;
+ netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
+ NAPI_POLL_WEIGHT);
+ netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
+ NAPI_POLL_WEIGHT);
+ rocker_carrier_init(rocker_port);
+
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ err = register_netdev(dev);
+ if (err) {
+ dev_err(&pdev->dev, "register_netdev failed\n");
+ goto err_register_netdev;
+ }
+ rocker->ports[port_number] = rocker_port;
+
+ return 0;
+
+err_register_netdev:
+ free_netdev(dev);
+ return err;
+}
+
+static int rocker_probe_ports(struct rocker *rocker)
+{
+ int i;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(struct rocker_port *) * rocker->port_count;
+ rocker->ports = kmalloc(alloc_size, GFP_KERNEL);
+ for (i = 0; i < rocker->port_count; i++) {
+ err = rocker_probe_port(rocker, i);
+ if (err)
+ goto remove_ports;
+ }
+ return 0;
+
+remove_ports:
+ rocker_remove_ports(rocker);
+ return err;
+}
+
+static int rocker_msix_init(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int msix_entries;
+ int i;
+ int err;
+
+ msix_entries = pci_msix_vec_count(pdev);
+ if (msix_entries < 0)
+ return msix_entries;
+
+ if (msix_entries != ROCKER_MSIX_VEC_COUNT(rocker->port_count))
+ return -EINVAL;
+
+ rocker->msix_entries = kmalloc_array(msix_entries,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!rocker->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < msix_entries; i++)
+ rocker->msix_entries[i].entry = i;
+
+ err = pci_enable_msix_exact(pdev, rocker->msix_entries, msix_entries);
+ if (err < 0)
+ goto err_enable_msix;
+
+ return 0;
+
+err_enable_msix:
+ kfree(rocker->msix_entries);
+ return err;
+}
+
+static void rocker_msix_fini(struct rocker *rocker)
+{
+ pci_disable_msix(rocker->pdev);
+ kfree(rocker->msix_entries);
+}
+
+static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct rocker *rocker;
+ int err;
+
+ rocker = kzalloc(sizeof(*rocker), GFP_KERNEL);
+ if (!rocker)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed\n");
+ goto err_pci_enable_device;
+ }
+
+ err = pci_request_regions(pdev, rocker_driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "pci_request_regions failed\n");
+ goto err_pci_request_regions;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (!err) {
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ } else {
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ }
+
+ if (pci_resource_len(pdev, 0) < ROCKER_PCI_BAR0_SIZE) {
+ dev_err(&pdev->dev, "invalid PCI region size\n");
+ goto err_pci_resource_len_check;
+ }
+
+ rocker->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!rocker->hw_addr) {
+ dev_err(&pdev->dev, "ioremap failed\n");
+ err = -EIO;
+ goto err_ioremap;
+ }
+ pci_set_master(pdev);
+
+ rocker->pdev = pdev;
+ pci_set_drvdata(pdev, rocker);
+
+ rocker->port_count = rocker_read32(rocker, PORT_PHYS_COUNT);
+
+ err = rocker_msix_init(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "MSI-X init failed\n");
+ goto err_msix_init;
+ }
+
+ err = rocker_basic_hw_test(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "basic hw test failed\n");
+ goto err_basic_hw_test;
+ }
+
+ rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
+
+ err = rocker_dma_rings_init(rocker);
+ if (err)
+ goto err_dma_rings_init;
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD),
+ rocker_cmd_irq_handler, 0,
+ rocker_driver_name, rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign cmd irq\n");
+ goto err_request_cmd_irq;
+ }
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT),
+ rocker_event_irq_handler, 0,
+ rocker_driver_name, rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign event irq\n");
+ goto err_request_event_irq;
+ }
+
+ rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
+
+ err = rocker_probe_ports(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "failed to probe ports\n");
+ goto err_probe_ports;
+ }
+
+ dev_info(&pdev->dev, "Rocker switch with id %016llx\n", rocker->hw.id);
+
+ return 0;
+
+err_probe_ports:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
+err_request_event_irq:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
+err_request_cmd_irq:
+ rocker_dma_rings_fini(rocker);
+err_dma_rings_init:
+err_basic_hw_test:
+ rocker_msix_fini(rocker);
+err_msix_init:
+ iounmap(rocker->hw_addr);
+err_ioremap:
+err_pci_resource_len_check:
+err_pci_set_dma_mask:
+ pci_release_regions(pdev);
+err_pci_request_regions:
+ pci_disable_device(pdev);
+err_pci_enable_device:
+ kfree(rocker);
+ return err;
+}
+
+static void rocker_remove(struct pci_dev *pdev)
+{
+ struct rocker *rocker = pci_get_drvdata(pdev);
+
+ rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
+ rocker_remove_ports(rocker);
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
+ rocker_dma_rings_fini(rocker);
+ rocker_msix_fini(rocker);
+ iounmap(rocker->hw_addr);
+ pci_release_regions(rocker->pdev);
+ pci_disable_device(rocker->pdev);
+ kfree(rocker);
+}
+
+static struct pci_driver rocker_pci_driver = {
+ .name = rocker_driver_name,
+ .id_table = rocker_pci_id_table,
+ .probe = rocker_probe,
+ .remove = rocker_remove,
+};
+
+/***********************
+ * Module init and exit
+ ***********************/
+
+static int __init rocker_module_init(void)
+{
+ return pci_register_driver(&rocker_pci_driver);
+}
+
+static void __exit rocker_module_exit(void)
+{
+ pci_unregister_driver(&rocker_pci_driver);
+}
+
+module_init(rocker_module_init);
+module_exit(rocker_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_AUTHOR("Scott Feldman <sfeldma@gmail.com>");
+MODULE_DESCRIPTION("Rocker switch device driver");
+MODULE_DEVICE_TABLE(pci, rocker_pci_id_table);
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
new file mode 100644
index 0000000..5251cf8
--- /dev/null
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -0,0 +1,427 @@
+/*
+ * drivers/net/ethernet/rocker/rocker.h - Rocker switch device driver
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ROCKER_H
+#define _ROCKER_H
+
+#include <linux/types.h>
+
+#define PCI_VENDOR_ID_REDHAT 0x1b36
+#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006
+
+#define ROCKER_PCI_BAR0_SIZE 0x2000
+
+/* MSI-X vectors */
+enum {
+ ROCKER_MSIX_VEC_CMD,
+ ROCKER_MSIX_VEC_EVENT,
+ ROCKER_MSIX_VEC_TEST,
+ ROCKER_MSIX_VEC_RESERVED0,
+ __ROCKER_MSIX_VEC_TX,
+ __ROCKER_MSIX_VEC_RX,
+#define ROCKER_MSIX_VEC_TX(port) \
+ (__ROCKER_MSIX_VEC_TX + ((port) * 2))
+#define ROCKER_MSIX_VEC_RX(port) \
+ (__ROCKER_MSIX_VEC_RX + ((port) * 2))
+#define ROCKER_MSIX_VEC_COUNT(portcnt) \
+ (ROCKER_MSIX_VEC_RX((portcnt - 1)) + 1)
+};
+
+/* Rocker bogus registers */
+#define ROCKER_BOGUS_REG0 0x0000
+#define ROCKER_BOGUS_REG1 0x0004
+#define ROCKER_BOGUS_REG2 0x0008
+#define ROCKER_BOGUS_REG3 0x000c
+
+/* Rocker test registers */
+#define ROCKER_TEST_REG 0x0010
+#define ROCKER_TEST_REG64 0x0018 /* 8-byte */
+#define ROCKER_TEST_IRQ 0x0020
+#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */
+#define ROCKER_TEST_DMA_SIZE 0x0030
+#define ROCKER_TEST_DMA_CTRL 0x0034
+
+/* Rocker test register ctrl */
+#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0)
+#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1)
+#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2)
+
+/* Rocker DMA ring register offsets */
+#define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */
+#define ROCKER_DMA_DESC_SIZE(x) (0x1008 + (x) * 32)
+#define ROCKER_DMA_DESC_HEAD(x) (0x100c + (x) * 32)
+#define ROCKER_DMA_DESC_TAIL(x) (0x1010 + (x) * 32)
+#define ROCKER_DMA_DESC_CTRL(x) (0x1014 + (x) * 32)
+#define ROCKER_DMA_DESC_CREDITS(x) (0x1018 + (x) * 32)
+#define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32)
+
+/* Rocker dma ctrl register bits */
+#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0)
+
+/* Rocker DMA ring types */
+enum rocker_dma_type {
+ ROCKER_DMA_CMD,
+ ROCKER_DMA_EVENT,
+ __ROCKER_DMA_TX,
+ __ROCKER_DMA_RX,
+#define ROCKER_DMA_TX(port) (__ROCKER_DMA_TX + (port) * 2)
+#define ROCKER_DMA_RX(port) (__ROCKER_DMA_RX + (port) * 2)
+};
+
+/* Rocker DMA ring size limits and default sizes */
+#define ROCKER_DMA_SIZE_MIN 2ul
+#define ROCKER_DMA_SIZE_MAX 65536ul
+#define ROCKER_DMA_CMD_DEFAULT_SIZE 32ul
+#define ROCKER_DMA_EVENT_DEFAULT_SIZE 32ul
+#define ROCKER_DMA_TX_DEFAULT_SIZE 64ul
+#define ROCKER_DMA_TX_DESC_SIZE 256
+#define ROCKER_DMA_RX_DEFAULT_SIZE 64ul
+#define ROCKER_DMA_RX_DESC_SIZE 256
+
+/* Rocker DMA descriptor struct */
+struct rocker_desc {
+ u64 buf_addr;
+ u64 cookie;
+ u16 buf_size;
+ u16 tlv_size;
+ u16 resv[5];
+ u16 comp_err;
+};
+
+#define ROCKER_DMA_DESC_COMP_ERR_GEN (1 << 15)
+
+/* Rocker DMA TLV struct */
+struct rocker_tlv {
+ u32 type;
+ u16 len;
+};
+
+/* TLVs */
+enum {
+ ROCKER_TLV_CMD_UNSPEC,
+ ROCKER_TLV_CMD_TYPE, /* u16 */
+ ROCKER_TLV_CMD_INFO, /* nest */
+
+ __ROCKER_TLV_CMD_MAX,
+ ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_CMD_TYPE_UNSPEC,
+ ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS,
+
+ __ROCKER_TLV_CMD_TYPE_MAX,
+ ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC,
+ ROCKER_TLV_CMD_PORT_SETTINGS_LPORT, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */
+
+ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
+ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1,
+};
+
+enum rocker_port_mode {
+ ROCKER_PORT_MODE_OF_DPA,
+};
+
+enum {
+ ROCKER_TLV_EVENT_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE, /* u16 */
+ ROCKER_TLV_EVENT_INFO, /* nest */
+
+ __ROCKER_TLV_EVENT_MAX,
+ ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_TYPE_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE_LINK_CHANGED,
+ ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN,
+
+ __ROCKER_TLV_EVENT_TYPE_MAX,
+ ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC,
+ ROCKER_TLV_EVENT_LINK_CHANGED_LPORT, /* u32 */
+ ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */
+
+ __ROCKER_TLV_EVENT_LINK_CHANGED_MAX,
+ ROCKER_TLV_EVENT_LINK_CHANGED_MAX =
+ __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC,
+ ROCKER_TLV_EVENT_MAC_VLAN_LPORT, /* u32 */
+ ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */
+ ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */
+
+ __ROCKER_TLV_EVENT_MAC_VLAN_MAX,
+ ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_RX_UNSPEC,
+ ROCKER_TLV_RX_FLAGS, /* u16, see ROCKER_RX_FLAGS_ */
+ ROCKER_TLV_RX_CSUM, /* u16 */
+ ROCKER_TLV_RX_FRAG_ADDR, /* u64 */
+ ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */
+ ROCKER_TLV_RX_FRAG_LEN, /* u16 */
+
+ __ROCKER_TLV_RX_MAX,
+ ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1,
+};
+
+#define ROCKER_RX_FLAGS_IPV4 (1 << 0)
+#define ROCKER_RX_FLAGS_IPV6 (1 << 1)
+#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2)
+#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3)
+#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4)
+#define ROCKER_RX_FLAGS_TCP (1 << 5)
+#define ROCKER_RX_FLAGS_UDP (1 << 6)
+#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7)
+
+enum {
+ ROCKER_TLV_TX_UNSPEC,
+ ROCKER_TLV_TX_OFFLOAD, /* u8, see ROCKER_TX_OFFLOAD_ */
+ ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */
+ ROCKER_TLV_TX_TSO_MSS, /* u16 */
+ ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */
+ ROCKER_TLV_TX_FRAGS, /* array */
+
+ __ROCKER_TLV_TX_MAX,
+ ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1,
+};
+
+#define ROCKER_TX_OFFLOAD_NONE 0
+#define ROCKER_TX_OFFLOAD_IP_CSUM 1
+#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2
+#define ROCKER_TX_OFFLOAD_L3_CSUM 3
+#define ROCKER_TX_OFFLOAD_TSO 4
+
+#define ROCKER_TX_FRAGS_MAX 16
+
+enum {
+ ROCKER_TLV_TX_FRAG_UNSPEC,
+ ROCKER_TLV_TX_FRAG, /* nest */
+
+ __ROCKER_TLV_TX_FRAG_MAX,
+ ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_TX_FRAG_ATTR_UNSPEC,
+ ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */
+ ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */
+
+ __ROCKER_TLV_TX_FRAG_ATTR_MAX,
+ ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1,
+};
+
+/* cmd info nested for OF-DPA msgs */
+enum {
+ ROCKER_TLV_OF_DPA_UNSPEC,
+ ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */
+ ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */
+ ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */
+ ROCKER_TLV_OF_DPA_COOKIE, /* u64 */
+ ROCKER_TLV_OF_DPA_IN_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_IN_LPORT_MASK, /* u32 */
+ ROCKER_TLV_OF_DPA_OUT_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */
+ ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */
+ ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_TUN_LOG_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */
+ ROCKER_TLV_OF_DPA_DST_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */
+ ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */
+ ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */
+ ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */
+ ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */
+
+ __ROCKER_TLV_OF_DPA_MAX,
+ ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1,
+};
+
+/* OF-DPA table IDs */
+
+enum rocker_of_dpa_table_id {
+ ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0,
+ ROCKER_OF_DPA_TABLE_ID_VLAN = 10,
+ ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20,
+ ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30,
+ ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40,
+ ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50,
+ ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60,
+};
+
+/* OF-DPA flow stats */
+enum {
+ ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */
+
+ __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1,
+};
+
+/* OF-DPA group types */
+enum rocker_of_dpa_group_type {
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY,
+};
+
+/* OF-DPA group L2 overlay types */
+enum rocker_of_dpa_overlay_type {
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0,
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST,
+};
+
+/* OF-DPA group ID encoding */
+#define ROCKER_GROUP_TYPE_SHIFT 28
+#define ROCKER_GROUP_TYPE_MASK 0xf0000000
+#define ROCKER_GROUP_VLAN_SHIFT 16
+#define ROCKER_GROUP_VLAN_MASK 0x0fff0000
+#define ROCKER_GROUP_PORT_SHIFT 0
+#define ROCKER_GROUP_PORT_MASK 0x0000ffff
+#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12
+#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000
+#define ROCKER_GROUP_SUBTYPE_SHIFT 10
+#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00
+#define ROCKER_GROUP_INDEX_SHIFT 0
+#define ROCKER_GROUP_INDEX_MASK 0x0000ffff
+#define ROCKER_GROUP_INDEX_LONG_SHIFT 0
+#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff
+
+#define ROCKER_GROUP_TYPE_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT)
+#define ROCKER_GROUP_TYPE_SET(type) \
+ (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK)
+#define ROCKER_GROUP_VLAN_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT)
+#define ROCKER_GROUP_VLAN_SET(vlan_id) \
+ (((vlan_id) << ROCKER_GROUP_VLAN_SHIFT) & ROCKER_GROUP_VLAN_MASK)
+#define ROCKER_GROUP_PORT_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT)
+#define ROCKER_GROUP_PORT_SET(port) \
+ (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK)
+#define ROCKER_GROUP_INDEX_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT)
+#define ROCKER_GROUP_INDEX_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK)
+#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \
+ ROCKER_GROUP_INDEX_LONG_SHIFT)
+#define ROCKER_GROUP_INDEX_LONG_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \
+ ROCKER_GROUP_INDEX_LONG_MASK)
+
+#define ROCKER_GROUP_NONE 0
+#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port))
+#define ROCKER_GROUP_L2_REWRITE(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L3_UNICAST(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+
+/* Rocker general purpose registers */
+#define ROCKER_CONTROL 0x0300
+#define ROCKER_PORT_PHYS_COUNT 0x0304
+#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */
+#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */
+#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */
+
+/* Rocker control bits */
+#define ROCKER_CONTROL_RESET (1 << 0)
+
+#endif
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 14/21] bridge: add brport flags to dflt bridge_getlink
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
To allow brport device to return current brport flags set on port. Add
returned flags to nested IFLA_PROTINFO netlink msg built in dflt getlink.
With this change, netlink msg returned for bridge_getlink contains the port's
offloaded flag settings (the port's SELF settings).
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
---
v3->v4:
-no change
new in v3
---
drivers/net/ethernet/emulex/benet/be_main.c | 3 ++-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +-
include/linux/rtnetlink.h | 3 ++-
net/core/rtnetlink.c | 39 ++++++++++++++++++++++++++-
4 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 9070b98..6510ec8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4362,7 +4362,8 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
hsw_mode == PORT_FWD_TYPE_VEPA ?
- BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB);
+ BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
+ 0, 0);
}
#ifdef CONFIG_BE2NET_VXLAN
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1bad9f4..eb2a04b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7773,7 +7773,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
else
mode = BRIDGE_MODE_VEPA;
- return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0);
}
static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 063f0f5..3b04190 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -103,5 +103,6 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
u16 vid);
extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u16 mode);
+ struct net_device *dev, u16 mode,
+ u32 flags, u32 mask);
#endif /* __LINUX_RTNETLINK_H */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b6541f2..7525ba3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2687,12 +2687,22 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
+ unsigned int attrnum, unsigned int flag)
+{
+ if (mask & flag)
+ return nla_put_u8(skb, attrnum, !!(flags & flag));
+ return 0;
+}
+
int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u16 mode)
+ struct net_device *dev, u16 mode,
+ u32 flags, u32 mask)
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
struct nlattr *br_afspec;
+ struct nlattr *protinfo;
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
@@ -2731,6 +2741,33 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
}
nla_nest_end(skb, br_afspec);
+ protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
+ if (!protinfo)
+ goto nla_put_failure;
+
+ if (brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_MODE, BR_HAIRPIN_MODE) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_GUARD, BR_BPDU_GUARD) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_FAST_LEAVE,
+ BR_MULTICAST_FAST_LEAVE) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_LEARNING, BR_LEARNING) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_LEARNING_SYNC, BR_LEARNING_SYNC) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) ||
+ brport_nla_put_flag(skb, flags, mask,
+ IFLA_BRPORT_PROXYARP, BR_PROXYARP)) {
+ nla_nest_cancel(skb, protinfo);
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, protinfo);
+
return nlmsg_end(skb, nlh);
nla_put_failure:
nlmsg_cancel(skb, nlh);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 13/21] bridge: add new hwmode swdev
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
Current hwmode settings are "vepa" or "veb". These are for NIC interfaces
with basic bridging function offloaded to HW. Add new "swdev" for full
switch device offloads.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
v3->v4:
-no change
new in v3
---
include/uapi/linux/if_bridge.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index da17e45..60425ca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -105,6 +105,7 @@ struct __fdb_entry {
#define BRIDGE_MODE_VEB 0 /* Default loopback mode */
#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */
+#define BRIDGE_MODE_SWDEV 2 /* Full switch device offload */
/* Bridge management nested attributes
* [IFLA_AF_SPEC] = {
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 12/21] bridge: add new brport flag LEARNING_SYNC
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
This policy flag controls syncing of learned FDB entries to bridge's FDB. If
on, FDB entries learned on bridge port device will be synced. If off, device
may still learn new FDB entries but they will not be synced with bridge's FDB.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
---
v3->v4:
-just resolved conflict caused by BIT change of previous patch
new in v3
---
include/linux/if_bridge.h | 1 +
include/uapi/linux/if_link.h | 1 +
2 files changed, 2 insertions(+)
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 2c81a8e..0a8ce76 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -43,6 +43,7 @@ struct br_ip_list {
#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
#define BR_PROMISC BIT(7)
#define BR_PROXYARP BIT(8)
+#define BR_LEARNING_SYNC BIT(9)
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 623f1a7..f7d0d2d 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -245,6 +245,7 @@ enum {
IFLA_BRPORT_LEARNING, /* mac learning */
IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 11/21] bridge: move private brport flags to if_bridge.h so port drivers can use flags
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com
---
v3->v4:
-used BIT macro as suggested by Thomas
new in v3
---
include/linux/if_bridge.h | 12 ++++++++++++
net/bridge/br_private.h | 10 ----------
2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fa2eca6..2c81a8e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -15,6 +15,7 @@
#include <linux/netdevice.h>
#include <uapi/linux/if_bridge.h>
+#include <linux/bitops.h>
struct br_ip {
union {
@@ -32,6 +33,17 @@ struct br_ip_list {
struct br_ip addr;
};
+#define BR_HAIRPIN_MODE BIT(0)
+#define BR_BPDU_GUARD BIT(1)
+#define BR_ROOT_BLOCK BIT(2)
+#define BR_MULTICAST_FAST_LEAVE BIT(3)
+#define BR_ADMIN_COST BIT(4)
+#define BR_LEARNING BIT(5)
+#define BR_FLOOD BIT(6)
+#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
+#define BR_PROMISC BIT(7)
+#define BR_PROXYARP BIT(8)
+
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
typedef int br_should_route_hook_t(struct sk_buff *skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc36fb3..aea3d13 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -164,16 +164,6 @@ struct net_bridge_port
struct rcu_head rcu;
unsigned long flags;
-#define BR_HAIRPIN_MODE 0x00000001
-#define BR_BPDU_GUARD 0x00000002
-#define BR_ROOT_BLOCK 0x00000004
-#define BR_MULTICAST_FAST_LEAVE 0x00000008
-#define BR_ADMIN_COST 0x00000010
-#define BR_LEARNING 0x00000020
-#define BR_FLOOD 0x00000040
-#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
-#define BR_PROMISC 0x00000080
-#define BR_PROXYARP 0x00000100
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct bridge_mcast_own_query ip4_own_query;
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 10/21] bridge: add API to notify bridge driver of learned FBD on offloaded device
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
When the swdev device learns a new mac/vlan on a port, it sends some async
notification to the driver and the driver installs an FDB in the device.
To give a holistic system view, the learned mac/vlan should be reflected
in the bridge's FBD table, so the user, using normal iproute2 cmds, can view
what is currently learned by the device. This API on the bridge driver gives
a way for the swdev driver to install an FBD entry in the bridge FBD table.
(And remove one).
This is equivalent to the device running these cmds:
bridge fdb [add|del] <mac> dev <dev> vid <vlan id> master
This patch needs some extra eyeballs for review, in paricular around the
locking and contexts.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
v3->v4:
-cosmetic changes to resolve conflicts
-call bh version of spinlock for hash_lock to avoid deadlock
v2->v3:
-added "external" word into function names to emphasize fdbs are learned
externally
-added "added_by_external_learn" to fbd entry struct indicate the entry
was learned externaly and build some logic around that
-expose the fact that fdb entry was learned externally to userspace
v1->v2:
-no change
x
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
include/linux/if_bridge.h | 18 +++++++++
include/uapi/linux/neighbour.h | 1 +
net/bridge/br_fdb.c | 91 +++++++++++++++++++++++++++++++++++++++++-
net/bridge/br_private.h | 3 +-
4 files changed, 111 insertions(+), 2 deletions(-)
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 808dcb8..fa2eca6 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -37,6 +37,24 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use
typedef int br_should_route_hook_t(struct sk_buff *skb);
extern br_should_route_hook_t __rcu *br_should_route_hook;
+#if IS_ENABLED(CONFIG_BRIDGE)
+int br_fdb_external_learn_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid);
+int br_fdb_external_learn_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid);
+#else
+static inline int br_fdb_external_learn_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ return 0;
+}
+static inline int br_fdb_external_learn_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ return 0;
+}
+#endif
+
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list);
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 2f043af..f3d77f9 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -38,6 +38,7 @@ enum {
#define NTF_SELF 0x02
#define NTF_MASTER 0x04
#define NTF_PROXY 0x08 /* == ATF_PUBL */
+#define NTF_EXT_LEARNED 0x10
#define NTF_ROUTER 0x80
/*
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b1be971..cc36e59 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -481,6 +481,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb->is_local = 0;
fdb->is_static = 0;
fdb->added_by_user = 0;
+ fdb->added_by_external_learn = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
}
@@ -613,7 +614,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_family = AF_BRIDGE;
ndm->ndm_pad1 = 0;
ndm->ndm_pad2 = 0;
- ndm->ndm_flags = 0;
+ ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
ndm->ndm_type = 0;
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(fdb);
@@ -983,3 +984,91 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
}
}
}
+
+int br_fdb_external_learn_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ struct hlist_head *head;
+ struct net_bridge_fdb_entry *fdb;
+ int err = 0;
+
+ rtnl_lock();
+
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ pr_info("bridge: %s not a bridge port\n", dev->name);
+ err = -EINVAL;
+ goto err_rtnl_unlock;
+ }
+
+ br = p->br;
+
+ spin_lock_bh(&br->hash_lock);
+
+ head = &br->hash[br_mac_hash(addr, vid)];
+ fdb = fdb_find(head, addr, vid);
+ if (!fdb) {
+ fdb = fdb_create(head, p, addr, vid);
+ if (!fdb) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
+ fdb->added_by_external_learn = 1;
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
+ } else if (fdb->added_by_external_learn) {
+ /* Refresh entry */
+ fdb->updated = fdb->used = jiffies;
+ } else if (!fdb->added_by_user) {
+ /* Take over SW learned entry */
+ fdb->added_by_external_learn = 1;
+ fdb->updated = jiffies;
+ fdb_notify(br, fdb, RTM_NEWNEIGH);
+ }
+
+err_unlock:
+ spin_unlock_bh(&br->hash_lock);
+err_rtnl_unlock:
+ rtnl_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL(br_fdb_external_learn_add);
+
+int br_fdb_external_learn_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ struct hlist_head *head;
+ struct net_bridge_fdb_entry *fdb;
+ int err = 0;
+
+ rtnl_lock();
+
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ pr_info("bridge: %s not a bridge port\n", dev->name);
+ err = -EINVAL;
+ goto err_rtnl_unlock;
+ }
+
+ br = p->br;
+
+ spin_lock_bh(&br->hash_lock);
+
+ head = &br->hash[br_mac_hash(addr, vid)];
+ fdb = fdb_find(head, addr, vid);
+ if (fdb && fdb->added_by_external_learn)
+ fdb_delete(br, fdb);
+ else
+ err = -ENOENT;
+
+ spin_unlock_bh(&br->hash_lock);
+err_rtnl_unlock:
+ rtnl_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL(br_fdb_external_learn_del);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b529da..cc36fb3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -100,7 +100,8 @@ struct net_bridge_fdb_entry
mac_addr addr;
unsigned char is_local:1,
is_static:1,
- added_by_user:1;
+ added_by_user:1,
+ added_by_external_learn:1;
__u16 vlan_id;
};
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 09/21] bridge: call netdev_sw_port_stp_update when bridge port STP status changes
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
To notify switch driver of change in STP state of bridge port, add new
.ndo op and provide switchdev wrapper func to call ndo op. Use it in bridge
code then.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
v3->v4:
-added warning when call to netdev_sw_port_stp_update was not successful
suggested by Andy
v2->v3:
-changed "sw" string to "switch" to avoid confusion
v1->v2:
-no change
---
include/linux/netdevice.h | 5 +++++
include/net/switchdev.h | 7 +++++++
net/bridge/br_stp.c | 7 +++++++
net/switchdev/switchdev.c | 19 +++++++++++++++++++
4 files changed, 38 insertions(+)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3603f31..29c92ee 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1024,6 +1024,9 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* Called to get an ID of the switch chip this port is part of.
* If driver implements this, it indicates that it represents a port
* of a switch chip.
+ * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
+ * Called to notify switch device port of bridge port STP
+ * state change.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1180,6 +1183,8 @@ struct net_device_ops {
#ifdef CONFIG_NET_SWITCHDEV
int (*ndo_switch_parent_id_get)(struct net_device *dev,
struct netdev_phys_item_id *psid);
+ int (*ndo_switch_port_stp_update)(struct net_device *dev,
+ u8 state);
#endif
};
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 7a52360..8a6d164 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,7 @@
int netdev_switch_parent_id_get(struct net_device *dev,
struct netdev_phys_item_id *psid);
+int netdev_switch_port_stp_update(struct net_device *dev, u8 state);
#else
@@ -25,6 +26,12 @@ static inline int netdev_switch_parent_id_get(struct net_device *dev,
return -EOPNOTSUPP;
}
+static inline int netdev_switch_port_stp_update(struct net_device *dev,
+ u8 state)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 2b047bc..fb3ebe6 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -12,6 +12,7 @@
*/
#include <linux/kernel.h>
#include <linux/rculist.h>
+#include <net/switchdev.h>
#include "br_private.h"
#include "br_private_stp.h"
@@ -38,7 +39,13 @@ void br_log_state(const struct net_bridge_port *p)
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
+ int err;
+
p->state = state;
+ err = netdev_switch_port_stp_update(p->dev, state);
+ if (err && err != -EOPNOTSUPP)
+ br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
+ (unsigned int) p->port_no, p->dev->name);
}
/* called under bridge lock */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 66973de..d162b21 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -31,3 +31,22 @@ int netdev_switch_parent_id_get(struct net_device *dev,
return ops->ndo_switch_parent_id_get(dev, psid);
}
EXPORT_SYMBOL(netdev_switch_parent_id_get);
+
+/**
+ * netdev_switch_port_stp_update - Notify switch device port of STP
+ * state change
+ * @dev: port device
+ * @state: port STP state
+ *
+ * Notify switch device port of bridge port STP state change.
+ */
+int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_switch_port_stp_update)
+ return -EOPNOTSUPP;
+ WARN_ON(!ops->ndo_switch_parent_id_get);
+ return ops->ndo_switch_port_stp_update(dev, state);
+}
+EXPORT_SYMBOL(netdev_switch_port_stp_update);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 08/21] net-sysfs: expose physical switch id for particular device
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
v3->v4:
-added entry to Documentation/ABI/testing/sysfs-class-net as suggested
by Florian
v2->v3:
-changed "sw" string to "switch" to avoid confusion
v1->v2:
-no change
---
Documentation/ABI/testing/sysfs-class-net | 8 ++++++++
net/core/net-sysfs.c | 24 ++++++++++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index e1b2e78..beb8ec4 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -216,3 +216,11 @@ Contact: netdev@vger.kernel.org
Description:
Indicates the interface protocol type as a decimal value. See
include/uapi/linux/if_arp.h for all possible values.
+
+What: /sys/class/net/<iface>/phys_switch_id
+Date: November 2014
+KernelVersion: 3.19
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the unique physical switch identifier of a switch this
+ port belongs to, as a string.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 26c46f4..9993412 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -12,6 +12,7 @@
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <net/switchdev.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
@@ -416,6 +417,28 @@ static ssize_t phys_port_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_port_id);
+static ssize_t phys_switch_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ struct netdev_phys_item_id ppid;
+
+ ret = netdev_switch_parent_id_get(netdev, &ppid);
+ if (!ret)
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_switch_id);
+
static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -441,6 +464,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_tx_queue_len.attr,
&dev_attr_gro_flush_timeout.attr,
&dev_attr_phys_port_id.attr,
+ &dev_attr_phys_switch_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 07/21] rtnl: expose physical switch id for particular device
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
The netdevice represents a port in a switch, it will expose
IFLA_PHYS_SWITCH_ID value via rtnl. Two netdevices with the same value
belong to one physical switch.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
---
v3->v4:
-no change
v2->v3:
-changed "sw" string to "switch" to avoid confusion
v1->v2:
-no change
---
include/uapi/linux/if_link.h | 1 +
net/core/rtnetlink.c | 26 +++++++++++++++++++++++++-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 36bddc2..623f1a7 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -145,6 +145,7 @@ enum {
IFLA_CARRIER,
IFLA_PHYS_PORT_ID,
IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
__IFLA_MAX
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7c59c02..b6541f2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -44,6 +44,7 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
+#include <net/switchdev.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
@@ -869,7 +870,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
- + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -968,6 +970,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ int err;
+ struct netdev_phys_item_id psid;
+
+ err = netdev_switch_parent_id_get(dev, &psid);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -1040,6 +1060,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
+ if (rtnl_phys_switch_id_fill(skb, dev))
+ goto nla_put_failure;
+
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
@@ -1199,6 +1222,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
+ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 06/21] net: introduce generic switch devices support
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
The goal of this is to provide a possibility to support various switch
chips. Drivers should implement relevant ndos to do so. Now there is
only one ndo defined:
- for getting physical switch id is in place.
Note that user can use random port netdevice to access the switch.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
---
v3->v4:
-no change
v2->v3:
-fixed documentation typo pointed out by M. Braun
-changed "sw" string to "switch" to avoid confusion
v1->v2:
-no change
---
Documentation/networking/switchdev.txt | 59 ++++++++++++++++++++++++++++++++++
MAINTAINERS | 7 ++++
include/linux/netdevice.h | 10 ++++++
include/net/switchdev.h | 30 +++++++++++++++++
net/Kconfig | 1 +
net/Makefile | 3 ++
net/switchdev/Kconfig | 13 ++++++++
net/switchdev/Makefile | 5 +++
net/switchdev/switchdev.c | 33 +++++++++++++++++++
9 files changed, 161 insertions(+)
create mode 100644 Documentation/networking/switchdev.txt
create mode 100644 include/net/switchdev.h
create mode 100644 net/switchdev/Kconfig
create mode 100644 net/switchdev/Makefile
create mode 100644 net/switchdev/switchdev.c
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
new file mode 100644
index 0000000..f981a92
--- /dev/null
+++ b/Documentation/networking/switchdev.txt
@@ -0,0 +1,59 @@
+Switch (and switch-ish) device drivers HOWTO
+===========================
+
+Please note that the word "switch" is here used in very generic meaning.
+This include devices supporting L2/L3 but also various flow offloading chips,
+including switches embedded into SR-IOV NICs.
+
+Lets describe a topology a bit. Imagine the following example:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ port1 port2 port3 port4 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | NIC0 NIC1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+In this example, there are two independent lines between the switch silicon
+and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
+separate from the switch driver. SOME switch chip is by managed by a driver
+via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
+connected to some other type of bus.
+
+Now, for the previous example show the representation in kernel:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | eth0 eth1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+Lets call the example switch driver for SOME switch chip "SOMEswitch". This
+driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
+created for each port of a switch. These netdevices are instances
+of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
+of the switch chip. eth0 and eth1 are instances of some other existing driver.
+
+The only difference of the switch-port netdevice from the ordinary netdevice
+is that is implements couple more NDOs:
+
+ ndo_switch_parent_id_get - This returns the same ID for two port netdevices
+ of the same physical switch chip. This is
+ mandatory to be implemented by all switch drivers
+ and serves the caller for recognition of a port
+ netdevice.
+ ndo_switch_parent_* - Functions that serve for a manipulation of the switch
+ chip itself (it can be though of as a "parent" of the
+ port, therefore the name). They are not port-specific.
+ Caller might use arbitrary port netdevice of the same
+ switch and it will make no difference.
+ ndo_switch_port_* - Functions that serve for a port-specific manipulation.
diff --git a/MAINTAINERS b/MAINTAINERS
index a545d68..05addb6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9058,6 +9058,13 @@ F: lib/swiotlb.c
F: arch/*/kernel/pci-swiotlb.c
F: include/linux/swiotlb.h
+SWITCHDEV
+M: Jiri Pirko <jiri@resnulli.us>
+L: netdev@vger.kernel.org
+S: Supported
+F: net/switchdev/
+F: include/net/switchdev.h
+
SYNOPSYS ARC ARCHITECTURE
M: Vineet Gupta <vgupta@synopsys.com>
S: Supported
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4bd41d7..3603f31 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1018,6 +1018,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* performing GSO on a packet. The device returns true if it is
* able to GSO the packet, false otherwise. If the return value is
* false the stack will do software GSO.
+ *
+ * int (*ndo_switch_parent_id_get)(struct net_device *dev,
+ * struct netdev_phys_item_id *psid);
+ * Called to get an ID of the switch chip this port is part of.
+ * If driver implements this, it indicates that it represents a port
+ * of a switch chip.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1171,6 +1177,10 @@ struct net_device_ops {
int (*ndo_get_lock_subclass)(struct net_device *dev);
bool (*ndo_gso_check) (struct sk_buff *skb,
struct net_device *dev);
+#ifdef CONFIG_NET_SWITCHDEV
+ int (*ndo_switch_parent_id_get)(struct net_device *dev,
+ struct netdev_phys_item_id *psid);
+#endif
};
/**
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
new file mode 100644
index 0000000..7a52360
--- /dev/null
+++ b/include/net/switchdev.h
@@ -0,0 +1,30 @@
+/*
+ * include/net/switchdev.h - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _LINUX_SWITCHDEV_H_
+#define _LINUX_SWITCHDEV_H_
+
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NET_SWITCHDEV
+
+int netdev_switch_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid);
+
+#else
+
+static inline int netdev_switch_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 99815b5..ff9ffc1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,6 +228,7 @@ source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
+source "net/switchdev/Kconfig"
config RPS
boolean
diff --git a/net/Makefile b/net/Makefile
index 7ed1970..95fc694 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -73,3 +73,6 @@ obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_NET_MPLS_GSO) += mpls/
obj-$(CONFIG_HSR) += hsr/
+ifneq ($(CONFIG_NET_SWITCHDEV),)
+obj-y += switchdev/
+endif
diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
new file mode 100644
index 0000000..1557545
--- /dev/null
+++ b/net/switchdev/Kconfig
@@ -0,0 +1,13 @@
+#
+# Configuration for Switch device support
+#
+
+config NET_SWITCHDEV
+ boolean "Switch (and switch-ish) device support (EXPERIMENTAL)"
+ depends on INET
+ ---help---
+ This module provides glue between core networking code and device
+ drivers in order to support hardware switch chips in very generic
+ meaning of the word "switch". This include devices supporting L2/L3 but
+ also various flow offloading chips, including switches embedded into
+ SR-IOV NICs.
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
new file mode 100644
index 0000000..5ed63ed
--- /dev/null
+++ b/net/switchdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Switch device API
+#
+
+obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
new file mode 100644
index 0000000..66973de
--- /dev/null
+++ b/net/switchdev/switchdev.c
@@ -0,0 +1,33 @@
+/*
+ * net/switchdev/switchdev.c - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <net/switchdev.h>
+
+/**
+ * netdev_switch_parent_id_get - Get ID of a switch
+ * @dev: port device
+ * @psid: switch ID
+ *
+ * Get ID of a switch this port is part of.
+ */
+int netdev_switch_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_switch_parent_id_get)
+ return -EOPNOTSUPP;
+ return ops->ndo_switch_parent_id_get(dev, psid);
+}
+EXPORT_SYMBOL(netdev_switch_parent_id_get);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 05/21] net: rename netdev_phys_port_id to more generic name
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
So this can be reused for identification of other "items" as well.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
v1->v2->v3->v4:
-no change
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
include/linux/netdevice.h | 16 ++++++++--------
net/core/dev.c | 2 +-
net/core/net-sysfs.c | 2 +-
net/core/rtnetlink.c | 6 +++---
8 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c4bd025..336ef3c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12537,7 +12537,7 @@ static int bnx2x_validate_addr(struct net_device *dev)
}
static int bnx2x_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct bnx2x *bp = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5ed5e40..9ae4270 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7511,7 +7511,7 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
#endif
static int i40e_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_pf *pf = np->vsi->back;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b7c9978..1597fb0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2259,7 +2259,7 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st
#define PORT_ID_BYTE_LEN 8
static int mlx4_en_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_dev *mdev = priv->mdev->dev;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 3227c80..1aa25b1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -461,7 +461,7 @@ static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
}
static int qlcnic_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_hardware_context *ahw = adapter->ahw;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 589929c..4bd41d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -754,13 +754,13 @@ struct netdev_fcoe_hbainfo {
};
#endif
-#define MAX_PHYS_PORT_ID_LEN 32
+#define MAX_PHYS_ITEM_ID_LEN 32
-/* This structure holds a unique identifier to identify the
- * physical port used by a netdevice.
+/* This structure holds a unique identifier to identify some
+ * physical item (port for example) used by a netdevice.
*/
-struct netdev_phys_port_id {
- unsigned char id[MAX_PHYS_PORT_ID_LEN];
+struct netdev_phys_item_id {
+ unsigned char id[MAX_PHYS_ITEM_ID_LEN];
unsigned char id_len;
};
@@ -976,7 +976,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
*
* int (*ndo_get_phys_port_id)(struct net_device *dev,
- * struct netdev_phys_port_id *ppid);
+ * struct netdev_phys_item_id *ppid);
* Called to get ID of physical port of this device. If driver does
* not implement this, it is assumed that the hw is not able to have
* multiple net devices on single physical port.
@@ -1152,7 +1152,7 @@ struct net_device_ops {
int (*ndo_change_carrier)(struct net_device *dev,
bool new_carrier);
int (*ndo_get_phys_port_id)(struct net_device *dev,
- struct netdev_phys_port_id *ppid);
+ struct netdev_phys_item_id *ppid);
void (*ndo_add_vxlan_port)(struct net_device *dev,
sa_family_t sa_family,
__be16 port);
@@ -2870,7 +2870,7 @@ void dev_set_group(struct net_device *, int);
int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid);
+ struct netdev_phys_item_id *ppid);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
diff --git a/net/core/dev.c b/net/core/dev.c
index ac48362..0814a56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5846,7 +5846,7 @@ EXPORT_SYMBOL(dev_change_carrier);
* Get device physical port ID
*/
int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
const struct net_device_ops *ops = dev->netdev_ops;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1a24602..26c46f4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -404,7 +404,7 @@ static ssize_t phys_port_id_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev)) {
- struct netdev_phys_port_id ppid;
+ struct netdev_phys_item_id ppid;
ret = dev_get_phys_port_id(netdev, &ppid);
if (!ret)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 85d1d76..7c59c02 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -869,7 +869,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
- + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -953,7 +953,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
- struct netdev_phys_port_id ppid;
+ struct netdev_phys_item_id ppid;
err = dev_get_phys_port_id(dev, &ppid);
if (err) {
@@ -1197,7 +1197,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
- [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+ [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
};
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 04/21] net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
Do the work of parsing NDA_VLAN directly in rtnetlink code, pass simple
u16 vid to drivers from there.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
---
v3->v4:
-fixed spelling of "fdb" pointed out by Andy
new in v3
---
drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 9 +++--
drivers/net/macvlan.c | 4 +-
drivers/net/vxlan.c | 4 +-
include/linux/netdevice.h | 8 ++--
include/linux/rtnetlink.h | 6 ++-
net/bridge/br_fdb.c | 39 ++----------------
net/bridge/br_private.h | 4 +-
net/core/rtnetlink.c | 50 ++++++++++++++++++++----
10 files changed, 70 insertions(+), 60 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7262077..5ed5e40 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7536,7 +7536,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
*/
static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr,
+ const unsigned char *addr, u16 vid,
u16 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 932f779..1bad9f4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7708,7 +7708,7 @@ static int ixgbe_set_features(struct net_device *netdev,
static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr,
+ const unsigned char *addr, u16 vid,
u16 flags)
{
/* guarantee we can provide a unique filter for the unicast address */
@@ -7717,7 +7717,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -ENOMEM;
}
- return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags);
+ return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
}
static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a913b3a..3227c80 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -376,13 +376,14 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p)
}
static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *netdev, const unsigned char *addr)
+ struct net_device *netdev,
+ const unsigned char *addr, u16 vid)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
int err = -EOPNOTSUPP;
if (!adapter->fdb_mac_learn)
- return ndo_dflt_fdb_del(ndm, tb, netdev, addr);
+ return ndo_dflt_fdb_del(ndm, tb, netdev, addr, vid);
if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
qlcnic_sriov_check(adapter)) {
@@ -401,13 +402,13 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *netdev,
- const unsigned char *addr, u16 flags)
+ const unsigned char *addr, u16 vid, u16 flags)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
int err = 0;
if (!adapter->fdb_mac_learn)
- return ndo_dflt_fdb_add(ndm, tb, netdev, addr, flags);
+ return ndo_dflt_fdb_add(ndm, tb, netdev, addr, vid, flags);
if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) &&
!qlcnic_sriov_check(adapter)) {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index bfb0b6e..a1a3e3e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -872,7 +872,7 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr,
+ const unsigned char *addr, u16 vid,
u16 flags)
{
struct macvlan_dev *vlan = netdev_priv(dev);
@@ -897,7 +897,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr)
+ const unsigned char *addr, u16 vid)
{
struct macvlan_dev *vlan = netdev_priv(dev);
int err = -EINVAL;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e9f81d4..7d8013d 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -849,7 +849,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
/* Add static entry (via netlink) */
static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 flags)
+ const unsigned char *addr, u16 vid, u16 flags)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
/* struct net *net = dev_net(vxlan->dev); */
@@ -885,7 +885,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* Delete entry (via netlink) */
static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr)
+ const unsigned char *addr, u16 vid)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2cb7724..589929c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -951,11 +951,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
*
* int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
* struct net_device *dev,
- * const unsigned char *addr, u16 flags)
+ * const unsigned char *addr, u16 vid, u16 flags)
* Adds an FDB entry to dev for addr.
* int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
* struct net_device *dev,
- * const unsigned char *addr)
+ * const unsigned char *addr, u16 vid)
* Deletes the FDB entry from dev coresponding to addr.
* int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
* struct net_device *dev, struct net_device *filter_dev,
@@ -1128,11 +1128,13 @@ struct net_device_ops {
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,
+ u16 vid,
u16 flags);
int (*ndo_fdb_del)(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr);
+ const unsigned char *addr,
+ u16 vid);
int (*ndo_fdb_dump)(struct sk_buff *skb,
struct netlink_callback *cb,
struct net_device *dev,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6cacbce..063f0f5 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -94,11 +94,13 @@ extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr,
- u16 flags);
+ u16 vid,
+ u16 flags);
extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr);
+ const unsigned char *addr,
+ u16 vid);
extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 08ef4e7..b1be971 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -805,33 +805,17 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
/* Add new permanent fdb entry with RTM_NEWNEIGH */
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr, u16 nlh_flags)
+ const unsigned char *addr, u16 vid, u16 nlh_flags)
{
struct net_bridge_port *p;
int err = 0;
struct net_port_vlans *pv;
- unsigned short vid = VLAN_N_VID;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
return -EINVAL;
}
- if (tb[NDA_VLAN]) {
- if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
- pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
- return -EINVAL;
- }
-
- vid = nla_get_u16(tb[NDA_VLAN]);
-
- if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
- return -EINVAL;
- }
- }
-
if (is_zero_ether_addr(addr)) {
pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
return -EINVAL;
@@ -845,7 +829,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
}
pv = nbp_get_vlan_info(p);
- if (vid != VLAN_N_VID) {
+ if (vid) {
if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
pr_info("bridge: RTM_NEWNEIGH with unconfigured "
"vlan %d on port %s\n", vid, dev->name);
@@ -903,27 +887,12 @@ static int __br_fdb_delete(struct net_bridge_port *p,
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr)
+ const unsigned char *addr, u16 vid)
{
struct net_bridge_port *p;
int err;
struct net_port_vlans *pv;
- unsigned short vid = VLAN_N_VID;
-
- if (tb[NDA_VLAN]) {
- if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
- pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
- return -EINVAL;
- }
- vid = nla_get_u16(tb[NDA_VLAN]);
-
- if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
- return -EINVAL;
- }
- }
p = br_port_get_rtnl(dev);
if (p == NULL) {
pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -932,7 +901,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
}
pv = nbp_get_vlan_info(p);
- if (vid != VLAN_N_VID) {
+ if (vid) {
if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
pr_info("bridge: RTM_DELNEIGH with unconfigured "
"vlan %d on port %s\n", vid, dev->name);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 5b37077..1b529da 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -404,9 +404,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid, bool added_by_user);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev, const unsigned char *addr);
+ struct net_device *dev, const unsigned char *addr, u16 vid);
int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
- const unsigned char *addr, u16 nlh_flags);
+ const unsigned char *addr, u16 vid, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int idx);
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a688268..85d1d76 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -36,6 +36,7 @@
#include <linux/mutex.h>
#include <linux/if_addr.h>
#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
#include <linux/pci.h>
#include <linux/etherdevice.h>
@@ -2312,7 +2313,7 @@ errout:
int ndo_dflt_fdb_add(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr,
+ const unsigned char *addr, u16 vid,
u16 flags)
{
int err = -EINVAL;
@@ -2338,6 +2339,28 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+{
+ u16 vid = 0;
+
+ if (vlan_attr) {
+ if (nla_len(vlan_attr) != sizeof(u16)) {
+ pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ return -EINVAL;
+ }
+
+ vid = nla_get_u16(vlan_attr);
+
+ if (!vid || vid >= VLAN_VID_MASK) {
+ pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
+ vid);
+ return -EINVAL;
+ }
+ }
+ *p_vid = vid;
+ return 0;
+}
+
static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
@@ -2345,6 +2368,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
u8 *addr;
+ u16 vid;
int err;
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2370,6 +2394,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
addr = nla_data(tb[NDA_LLADDR]);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ if (err)
+ return err;
+
err = -EOPNOTSUPP;
/* Support fdb on master device the net/bridge default case */
@@ -2378,7 +2406,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops = br_dev->netdev_ops;
- err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
+ err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
+ nlh->nlmsg_flags);
if (err)
goto out;
else
@@ -2389,9 +2418,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
if ((ndm->ndm_flags & NTF_SELF)) {
if (dev->netdev_ops->ndo_fdb_add)
err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
+ vid,
nlh->nlmsg_flags);
else
- err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
+ err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
nlh->nlmsg_flags);
if (!err) {
@@ -2409,7 +2439,7 @@ out:
int ndo_dflt_fdb_del(struct ndmsg *ndm,
struct nlattr *tb[],
struct net_device *dev,
- const unsigned char *addr)
+ const unsigned char *addr, u16 vid)
{
int err = -EINVAL;
@@ -2438,6 +2468,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
struct net_device *dev;
int err = -EINVAL;
__u8 *addr;
+ u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
@@ -2465,6 +2496,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
addr = nla_data(tb[NDA_LLADDR]);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ if (err)
+ return err;
+
err = -EOPNOTSUPP;
/* Support fdb on master device the net/bridge default case */
@@ -2474,7 +2509,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
const struct net_device_ops *ops = br_dev->netdev_ops;
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
if (err)
goto out;
@@ -2485,9 +2520,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
/* Embedded bridge, macvlan, and any other device support */
if (ndm->ndm_flags & NTF_SELF) {
if (dev->netdev_ops->ndo_fdb_del)
- err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
+ err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
+ vid);
else
- err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
+ err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
if (!err) {
rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 03/21] bridge: convert flags in fbd entry into bitfields
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
new in v4
---
net/bridge/br_private.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8f3f081..5b37077 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -98,9 +98,9 @@ struct net_bridge_fdb_entry
unsigned long updated;
unsigned long used;
mac_addr addr;
- unsigned char is_local;
- unsigned char is_static;
- unsigned char added_by_user;
+ unsigned char is_local:1,
+ is_static:1,
+ added_by_user:1;
__u16 vlan_id;
};
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 02/21] neigh: sort Neighbor Cache Entry Flags
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
new in v4
---
include/uapi/linux/neighbour.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 4a1d7e9..2f043af 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -35,11 +35,10 @@ enum {
*/
#define NTF_USE 0x01
-#define NTF_PROXY 0x08 /* == ATF_PUBL */
-#define NTF_ROUTER 0x80
-
#define NTF_SELF 0x02
#define NTF_MASTER 0x04
+#define NTF_PROXY 0x08 /* == ATF_PUBL */
+#define NTF_ROUTER 0x80
/*
* Neighbor Cache Entry States.
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 01/21] bridge: rename fdb_*_hw to fdb_*_hw_addr to avoid confusion
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417084826-9875-1-git-send-email-jiri@resnulli.us>
The current name might seem that this actually offloads the fdb entry to
hw. So rename it to clearly present that this for hardware address
addition/removal.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
v3->v4:
-no change
v2->v3:
-moved the patch to the patchset head
new in v2 as suggested by DaveM
---
net/bridge/br_fdb.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6f6c95c..08ef4e7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -90,7 +90,7 @@ static void fdb_rcu_free(struct rcu_head *head)
* are then updated with the new information.
* Called under RTNL.
*/
-static void fdb_add_hw(struct net_bridge *br, const unsigned char *addr)
+static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr)
{
int err;
struct net_bridge_port *p;
@@ -118,7 +118,7 @@ undo:
* the ports with needed information.
* Called under RTNL.
*/
-static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
+static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
{
struct net_bridge_port *p;
@@ -133,7 +133,7 @@ static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
if (f->is_static)
- fdb_del_hw(br, f->addr.addr);
+ fdb_del_hw_addr(br, f->addr.addr);
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
@@ -514,7 +514,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
return -ENOMEM;
fdb->is_local = fdb->is_static = 1;
- fdb_add_hw(br, addr);
+ fdb_add_hw_addr(br, addr);
fdb_notify(br, fdb, RTM_NEWNEIGH);
return 0;
}
@@ -754,19 +754,19 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
fdb->is_local = 1;
if (!fdb->is_static) {
fdb->is_static = 1;
- fdb_add_hw(br, addr);
+ fdb_add_hw_addr(br, addr);
}
} else if (state & NUD_NOARP) {
fdb->is_local = 0;
if (!fdb->is_static) {
fdb->is_static = 1;
- fdb_add_hw(br, addr);
+ fdb_add_hw_addr(br, addr);
}
} else {
fdb->is_local = 0;
if (fdb->is_static) {
fdb->is_static = 0;
- fdb_del_hw(br, addr);
+ fdb_del_hw_addr(br, addr);
}
}
--
1.9.3
^ permalink raw reply related
* [patch net-next v4 00/21] introduce rocker switch driver with hardware accelerated datapath api - phase 1: bridge fdb offload
From: Jiri Pirko @ 2014-11-27 10:40 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
Hi all.
This patchset is just the first phase of switch and switch-ish device
support api in kernel. Note that the api will extend.
So what this patchset includes:
- introduce switchdev api skeleton for implementing switch drivers
- introduce rocker switch driver which implements switchdev api fdb and
bridge set/get link ndos
As to the discussion if there is need to have specific class of device
representing the switch itself, so far we found no need to introduce that.
But we are generally ok with the idea and when the time comes and it will
be needed, it can be easily introduced without any disturbance.
This patchset introduces switch id export through rtnetlink and sysfs,
which is similar to what we have for port id in SR-IOV. I will send iproute2
patchset for showing the switch id for port netdevs once this is applied.
This applies also for the PF_BRIDGE and fdb iproute2 patches.
iproute2 patches are now available here:
https://github.com/jpirko/iproute2-rocker
For detailed description and version history, please see individual patches.
In v4 I reordered the patches leaving rocker patches on the end of the patchset.
Jiri Pirko (10):
bridge: rename fdb_*_hw to fdb_*_hw_addr to avoid confusion
neigh: sort Neighbor Cache Entry Flags
bridge: convert flags in fbd entry into bitfields
net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del
net: rename netdev_phys_port_id to more generic name
net: introduce generic switch devices support
rtnl: expose physical switch id for particular device
net-sysfs: expose physical switch id for particular device
rocker: introduce rocker switch driver
rocker: implement ndo_fdb_dump
Scott Feldman (9):
bridge: call netdev_sw_port_stp_update when bridge port STP status
changes
bridge: add API to notify bridge driver of learned FBD on offloaded
device
bridge: move private brport flags to if_bridge.h so port drivers can
use flags
bridge: add new brport flag LEARNING_SYNC
bridge: add new hwmode swdev
bridge: add brport flags to dflt bridge_getlink
rocker: implement rocker ofdpa flow table manipulation
rocker: implement L2 bridge offloading
rocker: add ndo_bridge_setlink/getlink support for learning policy
Thomas Graf (2):
rocker: Add proper validation of Netlink attributes
rocker: Use logical operators on booleans
Documentation/ABI/testing/sysfs-class-net | 8 +
Documentation/networking/switchdev.txt | 59 +
MAINTAINERS | 14 +
drivers/net/ethernet/Kconfig | 1 +
drivers/net/ethernet/Makefile | 1 +
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
drivers/net/ethernet/emulex/benet/be_main.c | 3 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 +-
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 11 +-
drivers/net/ethernet/rocker/Kconfig | 27 +
drivers/net/ethernet/rocker/Makefile | 5 +
drivers/net/ethernet/rocker/rocker.c | 4374 ++++++++++++++++++++++
drivers/net/ethernet/rocker/rocker.h | 428 +++
drivers/net/macvlan.c | 4 +-
drivers/net/vxlan.c | 4 +-
include/linux/if_bridge.h | 31 +
include/linux/netdevice.h | 39 +-
include/linux/rtnetlink.h | 9 +-
include/net/switchdev.h | 37 +
include/uapi/linux/if_bridge.h | 1 +
include/uapi/linux/if_link.h | 2 +
include/uapi/linux/neighbour.h | 6 +-
net/Kconfig | 1 +
net/Makefile | 3 +
net/bridge/br_fdb.c | 144 +-
net/bridge/br_private.h | 21 +-
net/bridge/br_stp.c | 7 +
net/core/dev.c | 2 +-
net/core/net-sysfs.c | 26 +-
net/core/rtnetlink.c | 119 +-
net/switchdev/Kconfig | 13 +
net/switchdev/Makefile | 5 +
net/switchdev/switchdev.c | 52 +
35 files changed, 5366 insertions(+), 105 deletions(-)
create mode 100644 Documentation/networking/switchdev.txt
create mode 100644 drivers/net/ethernet/rocker/Kconfig
create mode 100644 drivers/net/ethernet/rocker/Makefile
create mode 100644 drivers/net/ethernet/rocker/rocker.c
create mode 100644 drivers/net/ethernet/rocker/rocker.h
create mode 100644 include/net/switchdev.h
create mode 100644 net/switchdev/Kconfig
create mode 100644 net/switchdev/Makefile
create mode 100644 net/switchdev/switchdev.c
--
1.9.3
^ permalink raw reply
* [PATCH] ixgbe, ixgbevf: Add new mbox API to enable MC promiscuous mode
From: Hiroshi Shimamoto @ 2014-11-27 10:39 UTC (permalink / raw)
To: e1000-devel@lists.sourceforge.net
Cc: netdev@vger.kernel.org, Choi, Sy Jong, Hayato Momma,
linux-kernel@vger.kernel.org
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
The limitation of the number of multicast address for VF is not enough
for the large scale server with SR-IOV feature.
IPv6 requires the multicast MAC address for each IP address to handle
the Neighbor Solicitation message.
We couldn't assign over 30 IPv6 addresses to a single VF interface.
The easy way to solve this is enabling multicast promiscuous mode.
It is good to have a functionality to enable multicast promiscuous mode
for each VF from VF driver.
This patch introduces the new mbox API, IXGBE_VF_SET_MC_PROMISC, to
enable/disable multicast promiscuous mode in VF. If multicast promiscuous
mode is enabled the VF can receive all multicast packets.
With this patch, the ixgbevf driver automatically enable multicast
promiscuous mode when the number of multicast addresses is over than 30
if possible.
This also bump the API version up to 1.2 to check whether the API,
IXGBE_VF_SET_MC_PROMISC is available.
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
CC: Choi, Sy Jong <sy.jong.choi@intel.com>
Reviewed-by: Hayato Momma <h-momma@ce.jp.nec.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 4 ++
drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 80 +++++++++++++++++++++++
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 13 +++-
drivers/net/ethernet/intel/ixgbevf/mbx.h | 4 ++
drivers/net/ethernet/intel/ixgbevf/vf.c | 29 +++++++-
drivers/net/ethernet/intel/ixgbevf/vf.h | 1 +
7 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 5032a60..2a5e3d3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -144,6 +144,7 @@ struct vf_data_storage {
u16 vlans_enabled;
bool clear_to_send;
bool pf_set_mac;
+ bool vf_mc_promisc;
u16 pf_vlan; /* When set, guest VLAN config not allowed. */
u16 pf_qos;
u16 tx_rate;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index a5cb755..2963557 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -73,6 +73,7 @@ enum ixgbe_pfvf_api_rev {
ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */
ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */
ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */
+ ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */
/* This value should always be last */
ixgbe_mbox_api_unknown, /* indicates that API version is not known */
};
@@ -91,6 +92,9 @@ enum ixgbe_pfvf_api_rev {
/* mailbox API, version 1.1 VF requests */
#define IXGBE_VF_GET_QUEUES 0x09 /* get queue configuration */
+/* mailbox API, version 1.2 VF requests */
+#define IXGBE_VF_SET_MC_PROMISC 0x0a /* VF requests PF to set MC promiscuous */
+
/* GET_QUEUES return data indices within the mailbox */
#define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */
#define IXGBE_VF_RX_QUEUES 2 /* number of Rx queues supported */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 01f7081..427993c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -108,6 +108,10 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter)
adapter->flags2 &= ~(IXGBE_FLAG2_RSC_CAPABLE |
IXGBE_FLAG2_RSC_ENABLED);
+ /* Disable multicast promiscuous mode of each VF at first */
+ for (i = 0; i < adapter->num_vfs; i++)
+ adapter->vfinfo[i].vf_mc_promisc = false;
+
/* enable spoof checking for all VFs */
for (i = 0; i < adapter->num_vfs; i++)
adapter->vfinfo[i].spoofchk_enabled = true;
@@ -310,6 +314,46 @@ int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
return ixgbe_pci_sriov_enable(dev, num_vfs);
}
+static int ixgbe_enable_vf_mc_promisc(struct ixgbe_adapter * adapter, u32 vf)
+{
+ struct ixgbe_hw *hw;
+ u32 vmolr;
+
+ if (adapter->vfinfo[vf].vf_mc_promisc)
+ return 0;
+
+ hw = &adapter->hw;
+ vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+
+ e_info(drv, "VF %u: enabling multicast promiscuous\n", vf);
+
+ vmolr |= IXGBE_VMOLR_MPE;
+
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+ return 0;
+}
+
+static int ixgbe_disable_vf_mc_promisc(struct ixgbe_adapter * adapter, u32 vf)
+{
+ struct ixgbe_hw *hw;
+ u32 vmolr;
+
+ if (!adapter->vfinfo[vf].vf_mc_promisc)
+ return 0;
+
+ hw = &adapter->hw;
+ vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+
+ e_info(drv, "VF %u: disabling multicast promiscuous\n", vf);
+
+ vmolr &= ~IXGBE_VMOLR_MPE;
+
+ IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+ return 0;
+}
+
static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
u32 *msgbuf, u32 vf)
{
@@ -324,6 +368,9 @@ static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
u32 mta_reg;
u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+ /* Disable multicast promiscuous first */
+ ixgbe_disable_vf_mc_promisc(adapter, vf);
+
/* only so many hash values supported */
entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
@@ -426,6 +473,7 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
#endif /* CONFIG_FCOE */
switch (adapter->vfinfo[vf].vf_api) {
case ixgbe_mbox_api_11:
+ case ixgbe_mbox_api_12:
/*
* Version 1.1 supports jumbo frames on VFs if PF has
* jumbo frames enabled which means legacy VFs are
@@ -696,6 +744,9 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
IXGBE_WRITE_REG(hw, IXGBE_PVFTDWBALn(q_per_pool, vf, i), 0);
}
+ /* Disable multicast promiscuous at reset */
+ ixgbe_disable_vf_mc_promisc(adapter, vf);
+
/* reply to reset with ack and vf mac address */
msgbuf[0] = IXGBE_VF_RESET;
if (!is_zero_ether_addr(vf_mac)) {
@@ -884,6 +935,12 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
switch (api) {
case ixgbe_mbox_api_10:
case ixgbe_mbox_api_11:
+ case ixgbe_mbox_api_12:
+ e_info(drv, "VF %d requested api_version %s\n", vf,
+ (api == ixgbe_mbox_api_12) ? "ixgbe_mbox_api_12" :
+ (api == ixgbe_mbox_api_11) ? "ixgbe_mbox_api_11" :
+ (api == ixgbe_mbox_api_10) ? "ixgbe_mbox_api_10" :
+ "unknown");
adapter->vfinfo[vf].vf_api = api;
return 0;
default:
@@ -907,6 +964,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
switch (adapter->vfinfo[vf].vf_api) {
case ixgbe_mbox_api_20:
case ixgbe_mbox_api_11:
+ case ixgbe_mbox_api_12:
break;
default:
return -1;
@@ -934,6 +992,25 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
return 0;
}
+static int ixgbe_set_vf_mc_promisc(struct ixgbe_adapter *adapter,
+ u32 *msgbuf, u32 vf)
+{
+ bool enable = !!msgbuf[1]; /* msgbuf contains the flag to enable */
+
+ switch (adapter->vfinfo[vf].vf_api) {
+ case ixgbe_mbox_api_12:
+ break;
+ default:
+ return -1;
+ }
+
+ if (enable)
+ return ixgbe_enable_vf_mc_promisc(adapter, vf);
+ else
+ return ixgbe_disable_vf_mc_promisc(adapter, vf);
+}
+
+
static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
{
u32 mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -990,6 +1067,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
case IXGBE_VF_GET_QUEUES:
retval = ixgbe_get_vf_queues(adapter, msgbuf, vf);
break;
+ case IXGBE_VF_SET_MC_PROMISC:
+ retval = ixgbe_set_vf_mc_promisc(adapter, msgbuf, vf);
+ break;
default:
e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
retval = IXGBE_ERR_MBX;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 030a219..3615bdb 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1635,7 +1635,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
- int api[] = { ixgbe_mbox_api_11,
+ int api[] = { ixgbe_mbox_api_12,
+ ixgbe_mbox_api_11,
ixgbe_mbox_api_10,
ixgbe_mbox_api_unknown };
int err = 0, idx = 0;
@@ -1649,6 +1650,12 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
idx++;
}
+ dev_info(&adapter->pdev->dev, "mbox api_version = %s\n",
+ (hw->api_version == ixgbe_mbox_api_12) ? "ixgbe_mbox_api_12" :
+ (hw->api_version == ixgbe_mbox_api_11) ? "ixgbe_mbox_api_11" :
+ (hw->api_version == ixgbe_mbox_api_10) ? "ixgbe_mbox_api_10" :
+ "unknown");
+
spin_unlock_bh(&adapter->mbx_lock);
}
@@ -1827,6 +1834,9 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter)
netif_carrier_off(netdev);
+ /* drop multicast promiscuous mode flag */
+ adapter->hw.mac.mc_promisc = false;
+
if (!pci_channel_offline(adapter->pdev))
ixgbevf_reset(adapter);
@@ -3279,6 +3289,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
switch (adapter->hw.api_version) {
case ixgbe_mbox_api_11:
+ case ixgbe_mbox_api_12:
max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
break;
default:
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index 0bc3005..62ef0d8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -86,6 +86,7 @@ enum ixgbe_pfvf_api_rev {
ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */
ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */
ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */
+ ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */
/* This value should always be last */
ixgbe_mbox_api_unknown, /* indicates that API version is not known */
};
@@ -104,6 +105,9 @@ enum ixgbe_pfvf_api_rev {
/* mailbox API, version 1.1 VF requests */
#define IXGBE_VF_GET_QUEUE 0x09 /* get queue configuration */
+/* mailbox API, version 1.2 VF requests */
+#define IXGBE_VF_SET_MC_PROMISC 0x0a /* VF requests PF to set MC promiscuous */
+
/* GET_QUEUES return data indices within the mailbox */
#define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */
#define IXGBE_VF_RX_QUEUES 2 /* number of Rx queues supported */
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 9cddd56..43c1ee3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -120,6 +120,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
memcpy(hw->mac.perm_addr, addr, ETH_ALEN);
hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD];
+ /* after reset, MC promiscuous mode is disabled */
+ hw->mac.mc_promisc = false;
+
return 0;
}
@@ -327,8 +330,29 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
*/
cnt = netdev_mc_count(netdev);
- if (cnt > 30)
+ if (cnt > 30) {
+ /*
+ * If the API has the capability to handle MC promiscuous
+ * mode, turn it on.
+ */
+ if (hw->api_version == ixgbe_mbox_api_12) {
+ if (!hw->mac.mc_promisc) {
+ struct ixgbevf_adapter *adapter = hw->back;
+
+ dev_info(&adapter->pdev->dev, "Request MC PROMISC\n");
+
+ /* enabling multicast promiscuous */
+ msgbuf[0] = IXGBE_VF_SET_MC_PROMISC;
+ msgbuf[1] = 1;
+ ixgbevf_write_msg_read_ack(hw, msgbuf, 2);
+
+ hw->mac.mc_promisc = true;
+ }
+
+ return 0;
+ }
cnt = 30;
+ }
msgbuf[0] = IXGBE_VF_SET_MULTICAST;
msgbuf[0] |= cnt << IXGBE_VT_MSGINFO_SHIFT;
@@ -344,6 +368,8 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
ixgbevf_write_msg_read_ack(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
+ hw->mac.mc_promisc = false;
+
return 0;
}
@@ -545,6 +571,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
/* do nothing if API doesn't support ixgbevf_get_queues */
switch (hw->api_version) {
case ixgbe_mbox_api_11:
+ case ixgbe_mbox_api_12:
break;
default:
return 0;
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index aa8cc8d..59d8cf3 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -85,6 +85,7 @@ struct ixgbe_mac_info {
enum ixgbe_mac_type type;
s32 mc_filter_type;
+ bool mc_promisc;
bool get_link_status;
u32 max_tx_queues;
--
1.9.0
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=157005751&iu=/4140/ostg.clktrk
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel® Ethernet, visit http://communities.intel.com/community/wired
^ permalink raw reply related
* Re: Is this 32-bit NCM?
From: Bjørn Mork @ 2014-11-27 10:03 UTC (permalink / raw)
To: Enrico Mioso; +Cc: youtux, alexxst, linux-usb, netdev
In-Reply-To: <alpine.LNX.2.03.1411271037050.9464@gmail.com>
Enrico Mioso <mrkiko.rs@gmail.com> writes:
> Ok - we can arrive to some ocnclusions regarding the E3272.
> First of all - the modem seems buggy enough to not be able to handle requests
> for different formats. You need to unplug and re-plug it, but this is onlyan
> impression and is reasonable.
>
> Then - the modem will accept to ndisdup the connection with
> at^ndisdup=1,1,"internet"
> but - if we use huawei_cdc_ncm + cdc_ncm we have no flow handling messages and
> the modem stops here.
> If we use the cdc_ncm 32-bit driver (modified) we get lotfs of
> ^dsflorpt
> that's how it should be.
> So I think we can say that something is changing.
> Then there's the alignment problem you mentioned in your previous reply. And
> this is hard to solve.
> could you try to help me understand where the problem is?
> I feel like we are very close to the solution but something isn't working.
> Or might be just try to change the 16 bit driver?
If you use a recent version of the driver as a basis, then you get the
CDC NCM NTB parameters in sysfs (if not, then you need to enable
debugging and look in the log for these values). For example:
bjorn@nemi:~$ grep . /sys/class/net/wwan0/cdc_ncm/*
/sys/class/net/wwan0/cdc_ncm/bmNtbFormatsSupported:0x0001
/sys/class/net/wwan0/cdc_ncm/dwNtbInMaxSize:15360
/sys/class/net/wwan0/cdc_ncm/dwNtbOutMaxSize:15360
/sys/class/net/wwan0/cdc_ncm/min_tx_pkt:13824
/sys/class/net/wwan0/cdc_ncm/rx_max:15360
/sys/class/net/wwan0/cdc_ncm/tx_max:15360
/sys/class/net/wwan0/cdc_ncm/tx_timer_usecs:400
/sys/class/net/wwan0/cdc_ncm/wNdpInAlignment:4
/sys/class/net/wwan0/cdc_ncm/wNdpInDivisor:1
/sys/class/net/wwan0/cdc_ncm/wNdpInPayloadRemainder:0
/sys/class/net/wwan0/cdc_ncm/wNdpOutAlignment:4
/sys/class/net/wwan0/cdc_ncm/wNdpOutDivisor:32
/sys/class/net/wwan0/cdc_ncm/wNdpOutPayloadRemainder:0
/sys/class/net/wwan0/cdc_ncm/wNtbOutMaxDatagrams:32
The possible problem I am thinking of is proper handling of the
wNdp*PayloadRemainder values. See section 3.3.4 "NCM Ethernet Frame
Alignment" in the spec. Which is confusing as hell, but if I understand
it correctly then we are supposed to align the start of the IP packets
(the "payload", _not_ the ethernet frame) to a whole wNdp*Divisor number
as long as the wNdp*PayloadRemainder is 0.
Bjørn
^ permalink raw reply
* Re: [PATCH net] bpf: x86: fix epilogue generation for eBPF programs
From: Daniel Borkmann @ 2014-11-27 9:52 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: David S. Miller, Zi Shen Lim, Eric Dumazet, H. Peter Anvin,
Thomas Gleixner, Ingo Molnar, netdev, linux-kernel
In-Reply-To: <1417064546-4129-1-git-send-email-ast@plumgrid.com>
On 11/27/2014 06:02 AM, Alexei Starovoitov wrote:
> classic BPF has a restriction that last insn is always BPF_RET.
> eBPF doesn't have BPF_RET instruction and this restriction.
> It has BPF_EXIT insn which can appear anywhere in the program
> one or more times and it doesn't have to be last insn.
> Fix eBPF JIT to emit epilogue when first BPF_EXIT is seen
> and all other BPF_EXIT instructions will be emitted as jump.
>
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> ---
> Note, this bug is applicable only to native eBPF programs
> which first were introduced in 3.18, so no need to send it
> to stable and therefore no 'Fixes' tag.
Btw, even if it's not sent to -stable, a 'Fixes:' tag is useful
information for backporting and regression tracking, preferably
always mentioned where it can clearly be identified.
> arm64 JIT has the same problem, but the fix is not as trivial,
> so will be done as separate patch.
>
> Since 3.18 can only load eBPF programs and cannot execute them,
> this patch can even be done in net-next only, but I think it's worth
> to apply it to 3.18(net), so that JITed output for native eBPF
> programs is correct when bpf syscall loads it with net.core.bpf_jit_enable=2
Yes, sounds good to me, the condition insn_cnt - 1 is still held
with BPF to eBPF transformations.
> arch/x86/net/bpf_jit_comp.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 3f62734..7e90244 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -178,7 +178,7 @@ static void jit_fill_hole(void *area, unsigned int size)
> }
>
> struct jit_context {
> - unsigned int cleanup_addr; /* epilogue code offset */
> + int cleanup_addr; /* epilogue code offset */
Why this type change here? This seems a bit out of context (I would
have expected a mention of this in the commit message, otherwise).
> bool seen_ld_abs;
> };
>
> @@ -192,6 +192,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
> struct bpf_insn *insn = bpf_prog->insnsi;
> int insn_cnt = bpf_prog->len;
> bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
> + bool seen_exit = false;
> u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
> int i;
> int proglen = 0;
> @@ -854,10 +855,11 @@ common_load:
> goto common_load;
>
> case BPF_JMP | BPF_EXIT:
> - if (i != insn_cnt - 1) {
> + if (seen_exit) {
> jmp_offset = ctx->cleanup_addr - addrs[i];
> goto emit_jmp;
> }
> + seen_exit = true;
> /* update cleanup_addr */
> ctx->cleanup_addr = proglen;
> /* mov rbx, qword ptr [rbp-X] */
>
^ permalink raw reply
* Re: Is this 32-bit NCM?
From: Enrico Mioso @ 2014-11-27 9:45 UTC (permalink / raw)
To: Bjørn Mork; +Cc: youtux, alexxst, linux-usb, netdev
In-Reply-To: <87ppc957h1.fsf@nemi.mork.no>
Ok - we can arrive to some ocnclusions regarding the E3272.
First of all - the modem seems buggy enough to not be able to handle requests
for different formats. You need to unplug and re-plug it, but this is onlyan
impression and is reasonable.
Then - the modem will accept to ndisdup the connection with
at^ndisdup=1,1,"internet"
but - if we use huawei_cdc_ncm + cdc_ncm we have no flow handling messages and
the modem stops here.
If we use the cdc_ncm 32-bit driver (modified) we get lotfs of
^dsflorpt
that's how it should be.
So I think we can say that something is changing.
Then there's the alignment problem you mentioned in your previous reply. And
this is hard to solve.
could you try to help me understand where the problem is?
I feel like we are very close to the solution but something isn't working.
Or might be just try to change the 16 bit driver?
http://www.gstorm.eu/cdc_ncm.c
^ permalink raw reply
* Re: Is this 32-bit NCM?
From: Enrico Mioso @ 2014-11-27 9:22 UTC (permalink / raw)
To: Bjørn Mork; +Cc: youtux, linux-usb, netdev, Alex Strizhevsky
In-Reply-To: <87ppc957h1.fsf@nemi.mork.no>
[-- Attachment #1: Type: TEXT/PLAIN, Size: 4772 bytes --]
So ... might be this is happening even in 16 bit mode?
With our test device we are having no luck: the firmware now at least prints
dsflowrpt messages, but we seem to not go farther than this.
so - what should we try in your opinion? Go try with the 16 bit mode and try to
fix the alignment issues?
Any help would be greatly apreciated ... this is becoming a personal challenge,
even facing a possibly growing number of devices.
On Thu, 27 Nov 2014, Bjørn Mork wrote:
==Date: Thu, 27 Nov 2014 10:16:26
==From: Bjørn Mork <bjorn@mork.no>
==To: Enrico Mioso <mrkiko.rs@gmail.com>
==Cc: youtux@gmail.com, linux-usb@vger.kernel.org, netdev@vger.kernel.org
==Subject: Re: Is this 32-bit NCM?
==
==Enrico Mioso <mrkiko.rs@gmail.com> writes:
==
==> I am sorry for the indecent capture - but I didn't manage to get a
==> better one till now.
==> We modified the driver: but I wanted to be sure ... is this 32-bit NCM?
==
==
==Yes, it looks OK to me. I have only looked at it manually, so I could
==have missed something. But the signatures and header field lengths seem
==fine.
==
==I did notice one issue though, which might be related to more generic
==alignment bugs in the driver?
==
==The modem sends this. Notice how the single ethernet frame inside starts
==at 0072, skipping 2 unused bytes, making the IP packet start at a 4-byte
==aligned 0080:
==
==0040 6e 63 6d 68 10 00 c7 00 cd 00 00 00 10 00 00 00 ncmh............
==0050 6e 63 6d 30 20 00 00 00 00 00 00 00 00 00 00 00 ncm0 ...........
==0060 32 00 00 00 9b 00 00 00 00 00 00 00 00 00 00 00 2...............
==0070 00 00 0c 5b 8f 27 9a 64 4c 54 99 45 e5 d5 08 00 ...[.'.dLT.E....
==0080 45 00 00 8d 05 7f 40 00 33 06 af d8 95 9a a7 5b E.....@.3......[
==0090 0a 1f 4a ff 01 bb ce 5c c0 d5 a0 ba 8c 57 85 63 ..J....\.....W.c
==00a0 80 18 09 e6 35 0b 00 00 01 01 08 0a 4d 8b 3f fe ....5.......M.?.
==00b0 00 78 a8 a7 16 1a 21 17 60 0e 73 21 c7 d6 77 cd .x....!.`.s!..w.
==00c0 92 00 69 26 54 28 cb 19 25 2a 16 6b 82 6b bf ba ..i&T(..%*.k.k..
==00d0 46 a7 8a 6e eb 36 59 25 eb 9b e2 bb 4b 53 f2 4d F..n.6Y%....KS.M
==00e0 ce 11 dd 88 06 e8 23 24 8a 96 03 d9 61 31 34 9b ......#$....a14.
==00f0 68 f3 e9 5c fd d1 77 5f 18 75 fd 10 4d cb e7 29 h..\..w_.u..M..)
==0100 96 89 3d 75 fe 11 15 82 28 88 5b ba b0 ..=u....(.[..
==
==
==
==While we, after a large number of unused 0 bytes due to the fixed size
==NDP allocation, start the ethernet frame at 01a8, making the IP packet
==start at a 2-byte aligned offset at 01b6:
==
==0040 6e 63 6d 68 10 00 d5 00 aa 01 00 00 10 00 00 00 ncmh............
==0050 6e 63 6d 30 20 00 00 00 00 00 00 00 00 00 00 00 ncm0 ...........
==0060 68 01 00 00 42 00 00 00 00 00 00 00 00 00 00 00 h...B...........
==0070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00c0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00d0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==00f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0100 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0110 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0120 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0130 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0140 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0150 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0160 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0170 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0180 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==0190 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
==01a0 00 00 00 00 00 00 00 00 4c 54 99 45 e5 d5 0c 5b ........LT.E...[
==01b0 8f 27 9a 64 08 00 45 00 00 34 84 0b 40 00 40 06 .'.d..E..4..@.@.
==01c0 24 a5 0a 1f 4a ff 95 9a a7 5b ce 5c 01 bb 8c 57 $...J....[.\...W
==01d0 85 63 c0 d5 a1 13 80 10 03 59 0f 23 00 00 01 01 .c.......Y.#....
==01e0 08 0a 00 79 00 6f 4d 8b 3f fe ...y.oM.?.
==
==
==This is not so nice, and I do wonder if it is according to the requested
==alignment from the modem? If not, then that is more likely to break
==stuff than the 16-bit vs 32-bit header issue.
==
==
==
==Bjørn
==
^ permalink raw reply
* [PATCH net] rtnetlink: release net refcnt on error in do_setlink()
From: Nicolas Dichtel @ 2014-11-27 9:16 UTC (permalink / raw)
To: davem; +Cc: netdev, Nicolas Dichtel, Eric W. Biederman
rtnl_link_get_net() holds a reference on the 'struct net', we need to release
it in case of error.
CC: Eric W. Biederman <ebiederm@xmission.com>
Fixes: b51642f6d77b ("net: Enable a userns root rtnl calls that are safe for unprivilged users")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
net/core/rtnetlink.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b9b7dfaf202b..76321ea442c3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1498,6 +1498,7 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
err = -EPERM;
goto errout;
}
--
2.1.0
^ permalink raw reply related
* Re: Is this 32-bit NCM?
From: Bjørn Mork @ 2014-11-27 9:16 UTC (permalink / raw)
To: Enrico Mioso; +Cc: youtux, linux-usb, netdev
In-Reply-To: <alpine.LNX.2.03.1411262250160.4356@gmail.com>
Enrico Mioso <mrkiko.rs@gmail.com> writes:
> I am sorry for the indecent capture - but I didn't manage to get a
> better one till now.
> We modified the driver: but I wanted to be sure ... is this 32-bit NCM?
Yes, it looks OK to me. I have only looked at it manually, so I could
have missed something. But the signatures and header field lengths seem
fine.
I did notice one issue though, which might be related to more generic
alignment bugs in the driver?
The modem sends this. Notice how the single ethernet frame inside starts
at 0072, skipping 2 unused bytes, making the IP packet start at a 4-byte
aligned 0080:
0040 6e 63 6d 68 10 00 c7 00 cd 00 00 00 10 00 00 00 ncmh............
0050 6e 63 6d 30 20 00 00 00 00 00 00 00 00 00 00 00 ncm0 ...........
0060 32 00 00 00 9b 00 00 00 00 00 00 00 00 00 00 00 2...............
0070 00 00 0c 5b 8f 27 9a 64 4c 54 99 45 e5 d5 08 00 ...[.'.dLT.E....
0080 45 00 00 8d 05 7f 40 00 33 06 af d8 95 9a a7 5b E.....@.3......[
0090 0a 1f 4a ff 01 bb ce 5c c0 d5 a0 ba 8c 57 85 63 ..J....\.....W.c
00a0 80 18 09 e6 35 0b 00 00 01 01 08 0a 4d 8b 3f fe ....5.......M.?.
00b0 00 78 a8 a7 16 1a 21 17 60 0e 73 21 c7 d6 77 cd .x....!.`.s!..w.
00c0 92 00 69 26 54 28 cb 19 25 2a 16 6b 82 6b bf ba ..i&T(..%*.k.k..
00d0 46 a7 8a 6e eb 36 59 25 eb 9b e2 bb 4b 53 f2 4d F..n.6Y%....KS.M
00e0 ce 11 dd 88 06 e8 23 24 8a 96 03 d9 61 31 34 9b ......#$....a14.
00f0 68 f3 e9 5c fd d1 77 5f 18 75 fd 10 4d cb e7 29 h..\..w_.u..M..)
0100 96 89 3d 75 fe 11 15 82 28 88 5b ba b0 ..=u....(.[..
While we, after a large number of unused 0 bytes due to the fixed size
NDP allocation, start the ethernet frame at 01a8, making the IP packet
start at a 2-byte aligned offset at 01b6:
0040 6e 63 6d 68 10 00 d5 00 aa 01 00 00 10 00 00 00 ncmh............
0050 6e 63 6d 30 20 00 00 00 00 00 00 00 00 00 00 00 ncm0 ...........
0060 68 01 00 00 42 00 00 00 00 00 00 00 00 00 00 00 h...B...........
0070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0090 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00a0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00c0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00d0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00e0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00f0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0100 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0110 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0120 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0130 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0140 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0150 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0160 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0170 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0180 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0190 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
01a0 00 00 00 00 00 00 00 00 4c 54 99 45 e5 d5 0c 5b ........LT.E...[
01b0 8f 27 9a 64 08 00 45 00 00 34 84 0b 40 00 40 06 .'.d..E..4..@.@.
01c0 24 a5 0a 1f 4a ff 95 9a a7 5b ce 5c 01 bb 8c 57 $...J....[.\...W
01d0 85 63 c0 d5 a1 13 80 10 03 59 0f 23 00 00 01 01 .c.......Y.#....
01e0 08 0a 00 79 00 6f 4d 8b 3f fe ...y.oM.?.
This is not so nice, and I do wonder if it is according to the requested
alignment from the modem? If not, then that is more likely to break
stuff than the 16-bit vs 32-bit header issue.
Bjørn
^ permalink raw reply
* Re: [PATCH rfc] packet: zerocopy packet_snd
From: Jason Wang @ 2014-11-27 9:10 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: Willem de Bruijn, Network Development, David Miller, Eric Dumazet,
Daniel Borkmann
In-Reply-To: <20141126211748.GA11904@redhat.com>
On Thu, Nov 27, 2014 at 5:17 AM, Michael S. Tsirkin <mst@redhat.com>
wrote:
> On Wed, Nov 26, 2014 at 02:59:34PM -0500, Willem de Bruijn wrote:
>> > The main problem with zero copy ATM is with queueing disciplines
>> > which might keep the socket around essentially forever.
>> > The case was described here:
>> > https://lkml.org/lkml/2014/1/17/105
>> > and of course this will make it more serious now that
>> > more applications will be able to do this, so
>> > chances that an administrator enables this
>> > are higher.
>>
>> The denial of service issue raised there, that a single queue can
>> block an entire virtio-net device, is less problematic in the case
>> of
>> packet sockets. A socket can run out of sk_wmem_alloc, but a prudent
>> application can increase the limit or use separate sockets for
>> separate flows.
>
> Socket per flow? Maybe just use TCP then? increasing the limit
> sounds like a wrong solution, it hurts security.
>
>> > One possible solution is some kind of timer orphaning frags
>> > for skbs that have been around for too long.
>>
>> Perhaps this can be approximated without an explicit timer by
>> calling
>> skb_copy_ubufs on enqueue whenever qlen exceeds a threshold value?
>
> Hard to say. Will have to see that patch to judge how robust this is.
This could not work, consider if the threshold is greater than vring
size
or vhost_net pending limit, transmission may still be blocked.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox