* [patch net-next v2 05/10] rocker: introduce rocker switch driver
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
This patch introduces the first driver to benefit from the switchdev
infrastructure and to implement newly introduced switch ndos. This is a
driver for emulated switch chip implemented in qemu:
https://github.com/sfeldma/qemu-rocker/
This patch is a result of joint work with Scott Feldman.
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
MAINTAINERS | 7 +
drivers/net/ethernet/Kconfig | 1 +
drivers/net/ethernet/Makefile | 1 +
drivers/net/ethernet/rocker/Kconfig | 27 +
drivers/net/ethernet/rocker/Makefile | 5 +
drivers/net/ethernet/rocker/rocker.c | 2060 ++++++++++++++++++++++++++++++++++
drivers/net/ethernet/rocker/rocker.h | 427 +++++++
7 files changed, 2528 insertions(+)
create mode 100644 drivers/net/ethernet/rocker/Kconfig
create mode 100644 drivers/net/ethernet/rocker/Makefile
create mode 100644 drivers/net/ethernet/rocker/rocker.c
create mode 100644 drivers/net/ethernet/rocker/rocker.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 776e078..7e15b50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7807,6 +7807,13 @@ F: drivers/hid/hid-roccat*
F: include/linux/hid-roccat*
F: Documentation/ABI/*/sysfs-driver-hid-roccat*
+ROCKER DRIVER
+M: Jiri Pirko <jiri@resnulli.us>
+M: Scott Feldman <sfeldma@gmail.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/rocker/
+
ROCKETPORT DRIVER
P: Comtrol Corp.
W: http://www.comtrol.com
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1ed1fbb..df76050 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -155,6 +155,7 @@ source "drivers/net/ethernet/qualcomm/Kconfig"
source "drivers/net/ethernet/realtek/Kconfig"
source "drivers/net/ethernet/renesas/Kconfig"
source "drivers/net/ethernet/rdc/Kconfig"
+source "drivers/net/ethernet/rocker/Kconfig"
config S6GMAC
tristate "S6105 GMAC ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 6e0b629..bf56f8b 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_NET_VENDOR_QUALCOMM) += qualcomm/
obj-$(CONFIG_NET_VENDOR_REALTEK) += realtek/
obj-$(CONFIG_SH_ETH) += renesas/
obj-$(CONFIG_NET_VENDOR_RDC) += rdc/
+obj-$(CONFIG_NET_VENDOR_ROCKER) += rocker/
obj-$(CONFIG_S6GMAC) += s6gmac.o
obj-$(CONFIG_NET_VENDOR_SAMSUNG) += samsung/
obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig
new file mode 100644
index 0000000..11a850e
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Kconfig
@@ -0,0 +1,27 @@
+#
+# Rocker device configuration
+#
+
+config NET_VENDOR_ROCKER
+ bool "Rocker devices"
+ default y
+ ---help---
+ If you have a network device belonging to this class, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Rocker devices. If you say Y, you will be asked for
+ your specific card in the following questions.
+
+if NET_VENDOR_ROCKER
+
+config ROCKER
+ tristate "Rocker switch driver (EXPERIMENTAL)"
+ depends on PCI && NET_SWITCHDEV
+ ---help---
+ This driver supports Rocker switch device.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rocker.
+
+endif # NET_VENDOR_ROCKER
diff --git a/drivers/net/ethernet/rocker/Makefile b/drivers/net/ethernet/rocker/Makefile
new file mode 100644
index 0000000..f85fb12
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Rocker network device drivers.
+#
+
+obj-$(CONFIG_ROCKER) += rocker.o
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
new file mode 100644
index 0000000..ebad09c
--- /dev/null
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -0,0 +1,2060 @@
+/*
+ * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/crc32.h>
+#include <linux/sort.h>
+#include <linux/random.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+#include <net/rtnetlink.h>
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <generated/utsrelease.h>
+
+#include "rocker.h"
+
+static const char rocker_driver_name[] = "rocker";
+
+static const struct pci_device_id rocker_pci_id_table[] = {
+ {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_ROCKER), 0},
+ {0, }
+};
+
+struct rocker_desc_info {
+ char *data; /* mapped */
+ size_t data_size;
+ size_t tlv_size;
+ struct rocker_desc *desc;
+ DEFINE_DMA_UNMAP_ADDR(mapaddr);
+};
+
+struct rocker_dma_ring_info {
+ size_t size;
+ u32 head;
+ u32 tail;
+ struct rocker_desc *desc; /* mapped */
+ dma_addr_t mapaddr;
+ struct rocker_desc_info *desc_info;
+ unsigned int type;
+};
+
+struct rocker;
+
+struct rocker_port {
+ struct net_device *dev;
+ struct rocker *rocker;
+ unsigned int port_number;
+ u32 lport;
+ struct napi_struct napi_tx;
+ struct napi_struct napi_rx;
+ struct rocker_dma_ring_info tx_ring;
+ struct rocker_dma_ring_info rx_ring;
+};
+
+struct rocker {
+ struct pci_dev *pdev;
+ u8 __iomem *hw_addr;
+ struct msix_entry *msix_entries;
+ unsigned int port_count;
+ struct rocker_port **ports;
+ struct {
+ u64 id;
+ } hw;
+ spinlock_t cmd_ring_lock;
+ struct rocker_dma_ring_info cmd_ring;
+ struct rocker_dma_ring_info event_ring;
+};
+
+struct rocker_wait {
+ wait_queue_head_t wait;
+ bool done;
+ bool nowait;
+};
+
+static void rocker_wait_reset(struct rocker_wait *wait)
+{
+ wait->done = false;
+ wait->nowait = false;
+}
+
+static void rocker_wait_init(struct rocker_wait *wait)
+{
+ init_waitqueue_head(&wait->wait);
+ rocker_wait_reset(wait);
+}
+
+static struct rocker_wait *rocker_wait_create(gfp_t gfp)
+{
+ struct rocker_wait *wait;
+
+ wait = kmalloc(sizeof(*wait), gfp);
+ if (!wait)
+ return NULL;
+ rocker_wait_init(wait);
+ return wait;
+}
+
+static void rocker_wait_destroy(struct rocker_wait *work)
+{
+ kfree(work);
+}
+
+static bool rocker_wait_event_timeout(struct rocker_wait *wait,
+ unsigned long timeout)
+{
+ wait_event_timeout(wait->wait, wait->done, HZ / 10);
+ if (!wait->done)
+ return false;
+ return true;
+}
+
+static void rocker_wait_wake_up(struct rocker_wait *wait)
+{
+ wait->done = true;
+ wake_up(&wait->wait);
+}
+
+static u32 rocker_msix_vector(struct rocker *rocker, unsigned int vector)
+{
+ return rocker->msix_entries[vector].vector;
+}
+
+static u32 rocker_msix_tx_vector(struct rocker_port *rocker_port)
+{
+ return rocker_msix_vector(rocker_port->rocker,
+ ROCKER_MSIX_VEC_TX(rocker_port->port_number));
+}
+
+static u32 rocker_msix_rx_vector(struct rocker_port *rocker_port)
+{
+ return rocker_msix_vector(rocker_port->rocker,
+ ROCKER_MSIX_VEC_RX(rocker_port->port_number));
+}
+
+#define rocker_write32(rocker, reg, val) \
+ writel((val), (rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_read32(rocker, reg) \
+ readl((rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_write64(rocker, reg, val) \
+ writeq((val), (rocker)->hw_addr + (ROCKER_ ## reg))
+#define rocker_read64(rocker, reg) \
+ readq((rocker)->hw_addr + (ROCKER_ ## reg))
+
+/*****************************
+ * HW basic testing functions
+ *****************************/
+
+static int rocker_reg_test(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ u64 test_reg;
+ u64 rnd;
+
+ rnd = prandom_u32();
+ rnd >>= 1;
+ rocker_write32(rocker, TEST_REG, rnd);
+ test_reg = rocker_read32(rocker, TEST_REG);
+ if (test_reg != rnd * 2) {
+ dev_err(&pdev->dev, "unexpected 32bit register value %08llx, expected %08llx\n",
+ test_reg, rnd * 2);
+ return -EIO;
+ }
+
+ rnd = prandom_u32();
+ rnd <<= 31;
+ rnd |= prandom_u32();
+ rocker_write64(rocker, TEST_REG64, rnd);
+ test_reg = rocker_read64(rocker, TEST_REG64);
+ if (test_reg != rnd * 2) {
+ dev_err(&pdev->dev, "unexpected 64bit register value %16llx, expected %16llx\n",
+ test_reg, rnd * 2);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int rocker_dma_test_one(struct rocker *rocker, struct rocker_wait *wait,
+ u32 test_type, dma_addr_t dma_handle,
+ unsigned char *buf, unsigned char *expect,
+ size_t size)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+
+ rocker_wait_reset(wait);
+ rocker_write32(rocker, TEST_DMA_CTRL, test_type);
+
+ if (!rocker_wait_event_timeout(wait, HZ / 10)) {
+ dev_err(&pdev->dev, "no interrupt received within a timeout\n");
+ return -EIO;
+ }
+
+ for (i = 0; i < size; i++) {
+ if (buf[i] != expect[i]) {
+ dev_err(&pdev->dev, "unexpected memory content %02x at byte %x\n, %02x expected",
+ buf[i], i, expect[i]);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+#define ROCKER_TEST_DMA_BUF_SIZE (PAGE_SIZE * 4)
+#define ROCKER_TEST_DMA_FILL_PATTERN 0x96
+
+static int rocker_dma_test_offset(struct rocker *rocker,
+ struct rocker_wait *wait, int offset)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ unsigned char *alloc;
+ unsigned char *buf;
+ unsigned char *expect;
+ dma_addr_t dma_handle;
+ int i;
+ int err;
+
+ alloc = kzalloc(ROCKER_TEST_DMA_BUF_SIZE * 2 + offset,
+ GFP_KERNEL | GFP_DMA);
+ if (!alloc)
+ return -ENOMEM;
+ buf = alloc + offset;
+ expect = buf + ROCKER_TEST_DMA_BUF_SIZE;
+
+ dma_handle = pci_map_single(pdev, buf, ROCKER_TEST_DMA_BUF_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(pdev, dma_handle)) {
+ err = -EIO;
+ goto free_alloc;
+ }
+
+ rocker_write64(rocker, TEST_DMA_ADDR, dma_handle);
+ rocker_write32(rocker, TEST_DMA_SIZE, ROCKER_TEST_DMA_BUF_SIZE);
+
+ memset(expect, ROCKER_TEST_DMA_FILL_PATTERN, ROCKER_TEST_DMA_BUF_SIZE);
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_FILL,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+ memset(expect, 0, ROCKER_TEST_DMA_BUF_SIZE);
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_CLEAR,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+ prandom_bytes(buf, ROCKER_TEST_DMA_BUF_SIZE);
+ for (i = 0; i < ROCKER_TEST_DMA_BUF_SIZE; i++)
+ expect[i] = ~buf[i];
+ err = rocker_dma_test_one(rocker, wait, ROCKER_TEST_DMA_CTRL_INVERT,
+ dma_handle, buf, expect,
+ ROCKER_TEST_DMA_BUF_SIZE);
+ if (err)
+ goto unmap;
+
+unmap:
+ pci_unmap_single(pdev, dma_handle, ROCKER_TEST_DMA_BUF_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+free_alloc:
+ kfree(alloc);
+
+ return err;
+}
+
+static int rocker_dma_test(struct rocker *rocker, struct rocker_wait *wait)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < 8; i++) {
+ err = rocker_dma_test_offset(rocker, wait, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static irqreturn_t rocker_test_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_wait *wait = dev_id;
+
+ rocker_wait_wake_up(wait);
+
+ return IRQ_HANDLED;
+}
+
+static int rocker_basic_hw_test(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_wait wait;
+ int err;
+
+ err = rocker_reg_test(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "reg test failed\n");
+ return err;
+ }
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST),
+ rocker_test_irq_handler, 0,
+ rocker_driver_name, &wait);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign test irq\n");
+ return err;
+ }
+
+ rocker_wait_init(&wait);
+ rocker_write32(rocker, TEST_IRQ, ROCKER_MSIX_VEC_TEST);
+
+ if (!rocker_wait_event_timeout(&wait, HZ / 10)) {
+ dev_err(&pdev->dev, "no interrupt received within a timeout\n");
+ err = -EIO;
+ goto free_irq;
+ }
+
+ err = rocker_dma_test(rocker, &wait);
+ if (err)
+ dev_err(&pdev->dev, "dma test failed\n");
+
+free_irq:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_TEST), &wait);
+ return err;
+}
+
+/******
+ * TLV
+ ******/
+
+#define ROCKER_TLV_ALIGNTO 8U
+#define ROCKER_TLV_ALIGN(len) \
+ (((len) + ROCKER_TLV_ALIGNTO - 1) & ~(ROCKER_TLV_ALIGNTO - 1))
+#define ROCKER_TLV_HDRLEN ROCKER_TLV_ALIGN(sizeof(struct rocker_tlv))
+
+/* <------- ROCKER_TLV_HDRLEN -------> <--- ROCKER_TLV_ALIGN(payload) --->
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct rocker_tlv) | ing | | ing |
+ * +-----------------------------+- - -+- - - - - - - - - - - - - - -+- - -+
+ * <--------------------------- tlv->len -------------------------->
+ */
+
+static struct rocker_tlv *rocker_tlv_next(const struct rocker_tlv *tlv,
+ int *remaining)
+{
+ int totlen = ROCKER_TLV_ALIGN(tlv->len);
+
+ *remaining -= totlen;
+ return (struct rocker_tlv *) ((char *) tlv + totlen);
+}
+
+static int rocker_tlv_ok(const struct rocker_tlv *tlv, int remaining)
+{
+ return remaining >= (int) ROCKER_TLV_HDRLEN &&
+ tlv->len >= ROCKER_TLV_HDRLEN &&
+ tlv->len <= remaining;
+}
+
+#define rocker_tlv_for_each(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ rocker_tlv_ok(pos, rem); \
+ pos = rocker_tlv_next(pos, &(rem)))
+
+#define rocker_tlv_for_each_nested(pos, tlv, rem) \
+ rocker_tlv_for_each(pos, rocker_tlv_data(tlv), \
+ rocker_tlv_len(tlv), rem)
+
+static int rocker_tlv_attr_size(int payload)
+{
+ return ROCKER_TLV_HDRLEN + payload;
+}
+
+static int rocker_tlv_total_size(int payload)
+{
+ return ROCKER_TLV_ALIGN(rocker_tlv_attr_size(payload));
+}
+
+static int rocker_tlv_padlen(int payload)
+{
+ return rocker_tlv_total_size(payload) - rocker_tlv_attr_size(payload);
+}
+
+static int rocker_tlv_type(const struct rocker_tlv *tlv)
+{
+ return tlv->type;
+}
+
+static void *rocker_tlv_data(const struct rocker_tlv *tlv)
+{
+ return (char *) tlv + ROCKER_TLV_HDRLEN;
+}
+
+static int rocker_tlv_len(const struct rocker_tlv *tlv)
+{
+ return tlv->len - ROCKER_TLV_HDRLEN;
+}
+
+static u8 rocker_tlv_get_u8(const struct rocker_tlv *tlv)
+{
+ return *(u8 *) rocker_tlv_data(tlv);
+}
+
+static u16 rocker_tlv_get_u16(const struct rocker_tlv *tlv)
+{
+ return *(u16 *) rocker_tlv_data(tlv);
+}
+
+static u32 rocker_tlv_get_u32(const struct rocker_tlv *tlv)
+{
+ return *(u32 *) rocker_tlv_data(tlv);
+}
+
+static u64 rocker_tlv_get_u64(const struct rocker_tlv *tlv)
+{
+ return *(u64 *) rocker_tlv_data(tlv);
+}
+
+static void rocker_tlv_parse(struct rocker_tlv **tb, int maxtype,
+ const char *buf, int buf_len)
+{
+ const struct rocker_tlv *tlv;
+ const struct rocker_tlv *head = (const struct rocker_tlv *) buf;
+ int rem;
+
+ memset(tb, 0, sizeof(struct rocker_tlv *) * (maxtype + 1));
+
+ rocker_tlv_for_each(tlv, head, buf_len, rem) {
+ u32 type = rocker_tlv_type(tlv);
+
+ if (type > 0 && type <= maxtype)
+ tb[type] = (struct rocker_tlv *) tlv;
+ }
+}
+
+static void rocker_tlv_parse_nested(struct rocker_tlv **tb, int maxtype,
+ const struct rocker_tlv *tlv)
+{
+ rocker_tlv_parse(tb, maxtype, rocker_tlv_data(tlv),
+ rocker_tlv_len(tlv));
+}
+
+static void rocker_tlv_parse_desc(struct rocker_tlv **tb, int maxtype,
+ struct rocker_desc_info *desc_info)
+{
+ rocker_tlv_parse(tb, maxtype, desc_info->data,
+ desc_info->desc->tlv_size);
+}
+
+static struct rocker_tlv *rocker_tlv_start(struct rocker_desc_info *desc_info)
+{
+ return (struct rocker_tlv *) ((char *) desc_info->data +
+ desc_info->tlv_size);
+}
+
+static int rocker_tlv_put(struct rocker_desc_info *desc_info,
+ int attrtype, int attrlen, const void *data)
+{
+ int tail_room = desc_info->data_size - desc_info->tlv_size;
+ int total_size = rocker_tlv_total_size(attrlen);
+ struct rocker_tlv *tlv;
+
+ if (unlikely(tail_room < total_size))
+ return -EMSGSIZE;
+
+ tlv = rocker_tlv_start(desc_info);
+ desc_info->tlv_size += total_size;
+ tlv->type = attrtype;
+ tlv->len = rocker_tlv_attr_size(attrlen);
+ memcpy(rocker_tlv_data(tlv), data, attrlen);
+ memset((char *) tlv + tlv->len, 0, rocker_tlv_padlen(attrlen));
+ return 0;
+}
+
+static int rocker_tlv_put_u8(struct rocker_desc_info *desc_info,
+ int attrtype, u8 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u8), &value);
+}
+
+static int rocker_tlv_put_u16(struct rocker_desc_info *desc_info,
+ int attrtype, u16 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u16), &value);
+}
+
+static int rocker_tlv_put_u32(struct rocker_desc_info *desc_info,
+ int attrtype, u32 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u32), &value);
+}
+
+static int rocker_tlv_put_u64(struct rocker_desc_info *desc_info,
+ int attrtype, u64 value)
+{
+ return rocker_tlv_put(desc_info, attrtype, sizeof(u64), &value);
+}
+
+static struct rocker_tlv *
+rocker_tlv_nest_start(struct rocker_desc_info *desc_info, int attrtype)
+{
+ struct rocker_tlv *start = rocker_tlv_start(desc_info);
+
+ if (rocker_tlv_put(desc_info, attrtype, 0, NULL) < 0)
+ return NULL;
+
+ return start;
+}
+
+static void rocker_tlv_nest_end(struct rocker_desc_info *desc_info,
+ struct rocker_tlv *start)
+{
+ start->len = (char *) rocker_tlv_start(desc_info) - (char *) start;
+}
+
+static void rocker_tlv_nest_cancel(struct rocker_desc_info *desc_info,
+ struct rocker_tlv *start)
+{
+ desc_info->tlv_size = (char *) start - desc_info->data;
+}
+
+/******************************************
+ * DMA rings and descriptors manipulations
+ ******************************************/
+
+static u32 __pos_inc(u32 pos, size_t limit)
+{
+ return ++pos == limit ? 0 : pos;
+}
+
+static int rocker_desc_err(struct rocker_desc_info *desc_info)
+{
+ return -(desc_info->desc->comp_err & ~ROCKER_DMA_DESC_COMP_ERR_GEN);
+}
+
+static void rocker_desc_gen_clear(struct rocker_desc_info *desc_info)
+{
+ desc_info->desc->comp_err &= ~ROCKER_DMA_DESC_COMP_ERR_GEN;
+}
+
+static bool rocker_desc_gen(struct rocker_desc_info *desc_info)
+{
+ u32 comp_err = desc_info->desc->comp_err;
+
+ return comp_err & ROCKER_DMA_DESC_COMP_ERR_GEN ? true : false;
+}
+
+static void *rocker_desc_cookie_ptr_get(struct rocker_desc_info *desc_info)
+{
+ return (void *) desc_info->desc->cookie;
+}
+
+static void rocker_desc_cookie_ptr_set(struct rocker_desc_info *desc_info,
+ void *ptr)
+{
+ desc_info->desc->cookie = (long) ptr;
+}
+
+static struct rocker_desc_info *
+rocker_desc_head_get(struct rocker_dma_ring_info *info)
+{
+ static struct rocker_desc_info *desc_info;
+ u32 head = __pos_inc(info->head, info->size);
+
+ desc_info = &info->desc_info[info->head];
+ if (head == info->tail)
+ return NULL; /* ring full */
+ desc_info->tlv_size = 0;
+ return desc_info;
+}
+
+static void rocker_desc_commit(struct rocker_desc_info *desc_info)
+{
+ desc_info->desc->buf_size = desc_info->data_size;
+ desc_info->desc->tlv_size = desc_info->tlv_size;
+}
+
+static void rocker_desc_head_set(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ struct rocker_desc_info *desc_info)
+{
+ u32 head = __pos_inc(info->head, info->size);
+
+ BUG_ON(head == info->tail);
+ rocker_desc_commit(desc_info);
+ info->head = head;
+ rocker_write32(rocker, DMA_DESC_HEAD(info->type), head);
+}
+
+static struct rocker_desc_info *
+rocker_desc_tail_get(struct rocker_dma_ring_info *info)
+{
+ static struct rocker_desc_info *desc_info;
+
+ if (info->tail == info->head)
+ return NULL; /* nothing to be done between head and tail */
+ desc_info = &info->desc_info[info->tail];
+ if (!rocker_desc_gen(desc_info))
+ return NULL; /* gen bit not set, desc is not ready yet */
+ info->tail = __pos_inc(info->tail, info->size);
+ desc_info->tlv_size = desc_info->desc->tlv_size;
+ return desc_info;
+}
+
+static void rocker_dma_ring_credits_set(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ u32 credits)
+{
+ if (credits)
+ rocker_write32(rocker, DMA_DESC_CREDITS(info->type), credits);
+}
+
+static unsigned long rocker_dma_ring_size_fix(size_t size)
+{
+ return max(ROCKER_DMA_SIZE_MIN,
+ min(roundup_pow_of_two(size), ROCKER_DMA_SIZE_MAX));
+}
+
+static int rocker_dma_ring_create(struct rocker *rocker,
+ unsigned int type,
+ size_t size,
+ struct rocker_dma_ring_info *info)
+{
+ int i;
+
+ BUG_ON(size != rocker_dma_ring_size_fix(size));
+ info->size = size;
+ info->type = type;
+ info->head = 0;
+ info->tail = 0;
+ info->desc_info = kcalloc(info->size, sizeof(*info->desc_info),
+ GFP_KERNEL);
+ if (!info->desc_info)
+ return -ENOMEM;
+
+ info->desc = pci_alloc_consistent(rocker->pdev,
+ info->size * sizeof(*info->desc),
+ &info->mapaddr);
+ if (!info->desc) {
+ kfree(info->desc_info);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < info->size; i++)
+ info->desc_info[i].desc = &info->desc[i];
+
+ rocker_write32(rocker, DMA_DESC_CTRL(info->type),
+ ROCKER_DMA_DESC_CTRL_RESET);
+ rocker_write64(rocker, DMA_DESC_ADDR(info->type), info->mapaddr);
+ rocker_write32(rocker, DMA_DESC_SIZE(info->type), info->size);
+
+ return 0;
+}
+
+static void rocker_dma_ring_destroy(struct rocker *rocker,
+ struct rocker_dma_ring_info *info)
+{
+ rocker_write64(rocker, DMA_DESC_ADDR(info->type), 0);
+
+ pci_free_consistent(rocker->pdev,
+ info->size * sizeof(struct rocker_desc),
+ info->desc, info->mapaddr);
+ kfree(info->desc_info);
+}
+
+static void rocker_dma_ring_pass_to_producer(struct rocker *rocker,
+ struct rocker_dma_ring_info *info)
+{
+ int i;
+
+ BUG_ON(info->head || info->tail);
+
+ /* When ring is consumer, we need to advance head for each desc.
+ * That tells hw that the desc is ready to be used by it.
+ */
+ for (i = 0; i < info->size - 1; i++)
+ rocker_desc_head_set(rocker, info, &info->desc_info[i]);
+ rocker_desc_commit(&info->desc_info[i]);
+}
+
+static int rocker_dma_ring_bufs_alloc(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ int direction, size_t buf_size)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+ int err;
+
+ for (i = 0; i < info->size; i++) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+ struct rocker_desc *desc = &info->desc[i];
+ dma_addr_t dma_handle;
+ char *buf;
+
+ buf = kzalloc(buf_size, GFP_KERNEL | GFP_DMA);
+ if (!buf) {
+ err = -ENOMEM;
+ goto rollback;
+ }
+
+ dma_handle = pci_map_single(pdev, buf, buf_size, direction);
+ if (pci_dma_mapping_error(pdev, dma_handle)) {
+ kfree(buf);
+ err = -EIO;
+ goto rollback;
+ }
+
+ desc_info->data = buf;
+ desc_info->data_size = buf_size;
+ dma_unmap_addr_set(desc_info, mapaddr, dma_handle);
+
+ desc->buf_addr = dma_handle;
+ desc->buf_size = buf_size;
+ }
+ return 0;
+
+rollback:
+ for (i--; i >= 0; i--) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+
+ pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+ desc_info->data_size, direction);
+ kfree(desc_info->data);
+ }
+ return err;
+}
+
+static void rocker_dma_ring_bufs_free(struct rocker *rocker,
+ struct rocker_dma_ring_info *info,
+ int direction)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int i;
+
+ for (i = 0; i < info->size; i++) {
+ struct rocker_desc_info *desc_info = &info->desc_info[i];
+ struct rocker_desc *desc = &info->desc[i];
+
+ desc->buf_addr = 0;
+ desc->buf_size = 0;
+ pci_unmap_single(pdev, dma_unmap_addr(desc_info, mapaddr),
+ desc_info->data_size, direction);
+ kfree(desc_info->data);
+ }
+}
+
+static int rocker_dma_rings_init(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int err;
+
+ err = rocker_dma_ring_create(rocker, ROCKER_DMA_CMD,
+ ROCKER_DMA_CMD_DEFAULT_SIZE,
+ &rocker->cmd_ring);
+ if (err) {
+ dev_err(&pdev->dev, "failed to create command dma ring\n");
+ return err;
+ }
+
+ spin_lock_init(&rocker->cmd_ring_lock);
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL, PAGE_SIZE);
+ if (err) {
+ dev_err(&pdev->dev, "failed to alloc command dma ring buffers\n");
+ goto err_dma_cmd_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_ring_create(rocker, ROCKER_DMA_EVENT,
+ ROCKER_DMA_EVENT_DEFAULT_SIZE,
+ &rocker->event_ring);
+ if (err) {
+ dev_err(&pdev->dev, "failed to create event dma ring\n");
+ goto err_dma_event_ring_create;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker->event_ring,
+ PCI_DMA_FROMDEVICE, PAGE_SIZE);
+ if (err) {
+ dev_err(&pdev->dev, "failed to alloc event dma ring buffers\n");
+ goto err_dma_event_ring_bufs_alloc;
+ }
+ rocker_dma_ring_pass_to_producer(rocker, &rocker->event_ring);
+ return 0;
+
+err_dma_event_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker->event_ring);
+err_dma_event_ring_create:
+ rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL);
+err_dma_cmd_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
+ return err;
+}
+
+static void rocker_dma_rings_fini(struct rocker *rocker)
+{
+ rocker_dma_ring_bufs_free(rocker, &rocker->event_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker->event_ring);
+ rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
+}
+
+static int rocker_dma_rx_ring_skb_map(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ struct sk_buff *skb, size_t buf_len)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+
+ dma_handle = pci_map_single(pdev, skb->data, buf_len,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(pdev, dma_handle))
+ return -EIO;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_RX_FRAG_ADDR, dma_handle))
+ goto tlv_put_failure;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_RX_FRAG_MAX_LEN, buf_len))
+ goto tlv_put_failure;
+ return 0;
+
+tlv_put_failure:
+ pci_unmap_single(pdev, dma_handle, buf_len, PCI_DMA_FROMDEVICE);
+ desc_info->tlv_size = 0;
+ return -EMSGSIZE;
+}
+
+static size_t rocker_port_rx_buf_len(struct rocker_port *rocker_port)
+{
+ return rocker_port->dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+}
+
+static int rocker_dma_rx_ring_skb_alloc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct net_device *dev = rocker_port->dev;
+ struct sk_buff *skb;
+ size_t buf_len = rocker_port_rx_buf_len(rocker_port);
+ int err;
+
+ /* Ensure that hw will see tlv_size zero in case of an error.
+ * That tells hw to use another descriptor.
+ */
+ rocker_desc_cookie_ptr_set(desc_info, NULL);
+ desc_info->tlv_size = 0;
+
+ skb = netdev_alloc_skb_ip_align(dev, buf_len);
+ if (!skb)
+ return -ENOMEM;
+ err = rocker_dma_rx_ring_skb_map(rocker, rocker_port, desc_info,
+ skb, buf_len);
+ if (err) {
+ dev_kfree_skb_any(skb);
+ return err;
+ }
+ rocker_desc_cookie_ptr_set(desc_info, skb);
+ return 0;
+}
+
+static void rocker_dma_rx_ring_skb_unmap(struct rocker *rocker,
+ struct rocker_tlv **attrs)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+ size_t len;
+
+ if (!attrs[ROCKER_TLV_RX_FRAG_ADDR] ||
+ !attrs[ROCKER_TLV_RX_FRAG_MAX_LEN])
+ return;
+ dma_handle = rocker_tlv_get_u64(attrs[ROCKER_TLV_RX_FRAG_ADDR]);
+ len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_MAX_LEN]);
+ pci_unmap_single(pdev, dma_handle, len, PCI_DMA_FROMDEVICE);
+}
+
+static void rocker_dma_rx_ring_skb_free(struct rocker *rocker,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
+ struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
+
+ if (!skb)
+ return;
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
+ rocker_dma_rx_ring_skb_unmap(rocker, attrs);
+ dev_kfree_skb_any(skb);
+}
+
+static int rocker_dma_rx_ring_skbs_alloc(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring;
+ int i;
+ int err;
+
+ for (i = 0; i < rx_ring->size; i++) {
+ err = rocker_dma_rx_ring_skb_alloc(rocker, rocker_port,
+ &rx_ring->desc_info[i]);
+ if (err)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ for (i--; i >= 0; i--)
+ rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]);
+ return err;
+}
+
+static void rocker_dma_rx_ring_skbs_free(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct rocker_dma_ring_info *rx_ring = &rocker_port->rx_ring;
+ int i;
+
+ for (i = 0; i < rx_ring->size; i++)
+ rocker_dma_rx_ring_skb_free(rocker, &rx_ring->desc_info[i]);
+}
+
+static int rocker_port_dma_rings_init(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ int err;
+
+ err = rocker_dma_ring_create(rocker,
+ ROCKER_DMA_TX(rocker_port->port_number),
+ ROCKER_DMA_TX_DEFAULT_SIZE,
+ &rocker_port->tx_ring);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to create tx dma ring\n");
+ return err;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE,
+ ROCKER_DMA_TX_DESC_SIZE);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc tx dma ring buffers\n");
+ goto err_dma_tx_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_ring_create(rocker,
+ ROCKER_DMA_RX(rocker_port->port_number),
+ ROCKER_DMA_RX_DEFAULT_SIZE,
+ &rocker_port->rx_ring);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to create rx dma ring\n");
+ goto err_dma_rx_ring_create;
+ }
+
+ err = rocker_dma_ring_bufs_alloc(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL,
+ ROCKER_DMA_RX_DESC_SIZE);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc rx dma ring buffers\n");
+ goto err_dma_rx_ring_bufs_alloc;
+ }
+
+ err = rocker_dma_rx_ring_skbs_alloc(rocker, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "failed to alloc rx dma ring skbs\n");
+ goto err_dma_rx_ring_skbs_alloc;
+ }
+ rocker_dma_ring_pass_to_producer(rocker, &rocker_port->rx_ring);
+
+ return 0;
+
+err_dma_rx_ring_skbs_alloc:
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL);
+err_dma_rx_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
+err_dma_rx_ring_create:
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE);
+err_dma_tx_ring_bufs_alloc:
+ rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
+ return err;
+}
+
+static void rocker_port_dma_rings_fini(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+
+ rocker_dma_rx_ring_skbs_free(rocker, rocker_port);
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->rx_ring,
+ PCI_DMA_BIDIRECTIONAL);
+ rocker_dma_ring_destroy(rocker, &rocker_port->rx_ring);
+ rocker_dma_ring_bufs_free(rocker, &rocker_port->tx_ring,
+ PCI_DMA_TODEVICE);
+ rocker_dma_ring_destroy(rocker, &rocker_port->tx_ring);
+}
+
+static void rocker_port_set_enable(struct rocker_port *rocker_port, bool enable)
+{
+ u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE);
+
+ if (enable)
+ val |= 1 << rocker_port->lport;
+ else
+ val &= ~(1 << rocker_port->lport);
+ rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val);
+}
+
+/********************************
+ * Interrupt handler and helpers
+ ********************************/
+
+static irqreturn_t rocker_cmd_irq_handler(int irq, void *dev_id)
+{
+ struct rocker *rocker = dev_id;
+ struct rocker_desc_info *desc_info;
+ struct rocker_wait *wait;
+ u32 credits = 0;
+
+ spin_lock(&rocker->cmd_ring_lock);
+ while ((desc_info = rocker_desc_tail_get(&rocker->cmd_ring))) {
+ wait = rocker_desc_cookie_ptr_get(desc_info);
+ if (wait->nowait) {
+ rocker_desc_gen_clear(desc_info);
+ rocker_wait_destroy(wait);
+ } else {
+ rocker_wait_wake_up(wait);
+ }
+ credits++;
+ }
+ spin_unlock(&rocker->cmd_ring_lock);
+ rocker_dma_ring_credits_set(rocker, &rocker->cmd_ring, credits);
+
+ return IRQ_HANDLED;
+}
+
+static void rocker_port_link_up(struct rocker_port *rocker_port)
+{
+ netif_carrier_on(rocker_port->dev);
+ netdev_info(rocker_port->dev, "Link is up\n");
+}
+
+static void rocker_port_link_down(struct rocker_port *rocker_port)
+{
+ netif_carrier_off(rocker_port->dev);
+ netdev_info(rocker_port->dev, "Link is down\n");
+}
+
+static int rocker_event_link_change(struct rocker *rocker,
+ const struct rocker_tlv *info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_EVENT_LINK_CHANGED_MAX + 1];
+ unsigned int port_number;
+ bool link_up;
+ struct rocker_port *rocker_port;
+
+ rocker_tlv_parse_nested(attrs, ROCKER_TLV_EVENT_LINK_CHANGED_MAX, info);
+ if (!attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LPORT] ||
+ !attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP])
+ return -EIO;
+ port_number =
+ rocker_tlv_get_u32(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LPORT]) - 1;
+ link_up = rocker_tlv_get_u8(attrs[ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP]);
+
+ if (port_number >= rocker->port_count)
+ return -EINVAL;
+
+ rocker_port = rocker->ports[port_number];
+ if (netif_carrier_ok(rocker_port->dev) != link_up) {
+ if (link_up)
+ rocker_port_link_up(rocker_port);
+ else
+ rocker_port_link_down(rocker_port);
+ }
+
+ return 0;
+}
+
+static int rocker_event_process(struct rocker *rocker,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_EVENT_MAX + 1];
+ struct rocker_tlv *info;
+ u16 type;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_EVENT_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_EVENT_TYPE] ||
+ !attrs[ROCKER_TLV_EVENT_INFO])
+ return -EIO;
+
+ type = rocker_tlv_get_u16(attrs[ROCKER_TLV_EVENT_TYPE]);
+ info = attrs[ROCKER_TLV_EVENT_INFO];
+
+ switch (type) {
+ case ROCKER_TLV_EVENT_TYPE_LINK_CHANGED:
+ return rocker_event_link_change(rocker, info);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static irqreturn_t rocker_event_irq_handler(int irq, void *dev_id)
+{
+ struct rocker *rocker = dev_id;
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ while ((desc_info = rocker_desc_tail_get(&rocker->event_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err) {
+ dev_err(&pdev->dev, "event desc received with err %d\n",
+ err);
+ } else {
+ err = rocker_event_process(rocker, desc_info);
+ if (err)
+ dev_err(&pdev->dev, "event processing failed with err %d\n",
+ err);
+ }
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker->event_ring, desc_info);
+ credits++;
+ }
+ rocker_dma_ring_credits_set(rocker, &rocker->event_ring, credits);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rocker_tx_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_port *rocker_port = dev_id;
+
+ napi_schedule(&rocker_port->napi_tx);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t rocker_rx_irq_handler(int irq, void *dev_id)
+{
+ struct rocker_port *rocker_port = dev_id;
+
+ napi_schedule(&rocker_port->napi_rx);
+ return IRQ_HANDLED;
+}
+
+/********************
+ * Command interface
+ ********************/
+
+typedef int (*rocker_cmd_cb_t)(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv);
+
+static int rocker_cmd_exec(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ rocker_cmd_cb_t prepare, void *prepare_priv,
+ rocker_cmd_cb_t process, void *process_priv,
+ bool nowait)
+{
+ struct rocker_desc_info *desc_info;
+ struct rocker_wait *wait;
+ unsigned long flags;
+ int err;
+
+ wait = rocker_wait_create(nowait ? GFP_ATOMIC : GFP_KERNEL);
+ if (!wait)
+ return -ENOMEM;
+ wait->nowait = nowait;
+
+ spin_lock_irqsave(&rocker->cmd_ring_lock, flags);
+ desc_info = rocker_desc_head_get(&rocker->cmd_ring);
+ if (!desc_info) {
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+ err = -EAGAIN;
+ goto out;
+ }
+ err = prepare(rocker, rocker_port, desc_info, prepare_priv);
+ if (err) {
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+ goto out;
+ }
+ rocker_desc_cookie_ptr_set(desc_info, wait);
+ rocker_desc_head_set(rocker, &rocker->cmd_ring, desc_info);
+ spin_unlock_irqrestore(&rocker->cmd_ring_lock, flags);
+
+ if (nowait)
+ return 0;
+
+ if (!rocker_wait_event_timeout(wait, HZ / 10))
+ return -EIO;
+
+ err = rocker_desc_err(desc_info);
+ if (err)
+ return err;
+
+ if (process)
+ err = process(rocker, rocker_port, desc_info, process_priv);
+
+ rocker_desc_gen_clear(desc_info);
+out:
+ rocker_wait_destroy(wait);
+ return err;
+}
+
+static int
+rocker_cmd_get_port_settings_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int
+rocker_cmd_get_port_settings_ethtool_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct ethtool_cmd *ecmd = priv;
+ struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
+ struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ u32 speed;
+ u8 duplex;
+ u8 autoneg;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_CMD_INFO])
+ return -EIO;
+
+ rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ attrs[ROCKER_TLV_CMD_INFO]);
+ if (!info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED] ||
+ !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX] ||
+ !info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG])
+ return -EIO;
+
+ speed = rocker_tlv_get_u32(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_SPEED]);
+ duplex = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX]);
+ autoneg = rocker_tlv_get_u8(info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG]);
+
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->supported = SUPPORTED_TP;
+ ecmd->phy_address = 0xff;
+ ecmd->port = PORT_TP;
+ ethtool_cmd_speed_set(ecmd, speed);
+ ecmd->duplex = duplex ? DUPLEX_FULL : DUPLEX_HALF;
+ ecmd->autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+ return 0;
+}
+
+static int
+rocker_cmd_get_port_settings_macaddr_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ unsigned char *macaddr = priv;
+ struct rocker_tlv *attrs[ROCKER_TLV_CMD_MAX + 1];
+ struct rocker_tlv *info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MAX + 1];
+ struct rocker_tlv *attr;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_CMD_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_CMD_INFO])
+ return -EIO;
+
+ rocker_tlv_parse_nested(info_attrs, ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ attrs[ROCKER_TLV_CMD_INFO]);
+ attr = info_attrs[ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR];
+ if (!attr)
+ return -EIO;
+
+ if (rocker_tlv_len(attr) != ETH_ALEN)
+ return -EINVAL;
+
+ ether_addr_copy(macaddr, rocker_tlv_data(attr));
+ return 0;
+}
+
+static int
+rocker_cmd_set_port_settings_ethtool_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ struct ethtool_cmd *ecmd = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_SPEED,
+ ethtool_cmd_speed(ecmd)))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX,
+ ecmd->duplex))
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u8(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG,
+ ecmd->autoneg))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int
+rocker_cmd_set_port_settings_macaddr_prep(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ void *priv)
+{
+ unsigned char *macaddr = priv;
+ struct rocker_tlv *cmd_info;
+
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+ return -EMSGSIZE;
+ cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+ if (!cmd_info)
+ return -EMSGSIZE;
+ if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_LPORT,
+ rocker_port->lport))
+ return -EMSGSIZE;
+ if (rocker_tlv_put(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR,
+ ETH_ALEN, macaddr))
+ return -EMSGSIZE;
+ rocker_tlv_nest_end(desc_info, cmd_info);
+ return 0;
+}
+
+static int rocker_cmd_get_port_settings_ethtool(struct rocker_port *rocker_port,
+ struct ethtool_cmd *ecmd)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_get_port_settings_prep, NULL,
+ rocker_cmd_get_port_settings_ethtool_proc,
+ ecmd, false);
+}
+
+static int rocker_cmd_get_port_settings_macaddr(struct rocker_port *rocker_port,
+ unsigned char *macaddr)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_get_port_settings_prep, NULL,
+ rocker_cmd_get_port_settings_macaddr_proc,
+ macaddr, false);
+}
+
+static int rocker_cmd_set_port_settings_ethtool(struct rocker_port *rocker_port,
+ struct ethtool_cmd *ecmd)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_set_port_settings_ethtool_prep,
+ ecmd, NULL, NULL, false);
+}
+
+static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
+ unsigned char *macaddr)
+{
+ return rocker_cmd_exec(rocker_port->rocker, rocker_port,
+ rocker_cmd_set_port_settings_macaddr_prep,
+ macaddr, NULL, NULL, false);
+}
+
+/*****************
+ * Net device ops
+ *****************/
+
+static int rocker_port_open(struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int err;
+
+ err = rocker_port_dma_rings_init(rocker_port);
+ if (err)
+ return err;
+
+ err = request_irq(rocker_msix_tx_vector(rocker_port),
+ rocker_tx_irq_handler, 0,
+ rocker_driver_name, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "cannot assign tx irq\n");
+ goto err_request_tx_irq;
+ }
+
+ err = request_irq(rocker_msix_rx_vector(rocker_port),
+ rocker_rx_irq_handler, 0,
+ rocker_driver_name, rocker_port);
+ if (err) {
+ netdev_err(rocker_port->dev, "cannot assign rx irq\n");
+ goto err_request_rx_irq;
+ }
+
+ napi_enable(&rocker_port->napi_tx);
+ napi_enable(&rocker_port->napi_rx);
+ rocker_port_set_enable(rocker_port, true);
+ netif_start_queue(dev);
+ return 0;
+
+err_request_rx_irq:
+ free_irq(rocker_msix_tx_vector(rocker_port), rocker_port);
+err_request_tx_irq:
+ rocker_port_dma_rings_fini(rocker_port);
+ return err;
+}
+
+static int rocker_port_stop(struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ netif_stop_queue(dev);
+ rocker_port_set_enable(rocker_port, false);
+ napi_disable(&rocker_port->napi_rx);
+ napi_disable(&rocker_port->napi_tx);
+ free_irq(rocker_msix_rx_vector(rocker_port), rocker_port);
+ free_irq(rocker_msix_tx_vector(rocker_port), rocker_port);
+ rocker_port_dma_rings_fini(rocker_port);
+
+ return 0;
+}
+
+static void rocker_tx_desc_frags_unmap(struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_tlv *attrs[ROCKER_TLV_TX_MAX + 1];
+ struct rocker_tlv *attr;
+ int rem;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_TX_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_TX_FRAGS])
+ return;
+ rocker_tlv_for_each_nested(attr, attrs[ROCKER_TLV_TX_FRAGS], rem) {
+ struct rocker_tlv *frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_MAX + 1];
+ dma_addr_t dma_handle;
+ size_t len;
+
+ if (rocker_tlv_type(attr) != ROCKER_TLV_TX_FRAG)
+ continue;
+ rocker_tlv_parse_nested(frag_attrs, ROCKER_TLV_TX_FRAG_ATTR_MAX,
+ attr);
+ if (!frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR] ||
+ !frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN])
+ continue;
+ dma_handle = rocker_tlv_get_u64(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_ADDR]);
+ len = rocker_tlv_get_u16(frag_attrs[ROCKER_TLV_TX_FRAG_ATTR_LEN]);
+ pci_unmap_single(pdev, dma_handle, len, DMA_TO_DEVICE);
+ }
+}
+
+static int rocker_tx_desc_frag_map_put(struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info,
+ char *buf, size_t buf_len)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ struct pci_dev *pdev = rocker->pdev;
+ dma_addr_t dma_handle;
+ struct rocker_tlv *frag;
+
+ dma_handle = pci_map_single(pdev, buf, buf_len, DMA_TO_DEVICE);
+ if (unlikely(pci_dma_mapping_error(pdev, dma_handle))) {
+ if (net_ratelimit())
+ netdev_err(rocker_port->dev, "failed to dma map tx frag\n");
+ return -EIO;
+ }
+ frag = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAG);
+ if (!frag)
+ goto unmap_frag;
+ if (rocker_tlv_put_u64(desc_info, ROCKER_TLV_TX_FRAG_ATTR_ADDR,
+ dma_handle))
+ goto nest_cancel;
+ if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_TX_FRAG_ATTR_LEN,
+ buf_len))
+ goto nest_cancel;
+ rocker_tlv_nest_end(desc_info, frag);
+ return 0;
+
+nest_cancel:
+ rocker_tlv_nest_cancel(desc_info, frag);
+unmap_frag:
+ pci_unmap_single(pdev, dma_handle, buf_len, DMA_TO_DEVICE);
+ return -EMSGSIZE;
+}
+
+static netdev_tx_t rocker_port_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ struct rocker_tlv *frags;
+ int i;
+ int err;
+
+ desc_info = rocker_desc_head_get(&rocker_port->tx_ring);
+ if (unlikely(!desc_info)) {
+ if (net_ratelimit())
+ netdev_err(dev, "tx ring full when queue awake\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ rocker_desc_cookie_ptr_set(desc_info, skb);
+
+ frags = rocker_tlv_nest_start(desc_info, ROCKER_TLV_TX_FRAGS);
+ if (!frags)
+ goto out;
+ err = rocker_tx_desc_frag_map_put(rocker_port, desc_info,
+ skb->data, skb_headlen(skb));
+ if (err)
+ goto nest_cancel;
+ if (skb_shinfo(skb)->nr_frags > ROCKER_TX_FRAGS_MAX)
+ goto nest_cancel;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ err = rocker_tx_desc_frag_map_put(rocker_port, desc_info,
+ skb_frag_address(frag),
+ skb_frag_size(frag));
+ if (err)
+ goto unmap_frags;
+ }
+ rocker_tlv_nest_end(desc_info, frags);
+
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker_port->tx_ring, desc_info);
+
+ desc_info = rocker_desc_head_get(&rocker_port->tx_ring);
+ if (!desc_info)
+ netif_stop_queue(dev);
+
+ return NETDEV_TX_OK;
+
+unmap_frags:
+ rocker_tx_desc_frags_unmap(rocker_port, desc_info);
+nest_cancel:
+ rocker_tlv_nest_cancel(desc_info, frags);
+out:
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int rocker_port_set_mac_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ int err;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ err = rocker_cmd_set_port_settings_macaddr(rocker_port, addr->sa_data);
+ if (err)
+ return err;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+static int rocker_port_sw_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+ struct rocker *rocker = rocker_port->rocker;
+
+ psid->id_len = sizeof(rocker->hw.id);
+ memcpy(&psid->id, &rocker->hw.id, psid->id_len);
+ return 0;
+}
+
+static const struct net_device_ops rocker_port_netdev_ops = {
+ .ndo_open = rocker_port_open,
+ .ndo_stop = rocker_port_stop,
+ .ndo_start_xmit = rocker_port_xmit,
+ .ndo_set_mac_address = rocker_port_set_mac_address,
+ .ndo_sw_parent_id_get = rocker_port_sw_parent_id_get,
+};
+
+/********************
+ * ethtool interface
+ ********************/
+
+static int rocker_port_get_settings(struct net_device *dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ return rocker_cmd_get_port_settings_ethtool(rocker_port, ecmd);
+}
+
+static int rocker_port_set_settings(struct net_device *dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct rocker_port *rocker_port = netdev_priv(dev);
+
+ return rocker_cmd_set_port_settings_ethtool(rocker_port, ecmd);
+}
+
+static void rocker_port_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strlcpy(drvinfo->driver, rocker_driver_name, sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version));
+}
+
+static const struct ethtool_ops rocker_port_ethtool_ops = {
+ .get_settings = rocker_port_get_settings,
+ .set_settings = rocker_port_set_settings,
+ .get_drvinfo = rocker_port_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
+
+/*****************
+ * NAPI interface
+ *****************/
+
+static struct rocker_port *rocker_port_napi_tx_get(struct napi_struct *napi)
+{
+ return container_of(napi, struct rocker_port, napi_tx);
+}
+
+static int rocker_port_poll_tx(struct napi_struct *napi, int budget)
+{
+ struct rocker_port *rocker_port = rocker_port_napi_tx_get(napi);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ /* Cleanup tx descriptors */
+ while ((desc_info = rocker_desc_tail_get(&rocker_port->tx_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err && net_ratelimit())
+ netdev_err(rocker_port->dev, "tx desc received with err %d\n",
+ err);
+ rocker_tx_desc_frags_unmap(rocker_port, desc_info);
+ dev_kfree_skb_any(rocker_desc_cookie_ptr_get(desc_info));
+ credits++;
+ }
+
+ if (credits && netif_queue_stopped(rocker_port->dev))
+ netif_wake_queue(rocker_port->dev);
+
+ napi_complete(napi);
+ rocker_dma_ring_credits_set(rocker, &rocker_port->tx_ring, credits);
+
+ return 0;
+}
+
+static int rocker_port_rx_proc(struct rocker *rocker,
+ struct rocker_port *rocker_port,
+ struct rocker_desc_info *desc_info)
+{
+ struct rocker_tlv *attrs[ROCKER_TLV_RX_MAX + 1];
+ struct sk_buff *skb = rocker_desc_cookie_ptr_get(desc_info);
+ size_t rx_len;
+
+ if (!skb)
+ return -ENOENT;
+
+ rocker_tlv_parse_desc(attrs, ROCKER_TLV_RX_MAX, desc_info);
+ if (!attrs[ROCKER_TLV_RX_FRAG_LEN])
+ return -EINVAL;
+
+ rocker_dma_rx_ring_skb_unmap(rocker, attrs);
+
+ rx_len = rocker_tlv_get_u16(attrs[ROCKER_TLV_RX_FRAG_LEN]);
+ skb_put(skb, rx_len);
+ skb->protocol = eth_type_trans(skb, rocker_port->dev);
+ netif_receive_skb(skb);
+
+ return rocker_dma_rx_ring_skb_alloc(rocker, rocker_port, desc_info);
+}
+
+static struct rocker_port *rocker_port_napi_rx_get(struct napi_struct *napi)
+{
+ return container_of(napi, struct rocker_port, napi_rx);
+}
+
+static int rocker_port_poll_rx(struct napi_struct *napi, int budget)
+{
+ struct rocker_port *rocker_port = rocker_port_napi_rx_get(napi);
+ struct rocker *rocker = rocker_port->rocker;
+ struct rocker_desc_info *desc_info;
+ u32 credits = 0;
+ int err;
+
+ /* Process rx descriptors */
+ while (credits < budget &&
+ (desc_info = rocker_desc_tail_get(&rocker_port->rx_ring))) {
+ err = rocker_desc_err(desc_info);
+ if (err) {
+ if (net_ratelimit())
+ netdev_err(rocker_port->dev, "rx desc received with err %d\n",
+ err);
+ } else {
+ err = rocker_port_rx_proc(rocker, rocker_port,
+ desc_info);
+ if (err && net_ratelimit())
+ netdev_err(rocker_port->dev, "rx processing failed with err %d\n",
+ err);
+ }
+ rocker_desc_gen_clear(desc_info);
+ rocker_desc_head_set(rocker, &rocker_port->rx_ring, desc_info);
+ credits++;
+ }
+
+ if (credits < budget)
+ napi_complete(napi);
+
+ rocker_dma_ring_credits_set(rocker, &rocker_port->rx_ring, credits);
+
+ return credits;
+}
+
+/*****************
+ * PCI driver ops
+ *****************/
+
+static void rocker_carrier_init(struct rocker_port *rocker_port)
+{
+ struct rocker *rocker = rocker_port->rocker;
+ u64 link_status = rocker_read64(rocker, PORT_PHYS_LINK_STATUS);
+ bool link_up;
+
+ link_up = link_status & (1 << rocker_port->lport);
+ if (link_up)
+ netif_carrier_on(rocker_port->dev);
+ else
+ netif_carrier_off(rocker_port->dev);
+}
+
+static void rocker_remove_ports(struct rocker *rocker)
+{
+ int i;
+
+ for (i = 0; i < rocker->port_count; i++)
+ unregister_netdev(rocker->ports[i]->dev);
+ kfree(rocker->ports);
+}
+
+static void rocker_port_dev_addr_init(struct rocker *rocker,
+ struct rocker_port *rocker_port)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int err;
+
+ err = rocker_cmd_get_port_settings_macaddr(rocker_port,
+ rocker_port->dev->dev_addr);
+ if (err) {
+ dev_warn(&pdev->dev, "failed to get mac address, using random\n");
+ eth_hw_addr_random(rocker_port->dev);
+ }
+}
+
+static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ struct rocker_port *rocker_port;
+ struct net_device *dev;
+ int err;
+
+ dev = alloc_etherdev(sizeof(struct rocker_port));
+ if (!dev)
+ return -ENOMEM;
+ rocker_port = netdev_priv(dev);
+ rocker_port->dev = dev;
+ rocker_port->rocker = rocker;
+ rocker_port->port_number = port_number;
+ rocker_port->lport = port_number + 1;
+
+ rocker_port_dev_addr_init(rocker, rocker_port);
+ dev->netdev_ops = &rocker_port_netdev_ops;
+ dev->ethtool_ops = &rocker_port_ethtool_ops;
+ netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
+ NAPI_POLL_WEIGHT);
+ netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
+ NAPI_POLL_WEIGHT);
+ rocker_carrier_init(rocker_port);
+
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ err = register_netdev(dev);
+ if (err) {
+ dev_err(&pdev->dev, "register_netdev failed\n");
+ goto err_register_netdev;
+ }
+ rocker->ports[port_number] = rocker_port;
+
+ return 0;
+
+err_register_netdev:
+ free_netdev(dev);
+ return err;
+}
+
+static int rocker_probe_ports(struct rocker *rocker)
+{
+ int i;
+ size_t alloc_size;
+ int err;
+
+ alloc_size = sizeof(struct rocker_port *) * rocker->port_count;
+ rocker->ports = kmalloc(alloc_size, GFP_KERNEL);
+ for (i = 0; i < rocker->port_count; i++) {
+ err = rocker_probe_port(rocker, i);
+ if (err)
+ goto remove_ports;
+ }
+ return 0;
+
+remove_ports:
+ rocker_remove_ports(rocker);
+ return err;
+}
+
+static int rocker_msix_init(struct rocker *rocker)
+{
+ struct pci_dev *pdev = rocker->pdev;
+ int msix_entries;
+ int i;
+ int err;
+
+ msix_entries = pci_msix_vec_count(pdev);
+ if (msix_entries < 0)
+ return msix_entries;
+
+ if (msix_entries != ROCKER_MSIX_VEC_COUNT(rocker->port_count))
+ return -EINVAL;
+
+ rocker->msix_entries = kmalloc_array(msix_entries,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (!rocker->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < msix_entries; i++)
+ rocker->msix_entries[i].entry = i;
+
+ err = pci_enable_msix_exact(pdev, rocker->msix_entries, msix_entries);
+ if (err < 0)
+ goto err_enable_msix;
+
+ return 0;
+
+err_enable_msix:
+ kfree(rocker->msix_entries);
+ return err;
+}
+
+static void rocker_msix_fini(struct rocker *rocker)
+{
+ pci_disable_msix(rocker->pdev);
+ kfree(rocker->msix_entries);
+}
+
+static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct rocker *rocker;
+ int err;
+
+ rocker = kzalloc(sizeof(*rocker), GFP_KERNEL);
+ if (!rocker)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed\n");
+ goto err_pci_enable_device;
+ }
+
+ err = pci_request_regions(pdev, rocker_driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "pci_request_regions failed\n");
+ goto err_pci_request_regions;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (!err) {
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ } else {
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "pci_set_dma_mask failed\n");
+ goto err_pci_set_dma_mask;
+ }
+ }
+
+ if (pci_resource_len(pdev, 0) < ROCKER_PCI_BAR0_SIZE) {
+ dev_err(&pdev->dev, "invalid PCI region size\n");
+ goto err_pci_resource_len_check;
+ }
+
+ rocker->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!rocker->hw_addr) {
+ dev_err(&pdev->dev, "ioremap failed\n");
+ err = -EIO;
+ goto err_ioremap;
+ }
+ pci_set_master(pdev);
+
+ rocker->pdev = pdev;
+ pci_set_drvdata(pdev, rocker);
+
+ rocker->port_count = rocker_read32(rocker, PORT_PHYS_COUNT);
+
+ err = rocker_msix_init(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "MSI-X init failed\n");
+ goto err_msix_init;
+ }
+
+ err = rocker_basic_hw_test(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "basic hw test failed\n");
+ goto err_basic_hw_test;
+ }
+
+ rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
+
+ err = rocker_dma_rings_init(rocker);
+ if (err)
+ goto err_dma_rings_init;
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD),
+ rocker_cmd_irq_handler, 0,
+ rocker_driver_name, rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign cmd irq\n");
+ goto err_request_cmd_irq;
+ }
+
+ err = request_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT),
+ rocker_event_irq_handler, 0,
+ rocker_driver_name, rocker);
+ if (err) {
+ dev_err(&pdev->dev, "cannot assign event irq\n");
+ goto err_request_event_irq;
+ }
+
+ rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
+
+ err = rocker_probe_ports(rocker);
+ if (err) {
+ dev_err(&pdev->dev, "failed to probe ports\n");
+ goto err_probe_ports;
+ }
+
+ dev_info(&pdev->dev, "Rocker switch with id %016llx\n", rocker->hw.id);
+
+ return 0;
+
+err_probe_ports:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
+err_request_event_irq:
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
+err_request_cmd_irq:
+ rocker_dma_rings_fini(rocker);
+err_dma_rings_init:
+err_basic_hw_test:
+ rocker_msix_fini(rocker);
+err_msix_init:
+ iounmap(rocker->hw_addr);
+err_ioremap:
+err_pci_resource_len_check:
+err_pci_set_dma_mask:
+ pci_release_regions(pdev);
+err_pci_request_regions:
+ pci_disable_device(pdev);
+err_pci_enable_device:
+ kfree(rocker);
+ return err;
+}
+
+static void rocker_remove(struct pci_dev *pdev)
+{
+ struct rocker *rocker = pci_get_drvdata(pdev);
+
+ rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
+ rocker_remove_ports(rocker);
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
+ free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
+ rocker_dma_rings_fini(rocker);
+ rocker_msix_fini(rocker);
+ iounmap(rocker->hw_addr);
+ pci_release_regions(rocker->pdev);
+ pci_disable_device(rocker->pdev);
+ kfree(rocker);
+}
+
+static struct pci_driver rocker_pci_driver = {
+ .name = rocker_driver_name,
+ .id_table = rocker_pci_id_table,
+ .probe = rocker_probe,
+ .remove = rocker_remove,
+};
+
+/***********************
+ * Module init and exit
+ ***********************/
+
+static int __init rocker_module_init(void)
+{
+ return pci_register_driver(&rocker_pci_driver);
+}
+
+static void __exit rocker_module_exit(void)
+{
+ pci_unregister_driver(&rocker_pci_driver);
+}
+
+module_init(rocker_module_init);
+module_exit(rocker_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_AUTHOR("Scott Feldman <sfeldma@gmail.com>");
+MODULE_DESCRIPTION("Rocker switch device driver");
+MODULE_DEVICE_TABLE(pci, rocker_pci_id_table);
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
new file mode 100644
index 0000000..5251cf8
--- /dev/null
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -0,0 +1,427 @@
+/*
+ * drivers/net/ethernet/rocker/rocker.h - Rocker switch device driver
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ROCKER_H
+#define _ROCKER_H
+
+#include <linux/types.h>
+
+#define PCI_VENDOR_ID_REDHAT 0x1b36
+#define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006
+
+#define ROCKER_PCI_BAR0_SIZE 0x2000
+
+/* MSI-X vectors */
+enum {
+ ROCKER_MSIX_VEC_CMD,
+ ROCKER_MSIX_VEC_EVENT,
+ ROCKER_MSIX_VEC_TEST,
+ ROCKER_MSIX_VEC_RESERVED0,
+ __ROCKER_MSIX_VEC_TX,
+ __ROCKER_MSIX_VEC_RX,
+#define ROCKER_MSIX_VEC_TX(port) \
+ (__ROCKER_MSIX_VEC_TX + ((port) * 2))
+#define ROCKER_MSIX_VEC_RX(port) \
+ (__ROCKER_MSIX_VEC_RX + ((port) * 2))
+#define ROCKER_MSIX_VEC_COUNT(portcnt) \
+ (ROCKER_MSIX_VEC_RX((portcnt - 1)) + 1)
+};
+
+/* Rocker bogus registers */
+#define ROCKER_BOGUS_REG0 0x0000
+#define ROCKER_BOGUS_REG1 0x0004
+#define ROCKER_BOGUS_REG2 0x0008
+#define ROCKER_BOGUS_REG3 0x000c
+
+/* Rocker test registers */
+#define ROCKER_TEST_REG 0x0010
+#define ROCKER_TEST_REG64 0x0018 /* 8-byte */
+#define ROCKER_TEST_IRQ 0x0020
+#define ROCKER_TEST_DMA_ADDR 0x0028 /* 8-byte */
+#define ROCKER_TEST_DMA_SIZE 0x0030
+#define ROCKER_TEST_DMA_CTRL 0x0034
+
+/* Rocker test register ctrl */
+#define ROCKER_TEST_DMA_CTRL_CLEAR (1 << 0)
+#define ROCKER_TEST_DMA_CTRL_FILL (1 << 1)
+#define ROCKER_TEST_DMA_CTRL_INVERT (1 << 2)
+
+/* Rocker DMA ring register offsets */
+#define ROCKER_DMA_DESC_ADDR(x) (0x1000 + (x) * 32) /* 8-byte */
+#define ROCKER_DMA_DESC_SIZE(x) (0x1008 + (x) * 32)
+#define ROCKER_DMA_DESC_HEAD(x) (0x100c + (x) * 32)
+#define ROCKER_DMA_DESC_TAIL(x) (0x1010 + (x) * 32)
+#define ROCKER_DMA_DESC_CTRL(x) (0x1014 + (x) * 32)
+#define ROCKER_DMA_DESC_CREDITS(x) (0x1018 + (x) * 32)
+#define ROCKER_DMA_DESC_RES1(x) (0x101c + (x) * 32)
+
+/* Rocker dma ctrl register bits */
+#define ROCKER_DMA_DESC_CTRL_RESET (1 << 0)
+
+/* Rocker DMA ring types */
+enum rocker_dma_type {
+ ROCKER_DMA_CMD,
+ ROCKER_DMA_EVENT,
+ __ROCKER_DMA_TX,
+ __ROCKER_DMA_RX,
+#define ROCKER_DMA_TX(port) (__ROCKER_DMA_TX + (port) * 2)
+#define ROCKER_DMA_RX(port) (__ROCKER_DMA_RX + (port) * 2)
+};
+
+/* Rocker DMA ring size limits and default sizes */
+#define ROCKER_DMA_SIZE_MIN 2ul
+#define ROCKER_DMA_SIZE_MAX 65536ul
+#define ROCKER_DMA_CMD_DEFAULT_SIZE 32ul
+#define ROCKER_DMA_EVENT_DEFAULT_SIZE 32ul
+#define ROCKER_DMA_TX_DEFAULT_SIZE 64ul
+#define ROCKER_DMA_TX_DESC_SIZE 256
+#define ROCKER_DMA_RX_DEFAULT_SIZE 64ul
+#define ROCKER_DMA_RX_DESC_SIZE 256
+
+/* Rocker DMA descriptor struct */
+struct rocker_desc {
+ u64 buf_addr;
+ u64 cookie;
+ u16 buf_size;
+ u16 tlv_size;
+ u16 resv[5];
+ u16 comp_err;
+};
+
+#define ROCKER_DMA_DESC_COMP_ERR_GEN (1 << 15)
+
+/* Rocker DMA TLV struct */
+struct rocker_tlv {
+ u32 type;
+ u16 len;
+};
+
+/* TLVs */
+enum {
+ ROCKER_TLV_CMD_UNSPEC,
+ ROCKER_TLV_CMD_TYPE, /* u16 */
+ ROCKER_TLV_CMD_INFO, /* nest */
+
+ __ROCKER_TLV_CMD_MAX,
+ ROCKER_TLV_CMD_MAX = __ROCKER_TLV_CMD_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_CMD_TYPE_UNSPEC,
+ ROCKER_TLV_CMD_TYPE_GET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_FLOW_GET_STATS,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_ADD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_MOD,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_DEL,
+ ROCKER_TLV_CMD_TYPE_OF_DPA_GROUP_GET_STATS,
+
+ __ROCKER_TLV_CMD_TYPE_MAX,
+ ROCKER_TLV_CMD_TYPE_MAX = __ROCKER_TLV_CMD_TYPE_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_CMD_PORT_SETTINGS_UNSPEC,
+ ROCKER_TLV_CMD_PORT_SETTINGS_LPORT, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_SPEED, /* u32 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_DUPLEX, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_AUTONEG, /* u8 */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MACADDR, /* binary */
+ ROCKER_TLV_CMD_PORT_SETTINGS_MODE, /* u8 */
+
+ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
+ ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
+ __ROCKER_TLV_CMD_PORT_SETTINGS_MAX - 1,
+};
+
+enum rocker_port_mode {
+ ROCKER_PORT_MODE_OF_DPA,
+};
+
+enum {
+ ROCKER_TLV_EVENT_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE, /* u16 */
+ ROCKER_TLV_EVENT_INFO, /* nest */
+
+ __ROCKER_TLV_EVENT_MAX,
+ ROCKER_TLV_EVENT_MAX = __ROCKER_TLV_EVENT_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_TYPE_UNSPEC,
+ ROCKER_TLV_EVENT_TYPE_LINK_CHANGED,
+ ROCKER_TLV_EVENT_TYPE_MAC_VLAN_SEEN,
+
+ __ROCKER_TLV_EVENT_TYPE_MAX,
+ ROCKER_TLV_EVENT_TYPE_MAX = __ROCKER_TLV_EVENT_TYPE_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_LINK_CHANGED_UNSPEC,
+ ROCKER_TLV_EVENT_LINK_CHANGED_LPORT, /* u32 */
+ ROCKER_TLV_EVENT_LINK_CHANGED_LINKUP, /* u8 */
+
+ __ROCKER_TLV_EVENT_LINK_CHANGED_MAX,
+ ROCKER_TLV_EVENT_LINK_CHANGED_MAX =
+ __ROCKER_TLV_EVENT_LINK_CHANGED_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_EVENT_MAC_VLAN_UNSPEC,
+ ROCKER_TLV_EVENT_MAC_VLAN_LPORT, /* u32 */
+ ROCKER_TLV_EVENT_MAC_VLAN_MAC, /* binary */
+ ROCKER_TLV_EVENT_MAC_VLAN_VLAN_ID, /* __be16 */
+
+ __ROCKER_TLV_EVENT_MAC_VLAN_MAX,
+ ROCKER_TLV_EVENT_MAC_VLAN_MAX = __ROCKER_TLV_EVENT_MAC_VLAN_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_RX_UNSPEC,
+ ROCKER_TLV_RX_FLAGS, /* u16, see ROCKER_RX_FLAGS_ */
+ ROCKER_TLV_RX_CSUM, /* u16 */
+ ROCKER_TLV_RX_FRAG_ADDR, /* u64 */
+ ROCKER_TLV_RX_FRAG_MAX_LEN, /* u16 */
+ ROCKER_TLV_RX_FRAG_LEN, /* u16 */
+
+ __ROCKER_TLV_RX_MAX,
+ ROCKER_TLV_RX_MAX = __ROCKER_TLV_RX_MAX - 1,
+};
+
+#define ROCKER_RX_FLAGS_IPV4 (1 << 0)
+#define ROCKER_RX_FLAGS_IPV6 (1 << 1)
+#define ROCKER_RX_FLAGS_CSUM_CALC (1 << 2)
+#define ROCKER_RX_FLAGS_IPV4_CSUM_GOOD (1 << 3)
+#define ROCKER_RX_FLAGS_IP_FRAG (1 << 4)
+#define ROCKER_RX_FLAGS_TCP (1 << 5)
+#define ROCKER_RX_FLAGS_UDP (1 << 6)
+#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7)
+
+enum {
+ ROCKER_TLV_TX_UNSPEC,
+ ROCKER_TLV_TX_OFFLOAD, /* u8, see ROCKER_TX_OFFLOAD_ */
+ ROCKER_TLV_TX_L3_CSUM_OFF, /* u16 */
+ ROCKER_TLV_TX_TSO_MSS, /* u16 */
+ ROCKER_TLV_TX_TSO_HDR_LEN, /* u16 */
+ ROCKER_TLV_TX_FRAGS, /* array */
+
+ __ROCKER_TLV_TX_MAX,
+ ROCKER_TLV_TX_MAX = __ROCKER_TLV_TX_MAX - 1,
+};
+
+#define ROCKER_TX_OFFLOAD_NONE 0
+#define ROCKER_TX_OFFLOAD_IP_CSUM 1
+#define ROCKER_TX_OFFLOAD_TCP_UDP_CSUM 2
+#define ROCKER_TX_OFFLOAD_L3_CSUM 3
+#define ROCKER_TX_OFFLOAD_TSO 4
+
+#define ROCKER_TX_FRAGS_MAX 16
+
+enum {
+ ROCKER_TLV_TX_FRAG_UNSPEC,
+ ROCKER_TLV_TX_FRAG, /* nest */
+
+ __ROCKER_TLV_TX_FRAG_MAX,
+ ROCKER_TLV_TX_FRAG_MAX = __ROCKER_TLV_TX_FRAG_MAX - 1,
+};
+
+enum {
+ ROCKER_TLV_TX_FRAG_ATTR_UNSPEC,
+ ROCKER_TLV_TX_FRAG_ATTR_ADDR, /* u64 */
+ ROCKER_TLV_TX_FRAG_ATTR_LEN, /* u16 */
+
+ __ROCKER_TLV_TX_FRAG_ATTR_MAX,
+ ROCKER_TLV_TX_FRAG_ATTR_MAX = __ROCKER_TLV_TX_FRAG_ATTR_MAX - 1,
+};
+
+/* cmd info nested for OF-DPA msgs */
+enum {
+ ROCKER_TLV_OF_DPA_UNSPEC,
+ ROCKER_TLV_OF_DPA_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_PRIORITY, /* u32 */
+ ROCKER_TLV_OF_DPA_HARDTIME, /* u32 */
+ ROCKER_TLV_OF_DPA_IDLETIME, /* u32 */
+ ROCKER_TLV_OF_DPA_COOKIE, /* u64 */
+ ROCKER_TLV_OF_DPA_IN_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_IN_LPORT_MASK, /* u32 */
+ ROCKER_TLV_OF_DPA_OUT_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_GOTO_TABLE_ID, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_ID_LOWER, /* u32 */
+ ROCKER_TLV_OF_DPA_GROUP_COUNT, /* u16 */
+ ROCKER_TLV_OF_DPA_GROUP_IDS, /* u32 array */
+ ROCKER_TLV_OF_DPA_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_ID_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_VLAN_PCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_ID, /* __be16 */
+ ROCKER_TLV_OF_DPA_NEW_VLAN_PCP, /* u8 */
+ ROCKER_TLV_OF_DPA_TUNNEL_ID, /* u32 */
+ ROCKER_TLV_OF_DPA_TUN_LOG_LPORT, /* u32 */
+ ROCKER_TLV_OF_DPA_ETHERTYPE, /* __be16 */
+ ROCKER_TLV_OF_DPA_DST_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_DST_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_MAC_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_IP_PROTO, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_PROTO_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_DSCP_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_IP_DSCP, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN, /* u8 */
+ ROCKER_TLV_OF_DPA_IP_ECN_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_DST_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_DST_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_DST_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_IPV6_MASK, /* binary */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP, /* __be32 */
+ ROCKER_TLV_OF_DPA_SRC_ARP_IP_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_DST_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT, /* __be16 */
+ ROCKER_TLV_OF_DPA_L4_SRC_PORT_MASK, /* __be16 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_TYPE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE, /* u8 */
+ ROCKER_TLV_OF_DPA_ICMP_CODE_MASK, /* u8 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL, /* __be32 */
+ ROCKER_TLV_OF_DPA_IPV6_LABEL_MASK, /* __be32 */
+ ROCKER_TLV_OF_DPA_QUEUE_ID_ACTION, /* u8 */
+ ROCKER_TLV_OF_DPA_NEW_QUEUE_ID, /* u8 */
+ ROCKER_TLV_OF_DPA_CLEAR_ACTIONS, /* u32 */
+ ROCKER_TLV_OF_DPA_POP_VLAN, /* u8 */
+ ROCKER_TLV_OF_DPA_TTL_CHECK, /* u8 */
+ ROCKER_TLV_OF_DPA_COPY_CPU_ACTION, /* u8 */
+
+ __ROCKER_TLV_OF_DPA_MAX,
+ ROCKER_TLV_OF_DPA_MAX = __ROCKER_TLV_OF_DPA_MAX - 1,
+};
+
+/* OF-DPA table IDs */
+
+enum rocker_of_dpa_table_id {
+ ROCKER_OF_DPA_TABLE_ID_INGRESS_PORT = 0,
+ ROCKER_OF_DPA_TABLE_ID_VLAN = 10,
+ ROCKER_OF_DPA_TABLE_ID_TERMINATION_MAC = 20,
+ ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING = 30,
+ ROCKER_OF_DPA_TABLE_ID_MULTICAST_ROUTING = 40,
+ ROCKER_OF_DPA_TABLE_ID_BRIDGING = 50,
+ ROCKER_OF_DPA_TABLE_ID_ACL_POLICY = 60,
+};
+
+/* OF-DPA flow stats */
+enum {
+ ROCKER_TLV_OF_DPA_FLOW_STAT_UNSPEC,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_DURATION, /* u32 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_RX_PKTS, /* u64 */
+ ROCKER_TLV_OF_DPA_FLOW_STAT_TX_PKTS, /* u64 */
+
+ __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX,
+ ROCKER_TLV_OF_DPA_FLOW_STAT_MAX = __ROCKER_TLV_OF_DPA_FLOW_STAT_MAX - 1,
+};
+
+/* OF-DPA group types */
+enum rocker_of_dpa_group_type {
+ ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE = 0,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_INTERFACE,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_MCAST,
+ ROCKER_OF_DPA_GROUP_TYPE_L3_ECMP,
+ ROCKER_OF_DPA_GROUP_TYPE_L2_OVERLAY,
+};
+
+/* OF-DPA group L2 overlay types */
+enum rocker_of_dpa_overlay_type {
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_UCAST = 0,
+ ROCKER_OF_DPA_OVERLAY_TYPE_FLOOD_MCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_UCAST,
+ ROCKER_OF_DPA_OVERLAY_TYPE_MCAST_MCAST,
+};
+
+/* OF-DPA group ID encoding */
+#define ROCKER_GROUP_TYPE_SHIFT 28
+#define ROCKER_GROUP_TYPE_MASK 0xf0000000
+#define ROCKER_GROUP_VLAN_SHIFT 16
+#define ROCKER_GROUP_VLAN_MASK 0x0fff0000
+#define ROCKER_GROUP_PORT_SHIFT 0
+#define ROCKER_GROUP_PORT_MASK 0x0000ffff
+#define ROCKER_GROUP_TUNNEL_ID_SHIFT 12
+#define ROCKER_GROUP_TUNNEL_ID_MASK 0x0ffff000
+#define ROCKER_GROUP_SUBTYPE_SHIFT 10
+#define ROCKER_GROUP_SUBTYPE_MASK 0x00000c00
+#define ROCKER_GROUP_INDEX_SHIFT 0
+#define ROCKER_GROUP_INDEX_MASK 0x0000ffff
+#define ROCKER_GROUP_INDEX_LONG_SHIFT 0
+#define ROCKER_GROUP_INDEX_LONG_MASK 0x0fffffff
+
+#define ROCKER_GROUP_TYPE_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_TYPE_MASK) >> ROCKER_GROUP_TYPE_SHIFT)
+#define ROCKER_GROUP_TYPE_SET(type) \
+ (((type) << ROCKER_GROUP_TYPE_SHIFT) & ROCKER_GROUP_TYPE_MASK)
+#define ROCKER_GROUP_VLAN_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_VLAN_ID_MASK) >> ROCKER_GROUP_VLAN_ID_SHIFT)
+#define ROCKER_GROUP_VLAN_SET(vlan_id) \
+ (((vlan_id) << ROCKER_GROUP_VLAN_SHIFT) & ROCKER_GROUP_VLAN_MASK)
+#define ROCKER_GROUP_PORT_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_PORT_MASK) >> ROCKER_GROUP_PORT_SHIFT)
+#define ROCKER_GROUP_PORT_SET(port) \
+ (((port) << ROCKER_GROUP_PORT_SHIFT) & ROCKER_GROUP_PORT_MASK)
+#define ROCKER_GROUP_INDEX_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_MASK) >> ROCKER_GROUP_INDEX_SHIFT)
+#define ROCKER_GROUP_INDEX_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_SHIFT) & ROCKER_GROUP_INDEX_MASK)
+#define ROCKER_GROUP_INDEX_LONG_GET(group_id) \
+ (((group_id) & ROCKER_GROUP_INDEX_LONG_MASK) >> \
+ ROCKER_GROUP_INDEX_LONG_SHIFT)
+#define ROCKER_GROUP_INDEX_LONG_SET(index) \
+ (((index) << ROCKER_GROUP_INDEX_LONG_SHIFT) & \
+ ROCKER_GROUP_INDEX_LONG_MASK)
+
+#define ROCKER_GROUP_NONE 0
+#define ROCKER_GROUP_L2_INTERFACE(vlan_id, port) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_INTERFACE) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_PORT_SET(port))
+#define ROCKER_GROUP_L2_REWRITE(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_REWRITE) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+#define ROCKER_GROUP_L2_MCAST(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L2_FLOOD(vlan_id, index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD) |\
+ ROCKER_GROUP_VLAN_SET(ntohs(vlan_id)) | ROCKER_GROUP_INDEX_SET(index))
+#define ROCKER_GROUP_L3_UNICAST(index) \
+ (ROCKER_GROUP_TYPE_SET(ROCKER_OF_DPA_GROUP_TYPE_L3_UCAST) |\
+ ROCKER_GROUP_INDEX_LONG_SET(index))
+
+/* Rocker general purpose registers */
+#define ROCKER_CONTROL 0x0300
+#define ROCKER_PORT_PHYS_COUNT 0x0304
+#define ROCKER_PORT_PHYS_LINK_STATUS 0x0310 /* 8-byte */
+#define ROCKER_PORT_PHYS_ENABLE 0x0318 /* 8-byte */
+#define ROCKER_SWITCH_ID 0x0320 /* 8-byte */
+
+/* Rocker control bits */
+#define ROCKER_CONTROL_RESET (1 << 0)
+
+#endif
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 06/10] bridge: introduce fdb offloading via switchdev
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
From: Scott Feldman <sfeldma@gmail.com>
Add two new ndos: ndo_sw_port_fdb_add/del to offload static bridge
fdb entries. Static bridge FDB entries are installed, for example,
using iproute2 bridge cmd:
bridge fdb add ADDR dev DEV master vlan VID
This would install ADDR into the bridge's FDB for port DEV on vlan VID. The
switch driver implements two ndo_swdev ops to add/delete FDB entries in the
switch device:
int ndo_sw_port_fdb_add(struct net_device *dev,
const unsigned char *addr,
u16 vid);
int ndo_sw_port_fdb_del(struct net_device *dev,
const unsigned char *addr,
u16 vid);
The driver returns 0 on success, negative error code on failure.
Note: the switch driver would not implement ndo_fdb_add/del/dump on a port
netdev as these are intended for devices maintaining their own FDB. In our
case, we want the Linux bridge to own the FBD.
Note: by default, the bridge does not filter on VLAN and only bridges untagged
traffic. To enable VLAN support, turn on VLAN filtering:
echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
include/linux/netdevice.h | 16 ++++++++++++++++
include/net/switchdev.h | 17 +++++++++++++++++
net/bridge/br_fdb.c | 10 +++++++++-
net/switchdev/switchdev.c | 41 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97eade9..116a19d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1023,6 +1023,16 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* Called to get an ID of the switch chip this port is part of.
* If driver implements this, it indicates that it represents a port
* of a switch chip.
+ *
+ * int (*ndo_sw_port_fdb_add)(struct net_device *dev,
+ * const unsigned char *addr,
+ * u16 vid);
+ * Called to add a fdb to switch device port.
+ *
+ * int (*ndo_sw_port_fdb_del)(struct net_device *dev,
+ * const unsigned char *addr,
+ * u16 vid);
+ * Called to delete a fdb from switch device port.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1177,6 +1187,12 @@ struct net_device_ops {
#ifdef CONFIG_NET_SWITCHDEV
int (*ndo_sw_parent_id_get)(struct net_device *dev,
struct netdev_phys_item_id *psid);
+ int (*ndo_sw_port_fdb_add)(struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid);
+ int (*ndo_sw_port_fdb_del)(struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid);
#endif
};
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 79bf9bd..130cef7 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -1,6 +1,7 @@
/*
* include/net/switchdev.h - Switch device API
* Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,6 +17,10 @@
int netdev_sw_parent_id_get(struct net_device *dev,
struct netdev_phys_item_id *psid);
+int netdev_sw_port_fdb_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid);
+int netdev_sw_port_fdb_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid);
#else
@@ -25,6 +30,18 @@ static inline int netdev_sw_parent_id_get(struct net_device *dev,
return -EOPNOTSUPP;
}
+static inline int netdev_sw_port_fdb_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int netdev_sw_port_fdb_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ return -EOPNOTSUPP;
+}
+
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6f6c95c..f6f8bb5 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -24,6 +24,7 @@
#include <linux/atomic.h>
#include <asm/unaligned.h>
#include <linux/if_vlan.h>
+#include <net/switchdev.h>
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
@@ -132,8 +133,12 @@ static void fdb_del_hw(struct net_bridge *br, const unsigned char *addr)
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
{
- if (f->is_static)
+ if (f->is_static) {
fdb_del_hw(br, f->addr.addr);
+ if (f->dst)
+ netdev_sw_port_fdb_del(f->dst->dev,
+ f->addr.addr, f->vlan_id);
+ }
hlist_del_rcu(&f->hlist);
fdb_notify(br, f, RTM_DELNEIGH);
@@ -755,18 +760,21 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
if (!fdb->is_static) {
fdb->is_static = 1;
fdb_add_hw(br, addr);
+ netdev_sw_port_fdb_add(source->dev, addr, vid);
}
} else if (state & NUD_NOARP) {
fdb->is_local = 0;
if (!fdb->is_static) {
fdb->is_static = 1;
fdb_add_hw(br, addr);
+ netdev_sw_port_fdb_add(source->dev, addr, vid);
}
} else {
fdb->is_local = 0;
if (fdb->is_static) {
fdb->is_static = 0;
fdb_del_hw(br, addr);
+ netdev_sw_port_fdb_del(source->dev, addr, vid);
}
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5010f646..93d47b7 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,7 @@
/*
* net/switchdev/switchdev.c - Switch device API
* Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014 Scott Feldman <sfeldma@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -31,3 +32,43 @@ int netdev_sw_parent_id_get(struct net_device *dev,
return ops->ndo_sw_parent_id_get(dev, psid);
}
EXPORT_SYMBOL(netdev_sw_parent_id_get);
+
+/**
+ * netdev_sw_port_fdb_add - Add a fdb into switch port
+ * @dev: port device
+ * @addr: mac address
+ * @vid: vlan id
+ *
+ * Add a fdb into switch port.
+ */
+int netdev_sw_port_fdb_add(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_sw_port_fdb_add)
+ return -EOPNOTSUPP;
+ WARN_ON(!ops->ndo_sw_parent_id_get);
+ return ops->ndo_sw_port_fdb_add(dev, addr, vid);
+}
+EXPORT_SYMBOL(netdev_sw_port_fdb_add);
+
+/**
+ * netdev_sw_port_fdb_del - Delete a fdb from switch port
+ * @dev: port device
+ * @addr: mac address
+ * @vid: vlan id
+ *
+ * Delete a fdb from switch port.
+ */
+int netdev_sw_port_fdb_del(struct net_device *dev,
+ const unsigned char *addr, u16 vid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_sw_port_fdb_del)
+ return -EOPNOTSUPP;
+ WARN_ON(!ops->ndo_sw_parent_id_get);
+ return ops->ndo_sw_port_fdb_del(dev, addr, vid);
+}
+EXPORT_SYMBOL(netdev_sw_port_fdb_del);
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 04/10] net-sysfs: expose physical switch id for particular device
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
net/core/net-sysfs.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 55dc4da..8e6603c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -12,6 +12,7 @@
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <net/switchdev.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
#include <linux/nsproxy.h>
@@ -399,6 +400,28 @@ static ssize_t phys_port_id_show(struct device *dev,
}
static DEVICE_ATTR_RO(phys_port_id);
+static ssize_t phys_switch_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ struct netdev_phys_item_id ppid;
+
+ ret = netdev_sw_parent_id_get(netdev, &ppid);
+ if (!ret)
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_switch_id);
+
static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -423,6 +446,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_flags.attr,
&dev_attr_tx_queue_len.attr,
&dev_attr_phys_port_id.attr,
+ &dev_attr_phys_switch_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 03/10] rtnl: expose physical switch id for particular device
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
The netdevice represents a port in a switch, it will expose
IFLA_PHYS_SWITCH_ID value via rtnl. Two netdevices with the same value
belong to one physical switch.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
include/uapi/linux/if_link.h | 1 +
net/core/rtnetlink.c | 26 +++++++++++++++++++++++++-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7072d83..4163753 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -145,6 +145,7 @@ enum {
IFLA_CARRIER,
IFLA_PHYS_PORT_ID,
IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
__IFLA_MAX
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1087c6d..f839354 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -43,6 +43,7 @@
#include <linux/inet.h>
#include <linux/netdevice.h>
+#include <net/switchdev.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
@@ -868,7 +869,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
- + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -967,6 +969,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ int err;
+ struct netdev_phys_item_id psid;
+
+ err = netdev_sw_parent_id_get(dev, &psid);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -1039,6 +1059,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
+ if (rtnl_phys_switch_id_fill(skb, dev))
+ goto nla_put_failure;
+
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
@@ -1198,6 +1221,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
+ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 02/10] net: introduce generic switch devices support
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
The goal of this is to provide a possibility to support various switch
chips. Drivers should implement relevant ndos to do so. Now there is
only one ndo defined:
- for getting physical switch id is in place.
Note that user can use random port netdevice to access the switch.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
Documentation/networking/switchdev.txt | 59 ++++++++++++++++++++++++++++++++++
MAINTAINERS | 7 ++++
include/linux/netdevice.h | 10 ++++++
include/net/switchdev.h | 30 +++++++++++++++++
net/Kconfig | 1 +
net/Makefile | 3 ++
net/switchdev/Kconfig | 13 ++++++++
net/switchdev/Makefile | 5 +++
net/switchdev/switchdev.c | 33 +++++++++++++++++++
9 files changed, 161 insertions(+)
create mode 100644 Documentation/networking/switchdev.txt
create mode 100644 include/net/switchdev.h
create mode 100644 net/switchdev/Kconfig
create mode 100644 net/switchdev/Makefile
create mode 100644 net/switchdev/switchdev.c
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
new file mode 100644
index 0000000..98be76c
--- /dev/null
+++ b/Documentation/networking/switchdev.txt
@@ -0,0 +1,59 @@
+Switch (and switch-ish) device drivers HOWTO
+===========================
+
+Please note that the word "switch" is here used in very generic meaning.
+This include devices supporting L2/L3 but also various flow offloading chips,
+including switches embedded into SR-IOV NICs.
+
+Lets describe a topology a bit. Imagine the following example:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ port1 port2 port3 port4 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | NIC0 NIC1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+In this example, there are two independent lines between the switch silicon
+and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
+separate from the switch driver. SOME switch chip is by managed by a driver
+via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
+connected to some other type of bus.
+
+Now, for the previous example show the representation in kernel:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | eth0 eth1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+Lets call the example switch driver for SOME switch chip "SOMEswitch". This
+driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
+created for each port of a switch. These netdevices are instances
+of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
+of the switch chip. eth0 and eth1 are instances of some other existing driver.
+
+The only difference of the switch-port netdevice from the ordinary netdevice
+is that is implements couple more NDOs:
+
+ ndo_sw_parent_get_id - This returns the same ID for two port netdevices
+ of the same physical switch chip. This is
+ mandatory to be implemented by all switch drivers
+ and serves the caller for recognition of a port
+ netdevice.
+ ndo_sw_parent_* - Functions that serve for a manipulation of the switch
+ chip itself (it can be though of as a "parent" of the
+ port, therefore the name). They are not port-specific.
+ Caller might use arbitrary port netdevice of the same
+ switch and it will make no difference.
+ ndo_sw_port_* - Functions that serve for a port-specific manipulation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3a41fb0..776e078 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9003,6 +9003,13 @@ F: lib/swiotlb.c
F: arch/*/kernel/pci-swiotlb.c
F: include/linux/swiotlb.h
+SWITCHDEV
+M: Jiri Pirko <jiri@resnulli.us>
+L: netdev@vger.kernel.org
+S: Supported
+F: net/switchdev/
+F: include/net/switchdev.h
+
SYNOPSYS ARC ARCHITECTURE
M: Vineet Gupta <vgupta@synopsys.com>
S: Supported
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 71922e0..97eade9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1017,6 +1017,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* performing GSO on a packet. The device returns true if it is
* able to GSO the packet, false otherwise. If the return value is
* false the stack will do software GSO.
+ *
+ * int (*ndo_sw_parent_id_get)(struct net_device *dev,
+ * struct netdev_phys_item_id *psid);
+ * Called to get an ID of the switch chip this port is part of.
+ * If driver implements this, it indicates that it represents a port
+ * of a switch chip.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1168,6 +1174,10 @@ struct net_device_ops {
int (*ndo_get_lock_subclass)(struct net_device *dev);
bool (*ndo_gso_check) (struct sk_buff *skb,
struct net_device *dev);
+#ifdef CONFIG_NET_SWITCHDEV
+ int (*ndo_sw_parent_id_get)(struct net_device *dev,
+ struct netdev_phys_item_id *psid);
+#endif
};
/**
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
new file mode 100644
index 0000000..79bf9bd
--- /dev/null
+++ b/include/net/switchdev.h
@@ -0,0 +1,30 @@
+/*
+ * include/net/switchdev.h - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _LINUX_SWITCHDEV_H_
+#define _LINUX_SWITCHDEV_H_
+
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NET_SWITCHDEV
+
+int netdev_sw_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid);
+
+#else
+
+static inline int netdev_sw_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif
+
+#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 99815b5..ff9ffc1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,6 +228,7 @@ source "net/vmw_vsock/Kconfig"
source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
+source "net/switchdev/Kconfig"
config RPS
boolean
diff --git a/net/Makefile b/net/Makefile
index 7ed1970..95fc694 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -73,3 +73,6 @@ obj-$(CONFIG_OPENVSWITCH) += openvswitch/
obj-$(CONFIG_VSOCKETS) += vmw_vsock/
obj-$(CONFIG_NET_MPLS_GSO) += mpls/
obj-$(CONFIG_HSR) += hsr/
+ifneq ($(CONFIG_NET_SWITCHDEV),)
+obj-y += switchdev/
+endif
diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
new file mode 100644
index 0000000..1557545
--- /dev/null
+++ b/net/switchdev/Kconfig
@@ -0,0 +1,13 @@
+#
+# Configuration for Switch device support
+#
+
+config NET_SWITCHDEV
+ boolean "Switch (and switch-ish) device support (EXPERIMENTAL)"
+ depends on INET
+ ---help---
+ This module provides glue between core networking code and device
+ drivers in order to support hardware switch chips in very generic
+ meaning of the word "switch". This include devices supporting L2/L3 but
+ also various flow offloading chips, including switches embedded into
+ SR-IOV NICs.
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
new file mode 100644
index 0000000..5ed63ed
--- /dev/null
+++ b/net/switchdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Switch device API
+#
+
+obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
new file mode 100644
index 0000000..5010f646
--- /dev/null
+++ b/net/switchdev/switchdev.c
@@ -0,0 +1,33 @@
+/*
+ * net/switchdev/switchdev.c - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <net/switchdev.h>
+
+/**
+ * netdev_sw_parent_id_get - Get ID of a switch
+ * @dev: port device
+ * @psid: switch ID
+ *
+ * Get ID of a switch this port is part of.
+ */
+int netdev_sw_parent_id_get(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_sw_parent_id_get)
+ return -EOPNOTSUPP;
+ return ops->ndo_sw_parent_id_get(dev, psid);
+}
+EXPORT_SYMBOL(netdev_sw_parent_id_get);
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 01/10] net: rename netdev_phys_port_id to more generic name
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
In-Reply-To: <1415530280-9190-1-git-send-email-jiri@resnulli.us>
So this can be reused for identification of other "items" as well.
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
include/linux/netdevice.h | 16 ++++++++--------
net/core/dev.c | 2 +-
net/core/net-sysfs.c | 2 +-
net/core/rtnetlink.c | 6 +++---
8 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c4bd025..336ef3c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12537,7 +12537,7 @@ static int bnx2x_validate_addr(struct net_device *dev)
}
static int bnx2x_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct bnx2x *bp = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1a98e23..d749165 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7373,7 +7373,7 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
#endif
static int i40e_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_pf *pf = np->vsi->back;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0efbae9..bd007c3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2258,7 +2258,7 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st
#define PORT_ID_BYTE_LEN 8
static int mlx4_en_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_dev *mdev = priv->mdev->dev;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index f5e29f7..6e514d2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -460,7 +460,7 @@ static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
}
static int qlcnic_get_phys_port_id(struct net_device *netdev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
struct qlcnic_hardware_context *ahw = adapter->ahw;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 90ac959..71922e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -753,13 +753,13 @@ struct netdev_fcoe_hbainfo {
};
#endif
-#define MAX_PHYS_PORT_ID_LEN 32
+#define MAX_PHYS_ITEM_ID_LEN 32
-/* This structure holds a unique identifier to identify the
- * physical port used by a netdevice.
+/* This structure holds a unique identifier to identify some
+ * physical item (port for example) used by a netdevice.
*/
-struct netdev_phys_port_id {
- unsigned char id[MAX_PHYS_PORT_ID_LEN];
+struct netdev_phys_item_id {
+ unsigned char id[MAX_PHYS_ITEM_ID_LEN];
unsigned char id_len;
};
@@ -975,7 +975,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
*
* int (*ndo_get_phys_port_id)(struct net_device *dev,
- * struct netdev_phys_port_id *ppid);
+ * struct netdev_phys_item_id *ppid);
* Called to get ID of physical port of this device. If driver does
* not implement this, it is assumed that the hw is not able to have
* multiple net devices on single physical port.
@@ -1149,7 +1149,7 @@ struct net_device_ops {
int (*ndo_change_carrier)(struct net_device *dev,
bool new_carrier);
int (*ndo_get_phys_port_id)(struct net_device *dev,
- struct netdev_phys_port_id *ppid);
+ struct netdev_phys_item_id *ppid);
void (*ndo_add_vxlan_port)(struct net_device *dev,
sa_family_t sa_family,
__be16 port);
@@ -2878,7 +2878,7 @@ void dev_set_group(struct net_device *, int);
int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid);
+ struct netdev_phys_item_id *ppid);
struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
diff --git a/net/core/dev.c b/net/core/dev.c
index 70bb609..9c67f36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5818,7 +5818,7 @@ EXPORT_SYMBOL(dev_change_carrier);
* Get device physical port ID
*/
int dev_get_phys_port_id(struct net_device *dev,
- struct netdev_phys_port_id *ppid)
+ struct netdev_phys_item_id *ppid)
{
const struct net_device_ops *ops = dev->netdev_ops;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9dd0669..55dc4da 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -387,7 +387,7 @@ static ssize_t phys_port_id_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev)) {
- struct netdev_phys_port_id ppid;
+ struct netdev_phys_item_id ppid;
ret = dev_get_phys_port_id(netdev, &ppid);
if (!ret)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a688268..1087c6d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,7 +868,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
- + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -952,7 +952,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
- struct netdev_phys_port_id ppid;
+ struct netdev_phys_item_id ppid;
err = dev_get_phys_port_id(dev, &ppid);
if (err) {
@@ -1196,7 +1196,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
- [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+ [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
};
--
1.9.3
^ permalink raw reply related
* [patch net-next v2 00/10] introduce rocker switch driver with hardware accelerated datapath api - phase 1: bridge fdb offload
From: Jiri Pirko @ 2014-11-09 10:51 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse, pshelar,
azhou, ben, stephen, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl
Hi all.
This patchset is just the first phase of switch and switch-ish device
support api in kernel. Note that the api will extend (our complete work
can be pulled from https://github.com/jpirko/net-next-rocker).
So what this patchset includes:
- introduce switchdev api for implementing switch drivers (so far
only linux bridge fdb offload is covered)
- introduce rocker switch driver which implements switchdev api
As to the discussion if there is need to have specific class of device
representing the switch itself, so far we found no need to introduce that.
But we are generally ok with the idea and when the time comes and it will
be needed, it can be easily introduced without any disturbance.
This patchset introduces switch id export through rtnetlink and sysfs,
which is similar to what we have for port id in SR-IOV. I will send iproute2
patchset for showing the switch id for port netdevs once this is applied.
For detailed description, please see individual patches.
v1->v2:
- addressed all DaveM's comments
Jiri Pirko (5):
net: rename netdev_phys_port_id to more generic name
net: introduce generic switch devices support
rtnl: expose physical switch id for particular device
net-sysfs: expose physical switch id for particular device
rocker: introduce rocker switch driver
Scott Feldman (5):
bridge: introduce fdb offloading via switchdev
bridge: call netdev_sw_port_stp_update when bridge port STP status
changes
bridge: add API to notify bridge driver of learned FBD on offloaded
device
rocker: implement rocker ofdpa flow table manipulation
rocker: implement L2 bridge offloading
Documentation/networking/switchdev.txt | 59 +
MAINTAINERS | 14 +
drivers/net/ethernet/Kconfig | 1 +
drivers/net/ethernet/Makefile | 1 +
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +-
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 2 +-
drivers/net/ethernet/rocker/Kconfig | 27 +
drivers/net/ethernet/rocker/Makefile | 5 +
drivers/net/ethernet/rocker/rocker.c | 4182 ++++++++++++++++++++++
drivers/net/ethernet/rocker/rocker.h | 427 +++
include/linux/if_bridge.h | 18 +
include/linux/netdevice.h | 48 +-
include/net/switchdev.h | 53 +
include/uapi/linux/if_link.h | 1 +
net/Kconfig | 1 +
net/Makefile | 3 +
net/bridge/br_fdb.c | 94 +-
net/bridge/br_netlink.c | 2 +
net/bridge/br_stp.c | 4 +
net/bridge/br_stp_if.c | 3 +
net/bridge/br_stp_timer.c | 2 +
net/core/dev.c | 2 +-
net/core/net-sysfs.c | 26 +-
net/core/rtnetlink.c | 30 +-
net/switchdev/Kconfig | 13 +
net/switchdev/Makefile | 5 +
net/switchdev/switchdev.c | 93 +
29 files changed, 5104 insertions(+), 18 deletions(-)
create mode 100644 Documentation/networking/switchdev.txt
create mode 100644 drivers/net/ethernet/rocker/Kconfig
create mode 100644 drivers/net/ethernet/rocker/Makefile
create mode 100644 drivers/net/ethernet/rocker/rocker.c
create mode 100644 drivers/net/ethernet/rocker/rocker.h
create mode 100644 include/net/switchdev.h
create mode 100644 net/switchdev/Kconfig
create mode 100644 net/switchdev/Makefile
create mode 100644 net/switchdev/switchdev.c
--
1.9.3
^ permalink raw reply
* Re: [PATCH] checkpatch: Add --strict preference for #defines using BIT(foo)
From: Jiri Pirko @ 2014-11-09 9:50 UTC (permalink / raw)
To: Joe Perches; +Cc: Andrew Morton, David Miller, netdev, LKML
In-Reply-To: <1415394939.23530.20.camel@perches.com>
Fri, Nov 07, 2014 at 10:15:39PM CET, joe@perches.com wrote:
>Using BIT(foo) and BIT_ULL(bar) is more common now.
>Suggest using these macros over #defines with 1<<value.
Joe, regarding the other Dave's comment, the multiline one, that is also
not covered by checkpatch. Would please you take care of that as well?
Thanks.
>
>Add a --fix option too.
>
>Signed-off-by: Joe Perches <joe@perches.com>
>---
> scripts/checkpatch.pl | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
>diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
>index 893cbd5..b5dc3f4 100755
>--- a/scripts/checkpatch.pl
>+++ b/scripts/checkpatch.pl
>@@ -4973,6 +4973,17 @@ sub process {
> }
> }
>
>+# check for #defines like: 1 << <digit> that could be BIT(digit)
>+ if ($line =~ /#\s*define\s+\w+\s+\(?\s*1\s*([ulUL]*)\s*\<\<\s*(?:\d+|$Ident)\s*\)?/) {
>+ my $ull = "";
>+ $ull = "_ULL" if (defined($1) && $1 =~ /ll/i);
>+ if (CHK("BIT_MACRO",
>+ "Prefer using the BIT$ull macro\n" . $herecurr) &&
>+ $fix) {
>+ $fixed[$fixlinenr] =~ s/\(?\s*1\s*[ulUL]*\s*<<\s*(\d+|$Ident)\s*\)?/BIT${ull}($1)/;
>+ }
>+ }
>+
> # check for case / default statements not preceded by break/fallthrough/switch
> if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
> my $has_break = 0;
>
>
^ permalink raw reply
* Re: [PATCH V2 net-next 2/2] net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE
From: Or Gerlitz @ 2014-11-09 8:24 UTC (permalink / raw)
To: Amir Vadai, Ido Shamay
Cc: David S. Miller, Linux Netdev List, Matan Barak, Saeed Mahameed,
Shani Michaeli, Jerry Chu
In-Reply-To: <545C8582.4070408@dev.mellanox.co.il>
On Fri, Nov 7, 2014 at 10:40 AM, Ido Shamay <idos@dev.mellanox.co.il> wrote:
> On 11/6/2014 6:28 PM, Or Gerlitz wrote:
>>
>> if (likely(dev->features & NETIF_F_RXCSUM)) {
>> - if ((cqe->status &
>> cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
>> - (cqe->checksum == cpu_to_be16(0xffff))) {
>> - ring->csum_ok++;
>> - ip_summed = CHECKSUM_UNNECESSARY;
>> + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP
>> |
>> +
>> MLX4_CQE_STATUS_UDP)) {
>> + if ((cqe->status &
>> cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
>> + cqe->checksum == cpu_to_be16(0xffff))
>> {
>> + ip_summed = CHECKSUM_UNNECESSARY;
>> + ring->csum_ok++;
>> + } else {
>> + ip_summed = CHECKSUM_NONE;
>> + ring->csum_none++;
>> + }
>> } else {
>> - ip_summed = CHECKSUM_NONE;
>> - ring->csum_none++;
>> + if (priv->flags &
>> MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
>> + (cqe->status &
>> cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
>> +
>> MLX4_CQE_STATUS_IPV6))) {
>> + ip_summed = CHECKSUM_COMPLETE;
>> + ring->csum_complete++;
>> + } else {
>> + ip_summed = CHECKSUM_NONE;
>> + ring->csum_none++;
>> + }
>> }
>> } else {
>> ip_summed = CHECKSUM_NONE;
>> @@ -776,6 +874,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
>> struct mlx4_en_cq *cq, int bud
>> if (!nr)
>> goto next;
>>
>> + if (ip_summed == CHECKSUM_COMPLETE) {
>> + va =
>> skb_frag_address(skb_shinfo(gro_skb)->frags);
>> + if (check_csum(cqe, gro_skb, va,
>> ring->hwtstamp_rx_filter)) {
>> + ip_summed = CHECKSUM_NONE;
>> + ring->csum_none++;
>
>
> When check_csum returns non zero value, then ring->csum_complete counter was
> already incremented, and now we are incrementing ring->csum_none (for the
> same packet), so need to --ring->csum_complete.
Thanks Ido, I'll fix that.
Amir, I saw the little noise you just reported on too but didn't want
to re-spin V1 just for that, thanks for catching this up.
>
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next] net/fm10k: Avoid double setting of NETIF_F_SG for the HW encapsulation feature mask
From: Or Gerlitz @ 2014-11-09 8:15 UTC (permalink / raw)
To: Vick, Matthew; +Cc: Jeff Kirsher, netdev@vger.kernel.org
In-Reply-To: <D0827E2E.5ED4E%matthew.vick@intel.com>
On 11/7/2014 11:57 PM, Vick, Matthew wrote:
> Good catch, Or! Thank you for taking care of this!
sure
> Someone can correct me if I'm mistaken (I thought checkpatch would
> complain about this, but it doesn't seem to be), but I believe the start
> of the lines should match up like they are for the dev->features. Would
> you like to submit a V2 with this change or would you like me to do it
> (giving you credit via your Reported-by)?
I just submitted V1 along your request
^ permalink raw reply
* Re: [PATCH V2 net-next 2/2] net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE
From: Amir Vadai @ 2014-11-09 8:14 UTC (permalink / raw)
To: Or Gerlitz, David S. Miller
Cc: netdev, Matan Barak, Saeed Mahameed, Shani Michaeli, Ido Shamay,
Jerry Chu
In-Reply-To: <1415291324-22069-3-git-send-email-ogerlitz@mellanox.com>
On 11/6/2014 6:28 PM, Or Gerlitz wrote:
> From: Shani Michaeli <shanim@mellanox.com>
>
> When processing received traffic, pass CHECKSUM_COMPLETE status to the
> stack, with calculated checksum for non TCP/UDP packets (such
> as GRE or ICMP).
>
> Although the stack expects checksum which doesn't include the pseudo
> header, the HW adds it. To address that, we are subtracting the pseudo
> header checksum from the checksum value provided by the HW.
>
> In the IPv6 case, we also compute/add the IP header checksum which
> is not added by the HW for such packets.
>
> Cc: Jerry Chu <hkchu@google.com>
> Signed-off-by: Shani Michaeli <shanim@mellanox.com>
> Signed-off-by: Matan Barak <matanb@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> ---
> drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +-
> drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 +
> drivers/net/ethernet/mellanox/mlx4/en_port.c | 2 +
> drivers/net/ethernet/mellanox/mlx4/en_rx.c | 127 +++++++++++++++++++++--
> drivers/net/ethernet/mellanox/mlx4/main.c | 9 ++
> drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 5 +-
> include/linux/mlx4/device.h | 1 +
> 7 files changed, 142 insertions(+), 9 deletions(-)
>
[...]
> @@ -702,8 +787,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> * and not performing the selftest or flb disabled
> */
> if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
> - struct ethhdr *ethh;
> dma_addr_t dma;
> + struct ethhdr *ethh;
> /* Get pointer to first fragment since we haven't
> * skb yet and cast it to ethhdr struct
> */
This hunk is not needed
> @@ -744,13 +829,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
> (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
[...]
Amir
^ permalink raw reply
* [PATCH V1 net-next] net/fm10k: Avoid double setting of NETIF_F_SG for the HW encapsulation feature mask
From: Or Gerlitz @ 2014-11-09 8:13 UTC (permalink / raw)
To: Jeff Kirsher; +Cc: netdev, Or Gerlitz
The networking core does it for the driver during registration time.
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
---
changes from v0:
- set alignment to be per the request of Matthew Vick
drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 13 ++++++-------
1 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 8811364..14882dc 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -1414,13 +1414,12 @@ struct net_device *fm10k_alloc_netdev(void)
dev->vlan_features |= dev->features;
/* configure tunnel offloads */
- dev->hw_enc_features = NETIF_F_IP_CSUM |
- NETIF_F_TSO |
- NETIF_F_TSO6 |
- NETIF_F_TSO_ECN |
- NETIF_F_GSO_UDP_TUNNEL |
- NETIF_F_IPV6_CSUM |
- NETIF_F_SG;
+ dev->hw_enc_features |= NETIF_F_IP_CSUM |
+ NETIF_F_TSO |
+ NETIF_F_TSO6 |
+ NETIF_F_TSO_ECN |
+ NETIF_F_GSO_UDP_TUNNEL |
+ NETIF_F_IPV6_CSUM;
/* we want to leave these both on as we cannot disable VLAN tag
* insertion or stripping on the hardware since it is contained
--
1.7.1
^ permalink raw reply related
* [PATCH 3/3 net-next] sunvnet: Add missing rcu_read_unlock() in vnet_start_xmit
From: Sowmini Varadhan @ 2014-11-09 1:42 UTC (permalink / raw)
To: davem, sowmini.varadhan; +Cc: netdev, david.stevens
The out_dropped label will only do rcu_read_unlock for non-null port.
So add the missing rcu_read_unlock() when bailing due to non-null port.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
drivers/net/ethernet/sun/sunvnet.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 826b385..55d66c9 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -981,8 +981,10 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
port = __tx_port_find(vp, skb);
- if (unlikely(!port))
+ if (unlikely(!port)) {
+ rcu_read_unlock();
goto out_dropped;
+ }
if (skb->len > port->rmtu) {
unsigned long localmtu = port->rmtu - ETH_HLEN;
--
1.8.4.2
^ permalink raw reply related
* [PATCH 2/3 net-next] sunvnet: vnet_ack() should check if !start_cons to send a missed trigger
From: Sowmini Varadhan @ 2014-11-09 1:42 UTC (permalink / raw)
To: davem, sowmini.varadhan; +Cc: netdev, david.stevens
As per comments in vnet_start_xmit, for the edge case
when outgoing vnet_start_xmit() data and an incoming STOPPED
ACK cross each other in flight, we may need to send the missed
START trigger from maybe_tx_wakeup() after checking for a
false value of start_cons
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
drivers/net/ethernet/sun/sunvnet.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index deb395a..826b385 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -572,7 +572,7 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
*/
dr->cons = next_idx(end, dr);
desc = vio_dring_entry(dr, dr->cons);
- if (desc->hdr.state == VIO_DESC_READY && port->start_cons) {
+ if (desc->hdr.state == VIO_DESC_READY && !port->start_cons) {
/* vnet_start_xmit() just populated this dring but missed
* sending the "start" LDC message to the consumer.
* Send a "start" trigger on its behalf.
--
1.8.4.2
^ permalink raw reply related
* Re: [PATCH net-next] dccp: Convert DCCP_WARN to net_warn_ratelimited
From: David Miller @ 2014-11-09 1:42 UTC (permalink / raw)
To: joe; +Cc: netdev, linux-kernel, gerrit
In-Reply-To: <1415307221.21526.0.camel@perches.com>
From: Joe Perches <joe@perches.com>
Date: Thu, 06 Nov 2014 12:53:41 -0800
> Remove the dependency on the "warning" sysctl (net_msg_warn)
> which is only used by the LIMIT_NETDEBUG macro.
>
> Convert the LIMIT_NETDEBUG use in DCCP_WARN to the more
> common net_warn_ratelimited mechanism.
>
> This still ratelimits based on the net_ratelimit()
> function, but removes the check for the sysctl.
>
> Signed-off-by: Joe Perches <joe@perches.com>
Applied, thanks Joe.
> What about the LIMIT_NETDEBUG stuff now?
>
> Maybe all of them might as well be net_dbg_ratelimited
> even if it changes some of the logging levels.
I think a straight conversion to net_dbg_ratelimited would
be fine.
^ permalink raw reply
* [PATCH 1/3 net-next] sunvnet: Fix race between vnet_start_xmit() and vnet_ack()
From: Sowmini Varadhan @ 2014-11-09 1:41 UTC (permalink / raw)
To: davem, sowmini.varadhan; +Cc: netdev, david.stevens
When vnet_start_xmit() is concurrent with vnet_ack(), we may
have a race that looks like:
thread 1 thread 2
vnet_start_xmit vnet_event_napi -> vnet_rx
__vnet_tx_trigger for some desc X
at this point dr->prod == X
peer sends back a stopped ack for X
we process X, but X == dr->prod
so we bail out in vnet_ack with
!idx_is_pending
update dr->prod
As a result of the fact that we never processed the stopped ack for X,
the Tx path is led to incorrectly believe that the peer is still
"started" and reading, but the peer has stopped reading, which will
ultimately end in flow-control assertions.
The fix is to synchronize the above 2 paths on the netif_tx_lock.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
drivers/net/ethernet/sun/sunvnet.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 5c5fb59..deb395a 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -559,15 +559,17 @@ static int vnet_ack(struct vnet_port *port, void *msgbuf)
return 0;
end = pkt->end_idx;
- if (unlikely(!idx_is_pending(dr, end)))
- return 0;
-
vp = port->vp;
dev = vp->dev;
+ netif_tx_lock(dev);
+ if (unlikely(!idx_is_pending(dr, end))) {
+ netif_tx_unlock(dev);
+ return 0;
+ }
+
/* sync for race conditions with vnet_start_xmit() and tell xmit it
* is time to send a trigger.
*/
- netif_tx_lock(dev);
dr->cons = next_idx(end, dr);
desc = vio_dring_entry(dr, dr->cons);
if (desc->hdr.state == VIO_DESC_READY && port->start_cons) {
--
1.8.4.2
^ permalink raw reply related
* [PATCH 0/3 net-next] sunvnet: edge-case/race-conditions bug fixes
From: Sowmini Varadhan @ 2014-11-09 1:41 UTC (permalink / raw)
To: davem, sowmini.varadhan; +Cc: netdev, david.stevens
This patch series contains fixes for race-conditions in sunvnet,
that can encountered when there is a difference in latency between
producer and consumer.
Patch 1 addresses a case when the STOPPED LDC ack from a peer is
processed before vnet_start_xmit can finish updating the dr->prod
state.
Patch 2 fixes the edge-case when outgoing data and incoming
stopped-ack cross each other in flight.
Patch 3 adds a missing rcu_read_unlock(), found by code-inspection.
Sowmini Varadhan (3):
Fix race between vnet_start_xmit() and vnet_ack()
vnet_ack() should check if !start_cons to send a missed trigger
Add missing rcu_read_unlock() in vnet_start_xmit
drivers/net/ethernet/sun/sunvnet.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
--
1.8.4.2
^ permalink raw reply
* e100: Laptop battery drain and WoL settings from EEPROM
From: Ondrej Zary @ 2014-11-08 23:15 UTC (permalink / raw)
To: e1000-devel; +Cc: netdev, Kernel development list
Hello,
there is long-standing problem with battery drain after turning off at least
some Toshiba laptops, see
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/110784
I have the same problem with Toshiba Portege R100. When I shut it down in
Linux, the battery is drained to zero in a couple of days. I noticed that the
LAN port is still active, even when AC disconnected.
The WoL is enabled by default by e100 driver:
# ethtool eth0
Settings for eth0:
Supported ports: [ TP MII ]
Supported link modes: 10baseT/Half 10baseT/Full
100baseT/Half 100baseT/Full
Supported pause frame use: No
Supports auto-negotiation: Yes
Advertised link modes: 10baseT/Half 10baseT/Full
100baseT/Half 100baseT/Full
Advertised pause frame use: Symmetric
Advertised auto-negotiation: Yes
Link partner advertised link modes: 10baseT/Half
Link partner advertised pause frame use: No
Link partner advertised auto-negotiation: No
Speed: 10Mb/s
Duplex: Half
Port: MII
PHYAD: 1
Transceiver: internal
Auto-negotiation: on
Supports Wake-on: g
Wake-on: g
Current message level: 0x00000007 (7)
drv probe link
Link detected: yes
By this code:
/* Wol magic packet can be enabled from eeprom */
if ((nic->mac >= mac_82558_D101_A4) &&
(nic->eeprom[eeprom_id] & eeprom_id_wol)) {
nic->flags |= wol_magic;
device_set_wakeup_enable(&pdev->dev, true);
}
because the WoL bit is set in EEPROM ID word:
# ethtool -e eth0
Offset Values
------ ------
0x0000: xx xx xx xx xx xx 03 1b 00 00 01 02 01 47 00 00
0x0010: 00 00 00 00 a2 49 01 00 79 11 7f 00 00 00 00 00
^^ bit 5 here
0x0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0040: 00 00 00 00 00 00 3d 10 00 00 00 00 00 00 00 00
0x0050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0060: e4 00 3f 40 09 41 00 00 00 00 00 00 00 00 00 00
0x0070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f7 fe
Looks like this laptop is probably WoL-capable even on battery.
Measured the current from AC adapter:
around 20mA with WoL inactive (shut down from Windows or by power button in GRUB)
around 40mA with WoL active (shut down from Linux)
So to work-around this problem, users must disable WoL manually on each boot.
Maybe the driver should ignore the EEPROM WoL bit on Toshiba susbsystem IDs?
Or completely, like Windows driver does?
Ethernet controller details:
02:08.0 Ethernet controller [0200]: Intel Corporation 82801DB PRO/100 VE (MOB) Ethernet Controller [8086:103d] (rev 83)
Subsystem: Toshiba America Info Systems Device [1179:0001]
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 64 (2000ns min, 14000ns max), Cache Line Size: 32 bytes
Interrupt: pin A routed to IRQ 11
Region 0: Memory at dfdff000 (32-bit, non-prefetchable) [size=4K]
Region 1: I/O ports at cf40 [size=64]
Capabilities: [dc] Power Management version 2
Flags: PMEClk- DSI+ D1+ D2+ AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 NoSoftRst- PME-Enable- DSel=0 DScale=2 PME-
Kernel driver in use: e100
--
Ondrej Zary
^ permalink raw reply
* Re: [PATCH net-next v2 2/3] r8152: clear theflagofSCHEDULE_TASKLETin tasklet
From: Francois Romieu @ 2014-11-08 22:11 UTC (permalink / raw)
To: Hayes Wang
Cc: David Miller, netdev@vger.kernel.org, nic_swsd,
linux-kernel@vger.kernel.org, linux-usb@vger.kernel.org
In-Reply-To: <0835B3720019904CB8F7AA43166CEEB2ECDFC2@RTITMBSV03.realtek.com.tw>
Hayes Wang <hayeswang@realtek.com> :
> Francois Romieu [mailto:romieu@fr.zoreil.com]
> > Sent: Monday, November 03, 2014 6:53 AM
> [...]
> > test_and_clear_bit (dense) or clear_bit would be more idiomatic.
>
> Excuse me. Any suggestion?
> Should I use clear_bit directly, or something else?
> Or, do I have to remove this patch?
The performance explanation leaves me a bit unconvinced. Without any
figure one could simply go for the always locked clear_bit because of:
1. the "I'm racy" message that the open-coded test + set sends
2. the extra work needed to avoid 1 (comment, explain, ...).
The extra time could thus be used to see what happens when napi is
shoehorned in this tasklet machinery. I'd naively expect it to be
relevant for efficiency.
I won't mind if your agenda is completely different. :o)
--
Ueimor
^ permalink raw reply
* Re: [PATCH net-next] PPC: bpf_jit_comp: add SKF_AD_HATYPE instruction
From: Alexei Starovoitov @ 2014-11-08 17:59 UTC (permalink / raw)
To: Denis Kirjanov
Cc: netdev@vger.kernel.org, linuxppc-dev, Daniel Borkmann,
Philippe Bergheaud
In-Reply-To: <1415253755-4001-1-git-send-email-kda@linux-powerpc.org>
On Wed, Nov 5, 2014 at 10:02 PM, Denis Kirjanov <kda@linux-powerpc.org> wrote:
> Add BPF extension SKF_AD_HATYPE to ppc JIT to check
> the hw type of the interface
>
> JIT off:
> [ 69.106783] test_bpf: #20 LD_HATYPE 48 48 PASS
> JIT on:
> [ 64.721757] test_bpf: #20 LD_HATYPE 7 6 PASS
>
> CC: Alexei Starovoitov<alexei.starovoitov@gmail.com>
> CC: Daniel Borkmann<dborkman@redhat.com>
> CC: Philippe Bergheaud<felix@linux.vnet.ibm.com>
> Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
> ---
> arch/powerpc/net/bpf_jit_comp.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index d110e28..8bf4fc2 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -412,6 +412,22 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
> PPC_ANDI(r_A, r_A, PKT_TYPE_MAX);
> PPC_SRWI(r_A, r_A, 5);
> break;
> + case BPF_ANC | SKF_AD_HATYPE:
> + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
> + PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
> + dev));
> + PPC_CMPDI(r_scratch1, 0);
> + if (ctx->pc_ret0 != -1) {
> + PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
> + } else {
> + /* Exit, returning 0; first pass hits here. */
> + PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
please use canonical formatting ctx->idx * 4 + 12
> + PPC_LI(r_ret, 0);
> + PPC_JMP(exit_addr);
> + }
> + PPC_LHZ_OFFS(r_A, r_scratch1,
> + offsetof(struct net_device, type));
the whole thing looks like copy paste from 'case ifindex'.
Would be nice to handle them together to reduce
duplicated code, sine only last load is different.
Also in commit log please do _both_ runs with JIT on.
You should see a difference before/after applying this patch.
^ permalink raw reply
* [PATCH] drivers: atm: eni: Add pci_dma_mapping_error() call
From: Tina Johnson @ 2014-11-08 16:18 UTC (permalink / raw)
To: chas; +Cc: linux-atm-general, netdev, linux-kernel, julia.lawall,
Tina Johnson
Added a pci_dma_mapping_error() call to check for mapping errors before
further using the dma handle. Unchecked dma handles were found using
Coccinelle:
@rule1@
expression e1;
identifier x;
@@
*x = pci_map_single(...);
... when != pci_dma_mapping_error(e1,x)
Signed-off-by: Tina Johnson <tinajohnson.1234@gmail.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
---
drivers/atm/eni.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index d65975a..6450a36 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -356,6 +356,8 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
if (skb) {
paddr = pci_map_single(eni_dev->pci_dev,skb->data,skb->len,
PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(eni_dev->pci_dev, paddr))
+ goto trouble;
ENI_PRV_PADDR(skb) = paddr;
if (paddr & 3)
printk(KERN_CRIT DEV_LABEL "(itf %d): VCI %d has "
--
1.7.10.4
^ permalink raw reply related
* alx can't send packet larger than 750 bytes
From: Konrad Mosoń @ 2014-11-08 16:06 UTC (permalink / raw)
To: netdev, nic-devel
Hello there,
I just submitted bug to bugzilla.redhat.com (not to mail kernel, cause i
didn't have problem on ArchLinux).
https://bugzilla.redhat.com/show_bug.cgi?id=1161861
Could be nice if you could look into it. It's kinda critical — network
just doesn't works.
Thanks for help,
Konrad Mosoń
PS
nic-devel@qca.qualcomm — sorry for doublemail but netdev@vger rejected
my mail at first time cause i used html.
^ permalink raw reply
* [PATCH 2/2] 8139too: Allow using the largest possible MTU
From: Alban Bedel @ 2014-11-08 11:48 UTC (permalink / raw)
To: linux-kernel
Cc: netdev, Bjorn Helgaas, Benoit Taine, Eric W. Biederman,
David S. Miller, Alban Bedel
In-Reply-To: <1415447316-12424-1-git-send-email-albeu@free.fr>
This driver allows MTU up to 1518 bytes which is not enought to run
batman-adv. Simply raise the maximum packet size up to the maximum
allowed by the transmit descriptor, 1792 bytes, giving a maximum MTU
of 1774 bytes.
Signed-off-by: Alban Bedel <albeu@free.fr>
---
drivers/net/ethernet/realtek/8139too.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 8387de9..e157541 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -183,7 +183,7 @@ static int debug = -1;
#define NUM_TX_DESC 4
/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
-#define MAX_ETH_FRAME_SIZE 1536
+#define MAX_ETH_FRAME_SIZE 1792
/* max supported payload size */
#define MAX_ETH_DATA_SIZE (MAX_ETH_FRAME_SIZE - ETH_HLEN - ETH_FCS_LEN)
--
2.0.0
^ permalink raw reply related
* [PATCH 1/2] 8139too: Allow setting MTU larger than 1500
From: Alban Bedel @ 2014-11-08 11:48 UTC (permalink / raw)
To: linux-kernel
Cc: netdev, Bjorn Helgaas, Benoit Taine, Eric W. Biederman,
David S. Miller, Alban Bedel
Replace the default ndo_change_mtu callback with one that allow
setting MTU that the driver can handle.
Signed-off-by: Alban Bedel <albeu@free.fr>
---
drivers/net/ethernet/realtek/8139too.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 007b38c..8387de9 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -185,6 +185,9 @@ static int debug = -1;
/* max supported ethernet frame size -- must be at least (dev->mtu+14+4).*/
#define MAX_ETH_FRAME_SIZE 1536
+/* max supported payload size */
+#define MAX_ETH_DATA_SIZE (MAX_ETH_FRAME_SIZE - ETH_HLEN - ETH_FCS_LEN)
+
/* Size of the Tx bounce buffers -- must be at least (dev->mtu+14+4). */
#define TX_BUF_SIZE MAX_ETH_FRAME_SIZE
#define TX_BUF_TOT_LEN (TX_BUF_SIZE * NUM_TX_DESC)
@@ -920,11 +923,19 @@ static int rtl8139_set_features(struct net_device *dev, netdev_features_t featur
return 0;
}
+static int rtl8139_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < 68 || new_mtu > MAX_ETH_DATA_SIZE)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
static const struct net_device_ops rtl8139_netdev_ops = {
.ndo_open = rtl8139_open,
.ndo_stop = rtl8139_close,
.ndo_get_stats64 = rtl8139_get_stats64,
- .ndo_change_mtu = eth_change_mtu,
+ .ndo_change_mtu = rtl8139_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = rtl8139_set_mac_address,
.ndo_start_xmit = rtl8139_start_xmit,
--
2.0.0
^ permalink raw reply related
* Re: [PATCH] ipvs: Keep skb->sk when allocating headroom on tunnel xmit
From: Julian Anastasov @ 2014-11-08 6:16 UTC (permalink / raw)
To: Calvin Owens
Cc: Simon Horman, Wensong Zhang, lvs-devel, linux-kernel, netdev,
agartrell, kernel-team
In-Reply-To: <545D43BD.8030203@fb.com>
Hello,
On Fri, 7 Nov 2014, Calvin Owens wrote:
> On 11/05/2014 01:21 AM, Julian Anastasov wrote:
> >
> > Hello,
> >
> > On Tue, 4 Nov 2014, Calvin Owens wrote:
> >
> > > ip_vs_prepare_tunneled_skb() ignores ->sk when allocating a new
> > > skb, either unconditionally setting ->sk to NULL or allowing
> > > the uninitialized ->sk from a newly allocated skb to leak through
> > > to the caller.
> > >
> > > This patch properly copies ->sk and increments its reference count.
> > >
> > > Signed-off-by: Calvin Owens <calvinowens@fb.com>
> >
> > Good catch. Please, extend your patch to
> > fix also the second place that has such error,
> > ip_vs_tunnel_xmit_v6. This call is missing from long time,
> > it was not needed. But commits that allow skb->sk (local
> > clients) already need it, eg.
>
> I'm not sure where exactly you mean: ip_vs_tunnel_xmit_v6() calls
> ip_vs_prepare_tunneled_skb() to do the allocation, so this patch covers that
> case.
>
> In older versions of the kernel, ip_vs_tunnel_xmit_v6() does it directly,
> could that be what you're looking at?
Sorry, it seems I was checking old branch.
Simon, please apply.
Acked-by: Julian Anastasov <ja@ssi.bg>
> > - f2428ed5e7bc89c7 ("ipvs: load balance ipv6 connections from a local
> > process"), 2.6.28
> > - 4856c84c1358b798 ("ipvs: load balance IPv4 connections from a local
> > process"), 2.6.28
> >
> > > ---
> > > net/netfilter/ipvs/ip_vs_xmit.c | 2 ++
> > > 1 file changed, 2 insertions(+)
> > >
> > > diff --git a/net/netfilter/ipvs/ip_vs_xmit.c
> > > b/net/netfilter/ipvs/ip_vs_xmit.c
> > > index 437a366..bd90bf8 100644
> > > --- a/net/netfilter/ipvs/ip_vs_xmit.c
> > > +++ b/net/netfilter/ipvs/ip_vs_xmit.c
> > > @@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int
> > > skb_af,
> > > new_skb = skb_realloc_headroom(skb, max_headroom);
> > > if (!new_skb)
> > > goto error;
> > > + if (skb->sk)
> > > + skb_set_owner_w(new_skb, skb->sk);
> > > consume_skb(skb);
> > > skb = new_skb;
> > > }
Regards
--
Julian Anastasov <ja@ssi.bg>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox