* [PATCH v9 5/8] thunderbolt: Networking transmit and receive
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
This patch provides the handling interface for sending and receiving
network packets between the hosts over the full communication route
(using the communication path established in the previous patch).
The Thunderbolt Network driver interfaces the Linux network stack
and the hardware controller configuration to handle packet transmissions:
+----------------+ +----------------+
|Host 1 | |Host 2 |
| | | |
| +-------+ | | +-------+ |
| |Network| | | |Network| |
| |Stack | | | |Stack | |
| +-------+ | | +-------+ |
| ^ | | ^ |
| | | | | |
| v | | v |
| +-----------+ | | +-----------+ |
| |Thunderbolt| | | |Thunderbolt| |
| |Networking | | | |Networking | |
| |Driver | | | |Driver | |
| +-----------+ | | +-----------+ |
| ^ | | ^ |
| | | | | |
| v | | v |
| +-----------+ | | +-----------+ |
| |Thunderbolt| | | |Thunderbolt| |
| |Controller |<-+------------+->|Controller | |
| +-----------+ | | +-----------+ |
+----------------+ +----------------+
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
drivers/thunderbolt/icm/icm_nhi.c | 15 +
drivers/thunderbolt/icm/net.c | 1471 +++++++++++++++++++++++++++++++++++++
2 files changed, 1486 insertions(+)
diff --git a/drivers/thunderbolt/icm/icm_nhi.c b/drivers/thunderbolt/icm/icm_nhi.c
index edc910b..b1cc347 100644
--- a/drivers/thunderbolt/icm/icm_nhi.c
+++ b/drivers/thunderbolt/icm/icm_nhi.c
@@ -928,6 +928,7 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data)
{
struct tbt_nhi_ctxt *nhi_ctxt = data;
u32 isr0, isr1, imr0, imr1;
+ int i;
/* clear on read */
isr0 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE);
@@ -950,6 +951,20 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data)
spin_unlock(&nhi_ctxt->lock);
+ for (i = 0; i < nhi_ctxt->num_ports; ++i) {
+ struct net_device *net_dev =
+ nhi_ctxt->net_devices[i].net_dev;
+ if (net_dev) {
+ u8 path = PATH_FROM_PORT(nhi_ctxt->num_paths, i);
+
+ if (isr0 & REG_RING_INT_RX_PROCESSED(
+ path, nhi_ctxt->num_paths))
+ tbt_net_rx_msi(net_dev);
+ if (isr0 & REG_RING_INT_TX_PROCESSED(path))
+ tbt_net_tx_msi(net_dev);
+ }
+ }
+
if (isr0 & REG_RING_INT_RX_PROCESSED(TBT_ICM_RING_NUM,
nhi_ctxt->num_paths))
schedule_work(&nhi_ctxt->icm_msgs_work);
diff --git a/drivers/thunderbolt/icm/net.c b/drivers/thunderbolt/icm/net.c
index beeafb3..cf985dd 100644
--- a/drivers/thunderbolt/icm/net.c
+++ b/drivers/thunderbolt/icm/net.c
@@ -124,6 +124,17 @@ struct approve_inter_domain_connection_cmd {
};
+struct tbt_frame_header {
+ /* size of the data with the frame */
+ __le32 frame_size;
+ /* running index on the frames */
+ __le16 frame_index;
+ /* ID of the frame to match frames to specific packet */
+ __le16 frame_id;
+ /* how many frames assembles a full packet */
+ __le32 frame_count;
+};
+
enum neg_event {
RECEIVE_LOGOUT = NUM_MEDIUM_STATUSES,
RECEIVE_LOGIN_RESPONSE,
@@ -131,15 +142,81 @@ enum neg_event {
NUM_NEG_EVENTS
};
+enum frame_status {
+ GOOD_FRAME,
+ GOOD_AS_FIRST_FRAME,
+ GOOD_AS_FIRST_MULTICAST_FRAME,
+ FRAME_NOT_READY,
+ FRAME_ERROR,
+};
+
+enum packet_filter {
+ /* all multicast MAC addresses */
+ PACKET_TYPE_ALL_MULTICAST,
+ /* all types of MAC addresses: multicast, unicast and broadcast */
+ PACKET_TYPE_PROMISCUOUS,
+ /* all unicast MAC addresses */
+ PACKET_TYPE_UNICAST_PROMISCUOUS,
+};
+
enum disconnect_path_stage {
STAGE_1 = BIT(0),
STAGE_2 = BIT(1)
};
+struct tbt_net_stats {
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_errors;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_length_errors;
+ u64 rx_over_errors;
+ u64 rx_crc_errors;
+ u64 rx_missed_errors;
+ u64 multicast;
+};
+
+static const char tbt_net_gstrings_stats[][ETH_GSTRING_LEN] = {
+ "tx_packets",
+ "tx_bytes",
+ "tx_errors",
+ "rx_packets",
+ "rx_bytes",
+ "rx_length_errors",
+ "rx_over_errors",
+ "rx_crc_errors",
+ "rx_missed_errors",
+ "multicast",
+};
+
+struct tbt_buffer {
+ dma_addr_t dma;
+ union {
+ struct tbt_frame_header *hdr;
+ struct page *page;
+ };
+ u32 page_offset;
+};
+
+struct tbt_desc_ring {
+ /* pointer to the descriptor ring memory */
+ struct tbt_buf_desc *desc;
+ /* physical address of the descriptor ring */
+ dma_addr_t dma;
+ /* array of buffer structs */
+ struct tbt_buffer *buffers;
+ /* last descriptor that was associated with a buffer */
+ u16 last_allocated;
+ /* next descriptor to check for DD status bit */
+ u16 next_to_clean;
+};
+
/**
* struct tbt_port - the basic tbt_port structure
* @tbt_nhi_ctxt: context of the nhi controller.
* @net_dev: networking device object.
+ * @napi: network API
* @login_retry_work: work queue for sending login requests.
* @login_response_work: work queue for sending login responses.
* @work_struct logout_work: work queue for sending logout requests.
@@ -155,6 +232,11 @@ enum disconnect_path_stage {
* @login_retry_count: counts number of login retries sent.
* @local_depth: depth of the remote peer in the chain.
* @transmit_path: routing parameter for the icm.
+ * @tx_ring: transmit ring from where the packets are sent.
+ * @rx_ring: receive ring where the packets are received.
+ * @stats: network statistics of the rx/tx packets.
+ * @packet_filters: defines filters for the received packets.
+ * @multicast_hash_table: hash table of multicast addresses.
* @frame_id: counting ID of frames.
* @num: port number.
* @local_path: routing parameter for the icm.
@@ -164,6 +246,7 @@ enum disconnect_path_stage {
struct tbt_port {
struct tbt_nhi_ctxt *nhi_ctxt;
struct net_device *net_dev;
+ struct napi_struct napi;
struct delayed_work login_retry_work;
struct work_struct login_response_work;
struct work_struct logout_work;
@@ -179,6 +262,17 @@ struct tbt_port {
u8 login_retry_count;
u8 local_depth;
u8 transmit_path;
+ struct tbt_desc_ring tx_ring ____cacheline_aligned_in_smp;
+ struct tbt_desc_ring rx_ring;
+ struct tbt_net_stats stats;
+ u32 packet_filters;
+ /*
+ * hash table of 1024 boolean entries with hashing of
+ * the multicast address
+ */
+ u32 multicast_hash_table[DIV_ROUND_UP(
+ TBT_NET_MULTICAST_HASH_TABLE_SIZE,
+ BITS_PER_U32)];
u16 frame_id;
u8 num;
u8 local_path;
@@ -225,6 +319,8 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout)
(port->local_path * REG_OPTS_STEP);
u32 rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN;
+ napi_disable(&port->napi);
+
tx_reg = iobase + REG_TX_OPTIONS_BASE +
(port->local_path * REG_OPTS_STEP);
tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN;
@@ -266,8 +362,1336 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout)
port->nhi_ctxt->num_paths);
spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
}
+
+ port->rx_ring.next_to_clean = 0;
+ port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+}
+
+void tbt_net_tx_msi(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ u32 prod_cons, prod, cons;
+
+ prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase, REG_TX_RING_BASE,
+ port->local_path));
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+ return;
+
+ if (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+ TX_WAKE_THRESHOLD) {
+ netif_wake_queue(port->net_dev);
+ } else {
+ spin_lock(&port->nhi_ctxt->lock);
+ /* enable TX interrupt */
+ RING_INT_ENABLE_TX(iobase, port->local_path);
+ spin_unlock(&port->nhi_ctxt->lock);
+ }
+}
+
+static irqreturn_t tbt_net_tx_msix(int __always_unused irq, void *data)
+{
+ struct tbt_port *port = data;
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ u32 prod_cons, prod, cons;
+
+ prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase,
+ REG_TX_RING_BASE,
+ port->local_path));
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod < TBT_NET_NUM_TX_BUFS && cons < TBT_NET_NUM_TX_BUFS &&
+ TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >=
+ TX_WAKE_THRESHOLD) {
+ spin_lock(&port->nhi_ctxt->lock);
+ /* disable TX interrupt */
+ RING_INT_DISABLE_TX(iobase, port->local_path);
+ spin_unlock(&port->nhi_ctxt->lock);
+
+ netif_wake_queue(port->net_dev);
+ }
+
+ return IRQ_HANDLED;
+}
+
+void tbt_net_rx_msi(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ napi_schedule_irqoff(&port->napi);
+}
+
+static irqreturn_t tbt_net_rx_msix(int __always_unused irq, void *data)
+{
+ struct tbt_port *port = data;
+
+ if (likely(napi_schedule_prep(&port->napi))) {
+ struct tbt_nhi_ctxt *nhi_ctx = port->nhi_ctxt;
+
+ spin_lock(&nhi_ctx->lock);
+ /* disable RX interrupt */
+ RING_INT_DISABLE_RX(nhi_ctx->iobase, port->local_path,
+ nhi_ctx->num_paths);
+ spin_unlock(&nhi_ctx->lock);
+
+ __napi_schedule_irqoff(&port->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void tbt_net_pull_tail(struct sk_buff *skb)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int pull_len;
+ unsigned char *va;
+
+ /*
+ * it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool
+ */
+ va = skb_frag_address(frag);
+
+ pull_len = eth_get_headlen(va, TBT_NET_RX_HDR_SIZE);
+
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
+
+static inline bool tbt_net_alloc_mapped_page(struct device *dev,
+ struct tbt_buffer *buf, gfp_t gfp)
+{
+ if (!buf->page) {
+ buf->page = alloc_page(gfp | __GFP_COLD);
+ if (unlikely(!buf->page))
+ return false;
+
+ buf->dma = dma_map_page(dev, buf->page, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, buf->dma)) {
+ __free_page(buf->page);
+ buf->page = NULL;
+ return false;
+ }
+ buf->page_offset = 0;
+ }
+ return true;
+}
+
+static bool tbt_net_alloc_rx_buffers(struct device *dev,
+ struct tbt_desc_ring *rx_ring,
+ u16 cleaned_count, void __iomem *reg,
+ gfp_t gfp)
+{
+ u16 i = (rx_ring->last_allocated + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ bool res = false;
+
+ while (cleaned_count--) {
+ struct tbt_buf_desc *desc = &rx_ring->desc[i];
+ struct tbt_buffer *buf = &rx_ring->buffers[i];
+
+ /* making sure next_to_clean won't get old buffer */
+ desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS |
+ DESC_ATTR_INT_EN);
+ if (tbt_net_alloc_mapped_page(dev, buf, gfp)) {
+ res = true;
+ rx_ring->last_allocated = i;
+ i = (i + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ desc->phys = cpu_to_le64(buf->dma + buf->page_offset);
+ } else {
+ break;
+ }
+ }
+
+ if (res) {
+ iowrite32((rx_ring->last_allocated << REG_RING_CONS_SHIFT) &
+ REG_RING_CONS_MASK, reg);
+ }
+
+ return res;
+}
+
+static inline bool tbt_net_multicast_mac_set(const u32 *multicast_hash_table,
+ const u8 *ether_addr)
+{
+ u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ether_addr);
+
+ return !!(multicast_hash_table[hash_val / BITS_PER_U32] &
+ BIT(hash_val % BITS_PER_U32));
+}
+
+static enum frame_status tbt_net_check_frame(struct tbt_port *port,
+ u16 frame_num, u32 *count,
+ u16 index, u16 *id, u32 *size)
+{
+ struct tbt_desc_ring *rx_ring = &port->rx_ring;
+ __le32 desc_attr = rx_ring->desc[frame_num].attributes;
+ enum frame_status res = GOOD_AS_FIRST_FRAME;
+ u32 len, frame_count, frame_size;
+ struct tbt_frame_header *hdr;
+
+ if (!(desc_attr & cpu_to_le32(DESC_ATTR_DESC_DONE)))
+ return FRAME_NOT_READY;
+
+ rmb(); /* read other fields from desc after checking DD */
+
+ if (unlikely(desc_attr & cpu_to_le32(DESC_ATTR_RX_CRC_ERR))) {
+ ++port->stats.rx_crc_errors;
+ goto err;
+ } else if (unlikely(desc_attr &
+ cpu_to_le32(DESC_ATTR_RX_BUF_OVRN_ERR))) {
+ ++port->stats.rx_over_errors;
+ goto err;
+ }
+
+ len = (le32_to_cpu(desc_attr) & DESC_ATTR_LEN_MASK)
+ >> DESC_ATTR_LEN_SHIFT;
+ if (len == 0)
+ len = TBT_RING_MAX_FRAME_SIZE;
+ /* should be greater than just header i.e. contains data */
+ if (unlikely(len <= sizeof(struct tbt_frame_header))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ prefetchw(rx_ring->buffers[frame_num].page);
+ hdr = page_address(rx_ring->buffers[frame_num].page) +
+ rx_ring->buffers[frame_num].page_offset;
+ /* prefetch first cache line of first page */
+ prefetch(hdr);
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(&port->nhi_ctxt->pdev->dev,
+ rx_ring->buffers[frame_num].dma,
+ rx_ring->buffers[frame_num].page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+
+ frame_count = le32_to_cpu(hdr->frame_count);
+ frame_size = le32_to_cpu(hdr->frame_size);
+
+ if (unlikely((frame_size > len - sizeof(struct tbt_frame_header)) ||
+ (frame_size == 0))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+ /*
+ * In case we're in the middle of packet, validate the frame header
+ * based on first fragment of the packet
+ */
+ if (*count) {
+ /* check the frame count fits the count field */
+ if (frame_count != *count) {
+ ++port->stats.rx_length_errors;
+ goto check_as_first;
+ }
+
+ /*
+ * check the frame identifiers are incremented correctly,
+ * and id is matching
+ */
+ if ((le16_to_cpu(hdr->frame_index) != index) ||
+ (le16_to_cpu(hdr->frame_id) != *id)) {
+ ++port->stats.rx_missed_errors;
+ goto check_as_first;
+ }
+
+ *size += frame_size;
+ if (*size > TBT_NET_MTU) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+ res = GOOD_FRAME;
+ } else { /* start of packet, validate the frame header */
+ const u8 *addr;
+
+check_as_first:
+ rx_ring->next_to_clean = frame_num;
+
+ /* validate the first packet has a valid frame count */
+ if (unlikely(frame_count == 0 ||
+ frame_count > (TBT_NET_NUM_RX_BUFS / 4))) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ /* validate the first packet has a valid frame index */
+ if (hdr->frame_index != 0) {
+ ++port->stats.rx_missed_errors;
+ goto err;
+ }
+
+ BUILD_BUG_ON(TBT_NET_RX_HDR_SIZE > TBT_RING_MAX_FRM_DATA_SZ);
+ if ((frame_count > 1) && (frame_size < TBT_NET_RX_HDR_SIZE)) {
+ ++port->stats.rx_length_errors;
+ goto err;
+ }
+
+ addr = (u8 *)(hdr + 1);
+
+ /* check the packet can go through the filter */
+ if (is_multicast_ether_addr(addr)) {
+ if (!is_broadcast_ether_addr(addr)) {
+ if ((port->packet_filters &
+ (BIT(PACKET_TYPE_PROMISCUOUS) |
+ BIT(PACKET_TYPE_ALL_MULTICAST))) ||
+ tbt_net_multicast_mac_set(
+ port->multicast_hash_table, addr))
+ res = GOOD_AS_FIRST_MULTICAST_FRAME;
+ else
+ goto err;
+ }
+ } else if (!(port->packet_filters &
+ (BIT(PACKET_TYPE_PROMISCUOUS) |
+ BIT(PACKET_TYPE_UNICAST_PROMISCUOUS))) &&
+ !ether_addr_equal(port->net_dev->dev_addr, addr)) {
+ goto err;
+ }
+
+ *size = frame_size;
+ *count = frame_count;
+ *id = le16_to_cpu(hdr->frame_id);
+ }
+
+#if (PREFETCH_STRIDE < 128)
+ prefetch((u8 *)hdr + PREFETCH_STRIDE);
+#endif
+
+ return res;
+
+err:
+ rx_ring->next_to_clean = (frame_num + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+ return FRAME_ERROR;
+}
+
+static inline unsigned int tbt_net_max_frm_data_size(
+ __maybe_unused u32 frame_size)
+{
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+ return ALIGN(frame_size + sizeof(struct tbt_frame_header),
+ L1_CACHE_BYTES) -
+ sizeof(struct tbt_frame_header);
+#else
+ return TBT_RING_MAX_FRM_DATA_SZ;
+#endif
+}
+
+static int tbt_net_poll(struct napi_struct *napi, int budget)
+{
+ struct tbt_port *port = container_of(napi, struct tbt_port, napi);
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(port->nhi_ctxt->iobase,
+ REG_RX_RING_BASE,
+ port->local_path);
+ struct tbt_desc_ring *rx_ring = &port->rx_ring;
+ u16 cleaned_count = TBT_NUM_BUFS_BETWEEN(rx_ring->last_allocated,
+ rx_ring->next_to_clean,
+ TBT_NET_NUM_RX_BUFS);
+ unsigned long flags;
+ int rx_packets = 0;
+
+loop:
+ while (likely(rx_packets < budget)) {
+ struct sk_buff *skb;
+ enum frame_status status;
+ bool multicast = false;
+ u32 frame_count = 0, size;
+ u16 j, frame_id;
+ int i;
+
+ /*
+ * return some buffers to hardware, one at a time is too slow
+ * so allocate TBT_NET_RX_BUFFER_WRITE buffers at the same time
+ */
+ if (cleaned_count >= TBT_NET_RX_BUFFER_WRITE) {
+ tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ rx_ring, cleaned_count, reg,
+ GFP_ATOMIC);
+ cleaned_count = 0;
+ }
+
+ status = tbt_net_check_frame(port, rx_ring->next_to_clean,
+ &frame_count, 0, &frame_id,
+ &size);
+ if (status == FRAME_NOT_READY)
+ break;
+
+ if (status == FRAME_ERROR) {
+ ++cleaned_count;
+ continue;
+ }
+
+ multicast = (status == GOOD_AS_FIRST_MULTICAST_FRAME);
+
+ /*
+ * i is incremented up to the frame_count frames received,
+ * j cyclicly goes over the location from the next frame
+ * to clean in the ring
+ */
+ j = (rx_ring->next_to_clean + 1);
+ j &= (TBT_NET_NUM_RX_BUFS - 1);
+ for (i = 1; i < frame_count; ++i) {
+ status = tbt_net_check_frame(port, j, &frame_count, i,
+ &frame_id, &size);
+ if (status == FRAME_NOT_READY)
+ goto out;
+
+ j = (j + 1) & (TBT_NET_NUM_RX_BUFS - 1);
+
+ /* if a new frame is found, start over */
+ if (status == GOOD_AS_FIRST_FRAME ||
+ status == GOOD_AS_FIRST_MULTICAST_FRAME) {
+ multicast = (status ==
+ GOOD_AS_FIRST_MULTICAST_FRAME);
+ cleaned_count += i;
+ i = 0;
+ continue;
+ }
+
+ if (status == FRAME_ERROR) {
+ cleaned_count += (i + 1);
+ goto loop;
+ }
+ }
+
+ /* allocate a skb to store the frags */
+ skb = netdev_alloc_skb_ip_align(port->net_dev,
+ TBT_NET_RX_HDR_SIZE);
+ if (unlikely(!skb))
+ break;
+
+ /*
+ * we will be copying header into skb->data in
+ * tbt_net_pull_tail so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+
+ /*
+ * if overall size of packet smaller than TBT_NET_RX_HDR_SIZE
+ * which is a small buffer size we decided to allocate
+ * as the base to RX
+ */
+ if (size <= TBT_NET_RX_HDR_SIZE) {
+ struct tbt_buffer *buf =
+ &(rx_ring->buffers[rx_ring->next_to_clean]);
+ u8 *va = page_address(buf->page) + buf->page_offset +
+ sizeof(struct tbt_frame_header);
+
+ memcpy(__skb_put(skb, size), va,
+ ALIGN(size, sizeof(long)));
+
+ /*
+ * Reuse buffer as-is,
+ * just make sure it is local
+ * Access to local memory is faster than non-local
+ * memory so let's reuse.
+ * If not local, let's free it and reallocate later.
+ */
+ if (likely(page_to_nid(buf->page) == numa_node_id()))
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, buf->page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+ else {
+ /* this page cannot be reused so discard it */
+ put_page(buf->page);
+ buf->page = NULL;
+ dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+ buf->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+ rx_ring->next_to_clean = (rx_ring->next_to_clean + 1) &
+ (TBT_NET_NUM_RX_BUFS - 1);
+ } else {
+ for (i = 0; i < frame_count; ++i) {
+ struct tbt_buffer *buf = &(rx_ring->buffers[
+ rx_ring->next_to_clean]);
+ struct tbt_frame_header *hdr =
+ page_address(buf->page) +
+ buf->page_offset;
+ u32 frm_size = le32_to_cpu(hdr->frame_size);
+
+ unsigned int truesize =
+ tbt_net_max_frm_data_size(frm_size);
+
+ /* add frame to skb struct */
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf->page,
+ sizeof(struct tbt_frame_header)
+ + buf->page_offset,
+ frm_size, truesize);
+
+#if (TBT_NUM_FRAMES_PER_PAGE > 1)
+ /* move offset up to the next cache line */
+ buf->page_offset += (truesize +
+ sizeof(struct tbt_frame_header));
+
+ /*
+ * we can reuse buffer if there is space
+ * available and it is local
+ */
+ if (page_to_nid(buf->page) == numa_node_id()
+ && buf->page_offset <=
+ PAGE_SIZE - TBT_RING_MAX_FRAME_SIZE) {
+ /*
+ * bump ref count on page before
+ * it is given to the stack
+ */
+ get_page(buf->page);
+ /*
+ * sync the buffer for use by the
+ * device
+ */
+ dma_sync_single_range_for_device(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, buf->page_offset,
+ TBT_RING_MAX_FRAME_SIZE,
+ DMA_FROM_DEVICE);
+ } else
+#endif
+ {
+ buf->page = NULL;
+ dma_unmap_page(
+ &port->nhi_ctxt->pdev->dev,
+ buf->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ rx_ring->next_to_clean =
+ (rx_ring->next_to_clean + 1) &
+ (TBT_NET_NUM_RX_BUFS - 1);
+ }
+ /*
+ * place header from the first
+ * fragment in linear portion of buffer
+ */
+ tbt_net_pull_tail(skb);
+ }
+
+ /*
+ * The Thunderbolt medium doesn't have any restriction on
+ * minimum frame size, thus doesn't need any padding in
+ * transmit.
+ * The network stack accepts Runt Ethernet frames,
+ * therefor there is neither padding in receive.
+ */
+
+ skb->protocol = eth_type_trans(skb, port->net_dev);
+ napi_gro_receive(&port->napi, skb);
+
+ ++rx_packets;
+ port->stats.rx_bytes += size;
+ if (multicast)
+ ++port->stats.multicast;
+ cleaned_count += frame_count;
+ }
+
+out:
+ port->stats.rx_packets += rx_packets;
+
+ if (cleaned_count)
+ tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ rx_ring, cleaned_count, reg,
+ GFP_ATOMIC);
+
+ /* If all work not completed, return budget and keep polling */
+ if (rx_packets >= budget)
+ return budget;
+
+ /* Work is done so exit the polling mode and re-enable the interrupt */
+ napi_complete(napi);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /* enable RX interrupt */
+ RING_INT_ENABLE_RX(port->nhi_ctxt->iobase, port->local_path,
+ port->nhi_ctxt->num_paths);
+
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+ return 0;
+}
+
+static int tbt_net_open(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ int res = 0;
+ int i, j;
+
+ /* change link state to off until path establishment finishes */
+ netif_carrier_off(net_dev);
+
+ /*
+ * if we previously succeeded to allocate msix entries,
+ * now request IRQ for them:
+ * 2=tx data port 0,
+ * 3=rx data port 0,
+ * 4=tx data port 1,
+ * 5=rx data port 1,
+ * ...
+ * if not, if msi is used, nhi_msi will handle icm & data paths
+ */
+ if (port->nhi_ctxt->msix_entries) {
+ char name[] = "tbt-net-xx-xx";
+
+ scnprintf(name, sizeof(name), "tbt-net-rx-%02u", port->num);
+ res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3+(port->num*2)].vector,
+ tbt_net_rx_msix, 0, name, port);
+ if (res) {
+ netif_err(port, ifup, net_dev, "request_irq %s failed %d\n",
+ name, res);
+ goto out;
+ }
+ name[8] = 't';
+ res = devm_request_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2+(port->num*2)].vector,
+ tbt_net_tx_msix, 0, name, port);
+ if (res) {
+ netif_err(port, ifup, net_dev, "request_irq %s failed %d\n",
+ name, res);
+ goto request_irq_failure;
+ }
+ }
+ /*
+ * Verifying that all buffer sizes are well defined.
+ * Starting with frame(s) will not tip over the
+ * page boundary
+ */
+ BUILD_BUG_ON(TBT_NUM_FRAMES_PER_PAGE < 1);
+ /*
+ * Just to make sure we have enough place for containing
+ * 3 max MTU packets for TX
+ */
+ BUILD_BUG_ON((TBT_NET_NUM_TX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+ (TBT_NET_MTU * 3));
+ /* make sure the number of TX Buffers is power of 2 */
+ BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_TX_BUFS);
+ /*
+ * Just to make sure we have enough place for containing
+ * 3 max MTU packets for RX
+ */
+ BUILD_BUG_ON((TBT_NET_NUM_RX_BUFS * TBT_RING_MAX_FRAME_SIZE) <
+ (TBT_NET_MTU * 3));
+ /* make sure the number of RX Buffers is power of 2 */
+ BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_RX_BUFS);
+
+ port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1;
+
+ port->tx_ring.buffers = vzalloc(TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buffer));
+ if (!port->tx_ring.buffers)
+ goto ring_alloc_failure;
+ port->rx_ring.buffers = vzalloc(TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buffer));
+ if (!port->rx_ring.buffers)
+ goto ring_alloc_failure;
+
+ /*
+ * Allocate TX and RX descriptors
+ * if the total size is less than a page, do a central allocation
+ * Otherwise, split TX and RX
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ port->tx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ &port->tx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->tx_ring.desc)
+ goto ring_alloc_failure;
+ /* RX starts where TX finishes */
+ port->rx_ring.desc = &port->tx_ring.desc[TBT_NET_NUM_TX_BUFS];
+ port->rx_ring.dma = port->tx_ring.dma +
+ (TBT_NET_NUM_TX_BUFS * sizeof(struct tbt_buf_desc));
+ } else {
+ port->tx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ &port->tx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->tx_ring.desc)
+ goto ring_alloc_failure;
+ port->rx_ring.desc = dmam_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ &port->rx_ring.dma,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!port->rx_ring.desc)
+ goto rx_desc_alloc_failure;
+ }
+
+ /* allocate TX buffers and configure the descriptors */
+ for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+ port->tx_ring.buffers[i].hdr = dma_alloc_coherent(
+ &port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE * TBT_RING_MAX_FRAME_SIZE,
+ &port->tx_ring.buffers[i].dma,
+ GFP_KERNEL);
+ if (!port->tx_ring.buffers[i].hdr)
+ goto buffers_alloc_failure;
+
+ port->tx_ring.desc[i].phys =
+ cpu_to_le64(port->tx_ring.buffers[i].dma);
+ port->tx_ring.desc[i].attributes =
+ cpu_to_le32(DESC_ATTR_REQ_STS |
+ TBT_NET_DESC_ATTR_SOF_EOF);
+
+ /*
+ * In case the page is bigger than the frame size,
+ * make the next buffer descriptor points
+ * on the next frame memory address within the page
+ */
+ for (i++, j = 1; (i < TBT_NET_NUM_TX_BUFS) &&
+ (j < TBT_NUM_FRAMES_PER_PAGE); i++, j++) {
+ port->tx_ring.buffers[i].dma =
+ port->tx_ring.buffers[i - 1].dma +
+ TBT_RING_MAX_FRAME_SIZE;
+ port->tx_ring.buffers[i].hdr =
+ (void *)(port->tx_ring.buffers[i - 1].hdr) +
+ TBT_RING_MAX_FRAME_SIZE;
+ /* move the next offset i.e. TBT_RING_MAX_FRAME_SIZE */
+ port->tx_ring.buffers[i].page_offset =
+ port->tx_ring.buffers[i - 1].page_offset +
+ TBT_RING_MAX_FRAME_SIZE;
+ port->tx_ring.desc[i].phys =
+ cpu_to_le64(port->tx_ring.buffers[i].dma);
+ port->tx_ring.desc[i].attributes =
+ cpu_to_le32(DESC_ATTR_REQ_STS |
+ TBT_NET_DESC_ATTR_SOF_EOF);
+ }
+ i--;
+ }
+
+ port->negotiation_status =
+ BIT(port->nhi_ctxt->net_devices[port->num].medium_sts);
+ if (port->negotiation_status == BIT(MEDIUM_READY_FOR_CONNECTION)) {
+ port->login_retry_count = 0;
+ queue_delayed_work(port->nhi_ctxt->net_workqueue,
+ &port->login_retry_work, 0);
+ }
+
+ netif_info(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - ready for ThunderboltIP negotiation\n",
+ port->num);
+ return 0;
+
+buffers_alloc_failure:
+ /*
+ * Rollback the Tx buffers that were already allocated
+ * until the failure
+ */
+ for (i--; i >= 0; i--) {
+ /* free only for first buffer allocation */
+ if (port->tx_ring.buffers[i].page_offset == 0)
+ dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE *
+ TBT_RING_MAX_FRAME_SIZE,
+ port->tx_ring.buffers[i].hdr,
+ port->tx_ring.buffers[i].dma);
+ port->tx_ring.buffers[i].hdr = NULL;
+ }
+ /*
+ * For central allocation, free all
+ * otherwise free RX and then TX separately
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ port->rx_ring.desc = NULL;
+ } else {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->rx_ring.desc,
+ port->rx_ring.dma);
+ port->rx_ring.desc = NULL;
+rx_desc_alloc_failure:
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ }
+ port->tx_ring.desc = NULL;
+ring_alloc_failure:
+ vfree(port->tx_ring.buffers);
+ port->tx_ring.buffers = NULL;
+ vfree(port->rx_ring.buffers);
+ port->rx_ring.buffers = NULL;
+ res = -ENOMEM;
+ netif_err(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - unable to allocate memory\n",
+ port->num);
+
+ if (!port->nhi_ctxt->msix_entries)
+ goto out;
+
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+ port);
+request_irq_failure:
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+ port);
+out:
+ return res;
+}
+
+static int tbt_net_close(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ int i;
+
+ /*
+ * Close connection, disable rings, flow controls
+ * and interrupts
+ */
+ tbt_net_tear_down(net_dev, !(port->negotiation_status &
+ BIT(RECEIVE_LOGOUT)));
+
+ cancel_work_sync(&port->login_response_work);
+ cancel_work_sync(&port->logout_work);
+ cancel_work_sync(&port->status_reply_work);
+ cancel_work_sync(&port->approve_inter_domain_work);
+
+ /* Rollback the Tx buffers that were allocated */
+ for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) {
+ if (port->tx_ring.buffers[i].page_offset == 0)
+ dma_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NUM_FRAMES_PER_PAGE *
+ TBT_RING_MAX_FRAME_SIZE,
+ port->tx_ring.buffers[i].hdr,
+ port->tx_ring.buffers[i].dma);
+ port->tx_ring.buffers[i].hdr = NULL;
+ }
+ /* Unmap the Rx buffers that were allocated */
+ for (i = 0; i < TBT_NET_NUM_RX_BUFS; i++)
+ if (port->rx_ring.buffers[i].page) {
+ put_page(port->rx_ring.buffers[i].page);
+ port->rx_ring.buffers[i].page = NULL;
+ dma_unmap_page(&port->nhi_ctxt->pdev->dev,
+ port->rx_ring.buffers[i].dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ /*
+ * For central allocation, free all
+ * otherwise free RX and then TX separately
+ */
+ if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_SIZE_TOTAL_DESCS,
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ port->rx_ring.desc = NULL;
+ } else {
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_RX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->rx_ring.desc,
+ port->rx_ring.dma);
+ port->rx_ring.desc = NULL;
+ dmam_free_coherent(&port->nhi_ctxt->pdev->dev,
+ TBT_NET_NUM_TX_BUFS *
+ sizeof(struct tbt_buf_desc),
+ port->tx_ring.desc,
+ port->tx_ring.dma);
+ }
+ port->tx_ring.desc = NULL;
+
+ vfree(port->tx_ring.buffers);
+ port->tx_ring.buffers = NULL;
+ vfree(port->rx_ring.buffers);
+ port->rx_ring.buffers = NULL;
+
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector,
+ port);
+ devm_free_irq(&port->nhi_ctxt->pdev->dev,
+ port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector,
+ port);
+
+ netif_info(port, ifdown, net_dev, "Thunderbolt(TM) Networking port %u - is down\n",
+ port->num);
+
+ return 0;
+}
+
+static bool tbt_net_xmit_csum(struct sk_buff *skb,
+ struct tbt_desc_ring *tx_ring, u32 first,
+ u32 last, u32 frame_count)
+{
+
+ struct tbt_frame_header *hdr = tx_ring->buffers[first].hdr;
+ __wsum wsum = (__force __wsum)htonl(skb->len -
+ skb_transport_offset(skb));
+ int offset = skb_transport_offset(skb);
+ __sum16 *tucso; /* TCP UDP Checksum Segment Offset */
+ __be16 protocol = skb->protocol;
+ u8 *dest = (u8 *)(hdr + 1);
+ int len;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ for (; first != last;
+ first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1)) {
+ hdr = tx_ring->buffers[first].hdr;
+ hdr->frame_count = cpu_to_le32(frame_count);
+ }
+ return true;
+ }
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vhdr, vh;
+
+ vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh);
+ if (!vhdr)
+ return false;
+
+ protocol = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ /*
+ * Data points on the beginning of packet.
+ * Check is the checksum absolute place in the
+ * packet.
+ * ipcso will update IP checksum.
+ * tucso will update TCP/UPD checksum.
+ */
+ if (protocol == htons(ETH_P_IP)) {
+ __sum16 *ipcso = (__sum16 *)(dest +
+ ((u8 *)&(ip_hdr(skb)->check) - skb->data));
+
+ *ipcso = 0;
+ *ipcso = ip_fast_csum(dest + skb_network_offset(skb),
+ ip_hdr(skb)->ihl);
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+ else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+ else
+ return false;
+
+ *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0,
+ ip_hdr(skb)->protocol, 0);
+ } else if (skb_is_gso(skb)) {
+ if (skb_is_gso_v6(skb)) {
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(tcp_hdr(skb)->check) - skb->data));
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_TCP, 0);
+ } else if ((protocol == htons(ETH_P_IPV6)) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) {
+ tucso = (__sum16 *)(dest +
+ ((u8 *)&(udp_hdr(skb)->check) - skb->data));
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, IPPROTO_UDP, 0);
+ } else {
+ return false;
+ }
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ tucso = (__sum16 *)(dest + skb_checksum_start_offset(skb) +
+ skb->csum_offset);
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ 0, ipv6_hdr(skb)->nexthdr, 0);
+ } else {
+ return false;
+ }
+
+ /* First frame was headers, rest of the frames is data */
+ for (; first != last; first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1),
+ offset = 0) {
+ hdr = tx_ring->buffers[first].hdr;
+ dest = (u8 *)(hdr + 1) + offset;
+ len = le32_to_cpu(hdr->frame_size) - offset;
+ wsum = csum_partial(dest, len, wsum);
+ hdr->frame_count = cpu_to_le32(frame_count);
+ }
+ *tucso = csum_fold(wsum);
+
+ return true;
+}
+
+static netdev_tx_t tbt_net_xmit_frame(struct sk_buff *skb,
+ struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(iobase,
+ REG_TX_RING_BASE,
+ port->local_path);
+ struct tbt_desc_ring *tx_ring = &port->tx_ring;
+ struct tbt_frame_header *hdr;
+ u32 prod_cons, prod, cons, first;
+ /* len equivalent to the fragment length */
+ unsigned int len = skb_headlen(skb);
+ /* data_len is overall packet length */
+ unsigned int data_len = skb->len;
+ u32 frm_idx, frag_num = 0;
+ const u8 *src = skb->data;
+ bool unmap = false;
+ __le32 *attr;
+ u8 *dest;
+
+ if (unlikely(data_len == 0 || data_len > TBT_NET_MTU))
+ goto invalid_packet;
+
+ prod_cons = ioread32(reg);
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS)
+ goto tx_error;
+
+ if (data_len > (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) *
+ TBT_RING_MAX_FRM_DATA_SZ)) {
+ unsigned long flags;
+
+ netif_stop_queue(net_dev);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /*
+ * Enable TX interrupt to be notified about available buffers
+ * and restart transmission upon this.
+ */
+ RING_INT_ENABLE_TX(iobase, port->local_path);
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+ return NETDEV_TX_BUSY;
+ }
+
+ first = prod;
+ attr = &tx_ring->desc[prod].attributes;
+ hdr = tx_ring->buffers[prod].hdr;
+ dest = (u8 *)(hdr + 1);
+ /* if overall packet is bigger than the frame data size */
+ for (frm_idx = 0; data_len > TBT_RING_MAX_FRM_DATA_SZ; ++frm_idx) {
+ u32 size_left = TBT_RING_MAX_FRM_DATA_SZ;
+
+ *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK |
+ DESC_ATTR_INT_EN |
+ DESC_ATTR_DESC_DONE));
+ hdr->frame_size = cpu_to_le32(TBT_RING_MAX_FRM_DATA_SZ);
+ hdr->frame_index = cpu_to_le16(frm_idx);
+ hdr->frame_id = cpu_to_le16(port->frame_id);
+
+ do {
+ if (len > size_left) {
+ /*
+ * Copy data onto tx buffer data with full
+ * frame size then break
+ * and go to next frame
+ */
+ memcpy(dest, src, size_left);
+ len -= size_left;
+ dest += size_left;
+ src += size_left;
+ break;
+ }
+
+ memcpy(dest, src, len);
+ size_left -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+ /*
+ * Ensure all fragments have been processed
+ */
+ if (frag_num < skb_shinfo(skb)->nr_frags) {
+ const skb_frag_t *frag =
+ &(skb_shinfo(skb)->frags[frag_num]);
+ len = skb_frag_size(frag);
+ /* map and then unmap quickly */
+ src = kmap_atomic(skb_frag_page(frag)) +
+ frag->page_offset;
+ unmap = true;
+ ++frag_num;
+ } else if (unlikely(size_left > 0)) {
+ goto invalid_packet;
+ }
+ } while (size_left > 0);
+
+ data_len -= TBT_RING_MAX_FRM_DATA_SZ;
+ prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+ attr = &tx_ring->desc[prod].attributes;
+ hdr = tx_ring->buffers[prod].hdr;
+ dest = (u8 *)(hdr + 1);
+ }
+
+ *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK | DESC_ATTR_DESC_DONE));
+ /* Enable the interrupts, for resuming from stop queue later (if so) */
+ *attr |= cpu_to_le32(DESC_ATTR_INT_EN |
+ (((sizeof(struct tbt_frame_header) + data_len) <<
+ DESC_ATTR_LEN_SHIFT) & DESC_ATTR_LEN_MASK));
+ hdr->frame_size = cpu_to_le32(data_len);
+ hdr->frame_index = cpu_to_le16(frm_idx);
+ hdr->frame_id = cpu_to_le16(port->frame_id);
+
+ /* In case the remaining data_len is smaller than a frame */
+ while (len < data_len) {
+ memcpy(dest, src, len);
+ data_len -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+
+ if (frag_num < skb_shinfo(skb)->nr_frags) {
+ const skb_frag_t *frag =
+ &(skb_shinfo(skb)->frags[frag_num]);
+ len = skb_frag_size(frag);
+ src = kmap_atomic(skb_frag_page(frag)) +
+ frag->page_offset;
+ unmap = true;
+ ++frag_num;
+ } else if (unlikely(data_len > 0)) {
+ goto invalid_packet;
+ }
+ }
+ memcpy(dest, src, data_len);
+ if (unmap) {
+ kunmap_atomic((void *)src);
+ unmap = false;
+ }
+
+ ++frm_idx;
+ prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1);
+
+ if (!tbt_net_xmit_csum(skb, tx_ring, first, prod, frm_idx))
+ goto invalid_packet;
+
+ if (port->match_frame_id)
+ ++port->frame_id;
+
+ prod_cons &= ~REG_RING_PROD_MASK;
+ prod_cons |= (prod << REG_RING_PROD_SHIFT) & REG_RING_PROD_MASK;
+ wmb(); /* make sure producer update is done after buffers are ready */
+ iowrite32(prod_cons, reg);
+
+ ++port->stats.tx_packets;
+ port->stats.tx_bytes += skb->len;
+
+ dev_consume_skb_any(skb);
+ return NETDEV_TX_OK;
+
+invalid_packet:
+ netif_err(port, tx_err, net_dev, "port %u invalid transmit packet\n",
+ port->num);
+tx_error:
+ ++port->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
}
+static void tbt_net_set_rx_mode(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ struct netdev_hw_addr *ha;
+
+ if (net_dev->flags & IFF_PROMISC)
+ port->packet_filters |= BIT(PACKET_TYPE_PROMISCUOUS);
+ else
+ port->packet_filters &= ~BIT(PACKET_TYPE_PROMISCUOUS);
+ if (net_dev->flags & IFF_ALLMULTI)
+ port->packet_filters |= BIT(PACKET_TYPE_ALL_MULTICAST);
+ else
+ port->packet_filters &= ~BIT(PACKET_TYPE_ALL_MULTICAST);
+
+ /* if you have more than a single MAC address */
+ if (netdev_uc_count(net_dev) > 1)
+ port->packet_filters |= BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ /* if have a single MAC address */
+ else if (netdev_uc_count(net_dev) == 1) {
+ netdev_for_each_uc_addr(ha, net_dev)
+ /* checks whether the MAC is what we set */
+ if (ether_addr_equal(ha->addr, net_dev->dev_addr))
+ port->packet_filters &=
+ ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ else
+ port->packet_filters |=
+ BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ } else {
+ port->packet_filters &= ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS);
+ }
+
+ /* Populate the multicast hash table with received MAC addresses */
+ memset(port->multicast_hash_table, 0,
+ sizeof(port->multicast_hash_table));
+ netdev_for_each_mc_addr(ha, net_dev) {
+ u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ha->addr);
+
+ port->multicast_hash_table[hash_val / BITS_PER_U32] |=
+ BIT(hash_val % BITS_PER_U32);
+ }
+
+}
+
+static struct rtnl_link_stats64 *tbt_net_get_stats64(
+ struct net_device *net_dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ memset(stats, 0, sizeof(*stats));
+ stats->tx_packets = port->stats.tx_packets;
+ stats->tx_bytes = port->stats.tx_bytes;
+ stats->tx_errors = port->stats.tx_errors;
+ stats->rx_packets = port->stats.rx_packets;
+ stats->rx_bytes = port->stats.rx_bytes;
+ stats->rx_length_errors = port->stats.rx_length_errors;
+ stats->rx_over_errors = port->stats.rx_over_errors;
+ stats->rx_crc_errors = port->stats.rx_crc_errors;
+ stats->rx_missed_errors = port->stats.rx_missed_errors;
+ stats->rx_errors = stats->rx_length_errors + stats->rx_over_errors +
+ stats->rx_crc_errors + stats->rx_missed_errors;
+ stats->multicast = port->stats.multicast;
+ return stats;
+}
+
+static int tbt_net_set_mac_address(struct net_device *net_dev, void *addr)
+{
+ struct sockaddr *saddr = addr;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(net_dev->dev_addr, saddr->sa_data, net_dev->addr_len);
+
+ return 0;
+}
+
+static int tbt_net_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ /* MTU < 68 is an error and causes problems on some kernels */
+ if (new_mtu < 68 || new_mtu > (TBT_NET_MTU - ETH_HLEN))
+ return -EINVAL;
+
+ netif_info(port, probe, net_dev, "Thunderbolt(TM) Networking port %u - changing MTU from %u to %d\n",
+ port->num, net_dev->mtu, new_mtu);
+
+ net_dev->mtu = new_mtu;
+
+ return 0;
+}
+
+static const struct net_device_ops tbt_netdev_ops = {
+ /* called when the network is up'ed */
+ .ndo_open = tbt_net_open,
+ /* called when the network is down'ed */
+ .ndo_stop = tbt_net_close,
+ .ndo_start_xmit = tbt_net_xmit_frame,
+ .ndo_set_rx_mode = tbt_net_set_rx_mode,
+ .ndo_get_stats64 = tbt_net_get_stats64,
+ .ndo_set_mac_address = tbt_net_set_mac_address,
+ .ndo_change_mtu = tbt_net_change_mtu,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+static int tbt_net_get_settings(__maybe_unused struct net_device *net_dev,
+ struct ethtool_cmd *ecmd)
+{
+ ecmd->supported |= SUPPORTED_20000baseKR2_Full;
+ ecmd->advertising |= ADVERTISED_20000baseKR2_Full;
+ ecmd->autoneg = AUTONEG_DISABLE;
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->supported |= SUPPORTED_FIBRE;
+ ecmd->advertising |= ADVERTISED_FIBRE;
+ ecmd->port = PORT_FIBRE;
+ ethtool_cmd_speed_set(ecmd, SPEED_20000);
+ ecmd->duplex = DUPLEX_FULL;
+
+ return 0;
+}
+
+
+static u32 tbt_net_get_msglevel(struct net_device *net_dev)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ return port->msg_enable;
+}
+
+static void tbt_net_set_msglevel(struct net_device *net_dev, u32 data)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ port->msg_enable = data;
+}
+
+static void tbt_net_get_strings(__maybe_unused struct net_device *net_dev,
+ u32 stringset, u8 *data)
+{
+ if (stringset == ETH_SS_STATS)
+ memcpy(data, tbt_net_gstrings_stats,
+ sizeof(tbt_net_gstrings_stats));
+}
+
+static void tbt_net_get_ethtool_stats(struct net_device *net_dev,
+ __maybe_unused struct ethtool_stats *sts,
+ u64 *data)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ memcpy(data, &port->stats, sizeof(port->stats));
+}
+
+static int tbt_net_get_sset_count(__maybe_unused struct net_device *net_dev,
+ int sset)
+{
+ if (sset == ETH_SS_STATS)
+ return sizeof(tbt_net_gstrings_stats) / ETH_GSTRING_LEN;
+ return -EOPNOTSUPP;
+}
+
+static void tbt_net_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ strlcpy(drvinfo->driver, "Thunderbolt(TM) Networking",
+ sizeof(drvinfo->driver));
+ strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
+
+ strlcpy(drvinfo->bus_info, pci_name(port->nhi_ctxt->pdev),
+ sizeof(drvinfo->bus_info));
+ drvinfo->n_stats = tbt_net_get_sset_count(net_dev, ETH_SS_STATS);
+}
+
+static const struct ethtool_ops tbt_net_ethtool_ops = {
+ .get_settings = tbt_net_get_settings,
+ .get_drvinfo = tbt_net_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_msglevel = tbt_net_get_msglevel,
+ .set_msglevel = tbt_net_set_msglevel,
+ .get_strings = tbt_net_get_strings,
+ .get_ethtool_stats = tbt_net_get_ethtool_stats,
+ .get_sset_count = tbt_net_get_sset_count,
+};
+
static inline int send_message(struct tbt_port *port, const char *func,
enum pdf_value pdf, u32 msg_len,
const void *msg)
@@ -496,6 +1920,10 @@ void negotiation_events(struct net_device *net_dev,
/* configure TX ring */
reg = iobase + REG_TX_RING_BASE +
(port->local_path * REG_RING_STEP);
+ iowrite32(lower_32_bits(port->tx_ring.dma),
+ reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(port->tx_ring.dma),
+ reg + REG_RING_PHYS_HI_OFFSET);
tx_ring_conf = (TBT_NET_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) &
REG_RING_SIZE_MASK;
@@ -538,6 +1966,10 @@ void negotiation_events(struct net_device *net_dev,
*/
reg = iobase + REG_RX_RING_BASE +
(port->local_path * REG_RING_STEP);
+ iowrite32(lower_32_bits(port->rx_ring.dma),
+ reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(port->rx_ring.dma),
+ reg + REG_RING_PHYS_HI_OFFSET);
rx_ring_conf = (TBT_NET_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) &
REG_RING_SIZE_MASK;
@@ -547,6 +1979,17 @@ void negotiation_events(struct net_device *net_dev,
REG_RING_BUF_SIZE_MASK;
iowrite32(rx_ring_conf, reg + REG_RING_SIZE_OFFSET);
+ /* allocate RX buffers and configure the descriptors */
+ if (!tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev,
+ &port->rx_ring,
+ TBT_NET_NUM_RX_BUFS,
+ reg + REG_RING_CONS_PROD_OFFSET,
+ GFP_KERNEL)) {
+ netif_err(port, link, net_dev, "Thunderbolt(TM) Networking port %u - no memory for receive buffers\n",
+ port->num);
+ tbt_net_tear_down(net_dev, true);
+ break;
+ }
spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
/* enable RX interrupt */
@@ -559,6 +2002,7 @@ void negotiation_events(struct net_device *net_dev,
netif_info(port, link, net_dev, "Thunderbolt(TM) Networking port %u - ready\n",
port->num);
+ napi_enable(&port->napi);
netif_carrier_on(net_dev);
netif_start_queue(net_dev);
break;
@@ -769,15 +2213,42 @@ struct net_device *nhi_alloc_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
scnprintf(net_dev->name, sizeof(net_dev->name), "tbtnet%%dp%hhu",
port_num);
+ net_dev->netdev_ops = &tbt_netdev_ops;
+
+ netif_napi_add(net_dev, &port->napi, tbt_net_poll, NAPI_POLL_WEIGHT);
+
+ net_dev->hw_features = NETIF_F_SG |
+ NETIF_F_ALL_TSO |
+ NETIF_F_UFO |
+ NETIF_F_GRO |
+ NETIF_F_IP_CSUM |
+ NETIF_F_IPV6_CSUM;
+ net_dev->features = net_dev->hw_features;
+ if (nhi_ctxt->pci_using_dac)
+ net_dev->features |= NETIF_F_HIGHDMA;
+
INIT_DELAYED_WORK(&port->login_retry_work, login_retry);
INIT_WORK(&port->login_response_work, login_response);
INIT_WORK(&port->logout_work, logout);
INIT_WORK(&port->status_reply_work, status_reply);
INIT_WORK(&port->approve_inter_domain_work, approve_inter_domain);
+ net_dev->ethtool_ops = &tbt_net_ethtool_ops;
+
+ tbt_net_change_mtu(net_dev, TBT_NET_MTU - ETH_HLEN);
+
+ if (register_netdev(net_dev))
+ goto err_register;
+
+ netif_carrier_off(net_dev);
+
netif_info(port, probe, net_dev,
"Thunderbolt(TM) Networking port %u - MAC Address: %pM\n",
port_num, net_dev->dev_addr);
return net_dev;
+
+err_register:
+ free_netdev(net_dev);
+ return NULL;
}
--
2.7.4
^ permalink raw reply related
* [PATCH v9 3/8] thunderbolt: Communication with the ICM (firmware)
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
This patch provides the communication protocol between the
Intel Connection Manager(ICM) firmware that is operational in the
Thunderbolt controller in non-Apple hardware.
The ICM firmware-based controller is used for establishing and maintaining
the Thunderbolt Networking connection - we need to be able to communicate
with it.
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
drivers/thunderbolt/icm/Makefile | 2 +
drivers/thunderbolt/icm/icm_nhi.c | 1257 +++++++++++++++++++++++++++++++++++++
drivers/thunderbolt/icm/icm_nhi.h | 85 +++
drivers/thunderbolt/icm/net.h | 217 +++++++
4 files changed, 1561 insertions(+)
create mode 100644 drivers/thunderbolt/icm/Makefile
create mode 100644 drivers/thunderbolt/icm/icm_nhi.c
create mode 100644 drivers/thunderbolt/icm/icm_nhi.h
create mode 100644 drivers/thunderbolt/icm/net.h
diff --git a/drivers/thunderbolt/icm/Makefile b/drivers/thunderbolt/icm/Makefile
new file mode 100644
index 0000000..f0d0fbb
--- /dev/null
+++ b/drivers/thunderbolt/icm/Makefile
@@ -0,0 +1,2 @@
+obj-${CONFIG_THUNDERBOLT_ICM} += thunderbolt-icm.o
+thunderbolt-icm-objs := icm_nhi.o
diff --git a/drivers/thunderbolt/icm/icm_nhi.c b/drivers/thunderbolt/icm/icm_nhi.c
new file mode 100644
index 0000000..c843ce8
--- /dev/null
+++ b/drivers/thunderbolt/icm/icm_nhi.c
@@ -0,0 +1,1257 @@
+/*******************************************************************************
+ *
+ * Intel Thunderbolt(TM) driver
+ * Copyright(c) 2014 - 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ ******************************************************************************/
+
+#include <linux/printk.h>
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/dmi.h>
+#include "icm_nhi.h"
+#include "net.h"
+
+#define NHI_GENL_VERSION 1
+#define NHI_GENL_NAME "thunderbolt"
+
+#define DEVICE_DATA(num_ports, dma_port, nvm_ver_offset, nvm_auth_on_boot,\
+ support_full_e2e) \
+ ((num_ports) | ((dma_port) << 4) | ((nvm_ver_offset) << 10) | \
+ ((nvm_auth_on_boot) << 22) | ((support_full_e2e) << 23))
+#define DEVICE_DATA_NUM_PORTS(device_data) ((device_data) & 0xf)
+#define DEVICE_DATA_DMA_PORT(device_data) (((device_data) >> 4) & 0x3f)
+#define DEVICE_DATA_NVM_VER_OFFSET(device_data) (((device_data) >> 10) & 0xfff)
+#define DEVICE_DATA_NVM_AUTH_ON_BOOT(device_data) (((device_data) >> 22) & 0x1)
+#define DEVICE_DATA_SUPPORT_FULL_E2E(device_data) (((device_data) >> 23) & 0x1)
+
+#define USEC_TO_256_NSECS(usec) DIV_ROUND_UP((usec) * NSEC_PER_USEC, 256)
+
+/* NHI genetlink commands */
+enum {
+ NHI_CMD_UNSPEC,
+ NHI_CMD_SUBSCRIBE,
+ NHI_CMD_UNSUBSCRIBE,
+ NHI_CMD_QUERY_INFORMATION,
+ NHI_CMD_MSG_TO_ICM,
+ NHI_CMD_MSG_FROM_ICM,
+ NHI_CMD_MAILBOX,
+ NHI_CMD_APPROVE_TBT_NETWORKING,
+ NHI_CMD_ICM_IN_SAFE_MODE,
+ __NHI_CMD_MAX,
+};
+#define NHI_CMD_MAX (__NHI_CMD_MAX - 1)
+
+/* NHI genetlink policy */
+static const struct nla_policy nhi_genl_policy[NHI_ATTR_MAX + 1] = {
+ [NHI_ATTR_DRV_VERSION] = { .type = NLA_NUL_STRING, },
+ [NHI_ATTR_NVM_VER_OFFSET] = { .type = NLA_U16, },
+ [NHI_ATTR_NUM_PORTS] = { .type = NLA_U8, },
+ [NHI_ATTR_DMA_PORT] = { .type = NLA_U8, },
+ [NHI_ATTR_SUPPORT_FULL_E2E] = { .type = NLA_FLAG, },
+ [NHI_ATTR_MAILBOX_CMD] = { .type = NLA_U32, },
+ [NHI_ATTR_PDF] = { .type = NLA_U32, },
+ [NHI_ATTR_MSG_TO_ICM] = { .type = NLA_BINARY,
+ .len = TBT_ICM_RING_MAX_FRAME_SIZE },
+ [NHI_ATTR_MSG_FROM_ICM] = { .type = NLA_BINARY,
+ .len = TBT_ICM_RING_MAX_FRAME_SIZE },
+};
+
+/* NHI genetlink family */
+static struct genl_family nhi_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = FIELD_SIZEOF(struct tbt_nhi_ctxt, id),
+ .name = NHI_GENL_NAME,
+ .version = NHI_GENL_VERSION,
+ .maxattr = NHI_ATTR_MAX,
+};
+
+static LIST_HEAD(controllers_list);
+static DEFINE_MUTEX(controllers_list_mutex);
+static atomic_t subscribers = ATOMIC_INIT(0);
+/*
+ * Some of the received generic netlink messages are replied in a different
+ * context. The reply has to include the netlink portid of sender, therefore
+ * saving it in global variable (current assuption is one sender).
+ */
+static u32 portid;
+
+static bool nhi_nvm_authenticated(struct tbt_nhi_ctxt *nhi_ctxt)
+{
+ enum icm_operation_mode op_mode;
+ u32 *msg_head, port_id, reg;
+ struct sk_buff *skb;
+ int i;
+
+ if (!nhi_ctxt->nvm_auth_on_boot)
+ return true;
+
+ /*
+ * The check for NVM authentication can take time for iCM,
+ * especially in low power configuration.
+ */
+ for (i = 0; i < 5; i++) {
+ u32 status = ioread32(nhi_ctxt->iobase + REG_FW_STS);
+
+ if (status & REG_FW_STS_NVM_AUTH_DONE)
+ break;
+
+ msleep(30);
+ }
+ /*
+ * The check for authentication is done after checking if iCM
+ * is present so it shouldn't reach the max tries (=5).
+ * Anyway, the check for full functionality below covers the error case.
+ */
+ reg = ioread32(nhi_ctxt->iobase + REG_OUTMAIL_CMD);
+ op_mode = (reg & REG_OUTMAIL_CMD_OP_MODE_MASK) >>
+ REG_OUTMAIL_CMD_OP_MODE_SHIFT;
+ if (op_mode == FULL_FUNCTIONALITY)
+ return true;
+
+ dev_warn(&nhi_ctxt->pdev->dev, "controller id %#x is in operation mode %#x status %#lx, NVM image update might be required\n",
+ nhi_ctxt->id, op_mode,
+ (reg & REG_OUTMAIL_CMD_STS_MASK)>>REG_OUTMAIL_CMD_STS_SHIFT);
+
+ skb = genlmsg_new(NLMSG_ALIGN(nhi_genl_family.hdrsize), GFP_KERNEL);
+ if (!skb) {
+ dev_err(&nhi_ctxt->pdev->dev, "genlmsg_new failed: not enough memory to send controller operational mode\n");
+ return false;
+ }
+
+ /* keeping port_id into a local variable for next use */
+ port_id = portid;
+ msg_head = genlmsg_put(skb, port_id, 0, &nhi_genl_family, 0,
+ NHI_CMD_ICM_IN_SAFE_MODE);
+ if (!msg_head) {
+ nlmsg_free(skb);
+ dev_err(&nhi_ctxt->pdev->dev, "genlmsg_put failed: not enough memory to send controller operational mode\n");
+ return false;
+ }
+
+ *msg_head = nhi_ctxt->id;
+
+ genlmsg_end(skb, msg_head);
+
+ genlmsg_unicast(&init_net, skb, port_id);
+
+ return false;
+}
+
+int nhi_send_message(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
+ u32 msg_len, const void *msg, bool ignore_icm_resp)
+{
+ u32 prod_cons, prod, cons, attr;
+ struct tbt_icm_ring_shared_memory *shared_mem;
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(nhi_ctxt->iobase,
+ REG_TX_RING_BASE,
+ TBT_ICM_RING_NUM);
+
+ if (nhi_ctxt->d0_exit)
+ return -ENODEV;
+
+ prod_cons = ioread32(reg);
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_ICM_RING_NUM_TX_BUFS) {
+ dev_warn(&nhi_ctxt->pdev->dev,
+ "controller id %#x is not functional, producer %u out of range\n",
+ nhi_ctxt->id, prod);
+ return -ENODEV;
+ }
+ if (TBT_TX_RING_FULL(prod, cons, TBT_ICM_RING_NUM_TX_BUFS)) {
+ dev_err(&nhi_ctxt->pdev->dev,
+ "controller id %#x is not functional, TX ring full\n",
+ nhi_ctxt->id);
+ return -ENOSPC;
+ }
+
+ attr = (msg_len << DESC_ATTR_LEN_SHIFT) & DESC_ATTR_LEN_MASK;
+ attr |= (pdf << DESC_ATTR_EOF_SHIFT) & DESC_ATTR_EOF_MASK;
+
+ shared_mem = nhi_ctxt->icm_ring_shared_mem;
+ shared_mem->tx_buf_desc[prod].attributes = cpu_to_le32(attr);
+
+ memcpy(shared_mem->tx_buf[prod], msg, msg_len);
+
+ prod_cons &= ~REG_RING_PROD_MASK;
+ prod_cons |= (((prod + 1) % TBT_ICM_RING_NUM_TX_BUFS) <<
+ REG_RING_PROD_SHIFT) & REG_RING_PROD_MASK;
+
+ nhi_ctxt->wait_for_icm_resp = true;
+ nhi_ctxt->ignore_icm_resp = ignore_icm_resp;
+
+ iowrite32(prod_cons, reg);
+
+ return 0;
+}
+
+static int nhi_send_driver_ready_command(struct tbt_nhi_ctxt *nhi_ctxt)
+{
+ struct driver_ready_command {
+ __be32 req_code;
+ __be32 crc;
+ } drv_rdy_cmd = {
+ .req_code = cpu_to_be32(CC_DRV_READY),
+ };
+ u32 crc32;
+
+ crc32 = __crc32c_le(~0, (unsigned char const *)&drv_rdy_cmd,
+ offsetof(struct driver_ready_command, crc));
+
+ drv_rdy_cmd.crc = cpu_to_be32(~crc32);
+
+ return nhi_send_message(nhi_ctxt, PDF_SW_TO_FW_COMMAND,
+ sizeof(drv_rdy_cmd), &drv_rdy_cmd, false);
+}
+
+/**
+ * nhi_search_ctxt - search by id the controllers_list.
+ * Should be called under controllers_list_mutex.
+ *
+ * @id: id of the controller
+ *
+ * Return: driver context if found, NULL otherwise.
+ */
+static struct tbt_nhi_ctxt *nhi_search_ctxt(u32 id)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+
+ list_for_each_entry(nhi_ctxt, &controllers_list, node)
+ if (nhi_ctxt->id == id)
+ return nhi_ctxt;
+
+ return NULL;
+}
+
+static int nhi_genl_subscribe(__always_unused struct sk_buff *u_skb,
+ struct genl_info *info)
+ __acquires(&nhi_ctxt->send_sem)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+
+ /*
+ * To send driver ready command to iCM, need at least one subscriber
+ * that will handle the response.
+ * Currently the assumption is one user mode daemon as subscriber
+ * so one portid global variable (without locking).
+ */
+ if (atomic_inc_return(&subscribers) >= 1) {
+ portid = info->snd_portid;
+ if (mutex_lock_interruptible(&controllers_list_mutex)) {
+ atomic_dec_if_positive(&subscribers);
+ return -ERESTART;
+ }
+ list_for_each_entry(nhi_ctxt, &controllers_list, node) {
+ int res;
+
+ if (nhi_ctxt->d0_exit ||
+ !nhi_nvm_authenticated(nhi_ctxt))
+ continue;
+
+ res = down_timeout(&nhi_ctxt->send_sem,
+ msecs_to_jiffies(10*MSEC_PER_SEC));
+ if (res) {
+ dev_err(&nhi_ctxt->pdev->dev,
+ "%s: controller id %#x is not functional, timeout on waiting for FW response to previous message\n",
+ __func__, nhi_ctxt->id);
+ continue;
+ }
+
+ if (!mutex_trylock(&nhi_ctxt->d0_exit_send_mutex)) {
+ up(&nhi_ctxt->send_sem);
+ continue;
+ }
+
+ res = nhi_send_driver_ready_command(nhi_ctxt);
+
+ mutex_unlock(&nhi_ctxt->d0_exit_send_mutex);
+ if (res)
+ up(&nhi_ctxt->send_sem);
+ }
+ mutex_unlock(&controllers_list_mutex);
+ }
+
+ return 0;
+}
+
+static int nhi_genl_unsubscribe(__always_unused struct sk_buff *u_skb,
+ __always_unused struct genl_info *info)
+{
+ atomic_dec_if_positive(&subscribers);
+
+ return 0;
+}
+
+static int nhi_genl_query_information(__always_unused struct sk_buff *u_skb,
+ struct genl_info *info)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ struct sk_buff *skb;
+ bool msg_too_long;
+ int res = -ENODEV;
+ u32 *msg_head;
+
+ if (!info || !info->userhdr)
+ return -EINVAL;
+
+ skb = genlmsg_new(NLMSG_ALIGN(nhi_genl_family.hdrsize) +
+ nla_total_size(sizeof(DRV_VERSION)) +
+ nla_total_size(sizeof(nhi_ctxt->nvm_ver_offset)) +
+ nla_total_size(sizeof(nhi_ctxt->num_ports)) +
+ nla_total_size(sizeof(nhi_ctxt->dma_port)) +
+ nla_total_size(0), /* nhi_ctxt->support_full_e2e */
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ msg_head = genlmsg_put_reply(skb, info, &nhi_genl_family, 0,
+ NHI_CMD_QUERY_INFORMATION);
+ if (!msg_head) {
+ res = -ENOMEM;
+ goto genl_put_reply_failure;
+ }
+
+ if (mutex_lock_interruptible(&controllers_list_mutex)) {
+ res = -ERESTART;
+ goto genl_put_reply_failure;
+ }
+
+ nhi_ctxt = nhi_search_ctxt(*(u32 *)info->userhdr);
+ if (nhi_ctxt && !nhi_ctxt->d0_exit) {
+ *msg_head = nhi_ctxt->id;
+
+ msg_too_long = !!nla_put_string(skb, NHI_ATTR_DRV_VERSION,
+ DRV_VERSION);
+
+ msg_too_long = msg_too_long ||
+ nla_put_u16(skb, NHI_ATTR_NVM_VER_OFFSET,
+ nhi_ctxt->nvm_ver_offset);
+
+ msg_too_long = msg_too_long ||
+ nla_put_u8(skb, NHI_ATTR_NUM_PORTS,
+ nhi_ctxt->num_ports);
+
+ msg_too_long = msg_too_long ||
+ nla_put_u8(skb, NHI_ATTR_DMA_PORT,
+ nhi_ctxt->dma_port);
+
+ if (msg_too_long) {
+ res = -EMSGSIZE;
+ goto release_ctl_list_lock;
+ }
+
+ if (nhi_ctxt->support_full_e2e &&
+ nla_put_flag(skb, NHI_ATTR_SUPPORT_FULL_E2E)) {
+ res = -EMSGSIZE;
+ goto release_ctl_list_lock;
+ }
+ mutex_unlock(&controllers_list_mutex);
+
+ genlmsg_end(skb, msg_head);
+
+ return genlmsg_reply(skb, info);
+ }
+
+release_ctl_list_lock:
+ mutex_unlock(&controllers_list_mutex);
+ genlmsg_cancel(skb, msg_head);
+
+genl_put_reply_failure:
+ nlmsg_free(skb);
+
+ return res;
+}
+
+static int nhi_genl_msg_to_icm(__always_unused struct sk_buff *u_skb,
+ struct genl_info *info)
+ __acquires(&nhi_ctxt->send_sem)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ int res = -ENODEV;
+ int msg_len;
+ void *msg;
+
+ if (!info || !info->userhdr || !info->attrs ||
+ !info->attrs[NHI_ATTR_PDF] || !info->attrs[NHI_ATTR_MSG_TO_ICM])
+ return -EINVAL;
+
+ msg_len = nla_len(info->attrs[NHI_ATTR_MSG_TO_ICM]);
+ if (msg_len > TBT_ICM_RING_MAX_FRAME_SIZE)
+ return -ENOBUFS;
+
+ msg = nla_data(info->attrs[NHI_ATTR_MSG_TO_ICM]);
+
+ if (mutex_lock_interruptible(&controllers_list_mutex))
+ return -ERESTART;
+
+ nhi_ctxt = nhi_search_ctxt(*(u32 *)info->userhdr);
+ if (nhi_ctxt && !nhi_ctxt->d0_exit) {
+ /*
+ * waiting 10 seconds to receive a FW response
+ * if not, just give up and pop up an error
+ */
+ res = down_timeout(&nhi_ctxt->send_sem,
+ msecs_to_jiffies(10 * MSEC_PER_SEC));
+ if (res) {
+ void __iomem *rx_prod_cons = TBT_RING_CONS_PROD_REG(
+ nhi_ctxt->iobase,
+ REG_RX_RING_BASE,
+ TBT_ICM_RING_NUM);
+ void __iomem *tx_prod_cons = TBT_RING_CONS_PROD_REG(
+ nhi_ctxt->iobase,
+ REG_TX_RING_BASE,
+ TBT_ICM_RING_NUM);
+ dev_err(&nhi_ctxt->pdev->dev,
+ "%s: controller id %#x is not functional, timeout on waiting for FW response to previous message, tx prod&cons=%#x, rx prod&cons=%#x\n",
+ __func__, nhi_ctxt->id, ioread32(tx_prod_cons),
+ ioread32(rx_prod_cons));
+ goto release_ctl_list_lock;
+ }
+
+ if (!mutex_trylock(&nhi_ctxt->d0_exit_send_mutex)) {
+ up(&nhi_ctxt->send_sem);
+ goto release_ctl_list_lock;
+ }
+
+ mutex_unlock(&controllers_list_mutex);
+
+ res = nhi_send_message(nhi_ctxt,
+ nla_get_u32(info->attrs[NHI_ATTR_PDF]),
+ msg_len, msg, false);
+
+ mutex_unlock(&nhi_ctxt->d0_exit_send_mutex);
+ if (res)
+ up(&nhi_ctxt->send_sem);
+
+ return res;
+ }
+
+release_ctl_list_lock:
+ mutex_unlock(&controllers_list_mutex);
+ return res;
+}
+
+int nhi_mailbox(struct tbt_nhi_ctxt *nhi_ctxt, u32 cmd, u32 data, bool deinit)
+{
+ u32 delay = deinit ? U32_C(20) : U32_C(100);
+ int i;
+
+ iowrite32(data, nhi_ctxt->iobase + REG_INMAIL_DATA);
+ iowrite32(cmd, nhi_ctxt->iobase + REG_INMAIL_CMD);
+
+#define NHI_INMAIL_CMD_RETRIES 50
+ /*
+ * READ_ONCE fetches the value of nhi_ctxt->d0_exit every time
+ * and avoid optimization.
+ * deinit = true to continue the loop even if D3 process has been
+ * carried out.
+ */
+ for (i = 0; (i < NHI_INMAIL_CMD_RETRIES) &&
+ (deinit || !READ_ONCE(nhi_ctxt->d0_exit)); i++) {
+ cmd = ioread32(nhi_ctxt->iobase + REG_INMAIL_CMD);
+
+ if (cmd & REG_INMAIL_CMD_ERROR)
+ return -EIO;
+
+ if (!(cmd & REG_INMAIL_CMD_REQUEST))
+ break;
+
+ msleep(delay);
+ }
+
+ if (i == NHI_INMAIL_CMD_RETRIES) {
+ if (!deinit)
+ dev_err(&nhi_ctxt->pdev->dev,
+ "controller id %#x is not functional, inmail timeout\n",
+ nhi_ctxt->id);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int nhi_mailbox_generic(struct tbt_nhi_ctxt *nhi_ctxt, u32 mb_cmd)
+ __releases(&controllers_list_mutex)
+{
+ int res = -ENODEV;
+
+ if (mutex_lock_interruptible(&nhi_ctxt->mailbox_mutex)) {
+ res = -ERESTART;
+ goto release_ctl_list_lock;
+ }
+
+ if (!mutex_trylock(&nhi_ctxt->d0_exit_mailbox_mutex)) {
+ mutex_unlock(&nhi_ctxt->mailbox_mutex);
+ goto release_ctl_list_lock;
+ }
+
+ mutex_unlock(&controllers_list_mutex);
+
+ res = nhi_mailbox(nhi_ctxt, mb_cmd, 0, false);
+ mutex_unlock(&nhi_ctxt->d0_exit_mailbox_mutex);
+ mutex_unlock(&nhi_ctxt->mailbox_mutex);
+
+ return res;
+
+release_ctl_list_lock:
+ mutex_unlock(&controllers_list_mutex);
+ return res;
+}
+
+static int nhi_genl_mailbox(__always_unused struct sk_buff *u_skb,
+ struct genl_info *info)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ u32 cmd, mb_cmd;
+
+ if (!info || !info->userhdr || !info->attrs ||
+ !info->attrs[NHI_ATTR_MAILBOX_CMD])
+ return -EINVAL;
+
+ cmd = nla_get_u32(info->attrs[NHI_ATTR_MAILBOX_CMD]);
+ mb_cmd = ((cmd << REG_INMAIL_CMD_CMD_SHIFT) &
+ REG_INMAIL_CMD_CMD_MASK) | REG_INMAIL_CMD_REQUEST;
+
+ if (mutex_lock_interruptible(&controllers_list_mutex))
+ return -ERESTART;
+
+ nhi_ctxt = nhi_search_ctxt(*(u32 *)info->userhdr);
+ if (nhi_ctxt && !nhi_ctxt->d0_exit)
+ return nhi_mailbox_generic(nhi_ctxt, mb_cmd);
+
+ mutex_unlock(&controllers_list_mutex);
+ return -ENODEV;
+}
+
+
+static int nhi_genl_send_msg(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
+ const u8 *msg, u32 msg_len)
+{
+ u32 *msg_head, port_id;
+ struct sk_buff *skb;
+ int res;
+
+ if (atomic_read(&subscribers) < 1)
+ return -ENOTCONN;
+
+ skb = genlmsg_new(NLMSG_ALIGN(nhi_genl_family.hdrsize) +
+ nla_total_size(msg_len) +
+ nla_total_size(sizeof(pdf)),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ port_id = portid;
+ msg_head = genlmsg_put(skb, port_id, 0, &nhi_genl_family, 0,
+ NHI_CMD_MSG_FROM_ICM);
+ if (!msg_head) {
+ res = -ENOMEM;
+ goto genl_put_reply_failure;
+ }
+
+ *msg_head = nhi_ctxt->id;
+
+ if (nla_put_u32(skb, NHI_ATTR_PDF, pdf) ||
+ nla_put(skb, NHI_ATTR_MSG_FROM_ICM, msg_len, msg)) {
+ res = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(skb, msg_head);
+
+ return genlmsg_unicast(&init_net, skb, port_id);
+
+nla_put_failure:
+ genlmsg_cancel(skb, msg_head);
+genl_put_reply_failure:
+ nlmsg_free(skb);
+
+ return res;
+}
+
+static bool nhi_msg_from_icm_analysis(struct tbt_nhi_ctxt *nhi_ctxt,
+ enum pdf_value pdf,
+ const u8 *msg, u32 msg_len)
+{
+ /*
+ * preparation for messages that won't be sent,
+ * currently unused in this patch.
+ */
+ bool send_event = true;
+
+ switch (pdf) {
+ case PDF_ERROR_NOTIFICATION:
+ /* fallthrough */
+ case PDF_WRITE_CONFIGURATION_REGISTERS:
+ /* fallthrough */
+ case PDF_READ_CONFIGURATION_REGISTERS:
+ if (nhi_ctxt->wait_for_icm_resp) {
+ nhi_ctxt->wait_for_icm_resp = false;
+ up(&nhi_ctxt->send_sem);
+ }
+ /* fallthrough */
+ default:
+ break;
+ }
+
+ return send_event;
+}
+
+static void nhi_msgs_from_icm(struct work_struct *work)
+ __releases(&nhi_ctxt->send_sem)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt = container_of(work, typeof(*nhi_ctxt),
+ icm_msgs_work);
+ void __iomem *reg = TBT_RING_CONS_PROD_REG(nhi_ctxt->iobase,
+ REG_RX_RING_BASE,
+ TBT_ICM_RING_NUM);
+ u32 prod_cons, prod, cons;
+
+ prod_cons = ioread32(reg);
+ prod = TBT_REG_RING_PROD_EXTRACT(prod_cons);
+ cons = TBT_REG_RING_CONS_EXTRACT(prod_cons);
+ if (prod >= TBT_ICM_RING_NUM_RX_BUFS) {
+ dev_warn(&nhi_ctxt->pdev->dev,
+ "controller id %#x is not functional, producer %u out of range\n",
+ nhi_ctxt->id, prod);
+ return;
+ }
+ if (cons >= TBT_ICM_RING_NUM_RX_BUFS) {
+ dev_warn(&nhi_ctxt->pdev->dev,
+ "controller id %#x is not functional, consumer %u out of range\n",
+ nhi_ctxt->id, cons);
+ return;
+ }
+
+ while (!TBT_RX_RING_EMPTY(prod, cons, TBT_ICM_RING_NUM_RX_BUFS) &&
+ !nhi_ctxt->d0_exit) {
+ struct tbt_buf_desc *rx_desc;
+ u8 *msg;
+ u32 msg_len;
+ enum pdf_value pdf;
+ bool send_event;
+
+ cons = (cons + 1) % TBT_ICM_RING_NUM_RX_BUFS;
+ rx_desc = &(nhi_ctxt->icm_ring_shared_mem->rx_buf_desc[cons]);
+ if (!(le32_to_cpu(rx_desc->attributes) & DESC_ATTR_DESC_DONE))
+ usleep_range(10, 20);
+
+ rmb(); /* read the descriptor and the buffer after DD check */
+ pdf = (le32_to_cpu(rx_desc->attributes) & DESC_ATTR_EOF_MASK)
+ >> DESC_ATTR_EOF_SHIFT;
+ msg = nhi_ctxt->icm_ring_shared_mem->rx_buf[cons];
+ msg_len = (le32_to_cpu(rx_desc->attributes)&DESC_ATTR_LEN_MASK)
+ >> DESC_ATTR_LEN_SHIFT;
+
+ send_event = nhi_msg_from_icm_analysis(nhi_ctxt, pdf, msg,
+ msg_len);
+
+ if (send_event)
+ nhi_genl_send_msg(nhi_ctxt, pdf, msg, msg_len);
+
+ /* set the descriptor for another receive */
+ rx_desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS |
+ DESC_ATTR_INT_EN);
+ rx_desc->time = 0;
+ }
+
+ /* free the descriptors for more receive */
+ prod_cons &= ~REG_RING_CONS_MASK;
+ prod_cons |= (cons << REG_RING_CONS_SHIFT) & REG_RING_CONS_MASK;
+ iowrite32(prod_cons, reg);
+
+ if (!nhi_ctxt->d0_exit) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&nhi_ctxt->lock, flags);
+ /* enable RX interrupt */
+ RING_INT_ENABLE_RX(nhi_ctxt->iobase, TBT_ICM_RING_NUM,
+ nhi_ctxt->num_paths);
+
+ spin_unlock_irqrestore(&nhi_ctxt->lock, flags);
+ }
+}
+
+static irqreturn_t nhi_icm_ring_rx_msix(int __always_unused irq, void *data)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt = data;
+
+ spin_lock(&nhi_ctxt->lock);
+ /*
+ * disable RX interrupt
+ * We like to allow interrupt mitigation until the work item
+ * will be completed.
+ */
+ RING_INT_DISABLE_RX(nhi_ctxt->iobase, TBT_ICM_RING_NUM,
+ nhi_ctxt->num_paths);
+
+ spin_unlock(&nhi_ctxt->lock);
+
+ schedule_work(&nhi_ctxt->icm_msgs_work);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t nhi_msi(int __always_unused irq, void *data)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt = data;
+ u32 isr0, isr1, imr0, imr1;
+
+ /* clear on read */
+ isr0 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE);
+ isr1 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE +
+ REG_RING_NOTIFY_STEP);
+ if (unlikely(!isr0 && !isr1))
+ return IRQ_NONE;
+
+ spin_lock(&nhi_ctxt->lock);
+
+ imr0 = ioread32(nhi_ctxt->iobase + REG_RING_INTERRUPT_BASE);
+ imr1 = ioread32(nhi_ctxt->iobase + REG_RING_INTERRUPT_BASE +
+ REG_RING_INTERRUPT_STEP);
+ /* disable the arrived interrupts */
+ iowrite32(imr0 & ~isr0,
+ nhi_ctxt->iobase + REG_RING_INTERRUPT_BASE);
+ iowrite32(imr1 & ~isr1,
+ nhi_ctxt->iobase + REG_RING_INTERRUPT_BASE +
+ REG_RING_INTERRUPT_STEP);
+
+ spin_unlock(&nhi_ctxt->lock);
+
+ if (isr0 & REG_RING_INT_RX_PROCESSED(TBT_ICM_RING_NUM,
+ nhi_ctxt->num_paths))
+ schedule_work(&nhi_ctxt->icm_msgs_work);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * nhi_set_int_vec - Mapping of the MSIX vector entry to the ring
+ * @nhi_ctxt: contains data on NHI controller
+ * @path: ring to be mapped
+ * @msix_msg_id: msix entry to be mapped
+ */
+static inline void nhi_set_int_vec(struct tbt_nhi_ctxt *nhi_ctxt, u32 path,
+ u8 msix_msg_id)
+{
+ void __iomem *reg;
+ u32 step, shift, ivr;
+
+ if (msix_msg_id % 2)
+ path += nhi_ctxt->num_paths;
+
+ step = path / REG_INT_VEC_ALLOC_PER_REG;
+ shift = (path % REG_INT_VEC_ALLOC_PER_REG) *
+ REG_INT_VEC_ALLOC_FIELD_BITS;
+ reg = nhi_ctxt->iobase + REG_INT_VEC_ALLOC_BASE +
+ (step * REG_INT_VEC_ALLOC_STEP);
+ ivr = ioread32(reg) & ~(REG_INT_VEC_ALLOC_FIELD_MASK << shift);
+ iowrite32(ivr | (msix_msg_id << shift), reg);
+}
+
+/* NHI genetlink operations array */
+static const struct genl_ops nhi_ops[] = {
+ {
+ .cmd = NHI_CMD_SUBSCRIBE,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_subscribe,
+ },
+ {
+ .cmd = NHI_CMD_UNSUBSCRIBE,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_unsubscribe,
+ },
+ {
+ .cmd = NHI_CMD_QUERY_INFORMATION,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_query_information,
+ },
+ {
+ .cmd = NHI_CMD_MSG_TO_ICM,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_msg_to_icm,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = NHI_CMD_MAILBOX,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_mailbox,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+static int nhi_suspend(struct device *dev) __releases(&nhi_ctxt->send_sem)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt = pci_get_drvdata(to_pci_dev(dev));
+ void __iomem *rx_reg, *tx_reg;
+ u32 rx_reg_val, tx_reg_val;
+
+ /* must be after negotiation_events, since messages might be sent */
+ nhi_ctxt->d0_exit = true;
+
+ rx_reg = nhi_ctxt->iobase + REG_RX_OPTIONS_BASE +
+ (TBT_ICM_RING_NUM * REG_OPTS_STEP);
+ rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN;
+ tx_reg = nhi_ctxt->iobase + REG_TX_OPTIONS_BASE +
+ (TBT_ICM_RING_NUM * REG_OPTS_STEP);
+ tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN;
+ /* disable RX flow control */
+ iowrite32(rx_reg_val, rx_reg);
+ /* disable TX flow control */
+ iowrite32(tx_reg_val, tx_reg);
+ /* disable RX ring */
+ iowrite32(rx_reg_val & ~REG_OPTS_VALID, rx_reg);
+
+ mutex_lock(&nhi_ctxt->d0_exit_mailbox_mutex);
+ mutex_lock(&nhi_ctxt->d0_exit_send_mutex);
+
+ cancel_work_sync(&nhi_ctxt->icm_msgs_work);
+
+ if (nhi_ctxt->wait_for_icm_resp) {
+ nhi_ctxt->wait_for_icm_resp = false;
+ nhi_ctxt->ignore_icm_resp = false;
+ /*
+ * if there is response, it is lost, so unlock the send
+ * for the next resume.
+ */
+ up(&nhi_ctxt->send_sem);
+ }
+
+ mutex_unlock(&nhi_ctxt->d0_exit_send_mutex);
+ mutex_unlock(&nhi_ctxt->d0_exit_mailbox_mutex);
+
+ /* wait for all TX to finish */
+ usleep_range(5 * USEC_PER_MSEC, 7 * USEC_PER_MSEC);
+
+ /* disable all interrupts */
+ iowrite32(0, nhi_ctxt->iobase + REG_RING_INTERRUPT_BASE);
+ /* disable TX ring */
+ iowrite32(tx_reg_val & ~REG_OPTS_VALID, tx_reg);
+
+ return 0;
+}
+
+static int nhi_resume(struct device *dev) __acquires(&nhi_ctxt->send_sem)
+{
+ dma_addr_t phys;
+ struct tbt_nhi_ctxt *nhi_ctxt = pci_get_drvdata(to_pci_dev(dev));
+ struct tbt_buf_desc *desc;
+ void __iomem *iobase = nhi_ctxt->iobase;
+ void __iomem *reg;
+ int i;
+
+ if (nhi_ctxt->msix_entries) {
+ iowrite32(ioread32(iobase + REG_DMA_MISC) |
+ REG_DMA_MISC_INT_AUTO_CLEAR,
+ iobase + REG_DMA_MISC);
+ /*
+ * Vector #0, which is TX complete to ICM,
+ * isn't been used currently.
+ */
+ nhi_set_int_vec(nhi_ctxt, 0, 1);
+
+ for (i = 2; i < nhi_ctxt->num_vectors; i++)
+ nhi_set_int_vec(nhi_ctxt, nhi_ctxt->num_paths - (i/2),
+ i);
+ }
+
+ /* configure TX descriptors */
+ for (i = 0, phys = nhi_ctxt->icm_ring_shared_mem_dma_addr;
+ i < TBT_ICM_RING_NUM_TX_BUFS;
+ i++, phys += TBT_ICM_RING_MAX_FRAME_SIZE) {
+ desc = &nhi_ctxt->icm_ring_shared_mem->tx_buf_desc[i];
+ desc->phys = cpu_to_le64(phys);
+ desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS);
+ }
+ /* configure RX descriptors */
+ for (i = 0;
+ i < TBT_ICM_RING_NUM_RX_BUFS;
+ i++, phys += TBT_ICM_RING_MAX_FRAME_SIZE) {
+ desc = &nhi_ctxt->icm_ring_shared_mem->rx_buf_desc[i];
+ desc->phys = cpu_to_le64(phys);
+ desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS |
+ DESC_ATTR_INT_EN);
+ }
+
+ /* configure throttling rate for interrupts */
+ for (i = 0, reg = iobase + REG_INT_THROTTLING_RATE;
+ i < NUM_INT_VECTORS;
+ i++, reg += REG_INT_THROTTLING_RATE_STEP) {
+ iowrite32(USEC_TO_256_NSECS(128), reg);
+ }
+
+ /* configure TX for ICM ring */
+ reg = iobase + REG_TX_RING_BASE + (TBT_ICM_RING_NUM * REG_RING_STEP);
+ phys = nhi_ctxt->icm_ring_shared_mem_dma_addr +
+ offsetof(struct tbt_icm_ring_shared_memory, tx_buf_desc);
+ iowrite32(lower_32_bits(phys), reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(phys), reg + REG_RING_PHYS_HI_OFFSET);
+ iowrite32((TBT_ICM_RING_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) &
+ REG_RING_SIZE_MASK,
+ reg + REG_RING_SIZE_OFFSET);
+
+ reg = iobase + REG_TX_OPTIONS_BASE + (TBT_ICM_RING_NUM*REG_OPTS_STEP);
+ iowrite32(REG_OPTS_RAW | REG_OPTS_VALID, reg);
+
+ /* configure RX for ICM ring */
+ reg = iobase + REG_RX_RING_BASE + (TBT_ICM_RING_NUM * REG_RING_STEP);
+ phys = nhi_ctxt->icm_ring_shared_mem_dma_addr +
+ offsetof(struct tbt_icm_ring_shared_memory, rx_buf_desc);
+ iowrite32(lower_32_bits(phys), reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(upper_32_bits(phys), reg + REG_RING_PHYS_HI_OFFSET);
+ iowrite32(((TBT_ICM_RING_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) &
+ REG_RING_SIZE_MASK) |
+ ((TBT_ICM_RING_MAX_FRAME_SIZE << REG_RING_BUF_SIZE_SHIFT) &
+ REG_RING_BUF_SIZE_MASK),
+ reg + REG_RING_SIZE_OFFSET);
+ iowrite32(((TBT_ICM_RING_NUM_RX_BUFS - 1) << REG_RING_CONS_SHIFT) &
+ REG_RING_CONS_MASK,
+ reg + REG_RING_CONS_PROD_OFFSET);
+
+ reg = iobase + REG_RX_OPTIONS_BASE + (TBT_ICM_RING_NUM*REG_OPTS_STEP);
+ iowrite32(REG_OPTS_RAW | REG_OPTS_VALID, reg);
+
+ /* enable RX interrupt */
+ RING_INT_ENABLE_RX(iobase, TBT_ICM_RING_NUM, nhi_ctxt->num_paths);
+
+ if (likely((atomic_read(&subscribers) > 0) &&
+ nhi_nvm_authenticated(nhi_ctxt))) {
+ down(&nhi_ctxt->send_sem);
+ nhi_ctxt->d0_exit = false;
+ mutex_lock(&nhi_ctxt->d0_exit_send_mutex);
+ /*
+ * interrupts are enabled here before send due to
+ * implicit barrier in mutex
+ */
+ nhi_send_driver_ready_command(nhi_ctxt);
+ mutex_unlock(&nhi_ctxt->d0_exit_send_mutex);
+ } else {
+ nhi_ctxt->d0_exit = false;
+ }
+
+ return 0;
+}
+
+static void icm_nhi_shutdown(struct pci_dev *pdev)
+{
+ nhi_suspend(&pdev->dev);
+}
+
+static void icm_nhi_remove(struct pci_dev *pdev)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt = pci_get_drvdata(pdev);
+ int i;
+
+ nhi_suspend(&pdev->dev);
+
+ if (nhi_ctxt->net_workqueue)
+ destroy_workqueue(nhi_ctxt->net_workqueue);
+
+ /*
+ * disable irq for msix or msi
+ */
+ if (likely(nhi_ctxt->msix_entries)) {
+ /* Vector #0 isn't been used currently */
+ devm_free_irq(&pdev->dev, nhi_ctxt->msix_entries[1].vector,
+ nhi_ctxt);
+ pci_disable_msix(pdev);
+ } else {
+ devm_free_irq(&pdev->dev, pdev->irq, nhi_ctxt);
+ pci_disable_msi(pdev);
+ }
+
+ /*
+ * remove controller from the controllers list
+ */
+ mutex_lock(&controllers_list_mutex);
+ list_del(&nhi_ctxt->node);
+ mutex_unlock(&controllers_list_mutex);
+
+ nhi_mailbox(
+ nhi_ctxt,
+ ((CC_DRV_UNLOADS_AND_DISCONNECT_INTER_DOMAIN_PATHS
+ << REG_INMAIL_CMD_CMD_SHIFT) &
+ REG_INMAIL_CMD_CMD_MASK) |
+ REG_INMAIL_CMD_REQUEST,
+ 0, true);
+
+ usleep_range(1 * USEC_PER_MSEC, 5 * USEC_PER_MSEC);
+ iowrite32(1, nhi_ctxt->iobase + REG_HOST_INTERFACE_RST);
+
+ mutex_destroy(&nhi_ctxt->d0_exit_send_mutex);
+ mutex_destroy(&nhi_ctxt->d0_exit_mailbox_mutex);
+ mutex_destroy(&nhi_ctxt->mailbox_mutex);
+ for (i = 0; i < nhi_ctxt->num_ports; i++)
+ mutex_destroy(&(nhi_ctxt->net_devices[i].state_mutex));
+}
+
+static int icm_nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ void __iomem *iobase;
+ int i, res;
+ bool enable_msi = false;
+
+ res = pcim_enable_device(pdev);
+ if (res) {
+ dev_err(&pdev->dev, "cannot enable PCI device, aborting\n");
+ return res;
+ }
+
+ res = pcim_iomap_regions(pdev, 1 << NHI_MMIO_BAR, pci_name(pdev));
+ if (res) {
+ dev_err(&pdev->dev, "cannot obtain PCI resources, aborting\n");
+ return res;
+ }
+
+ /* cannot fail - table is allocated in pcim_iomap_regions */
+ iobase = pcim_iomap_table(pdev)[NHI_MMIO_BAR];
+
+ /* check if ICM is running */
+ if (!(ioread32(iobase + REG_FW_STS) & REG_FW_STS_ICM_EN)) {
+ dev_err(&pdev->dev, "ICM isn't present, aborting\n");
+ return -ENODEV;
+ }
+
+ nhi_ctxt = devm_kzalloc(&pdev->dev, sizeof(*nhi_ctxt), GFP_KERNEL);
+ if (!nhi_ctxt)
+ return -ENOMEM;
+
+ nhi_ctxt->pdev = pdev;
+ nhi_ctxt->iobase = iobase;
+ nhi_ctxt->id = (PCI_DEVID(pdev->bus->number, pdev->devfn) << 16) |
+ id->device;
+ /*
+ * Number of paths represents the number of rings available for
+ * the controller.
+ */
+ nhi_ctxt->num_paths = ioread32(iobase + REG_HOP_COUNT) &
+ REG_HOP_COUNT_TOTAL_PATHS_MASK;
+
+ nhi_ctxt->nvm_auth_on_boot = DEVICE_DATA_NVM_AUTH_ON_BOOT(
+ id->driver_data);
+ nhi_ctxt->support_full_e2e = DEVICE_DATA_SUPPORT_FULL_E2E(
+ id->driver_data);
+
+ nhi_ctxt->dma_port = DEVICE_DATA_DMA_PORT(id->driver_data);
+ /*
+ * Number of ports in the controller
+ */
+ nhi_ctxt->num_ports = DEVICE_DATA_NUM_PORTS(id->driver_data);
+ nhi_ctxt->nvm_ver_offset = DEVICE_DATA_NVM_VER_OFFSET(id->driver_data);
+
+ mutex_init(&nhi_ctxt->d0_exit_send_mutex);
+ mutex_init(&nhi_ctxt->d0_exit_mailbox_mutex);
+ mutex_init(&nhi_ctxt->mailbox_mutex);
+
+ sema_init(&nhi_ctxt->send_sem, 1);
+
+ INIT_WORK(&nhi_ctxt->icm_msgs_work, nhi_msgs_from_icm);
+
+ spin_lock_init(&nhi_ctxt->lock);
+
+ nhi_ctxt->net_devices = devm_kcalloc(&pdev->dev,
+ nhi_ctxt->num_ports,
+ sizeof(struct port_net_dev),
+ GFP_KERNEL);
+ if (!nhi_ctxt->net_devices)
+ return -ENOMEM;
+
+ for (i = 0; i < nhi_ctxt->num_ports; i++)
+ mutex_init(&(nhi_ctxt->net_devices[i].state_mutex));
+
+ /*
+ * allocating RX and TX vectors for ICM and per port
+ * for thunderbolt networking.
+ * The mapping of the vector is carried out by
+ * nhi_set_int_vec and looks like:
+ * 0=tx icm, 1=rx icm, 2=tx data port 0,
+ * 3=rx data port 0...
+ */
+ nhi_ctxt->num_vectors = (1 + nhi_ctxt->num_ports) * 2;
+ nhi_ctxt->msix_entries = devm_kcalloc(&pdev->dev,
+ nhi_ctxt->num_vectors,
+ sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (likely(nhi_ctxt->msix_entries)) {
+ for (i = 0; i < nhi_ctxt->num_vectors; i++)
+ nhi_ctxt->msix_entries[i].entry = i;
+ res = pci_enable_msix_exact(pdev,
+ nhi_ctxt->msix_entries,
+ nhi_ctxt->num_vectors);
+
+ if (res ||
+ /*
+ * Allocating ICM RX only.
+ * vector #0, which is TX complete to ICM,
+ * isn't been used currently
+ */
+ devm_request_irq(&pdev->dev,
+ nhi_ctxt->msix_entries[1].vector,
+ nhi_icm_ring_rx_msix, 0, pci_name(pdev),
+ nhi_ctxt)) {
+ devm_kfree(&pdev->dev, nhi_ctxt->msix_entries);
+ nhi_ctxt->msix_entries = NULL;
+ enable_msi = true;
+ }
+ } else {
+ enable_msi = true;
+ }
+ /*
+ * In case allocation didn't succeed, use msi instead of msix
+ */
+ if (enable_msi) {
+ res = pci_enable_msi(pdev);
+ if (res) {
+ dev_err(&pdev->dev, "cannot enable MSI, aborting\n");
+ return res;
+ }
+ res = devm_request_irq(&pdev->dev, pdev->irq, nhi_msi, 0,
+ pci_name(pdev), nhi_ctxt);
+ if (res) {
+ dev_err(&pdev->dev,
+ "request_irq failed %d, aborting\n", res);
+ return res;
+ }
+ }
+ /*
+ * try to work with address space of 64 bits.
+ * In case this doesn't work, work with 32 bits.
+ */
+ if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+ nhi_ctxt->pci_using_dac = true;
+ } else {
+ res = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (res) {
+ dev_err(&pdev->dev,
+ "No suitable DMA available, aborting\n");
+ return res;
+ }
+ }
+
+ BUILD_BUG_ON(sizeof(struct tbt_buf_desc) != 16);
+ BUILD_BUG_ON(sizeof(struct tbt_icm_ring_shared_memory) > PAGE_SIZE);
+ nhi_ctxt->icm_ring_shared_mem = dmam_alloc_coherent(
+ &pdev->dev, sizeof(*nhi_ctxt->icm_ring_shared_mem),
+ &nhi_ctxt->icm_ring_shared_mem_dma_addr,
+ GFP_KERNEL | __GFP_ZERO);
+ if (nhi_ctxt->icm_ring_shared_mem == NULL) {
+ dev_err(&pdev->dev, "dmam_alloc_coherent failed, aborting\n");
+ return -ENOMEM;
+ }
+
+ nhi_ctxt->net_workqueue = create_singlethread_workqueue("thunderbolt");
+ if (!nhi_ctxt->net_workqueue) {
+ dev_err(&pdev->dev, "create_singlethread_workqueue failed, aborting\n");
+ return -ENOMEM;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, nhi_ctxt);
+
+ nhi_resume(&pdev->dev);
+ /*
+ * Add the new controller at the end of the list
+ */
+ mutex_lock(&controllers_list_mutex);
+ list_add_tail(&nhi_ctxt->node, &controllers_list);
+ mutex_unlock(&controllers_list_mutex);
+
+ return res;
+}
+
+/*
+ * The tunneled pci bridges are siblings of us. Use resume_noirq to reenable
+ * the tunnels asap. A corresponding pci quirk blocks the downstream bridges
+ * resume_noirq until we are done.
+ */
+static const struct dev_pm_ops icm_nhi_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(nhi_suspend, nhi_resume)
+};
+
+static const struct pci_device_id nhi_pci_device_ids[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_2C_NHI),
+ DEVICE_DATA(1, 5, 0xa, false, false) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_REDWOOD_RIDGE_4C_NHI),
+ DEVICE_DATA(2, 5, 0xa, false, false) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI),
+ DEVICE_DATA(1, 5, 0xa, false, false) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI),
+ DEVICE_DATA(2, 5, 0xa, false, false) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_NHI),
+ DEVICE_DATA(1, 3, 0xa, false, false) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_2C_NHI),
+ DEVICE_DATA(1, 5, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_4C_NHI),
+ DEVICE_DATA(2, 5, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_USBONLY_NHI),
+ DEVICE_DATA(1, 5, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_NHI),
+ DEVICE_DATA(1, 3, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_USBONLY_NHI),
+ DEVICE_DATA(1, 3, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_NHI),
+ DEVICE_DATA(1, 5, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_NHI),
+ DEVICE_DATA(2, 5, 0xa, true, true) },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_USBONLY_NHI),
+ DEVICE_DATA(1, 5, 0xa, true, true) },
+ { 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, nhi_pci_device_ids);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static struct pci_driver icm_nhi_driver = {
+ .name = "thunderbolt",
+ .id_table = nhi_pci_device_ids,
+ .probe = icm_nhi_probe,
+ .remove = icm_nhi_remove,
+ .shutdown = icm_nhi_shutdown,
+ .driver.pm = &icm_nhi_pm_ops,
+};
+
+static int __init icm_nhi_init(void)
+{
+ int rc;
+
+ if (dmi_match(DMI_BOARD_VENDOR, "Apple Inc."))
+ return -ENODEV;
+
+ rc = genl_register_family_with_ops(&nhi_genl_family, nhi_ops);
+ if (rc)
+ goto failure;
+
+ rc = pci_register_driver(&icm_nhi_driver);
+ if (rc)
+ goto failure_genl;
+
+ return 0;
+
+failure_genl:
+ genl_unregister_family(&nhi_genl_family);
+
+failure:
+ pr_debug("nhi: error %d occurred in %s\n", rc, __func__);
+ return rc;
+}
+
+static void __exit icm_nhi_unload(void)
+{
+ genl_unregister_family(&nhi_genl_family);
+ pci_unregister_driver(&icm_nhi_driver);
+}
+
+module_init(icm_nhi_init);
+module_exit(icm_nhi_unload);
diff --git a/drivers/thunderbolt/icm/icm_nhi.h b/drivers/thunderbolt/icm/icm_nhi.h
new file mode 100644
index 0000000..1db37e5
--- /dev/null
+++ b/drivers/thunderbolt/icm/icm_nhi.h
@@ -0,0 +1,85 @@
+/*******************************************************************************
+ *
+ * Intel Thunderbolt(TM) driver
+ * Copyright(c) 2014 - 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ ******************************************************************************/
+
+#ifndef ICM_NHI_H_
+#define ICM_NHI_H_
+
+#include <linux/pci.h>
+#include "../nhi_regs.h"
+
+#define DRV_VERSION "16.1.55.1"
+
+#define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_NHI 0x157d /*Tbt 2 Low Pwr*/
+#define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_BRIDGE 0x157e
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_NHI 0x15bf /*Tbt 3 Low Pwr*/
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_BRIDGE 0x15c0
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_NHI 0x15d2 /*Thunderbolt 3*/
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_4C_BRIDGE 0x15d3
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_NHI 0x15d9
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_2C_BRIDGE 0x15da
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_LP_USBONLY_NHI 0x15dc
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_USBONLY_NHI 0x15dd
+#define PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_USBONLY_NHI 0x15de
+
+#define TBT_ICM_RING_MAX_FRAME_SIZE 256
+#define TBT_ICM_RING_NUM 0
+#define TBT_RING_MAX_FRM_DATA_SZ (TBT_RING_MAX_FRAME_SIZE - \
+ sizeof(struct tbt_frame_header))
+
+enum icm_operation_mode {
+ SAFE_MODE,
+ AUTHENTICATION_MODE_FUNCTIONALITY,
+ ENDPOINT_OPERATION_MODE,
+ FULL_FUNCTIONALITY,
+};
+
+#define TBT_ICM_RING_NUM_TX_BUFS TBT_RING_MIN_NUM_BUFFERS
+#define TBT_ICM_RING_NUM_RX_BUFS ((PAGE_SIZE - (TBT_ICM_RING_NUM_TX_BUFS * \
+ (sizeof(struct tbt_buf_desc) + TBT_ICM_RING_MAX_FRAME_SIZE))) / \
+ (sizeof(struct tbt_buf_desc) + TBT_ICM_RING_MAX_FRAME_SIZE))
+
+/* struct tbt_icm_ring_shared_memory - memory area for DMA */
+struct tbt_icm_ring_shared_memory {
+ u8 tx_buf[TBT_ICM_RING_NUM_TX_BUFS][TBT_ICM_RING_MAX_FRAME_SIZE];
+ u8 rx_buf[TBT_ICM_RING_NUM_RX_BUFS][TBT_ICM_RING_MAX_FRAME_SIZE];
+ struct tbt_buf_desc tx_buf_desc[TBT_ICM_RING_NUM_TX_BUFS];
+ struct tbt_buf_desc rx_buf_desc[TBT_ICM_RING_NUM_RX_BUFS];
+} __aligned(TBT_ICM_RING_MAX_FRAME_SIZE);
+
+/* mailbox data from SW */
+#define REG_INMAIL_DATA 0x39900
+
+/* mailbox command from SW */
+#define REG_INMAIL_CMD 0x39904
+#define REG_INMAIL_CMD_CMD_SHIFT 0
+#define REG_INMAIL_CMD_CMD_MASK GENMASK(7, REG_INMAIL_CMD_CMD_SHIFT)
+#define REG_INMAIL_CMD_ERROR BIT(30)
+#define REG_INMAIL_CMD_REQUEST BIT(31)
+
+/* mailbox command from FW */
+#define REG_OUTMAIL_CMD 0x3990C
+#define REG_OUTMAIL_CMD_STS_SHIFT 0
+#define REG_OUTMAIL_CMD_STS_MASK GENMASK(7, REG_OUTMAIL_CMD_STS_SHIFT)
+#define REG_OUTMAIL_CMD_OP_MODE_SHIFT 8
+#define REG_OUTMAIL_CMD_OP_MODE_MASK \
+ GENMASK(11, REG_OUTMAIL_CMD_OP_MODE_SHIFT)
+#define REG_OUTMAIL_CMD_REQUEST BIT(31)
+
+#define REG_FW_STS 0x39944
+#define REG_FW_STS_ICM_EN GENMASK(1, 0)
+#define REG_FW_STS_NVM_AUTH_DONE BIT(31)
+
+#endif
diff --git a/drivers/thunderbolt/icm/net.h b/drivers/thunderbolt/icm/net.h
new file mode 100644
index 0000000..0281201
--- /dev/null
+++ b/drivers/thunderbolt/icm/net.h
@@ -0,0 +1,217 @@
+/*******************************************************************************
+ *
+ * Intel Thunderbolt(TM) driver
+ * Copyright(c) 2014 - 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ ******************************************************************************/
+
+#ifndef NET_H_
+#define NET_H_
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <net/genetlink.h>
+
+/*
+ * Each physical port contains 2 channels.
+ * Devices are exposed to user based on physical ports.
+ */
+#define CHANNELS_PER_PORT_NUM 2
+/*
+ * Calculate host physical port number (Zero-based numbering) from
+ * host channel/link which starts from 1.
+ */
+#define PORT_NUM_FROM_LINK(link) (((link) - 1) / CHANNELS_PER_PORT_NUM)
+
+#define TBT_TX_RING_FULL(prod, cons, size) ((((prod) + 1) % (size)) == (cons))
+#define TBT_TX_RING_EMPTY(prod, cons) ((prod) == (cons))
+#define TBT_RX_RING_FULL(prod, cons) ((prod) == (cons))
+#define TBT_RX_RING_EMPTY(prod, cons, size) ((((cons) + 1) % (size)) == (prod))
+
+#define PATH_FROM_PORT(num_paths, port_num) (((num_paths) - 1) - (port_num))
+
+/* Protocol Defined Field values for SW<->FW communication in raw mode */
+enum pdf_value {
+ PDF_READ_CONFIGURATION_REGISTERS = 1,
+ PDF_WRITE_CONFIGURATION_REGISTERS,
+ PDF_ERROR_NOTIFICATION,
+ PDF_ERROR_ACKNOWLEDGMENT,
+ PDF_PLUG_EVENT_NOTIFICATION,
+ PDF_INTER_DOMAIN_REQUEST,
+ PDF_INTER_DOMAIN_RESPONSE,
+ PDF_CM_OVERRIDE,
+ PDF_RESET_CIO_SWITCH,
+ PDF_FW_TO_SW_NOTIFICATION,
+ PDF_SW_TO_FW_COMMAND,
+ PDF_FW_TO_SW_RESPONSE
+};
+
+/*
+ * SW->FW commands
+ * CC = Command Code
+ */
+enum {
+ CC_GET_THUNDERBOLT_TOPOLOGY = 1,
+ CC_GET_VIDEO_RESOURCES_DATA,
+ CC_DRV_READY,
+ CC_APPROVE_PCI_CONNECTION,
+ CC_CHALLENGE_PCI_CONNECTION,
+ CC_ADD_DEVICE_AND_KEY,
+ CC_APPROVE_INTER_DOMAIN_CONNECTION = 0x10
+};
+
+/*
+ * FW->SW responses
+ * RC = response code
+ */
+enum {
+ RC_GET_TBT_TOPOLOGY = 1,
+ RC_GET_VIDEO_RESOURCES_DATA,
+ RC_DRV_READY,
+ RC_APPROVE_PCI_CONNECTION,
+ RC_CHALLENGE_PCI_CONNECTION,
+ RC_ADD_DEVICE_AND_KEY,
+ RC_INTER_DOMAIN_PKT_SENT = 8,
+ RC_APPROVE_INTER_DOMAIN_CONNECTION = 0x10
+};
+
+/*
+ * FW->SW notifications
+ * NC = notification code
+ */
+enum {
+ NC_DEVICE_CONNECTED = 3,
+ NC_DEVICE_DISCONNECTED,
+ NC_DP_DEVICE_CONNECTED_NOT_TUNNELED,
+ NC_INTER_DOMAIN_CONNECTED,
+ NC_INTER_DOMAIN_DISCONNECTED
+};
+
+/*
+ * SW -> FW mailbox commands
+ * CC = Command Code
+ */
+enum {
+ CC_STOP_CM_ACTIVITY,
+ CC_ENTER_PASS_THROUGH_MODE,
+ CC_ENTER_CM_OWNERSHIP_MODE,
+ CC_DRV_LOADED,
+ CC_DRV_UNLOADED,
+ CC_SAVE_CURRENT_CONNECTED_DEVICES,
+ CC_DISCONNECT_PCIE_PATHS,
+ CC_DRV_UNLOADS_AND_DISCONNECT_INTER_DOMAIN_PATHS,
+ DISCONNECT_PORT_A_INTER_DOMAIN_PATH = 0x10,
+ DISCONNECT_PORT_B_INTER_DOMAIN_PATH,
+ DP_TUNNEL_MODE_IN_ORDER_PER_CAPABILITIES = 0x1E,
+ DP_TUNNEL_MODE_MAXIMIZE_SNK_SRC_TUNNELS,
+ CC_SET_FW_MODE_FD1_D1_CERT = 0x20,
+ CC_SET_FW_MODE_FD1_D1_ALL,
+ CC_SET_FW_MODE_FD1_DA_CERT,
+ CC_SET_FW_MODE_FD1_DA_ALL,
+ CC_SET_FW_MODE_FDA_D1_CERT,
+ CC_SET_FW_MODE_FDA_D1_ALL,
+ CC_SET_FW_MODE_FDA_DA_CERT,
+ CC_SET_FW_MODE_FDA_DA_ALL
+};
+
+
+/* NHI genetlink attributes */
+enum {
+ NHI_ATTR_UNSPEC,
+ NHI_ATTR_DRV_VERSION,
+ NHI_ATTR_NVM_VER_OFFSET,
+ NHI_ATTR_NUM_PORTS,
+ NHI_ATTR_DMA_PORT,
+ NHI_ATTR_SUPPORT_FULL_E2E,
+ NHI_ATTR_MAILBOX_CMD,
+ NHI_ATTR_PDF,
+ NHI_ATTR_MSG_TO_ICM,
+ NHI_ATTR_MSG_FROM_ICM,
+ __NHI_ATTR_MAX,
+};
+#define NHI_ATTR_MAX (__NHI_ATTR_MAX - 1)
+
+struct port_net_dev {
+ struct net_device *net_dev;
+ struct mutex state_mutex;
+};
+
+/**
+ * struct tbt_nhi_ctxt - thunderbolt native host interface context
+ * @node: node in the controllers list.
+ * @pdev: pci device information.
+ * @iobase: address of I/O.
+ * @msix_entries: MSI-X vectors.
+ * @icm_ring_shared_mem: virtual address of iCM ring.
+ * @icm_ring_shared_mem_dma_addr: DMA addr of iCM ring.
+ * @send_sem: semaphore for sending messages to iCM
+ * one at a time.
+ * @mailbox_mutex: mutex for sending mailbox commands to
+ * iCM one at a time.
+ * @d0_exit_send_mutex: synchronizing the d0 exit with messages.
+ * @d0_exit_mailbox_mutex: synchronizing the d0 exit with mailbox.
+ * @lock: synchronizing the interrupt registers
+ * access.
+ * @icm_msgs_work: work queue for handling messages
+ * from iCM.
+ * @net_devices: net devices per port.
+ * @net_workqueue: work queue to send net messages.
+ * @id: id of the controller.
+ * @num_paths: number of paths supported by controller.
+ * @nvm_ver_offset: offset of NVM version in NVM.
+ * @num_vectors: number of MSI-X vectors.
+ * @num_ports: number of ports in the controller.
+ * @dma_port: DMA port.
+ * @d0_exit: whether controller exit D0 state.
+ * @nvm_auth_on_boot: whether iCM authenticates the NVM
+ * during boot.
+ * @wait_for_icm_resp: whether to wait for iCM response.
+ * @ignore_icm_resp: whether to ignore iCM response.
+ * @pci_using_dac: whether using DAC.
+ * @support_full_e2e: whether controller support full E2E.
+ */
+struct tbt_nhi_ctxt {
+ struct list_head node;
+ struct pci_dev *pdev;
+ void __iomem *iobase;
+ struct msix_entry *msix_entries;
+ struct tbt_icm_ring_shared_memory *icm_ring_shared_mem;
+ dma_addr_t icm_ring_shared_mem_dma_addr;
+ struct semaphore send_sem;
+ struct mutex mailbox_mutex;
+ struct mutex d0_exit_send_mutex;
+ struct mutex d0_exit_mailbox_mutex;
+ spinlock_t lock;
+ struct work_struct icm_msgs_work;
+ struct port_net_dev *net_devices;
+ struct workqueue_struct *net_workqueue;
+ u32 id;
+ u32 num_paths;
+ u16 nvm_ver_offset;
+ u8 num_vectors;
+ u8 num_ports;
+ u8 dma_port;
+ bool d0_exit;
+ bool nvm_auth_on_boot : 1;
+ bool wait_for_icm_resp : 1;
+ bool ignore_icm_resp : 1;
+ bool pci_using_dac : 1;
+ bool support_full_e2e : 1;
+};
+
+int nhi_send_message(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
+ u32 msg_len, const void *msg, bool ignore_icm_resp);
+int nhi_mailbox(struct tbt_nhi_ctxt *nhi_ctxt, u32 cmd, u32 data, bool deinit);
+
+#endif
--
2.7.4
^ permalink raw reply related
* [PATCH v9 2/8] thunderbolt: Updating the register definitions
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
Adding more Thunderbolt(TM) register definitions
and some helper macros.
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
drivers/thunderbolt/nhi_regs.h | 109 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index 75cf069..b8e961f 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -9,6 +9,11 @@
#include <linux/types.h>
+#define NHI_MMIO_BAR 0
+
+#define TBT_RING_MIN_NUM_BUFFERS 2
+#define TBT_RING_MAX_FRAME_SIZE (4 * 1024)
+
enum ring_flags {
RING_FLAG_ISOCH_ENABLE = 1 << 27, /* TX only? */
RING_FLAG_E2E_FLOW_CONTROL = 1 << 28,
@@ -39,6 +44,33 @@ struct ring_desc {
u32 time; /* write zero */
} __packed;
+/**
+ * struct tbt_buf_desc - TX/RX ring buffer descriptor.
+ * This is same as struct ring_desc, but without the use of bitfields and
+ * with explicit endianity.
+ */
+struct tbt_buf_desc {
+ __le64 phys;
+ __le32 attributes;
+ __le32 time;
+};
+
+#define DESC_ATTR_LEN_SHIFT 0
+#define DESC_ATTR_LEN_MASK GENMASK(11, DESC_ATTR_LEN_SHIFT)
+#define DESC_ATTR_EOF_SHIFT 12
+#define DESC_ATTR_EOF_MASK GENMASK(15, DESC_ATTR_EOF_SHIFT)
+#define DESC_ATTR_SOF_SHIFT 16
+#define DESC_ATTR_SOF_MASK GENMASK(19, DESC_ATTR_SOF_SHIFT)
+#define DESC_ATTR_TX_ISOCH_DMA_EN BIT(20) /* TX */
+#define DESC_ATTR_RX_CRC_ERR BIT(20) /* RX after use */
+#define DESC_ATTR_DESC_DONE BIT(21)
+#define DESC_ATTR_REQ_STS BIT(22) /* TX and RX before use */
+#define DESC_ATTR_RX_BUF_OVRN_ERR BIT(22) /* RX after use */
+#define DESC_ATTR_INT_EN BIT(23)
+#define DESC_ATTR_OFFSET_SHIFT 24
+#define DESC_ATTR_OFFSET_MASK GENMASK(31, DESC_ATTR_OFFSET_SHIFT)
+
+
/* NHI registers in bar 0 */
/*
@@ -60,6 +92,30 @@ struct ring_desc {
*/
#define REG_RX_RING_BASE 0x08000
+#define REG_RING_STEP 16
+#define REG_RING_PHYS_LO_OFFSET 0
+#define REG_RING_PHYS_HI_OFFSET 4
+#define REG_RING_CONS_PROD_OFFSET 8 /* cons - RO, prod - RW */
+#define REG_RING_CONS_SHIFT 0
+#define REG_RING_CONS_MASK GENMASK(15, REG_RING_CONS_SHIFT)
+#define REG_RING_PROD_SHIFT 16
+#define REG_RING_PROD_MASK GENMASK(31, REG_RING_PROD_SHIFT)
+#define REG_RING_SIZE_OFFSET 12
+#define REG_RING_SIZE_SHIFT 0
+#define REG_RING_SIZE_MASK GENMASK(15, REG_RING_SIZE_SHIFT)
+#define REG_RING_BUF_SIZE_SHIFT 16
+#define REG_RING_BUF_SIZE_MASK GENMASK(27, REG_RING_BUF_SIZE_SHIFT)
+
+#define TBT_RING_CONS_PROD_REG(iobase, ringbase, ringnumber) \
+ ((iobase) + (ringbase) + \
+ ((ringnumber) * REG_RING_STEP) + \
+ REG_RING_CONS_PROD_OFFSET)
+
+#define TBT_REG_RING_PROD_EXTRACT(val) (((val) & REG_RING_PROD_MASK) >> \
+ REG_RING_PROD_SHIFT)
+
+#define TBT_REG_RING_CONS_EXTRACT(val) (((val) & REG_RING_CONS_MASK) >> \
+ REG_RING_CONS_SHIFT)
/*
* 32 bytes per entry, one entry for every hop (REG_HOP_COUNT)
* 00: enum_ring_flags
@@ -77,6 +133,19 @@ struct ring_desc {
* ..: unknown
*/
#define REG_RX_OPTIONS_BASE 0x29800
+#define REG_RX_OPTS_TX_E2E_HOP_ID_SHIFT 12
+#define REG_RX_OPTS_TX_E2E_HOP_ID_MASK \
+ GENMASK(22, REG_RX_OPTS_TX_E2E_HOP_ID_SHIFT)
+#define REG_RX_OPTS_MASK_OFFSET 4
+#define REG_RX_OPTS_MASK_EOF_SHIFT 0
+#define REG_RX_OPTS_MASK_EOF_MASK GENMASK(15, REG_RX_OPTS_MASK_EOF_SHIFT)
+#define REG_RX_OPTS_MASK_SOF_SHIFT 16
+#define REG_RX_OPTS_MASK_SOF_MASK GENMASK(31, REG_RX_OPTS_MASK_SOF_SHIFT)
+
+#define REG_OPTS_STEP 32
+#define REG_OPTS_E2E_EN BIT(28)
+#define REG_OPTS_RAW BIT(30)
+#define REG_OPTS_VALID BIT(31)
/*
* three bitfields: tx, rx, rx overflow
@@ -86,6 +155,7 @@ struct ring_desc {
*/
#define REG_RING_NOTIFY_BASE 0x37800
#define RING_NOTIFY_REG_COUNT(nhi) ((31 + 3 * nhi->hop_count) / 32)
+#define REG_RING_NOTIFY_STEP 4
/*
* two bitfields: rx, tx
@@ -94,8 +164,47 @@ struct ring_desc {
*/
#define REG_RING_INTERRUPT_BASE 0x38200
#define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32)
+#define REG_RING_INT_TX_PROCESSED(ring_num) BIT(ring_num)
+#define REG_RING_INT_RX_PROCESSED(ring_num, num_paths) BIT((ring_num) + \
+ (num_paths))
+#define RING_INT_DISABLE(base, val) iowrite32( \
+ ioread32((base) + REG_RING_INTERRUPT_BASE) & ~(val), \
+ (base) + REG_RING_INTERRUPT_BASE)
+#define RING_INT_ENABLE(base, val) iowrite32( \
+ ioread32((base) + REG_RING_INTERRUPT_BASE) | (val), \
+ (base) + REG_RING_INTERRUPT_BASE)
+#define RING_INT_DISABLE_TX(base, ring_num) \
+ RING_INT_DISABLE(base, REG_RING_INT_TX_PROCESSED(ring_num))
+#define RING_INT_DISABLE_RX(base, ring_num, num_paths) \
+ RING_INT_DISABLE(base, REG_RING_INT_RX_PROCESSED(ring_num, num_paths))
+#define RING_INT_ENABLE_TX(base, ring_num) \
+ RING_INT_ENABLE(base, REG_RING_INT_TX_PROCESSED(ring_num))
+#define RING_INT_ENABLE_RX(base, ring_num, num_paths) \
+ RING_INT_ENABLE(base, REG_RING_INT_RX_PROCESSED(ring_num, num_paths))
+#define RING_INT_DISABLE_TX_RX(base, ring_num, num_paths) \
+ RING_INT_DISABLE(base, REG_RING_INT_TX_PROCESSED(ring_num) | \
+ REG_RING_INT_RX_PROCESSED(ring_num, num_paths))
+
+#define REG_RING_INTERRUPT_STEP 4
+
+#define REG_INT_THROTTLING_RATE 0x38c00
+#define REG_INT_THROTTLING_RATE_STEP 4
+#define NUM_INT_VECTORS 16
+
+#define REG_INT_VEC_ALLOC_BASE 0x38c40
+#define REG_INT_VEC_ALLOC_STEP 4
+#define REG_INT_VEC_ALLOC_FIELD_BITS 4
+#define REG_INT_VEC_ALLOC_FIELD_MASK (BIT(REG_INT_VEC_ALLOC_FIELD_BITS) - 1)
+#define REG_INT_VEC_ALLOC_PER_REG ((BITS_PER_BYTE * sizeof(u32)) / \
+ REG_INT_VEC_ALLOC_FIELD_BITS)
/* The last 11 bits contain the number of hops supported by the NHI port. */
#define REG_HOP_COUNT 0x39640
+#define REG_HOP_COUNT_TOTAL_PATHS_MASK GENMASK(10, 0)
+
+#define REG_HOST_INTERFACE_RST 0x39858
+
+#define REG_DMA_MISC 0x39864
+#define REG_DMA_MISC_INT_AUTO_CLEAR BIT(2)
#endif
--
2.7.4
^ permalink raw reply related
* [PATCH v9 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
This driver enables Thunderbolt Networking on non-Apple platforms
running Linux.
Thunderbolt Networking provides peer-to-peer connections to transfer
files between computers, perform PC migrations, and/or set up small
workgroups with shared storage.
This is a virtual connection that emulates an Ethernet adapter that
enables Ethernet networking with the benefit of Thunderbolt superfast
medium capability.
Thunderbolt Networking enables two hosts and several devices that
have a Thunderbolt controller to be connected together in a linear
(Daisy chain) series from a single port.
Thunderbolt Networking for Linux is compatible with Thunderbolt
Networking on systems running macOS or Windows and also supports
Thunderbolt generation 2 and 3 controllers.
Note that all pre-existing Thunderbolt generation 3 features, such as
USB, Display and other Thunderbolt device connectivity will continue
to function exactly as they did prior to enabling Thunderbolt Networking.
Code and Software Specifications:
This kernel code creates a virtual ethernet device for computer to
computer communication over a Thunderbolt cable.
The new driver is a separate driver to the existing Thunderbolt driver.
It is designed to work on systems running Linux that
interface with Intel Connection Manager (ICM) firmware based
Thunderbolt controllers that support Thunderbolt Networking.
The kernel code operates in coordination with the Thunderbolt user-
space daemon to implement full Thunderbolt networking functionality.
Hardware Specifications:
Thunderbolt Hardware specs have not yet been published but are used
where necessary for register definitions.
Acked-by: Andreas Noever <andreas.noever@gmail.com>
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
Changes since v8:
- Added support for more Thunderbolt device IDs
These patches were pushed to GitHub where they can be reviewed more
comfortably with green/red highlighting:
https://github.com/01org/thunderbolt-software-kernel-tree
Daemon code:
https://github.com/01org/thunderbolt-software-daemon
For reference, here's a link to version 8:
[v8]: https://lkml.org/lkml/2016/9/28/378
Amir Levy (8):
thunderbolt: Macro rename
thunderbolt: Updating the register definitions
thunderbolt: Communication with the ICM (firmware)
thunderbolt: Networking state machine
thunderbolt: Networking transmit and receive
thunderbolt: Kconfig for Thunderbolt Networking
thunderbolt: Networking doc
thunderbolt: Adding maintainer entry
Documentation/00-INDEX | 2 +
Documentation/thunderbolt/networking.txt | 132 ++
MAINTAINERS | 8 +-
drivers/thunderbolt/Kconfig | 27 +-
drivers/thunderbolt/Makefile | 3 +-
drivers/thunderbolt/icm/Makefile | 2 +
drivers/thunderbolt/icm/icm_nhi.c | 1520 ++++++++++++++++++++
drivers/thunderbolt/icm/icm_nhi.h | 85 ++
drivers/thunderbolt/icm/net.c | 2254 ++++++++++++++++++++++++++++++
drivers/thunderbolt/icm/net.h | 287 ++++
drivers/thunderbolt/nhi_regs.h | 115 +-
11 files changed, 4426 insertions(+), 9 deletions(-)
create mode 100644 Documentation/thunderbolt/networking.txt
create mode 100644 drivers/thunderbolt/icm/Makefile
create mode 100644 drivers/thunderbolt/icm/icm_nhi.c
create mode 100644 drivers/thunderbolt/icm/icm_nhi.h
create mode 100644 drivers/thunderbolt/icm/net.c
create mode 100644 drivers/thunderbolt/icm/net.h
--
2.7.4
^ permalink raw reply
* [PATCH net-next v5]] cadence: Add LSO support.
From: Rafal Ozieblo @ 2016-11-09 13:41 UTC (permalink / raw)
To: nicolas.ferre, netdev, linux-kernel; +Cc: Rafal Ozieblo
In-Reply-To: <1478612463-15076-1-git-send-email-rafalo@cadence.com>
New Cadence GEM hardware support Large Segment Offload (LSO):
TCP segmentation offload (TSO) as well as UDP fragmentation
offload (UFO). Support for those features was added to the driver.
Signed-off-by: Rafal Ozieblo <rafalo@cadence.com>
---
Changed in v2:
macb_lso_check_compatibility() changed to macb_features_check()
(with little modifications) and bind to .ndo_features_check.
(after Eric Dumazet suggestion)
---
Changed in v3:
Respin to net-next.
---
Changed in v4:
(struct iphdr*)skb_network_header(skb) changed to ip_hdr(skb)
---
Changed in v5:
Changes after Florian Fainelli comments
---
drivers/net/ethernet/cadence/macb.c | 142 +++++++++++++++++++++++++++++++++---
drivers/net/ethernet/cadence/macb.h | 14 ++++
2 files changed, 144 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index e1847ce..dd38ef7 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -32,7 +32,9 @@
#include <linux/of_gpio.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
-
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
#include "macb.h"
#define MACB_RX_BUFFER_SIZE 128
@@ -60,10 +62,13 @@
| MACB_BIT(TXERR))
#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP))
-#define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1))
-#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1))
+/* Max length of transmit frame must be a multiple of 8 bytes */
+#define MACB_TX_LEN_ALIGN 8
+#define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
+#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
#define GEM_MTU_MIN_SIZE ETH_MIN_MTU
+#define MACB_NETIF_LSO (NETIF_F_TSO | NETIF_F_UFO)
#define MACB_WOL_HAS_MAGIC_PACKET (0x1 << 0)
#define MACB_WOL_ENABLED (0x1 << 1)
@@ -1223,7 +1228,8 @@ static void macb_poll_controller(struct net_device *dev)
static unsigned int macb_tx_map(struct macb *bp,
struct macb_queue *queue,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ unsigned int hdrlen)
{
dma_addr_t mapping;
unsigned int len, entry, i, tx_head = queue->tx_head;
@@ -1231,14 +1237,27 @@ static unsigned int macb_tx_map(struct macb *bp,
struct macb_dma_desc *desc;
unsigned int offset, size, count = 0;
unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
- unsigned int eof = 1;
- u32 ctrl;
+ unsigned int eof = 1, mss_mfs = 0;
+ u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
+
+ /* LSO */
+ if (skb_shinfo(skb)->gso_size != 0) {
+ if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+ /* UDP - UFO */
+ lso_ctrl = MACB_LSO_UFO_ENABLE;
+ else
+ /* TCP - TSO */
+ lso_ctrl = MACB_LSO_TSO_ENABLE;
+ }
/* First, map non-paged data */
len = skb_headlen(skb);
+
+ /* first buffer length */
+ size = hdrlen;
+
offset = 0;
while (len) {
- size = min(len, bp->max_tx_length);
entry = macb_tx_ring_wrap(bp, tx_head);
tx_skb = &queue->tx_skb[entry];
@@ -1258,6 +1277,8 @@ static unsigned int macb_tx_map(struct macb *bp,
offset += size;
count++;
tx_head++;
+
+ size = min(len, bp->max_tx_length);
}
/* Then, map paged data from fragments */
@@ -1311,6 +1332,21 @@ static unsigned int macb_tx_map(struct macb *bp,
desc = &queue->tx_ring[entry];
desc->ctrl = ctrl;
+ if (lso_ctrl) {
+ if (lso_ctrl == MACB_LSO_UFO_ENABLE)
+ /* include header and FCS in value given to h/w */
+ mss_mfs = skb_shinfo(skb)->gso_size +
+ skb_transport_offset(skb) +
+ ETH_FCS_LEN;
+ else /* TSO */ {
+ mss_mfs = skb_shinfo(skb)->gso_size;
+ /* TCP Sequence Number Source Select
+ * can be set only for TSO
+ */
+ seq_ctrl = 0;
+ }
+ }
+
do {
i--;
entry = macb_tx_ring_wrap(bp, i);
@@ -1325,6 +1361,16 @@ static unsigned int macb_tx_map(struct macb *bp,
if (unlikely(entry == (bp->tx_ring_size - 1)))
ctrl |= MACB_BIT(TX_WRAP);
+ /* First descriptor is header descriptor */
+ if (i == queue->tx_head) {
+ ctrl |= MACB_BF(TX_LSO, lso_ctrl);
+ ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
+ } else
+ /* Only set MSS/MFS on payload descriptors
+ * (second or later descriptor)
+ */
+ ctrl |= MACB_BF(MSS_MFS, mss_mfs);
+
/* Set TX buffer descriptor */
macb_set_addr(desc, tx_skb->mapping);
/* desc->addr must be visible to hardware before clearing
@@ -1350,6 +1396,43 @@ static unsigned int macb_tx_map(struct macb *bp,
return 0;
}
+static netdev_features_t macb_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+ unsigned int nr_frags, f;
+ unsigned int hdrlen;
+
+ /* Validate LSO compatibility */
+
+ /* there is only one buffer */
+ if (!skb_is_nonlinear(skb))
+ return features;
+
+ /* length of header */
+ hdrlen = skb_transport_offset(skb);
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+ hdrlen += tcp_hdrlen(skb);
+
+ /* For LSO:
+ * When software supplies two or more payload buffers all payload buffers
+ * apart from the last must be a multiple of 8 bytes in size.
+ */
+ if (!IS_ALIGNED(skb_headlen(skb) - hdrlen, MACB_TX_LEN_ALIGN))
+ return features & ~MACB_NETIF_LSO;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ /* No need to check last fragment */
+ nr_frags--;
+ for (f = 0; f < nr_frags; f++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+ if (!IS_ALIGNED(skb_frag_size(frag), MACB_TX_LEN_ALIGN))
+ return features & ~MACB_NETIF_LSO;
+ }
+ return features;
+}
+
static inline int macb_clear_csum(struct sk_buff *skb)
{
/* no change for packets without checksum offloading */
@@ -1374,7 +1457,28 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct macb *bp = netdev_priv(dev);
struct macb_queue *queue = &bp->queues[queue_index];
unsigned long flags;
- unsigned int count, nr_frags, frag_size, f;
+ unsigned int desc_cnt, nr_frags, frag_size, f;
+ unsigned int hdrlen;
+ bool is_lso, is_udp = 0;
+
+ is_lso = (skb_shinfo(skb)->gso_size != 0);
+
+ if (is_lso) {
+ is_udp = !!(ip_hdr(skb)->protocol == IPPROTO_UDP);
+
+ /* length of headers */
+ if (is_udp)
+ /* only queue eth + ip headers separately for UDP */
+ hdrlen = skb_transport_offset(skb);
+ else
+ hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ if (skb_headlen(skb) < hdrlen) {
+ netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+ /* if this is required, would need to copy to single buffer */
+ return NETDEV_TX_BUSY;
+ }
+ } else
+ hdrlen = min(skb_headlen(skb), bp->max_tx_length);
#if defined(DEBUG) && defined(VERBOSE_DEBUG)
netdev_vdbg(bp->dev,
@@ -1389,18 +1493,22 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
* socket buffer: skb fragments of jumbo frames may need to be
* split into many buffer descriptors.
*/
- count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
+ if (is_lso && (skb_headlen(skb) > hdrlen))
+ /* extra header descriptor if also payload in first buffer */
+ desc_cnt = DIV_ROUND_UP((skb_headlen(skb) - hdrlen), bp->max_tx_length) + 1;
+ else
+ desc_cnt = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
nr_frags = skb_shinfo(skb)->nr_frags;
for (f = 0; f < nr_frags; f++) {
frag_size = skb_frag_size(&skb_shinfo(skb)->frags[f]);
- count += DIV_ROUND_UP(frag_size, bp->max_tx_length);
+ desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length);
}
spin_lock_irqsave(&bp->lock, flags);
/* This is a hard error, log it. */
if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
- bp->tx_ring_size) < count) {
+ bp->tx_ring_size) < desc_cnt) {
netif_stop_subqueue(dev, queue_index);
spin_unlock_irqrestore(&bp->lock, flags);
netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
@@ -1408,13 +1516,17 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_BUSY;
}
+ if (is_udp) /* is_udp is only set when (is_lso) is checked */
+ /* zero UDP checksum, not calculated by h/w for UFO */
+ udp_hdr(skb)->check = 0;
+
if (macb_clear_csum(skb)) {
dev_kfree_skb_any(skb);
goto unlock;
}
/* Map socket buffer for DMA transfer */
- if (!macb_tx_map(bp, queue, skb)) {
+ if (!macb_tx_map(bp, queue, skb, hdrlen)) {
dev_kfree_skb_any(skb);
goto unlock;
}
@@ -2354,6 +2466,7 @@ static const struct net_device_ops macb_netdev_ops = {
.ndo_poll_controller = macb_poll_controller,
#endif
.ndo_set_features = macb_set_features,
+ .ndo_features_check = macb_features_check,
};
/* Configure peripheral capabilities according to device tree
@@ -2560,6 +2673,11 @@ static int macb_init(struct platform_device *pdev)
/* Set features */
dev->hw_features = NETIF_F_SG;
+
+ /* Check LSO capability */
+ if (GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
+ dev->hw_features |= MACB_NETIF_LSO;
+
/* Checksum offload is only available on gem with packet buffer */
if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 1216950..d67adad 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -382,6 +382,10 @@
#define GEM_TX_PKT_BUFF_OFFSET 21
#define GEM_TX_PKT_BUFF_SIZE 1
+/* Bitfields in DCFG6. */
+#define GEM_PBUF_LSO_OFFSET 27
+#define GEM_PBUF_LSO_SIZE 1
+
/* Constants for CLK */
#define MACB_CLK_DIV8 0
#define MACB_CLK_DIV16 1
@@ -414,6 +418,10 @@
#define MACB_CAPS_SG_DISABLED 0x40000000
#define MACB_CAPS_MACB_IS_GEM 0x80000000
+/* LSO settings */
+#define MACB_LSO_UFO_ENABLE 0x01
+#define MACB_LSO_TSO_ENABLE 0x02
+
/* Bit manipulation macros */
#define MACB_BIT(name) \
(1 << MACB_##name##_OFFSET)
@@ -545,6 +553,12 @@ struct macb_dma_desc {
#define MACB_TX_LAST_SIZE 1
#define MACB_TX_NOCRC_OFFSET 16
#define MACB_TX_NOCRC_SIZE 1
+#define MACB_MSS_MFS_OFFSET 16
+#define MACB_MSS_MFS_SIZE 14
+#define MACB_TX_LSO_OFFSET 17
+#define MACB_TX_LSO_SIZE 2
+#define MACB_TX_TCP_SEQ_SRC_OFFSET 19
+#define MACB_TX_TCP_SEQ_SRC_SIZE 1
#define MACB_TX_BUF_EXHAUSTED_OFFSET 27
#define MACB_TX_BUF_EXHAUSTED_SIZE 1
#define MACB_TX_UNDERRUN_OFFSET 28
--
2.4.5
^ permalink raw reply related
* Re: [PATCH 1/2] net: mvpp2: don't bring up on MAC address set
From: Thomas Petazzoni @ 2016-11-09 13:22 UTC (permalink / raw)
To: Baruch Siach; +Cc: Marcin Wojtas, netdev, Gregory Clement
In-Reply-To: <ff17831771f3575f351c134703d3f153485b01c0.1478696194.git.baruch@tkos.co.il>
Hello,
On Wed, 9 Nov 2016 14:56:33 +0200, Baruch Siach wrote:
> Current .ndo_set_mac_address implementation brings up the interface when revert
> to original address after failure succeeds. Fix this.
>
> Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Indeed, this piece of code is not very smart.
> diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
> index 60227a3452a4..e427b4706726 100644
> --- a/drivers/net/ethernet/marvell/mvpp2.c
> +++ b/drivers/net/ethernet/marvell/mvpp2.c
> @@ -5686,9 +5686,8 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
> if (!err)
> return 0;
> /* Reconfigure parser to accept the original MAC address */
> - err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
> - if (err)
> - goto error;
> + mvpp2_prs_update_mac_da(dev, dev->dev_addr);
> + goto error;
Wouldn't it make more sense to call mvpp2_prs_update_mac_da() under
the error: goto label?
But if you think beyond that, it is a bit crazy that to handle the
error case of mvpp2_prs_update_mac_da(), we have to call
mvpp2_prs_update_mac_da(), which is exactly the same function...
Perhaps it would be interesting to investigate what are the various
conditions for which mvpp2_prs_update_mac_da() fails, and see if we can
avoid them.
Best regards,
Thomas
--
Thomas Petazzoni, CTO, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
^ permalink raw reply
* Re: [Intel-wired-lan] [PATCH] igb: use igb_adapter->io_addr instead of e1000_hw->hw_addr
From: Cao jin @ 2016-11-09 13:28 UTC (permalink / raw)
To: Alexander Duyck, Hisashi T Fujinaka, Netdev, intel-wired-lan,
linux-kernel@vger.kernel.org, Izumi, Taku/泉 拓,
vinschen
In-Reply-To: <CAKgT0UeaoGfOOC1=h4pGk+=FDd5EqvgRGAdk1StLuiW1-M8tVA@mail.gmail.com>
Thanks Corrina for your info.
I tested my patch, it works for me on kernel 4.9-rc4.
"surprise removal" maybe another issue to solve. This one is enough to
solve my issue and other one's, could it be accept first?
Cao jin
On 11/09/2016 03:33 AM, Alexander Duyck wrote:
> On Tue, Nov 8, 2016 at 10:37 AM, Corinna Vinschen <vinschen@redhat.com> wrote:
>> On Nov 8 09:16, Hisashi T Fujinaka wrote:
>>> On Tue, 8 Nov 2016, Corinna Vinschen wrote:
>>>> On Nov 8 15:06, Cao jin wrote:
>>>>> When running as guest, under certain condition, it will oops as following.
>>>>> writel() in igb_configure_tx_ring() results in oops, because hw->hw_addr
>>>>> is NULL. While other register access won't oops kernel because they use
>>>>> wr32/rd32 which have a defense against NULL pointer.
>>>>> [...]
>>>>
>>>> Incidentally we're just looking for a solution to that problem too.
>>>> Do three patches to fix the same problem at rougly the same time already
>>>> qualify as freak accident?
>>>>
>>>> FTR, I attached my current patch, which I was planning to submit after
>>>> some external testing.
>>>>
>>>> However, all three patches have one thing in common: They workaround
>>>> a somewhat dubious resetting of the hardware address to NULL in case
>>>> reading from a register failed.
>>>>
>>>> That makes me wonder if setting the hardware address to NULL in
>>>> rd32/igb_rd32 is really such a good idea. It's performed in a function
>>>> which return value is *never* tested for validity in the calling
>>>> functions and leads to subsequent crashes since no tests for hw_addr ==
>>>> NULL are performed.
>>>>
>>>> Maybe commit 22a8b2915 should be reconsidered? Isn't there some more
>>>> graceful way to handle the "surprise removal"?
>>>
>>> Answering this from my home account because, well, work is Outlook.
>>>
>>> "Reconsidering" would be great. In fact, revert if if you'd like. I'm
>>> uncertain that the surprise removal code actually works the way I
>>> thought previously and I think I took a lot of it out of my local code.
>>>
>>> Unfortuantely I don't have any equipment that I can use to reproduce
>>> surprise removal any longer so that means I wouldn't be able to test
>>> anything. I have to defer to you or Cao Jin.
>>
>> I'm not too keen to rip out a PCIe NIC under power from my locale
>> desktop machine, but I think an actual surprise removal is not the
>> problem.
>>
>> As described in my git log entry, the error condition in igb_rd32 can be
>> triggered during a suspend. The HW has been put into a sleep state but
>> some register read requests are apparently not guarded against that
>> situation. Reading a register in this state returns -1, thus a suspend
>> is erroneously triggering the "surprise removal" sequence.
>
> The question I would have is what is reading the device when it is in
> this state. The watchdog and any other functions that would read the
> device should be disabled.
>
> One possibility could be a race between a call to igb_close and the
> igb_suspend function. We have seen some of those pop up recently on
> ixgbe and it looks like igb has the same bug. We should probably be
> using the rtnl_lock to guarantee that netif_device_detach and the call
> to __igb_close are completed before igb_close could possibly be called
> by the network stack.
>
>> Here's a raw idea:
>>
>> - Note that device is suspended in e1000_hw struct. Don't trigger
>> error sequence in igb_rd32 if so (...and return a 0 value???)
>
> The thing is that a suspended device should not be accessed at all.
> If we are accessing it while it is suspended then that is a bug. If
> you could throw a WARN_ON call in igb_rd32 to capture where this is
> being triggered that might be useful.
>
>> - Otherwise assume it's actually a surprise removal. In theory that
>> should somehow trigger a device removal sequence, kind of like
>> calling igb_remove, no?
>
> Well a read of the MMIO region while suspended is more of a surprise
> read since there shouldn't be anything going on. We need to isolate
> where that read is coming from and fix it.
>
> Thanks.
>
> - Alex
>
>
> .
>
^ permalink raw reply
* Is Documentation/networking/phy.txt still up-to-date?
From: Sebastian Frias @ 2016-11-09 13:24 UTC (permalink / raw)
To: afleming, jgarzik, Måns Rullgård, Florian Fainelli
Cc: netdev, LKML, David S. Miller
Hi,
Documentation/networking/phy.txt discusses phy_connect and states that:
"...
interface is a u32 which specifies the connection type used
between the controller and the PHY. Examples are GMII, MII,
RGMII, and SGMII. For a full list, see include/linux/phy.h
Now just make sure that phydev->supported and phydev->advertising have any
values pruned from them which don't make sense for your controller (a 10/100
controller may be connected to a gigabit capable PHY, so you would need to
mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
for these bitfields. Note that you should not SET any bits, or the PHY may
get put into an unsupported state.
..."
However, 'drivers/net/ethernet/aurora/nb8800.c' for example, does SETs some
bits (in function 'nb8800_pause_adv').
I checked 'drivers/net/ethernet/broadcom/genet/bcmmii.c' and that one CLEARs
bits (as per the documentation).
Does anybody knows what is the correct/recommended approach?
Best regards,
Sebastian
^ permalink raw reply
* Re: [PATCH net] r8152: Fix broken RX checksums.
From: Mark Lord @ 2016-11-09 13:19 UTC (permalink / raw)
To: Hayes Wang, David Miller
Cc: nic_swsd, netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <0835B3720019904CB8F7AA43166CEEB20104A0FD@RTITMBSV03.realtek.com.tw>
On 16-11-09 08:09 AM, Hayes Wang wrote:
> Mark Lord [mailto:mlord@pobox.com]
..
>> The MTU/MRU on this link is the standard 1500 bytes, so a pkt_len of 2045 isn't
>> valid here.
>> And the rx_desc values look an awful lot like the rx_data values that follow it.
>>
>> There's definitely more broken here than just TCP RX checksums.
>
> I don't think it is the issue of our hw. If it happens, windows or
> other OS may have problems, too. It is like the memory issue described
> in commit 990c9b347245("Merge branch 'r8152-fixes'"). It seems that
> the data in memory is not same with the one from the device.
I am still doing long-term testing of various tweaks to the driver,
and can now confirm that changing from kmalloc() to usb_alloc_coherent()
vastly improves reliability, and re-enabling RX checksums works fine
with that change.
However, even with coherent URB buffers, I still see the occasional bad rx_desc:
like, twice in 36 hours of continuous bashing at it.
So having code in the driver to sanitize the rx_desc is essential.
My current test code (shared with Hayes already) includes validation of various
key fields of the rx_desc, and detects when the chip/driver/whatever gets confused.
Hopefully r8152.c will get updated to take more care before trusting
what it sees in the rx_desc fields.
Cheers
--
Mark Lord
mlord@pobox.com
^ permalink raw reply
* Re: [PATCH net-next 2/3] ptp: igb: Use the high resolution frequency method.
From: Richard Cochran @ 2016-11-09 13:15 UTC (permalink / raw)
To: Keller, Jacob E
Cc: netdev@vger.kernel.org, tglx@linutronix.de,
Manfred.Rudigier@omicron.at, ulrik.debie-os@e2big.org,
stefan.sorensen@spectralink.com, davem@davemloft.net,
Kirsher, Jeffrey T, john.stultz@linaro.org,
intel-wired-lan@lists.osuosl.org
In-Reply-To: <1478642646.7545.39.camel@intel.com>
On Tue, Nov 08, 2016 at 10:04:23PM +0000, Keller, Jacob E wrote:
> Additionally, what about min/max frequency check? Wouldn't this need to
> be updated for the new adjfine operation?
In theory you might increase the max by some sub-ppb value, but we
cannot express that as the resolution of the user space interface is
in ppb, and that little extra is not important anyhow.
Thanks,
Richard
^ permalink raw reply
* Re: [PATCH net-next 2/3] ptp: igb: Use the high resolution frequency method.
From: Richard Cochran @ 2016-11-09 13:11 UTC (permalink / raw)
To: Keller, Jacob E
Cc: netdev@vger.kernel.org, tglx@linutronix.de,
Manfred.Rudigier@omicron.at, ulrik.debie-os@e2big.org,
stefan.sorensen@spectralink.com, davem@davemloft.net,
Kirsher, Jeffrey T, john.stultz@linaro.org,
intel-wired-lan@lists.osuosl.org
In-Reply-To: <1478642533.7545.38.camel@intel.com>
On Tue, Nov 08, 2016 at 10:02:22PM +0000, Keller, Jacob E wrote:
> On Tue, 2016-11-08 at 22:49 +0100, Richard Cochran wrote:
> > - rate = ppb;
> > - rate <<= 26;
> > - rate = div_u64(rate, 1953125);
> > + rate = scaled_ppm;
> > + rate <<= 13;
> > + rate = div_u64(rate, 15625);
>
> I'm curious how you generate the new math here, since this can be
> tricky, and I could use more examples in order to port to some of the
> other drivers implementations. I'm not quit sure how to handle the
> value when the lower 16 bits are fractional.
TL;DR version:
In ptp_clock.c we convert scaled_ppm to ppb like this.
ppb = scaled_ppm * 10^3 * 2^-16
If you already have a working driver that does
regval = ppb * SOMEMATH;
then just substitute
regval = (scaled_ppm * 10^3 * 2^-16) * SOMEMATH;
= (scaled_ppm * 5^3 * 2^-13) * SOMEMATH;
and simplify by combining the 5^3 and 2^-13 constants into SOMEMATH.
Longer explanation:
You have to consider how the frequency adjustment HW works, case by
case. Both the i210 and the phyter have an adjustment register that
holds units of 2^-32 nanoseconds per 8 nanosecond clock period, and so
the rate from adjustment value 1 is (2^-32 / 8).
Then with the old interface, the conversion from "adjustment unit" to
ppb was (2^-32 / 8 * 10^9) or (2^-26 * 5^9). The conversion the other
way needs the inverse, and so the code did (ppb << 26) / 5^9.
With the new interface, the conversion from "adjustment unit" to
scaled_ppm is (2^-32 / 8 * 10^6 * 2^16) or (2^-13 * 5^6). The code
converts the other direction using the inverse, (s_ppm << 13) / 5^6.
HTH,
Richard
^ permalink raw reply
* RE: [PATCH net] r8152: Fix broken RX checksums.
From: Hayes Wang @ 2016-11-09 13:09 UTC (permalink / raw)
To: Mark Lord, David Miller
Cc: nic_swsd, netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <201611041425.uA4EPwCw018176@rtits1.realtek.com>
Mark Lord [mailto:mlord@pobox.com]
> Sent: Friday, November 04, 2016 9:50 PM
[...]
> Yeah, the device or driver is definitely getting confused with rx_desc structures.
> I added code to check for unlikely rx_desc values, and it found this for starters:
>
> rx_desc: 00480801 00480401 00480001 0048fc00 0048f800 0048f400
> pkt_len=2045
> rx_data: 00 f0 48 00 00 ec 48 00 00 e8 48 00 00 e4 48 00 00 e0 48 00 00 dc 48 00
> 00 d8 48 00 00 d4 48 00
> rx_data: 00 d0 48 00 00 cc 48 00 00 c8 48 00 00 c4 48 00 00 c0 48 00 00 bc 48 00
> 00 b8 48 00 00 b4 48 00
> rx_data: 00 b0 48 00 00 ac 48 00 00 01 00 00 81 ed 00 00 00 01 00 00 00 00 00 00
> 00 00 00 02 4d ac 00 00
> rx_data: 10 00 ff ff ff ff 00 00 01 28 83 d6 ff 6d 00 20 25 b1 58 1b 68 ff 00 05 20 01
> 56 41 17 35 00 00
> ...
>
> The MTU/MRU on this link is the standard 1500 bytes, so a pkt_len of 2045 isn't
> valid here.
> And the rx_desc values look an awful lot like the rx_data values that follow it.
>
> There's definitely more broken here than just TCP RX checksums.
I don't think it is the issue of our hw. If it happens, windows or
other OS may have problems, too. It is like the memory issue described
in commit 990c9b347245("Merge branch 'r8152-fixes'"). It seems that
the data in memory is not same with the one from the device.
Besides, I test the raspberry pi with RTL8152. However, I don't find
any checksum issue for TCP. I try to copy a large file and md5sum it
through NFS. It works fine.
Best Regards,
Hayes
^ permalink raw reply
* Re: [PATCH 1/2] net: ethernet: nb8800: Do not apply TX delay at MAC level
From: Sebastian Frias @ 2016-11-09 13:02 UTC (permalink / raw)
To: Måns Rullgård, Florian Fainelli
Cc: David S. Miller, netdev, LKML, Mason, Andrew Lunn
In-Reply-To: <yw1x1syrjil8.fsf@unicorn.mansr.com>
On 11/04/2016 05:49 PM, Måns Rullgård wrote:
>>> But when doing so, both the Atheros 8035 and the Aurora NB8800 drivers
>>> will apply the delay.
>>>
>>> I think a better way of dealing with this is that both, PHY and MAC
>>> drivers exchange information so that the delay is applied only once.
>>
>> Exchange what information? The PHY device interface (phydev->interface)
>> conveys the needed information for both entities.
>
> There doesn't seem to be any consensus among the drivers regarding where
> the delay should be applied. Since only a few drivers, MAC or PHY, act
> on this property, most combinations still work by chance. It is common
> for boards to set the delay at the PHY using external config pins so no
> software setup is required (although I have one Sigma based board that
> gets this wrong). I suspect if drivers/net/ethernet/broadcom/genet were
> used with one of the four PHY drivers that also set the delay based on
> this DT property, things would go wrong.
>
Exactly, what about a patch like (I can make a formal submission, even
merge it with the patch discussed in this thread, consider this a RFC):
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index fba2699..4217ff4 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1283,6 +1283,10 @@ static int nb8800_tangox_init(struct net_device *dev)
case PHY_INTERFACE_MODE_RGMII_RXID:
case PHY_INTERFACE_MODE_RGMII_TXID:
pad_mode = PAD_MODE_RGMII;
+
+ if ((dev->phydev->flags & PHY_SUPPORTS_TXID) == 0)
+ pad_mode |= PAD_MODE_GTX_CLK_DELAY;
+
break;
default:
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 2e0c759..5eddb04 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -426,7 +426,9 @@ static int at803x_aneg_done(struct phy_device *phydev)
.suspend = at803x_suspend,
.resume = at803x_resume,
.features = PHY_GBIT_FEATURES,
- .flags = PHY_HAS_INTERRUPT,
+ .flags = PHY_HAS_INTERRUPT |
+ PHY_SUPPORTS_RXID |
+ PHY_SUPPORTS_TXID,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
.ack_interrupt = at803x_ack_interrupt,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e7e1fd3..0f0b17e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -61,6 +61,8 @@
#define PHY_HAS_INTERRUPT 0x00000001
#define PHY_HAS_MAGICANEG 0x00000002
#define PHY_IS_INTERNAL 0x00000004
+#define PHY_SUPPORTS_RXID 0x00000008
+#define PHY_SUPPORTS_TXID 0x00000010
#define MDIO_DEVICE_IS_PHY 0x80000000
/* Interface Mode definitions */
^ permalink raw reply related
* [PATCH 2/2] net: mvpp2: simplify MAC address set code
From: Baruch Siach @ 2016-11-09 12:56 UTC (permalink / raw)
To: Marcin Wojtas; +Cc: netdev, Thomas Petazzoni, Gregory Clement, Baruch Siach
In-Reply-To: <ff17831771f3575f351c134703d3f153485b01c0.1478696194.git.baruch@tkos.co.il>
Remove duplicated code for handling the !netif_running() case.
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
Build tested only.
---
drivers/net/ethernet/marvell/mvpp2.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index e427b4706726..64b7f985d517 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5674,32 +5674,28 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
{
struct mvpp2_port *port = netdev_priv(dev);
const struct sockaddr *addr = p;
- int err;
+ int err, restart_dev = 0;
if (!is_valid_ether_addr(addr->sa_data)) {
err = -EADDRNOTAVAIL;
goto error;
}
- if (!netif_running(dev)) {
- err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
- if (!err)
- return 0;
- /* Reconfigure parser to accept the original MAC address */
+ if (netif_running(dev)) {
+ mvpp2_stop_dev(port);
+ restart_dev = 1;
+ }
+
+ err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
+ if (err) {
+ /* Reconfigure parser accept the original MAC address */
mvpp2_prs_update_mac_da(dev, dev->dev_addr);
goto error;
}
- mvpp2_stop_dev(port);
-
- err = mvpp2_prs_update_mac_da(dev, addr->sa_data);
- if (!err)
- goto out_start;
+ if (!restart_dev)
+ return 0;
- /* Reconfigure parser accept the original MAC address */
- mvpp2_prs_update_mac_da(dev, dev->dev_addr);
- goto error;
-out_start:
mvpp2_start_dev(port);
mvpp2_egress_enable(port);
mvpp2_ingress_enable(port);
--
2.10.2
^ permalink raw reply related
* [PATCH 1/2] net: mvpp2: don't bring up on MAC address set
From: Baruch Siach @ 2016-11-09 12:56 UTC (permalink / raw)
To: Marcin Wojtas; +Cc: netdev, Thomas Petazzoni, Gregory Clement, Baruch Siach
Current .ndo_set_mac_address implementation brings up the interface when revert
to original address after failure succeeds. Fix this.
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
Untested; I don't have the hardware.
---
drivers/net/ethernet/marvell/mvpp2.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 60227a3452a4..e427b4706726 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5686,9 +5686,8 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
if (!err)
return 0;
/* Reconfigure parser to accept the original MAC address */
- err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
- if (err)
- goto error;
+ mvpp2_prs_update_mac_da(dev, dev->dev_addr);
+ goto error;
}
mvpp2_stop_dev(port);
@@ -5698,9 +5697,8 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
goto out_start;
/* Reconfigure parser accept the original MAC address */
- err = mvpp2_prs_update_mac_da(dev, dev->dev_addr);
- if (err)
- goto error;
+ mvpp2_prs_update_mac_da(dev, dev->dev_addr);
+ goto error;
out_start:
mvpp2_start_dev(port);
mvpp2_egress_enable(port);
--
2.10.2
^ permalink raw reply related
* Re: [PATCH] usbnet: prevent device rpm suspend in usbnet_probe function
From: Bjørn Mork @ 2016-11-09 12:32 UTC (permalink / raw)
To: Oliver Neukum; +Cc: Alan Stern, Kai-Heng Feng, linux-kernel, linux-usb, netdev
In-Reply-To: <1478692735.2428.10.camel@suse.com>
Oliver Neukum <oneukum@suse.com> writes:
> On Tue, 2016-11-08 at 13:44 -0500, Alan Stern wrote:
>
>> These problems could very well be caused by running at SuperSpeed
>> (USB-3) instead of high speed (USB-2).
>>
>> Is there any way to test what happens when the device is attached to
>> the computer by a USB-2 cable? That would prevent it from operating at
>> SuperSpeed.
>>
>> The main point, however, is that the proposed patch doesn't seem to
>> address the true problem, which is that the device gets suspended
>> between probes. The patch only tries to prevent it from being
>> suspended during a probe -- which is already prevented by the USB core.
>
> But why doesn't it fail during normal operation?
>
> I suspect that its firmware requires the altsetting
>
> /* should we change control altsetting on a NCM/MBIM function? */
> if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) {
> data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
> ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM);
>
> to be set before it accepts a suspension.
Could be, but I don't think so. The above code is effectively a noop
unless the function is a combined NCM/MBIM function. Something I've
never seen on a Sierra Wireless device (ignoring the infamous EM7345,
which really is an Intel device).
This is a typical example of a Sierra Wireless modem configured for
MBIM:
P: Vendor=1199 ProdID=9079 Rev= 0.06
S: Manufacturer=Sierra Wireless, Incorporated
S: Product=Sierra Wireless EM7455 Qualcomm Snapdragon X7 LTE-A
S: SerialNumber=LF615126xxxxxxx
C:* #Ifs= 2 Cfg#= 1 Atr=a0 MxPwr=500mA
A: FirstIf#=12 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00
I:* If#=12 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=(none)
E: Ad=82(I) Atr=03(Int.) MxPS= 64 Ivl=32ms
I:* If#=13 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=(none)
I: If#=13 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=(none)
E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
The control interface of plain MBIM functions will always have a single
altsetting, like the example above. So cdc_ncm_select_altsetting(intf)
returns "0", while CDC_NCM_COMM_ALTSETTING_MBIM is "1".
Just for reference, using the Intel^H^H^H^H^HEM7345 as example, this is
what a combined NCM/MBIM function looks like:
P: Vendor=1199 ProdID=a001 Rev=17.29
S: Manufacturer=Sierra Wireless Inc.
S: Product=Sierra Wireless EM7345 4G LTE
S: SerialNumber=013937000xxxxxx
C:* #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=100mA
A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0d Prot=00
A: FirstIf#= 2 IfCount= 2 Cls=02(comm.) Sub=02 Prot=01
I: If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0d Prot=00 Driver=cdc_mbim
E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=1ms
I:* If#= 0 Alt= 1 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim
E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=1ms
I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_mbim
I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_mbim
E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 1 Alt= 2 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim
E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=(none)
E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=1ms
I:* If#= 3 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=(none)
E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
And this is what the code you quote is trying to deal with. Note the
different subclass of altsetting 0 and 1.... This is incredibly ugly.
FWIW, the modem in question cannot be an EM7345. That modem does not
have the static interface numbering oddity. Another sign that it isn't
a true Sierra device.
Bjørn
^ permalink raw reply
* [PATCH v9 8/8] thunderbolt: Adding maintainer entry
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
Add Amir Levy as maintainer for Thunderbolt(TM) ICM driver
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
MAINTAINERS | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 411e3b8..87763c44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10652,7 +10652,13 @@ F: include/uapi/linux/stm.h
THUNDERBOLT DRIVER
M: Andreas Noever <andreas.noever@gmail.com>
S: Maintained
-F: drivers/thunderbolt/
+F: drivers/thunderbolt/*
+
+THUNDERBOLT ICM DRIVER
+M: Amir Levy <amir.jer.levy@intel.com>
+S: Maintained
+F: drivers/thunderbolt/icm/
+F: Documentation/thunderbolt/networking.txt
TI BQ27XXX POWER SUPPLY DRIVER
R: Andrew F. Davis <afd@ti.com>
--
2.7.4
^ permalink raw reply related
* [PATCH v9 7/8] thunderbolt: Networking doc
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
Adding Thunderbolt(TM) networking documentation.
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
Documentation/00-INDEX | 2 +
Documentation/thunderbolt/networking.txt | 132 +++++++++++++++++++++++++++++++
2 files changed, 134 insertions(+)
create mode 100644 Documentation/thunderbolt/networking.txt
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 3acc4f1..0239e68 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -440,6 +440,8 @@ this_cpu_ops.txt
- List rationale behind and the way to use this_cpu operations.
thermal/
- directory with information on managing thermal issues (CPU/temp)
+thunderbolt/
+ - directory with info regarding Thunderbolt.
trace/
- directory with info on tracing technologies within linux
unaligned-memory-access.txt
diff --git a/Documentation/thunderbolt/networking.txt b/Documentation/thunderbolt/networking.txt
new file mode 100644
index 0000000..88d1c12
--- /dev/null
+++ b/Documentation/thunderbolt/networking.txt
@@ -0,0 +1,132 @@
+Intel Thunderbolt(TM) Networking driver
+=======================================
+
+Copyright(c) 2013 - 2016 Intel Corporation.
+
+Contact Information:
+Intel Thunderbolt mailing list <thunderbolt-software@lists.01.org>
+Edited by Amir Levy <amir.jer.levy@intel.com>
+
+Overview
+========
+
+* The Thunderbolt Networking driver enables peer to peer networking on non-Apple
+ platforms running Linux.
+
+* The driver creates a virtual Ethernet device that enables computer to computer
+ communication over the Thunderbolt cable.
+
+* Using Thunderbolt Networking you can perform high speed file transfers between
+ computers, perform PC migrations and/or set up small workgroups with shared
+ storage without compromising any other Thunderbolt functionality.
+
+* The driver is located in drivers/thunderbolt/icm.
+
+* This driver will function only on non-Apple platforms with firmware based
+ Thunderbolt controllers that support Thunderbolt Networking.
+
+ +----------------+ +----------------+
+ |Host 1 | |Host 2 |
+ | | | |
+ | +-------+ | | +-------+ |
+ | |Network| | | |Network| |
+ | |Stack | | | |Stack | |
+ | +-------+ | | +-------+ |
+ | ^ | | ^ |
+ | | | | | |
+ | v | | v |
+ | +-----------+ | | +-----------+ |
+ | |Thunderbolt| | | |Thunderbolt| |
+ | |Networking | | | |Networking | |
+ | |Driver | | | |Driver | |
+ | +-----------+ | | +-----------+ |
+ | ^ | | ^ |
+ | | | | | |
+ | v | | v |
+ | +-----------+ | | +-----------+ |
+ | |Thunderbolt| | | |Thunderbolt| |
+ | |Controller |<-+------------+->|Controller | |
+ | |with ICM | | | |with ICM | |
+ | |enabled | | | |enabled | |
+ | +-----------+ | | +-----------+ |
+ +----------------+ +----------------+
+
+Files
+=====
+
+The following files are located in the drivers/thunderbolt/icm directory:
+
+- icm_nhi.c/h: These files allow communication with the firmware (Intel
+ Connection Manager) based controller. They also create an interface for
+ netlink communication with a user space daemon.
+
+- net.c/net.h: These files implement the 'eth' interface for the
+ Thunderbolt(TM) Networking.
+
+Interface to User Space
+=======================
+
+The interface to the user space module is implemented through a Generic Netlink.
+This is the communications protocol between the Thunderbolt driver and the user
+space application.
+
+Note that this interface mediates user space communication with ICM.
+(Existing Linux tools can be used to configure the network interface.)
+
+The Thunderbolt Daemon utilizes this interface to communicate with the driver.
+To be accessed by the user space module, both kernel and user space modules
+have to register with the same GENL_NAME.
+For the purpose of the Thunderbolt Network driver, "thunderbolt" is used.
+The registration is done at driver initialization time for all instances
+of the Thunderbolt controllers. The communication is carried through pre-defined
+Thunderbolt messages. Each specific message has a callback function that is
+called when the related message is received.
+
+Message Definitions:
+* NHI_CMD_UNSPEC: Not used.
+* NHI_CMD_SUBSCRIBE: Subscription request from daemon to driver to open the
+ communication channel.
+* NHI_CMD_UNSUBSCRIBE: Request from daemon to driver to unsubscribe and
+ to close communication channel.
+* NHI_CMD_QUERY_INFORMATION: Request information from the driver such as
+ driver version, FW version offset, number of ports in the controller
+ and DMA port.
+* NHI_CMD_MSG_TO_ICM: Message from user space module to FW.
+* NHI_CMD_MSG_FROM_ICM: Response from FW to user space module.
+* NHI_CMD_MAILBOX: Message that uses mailbox mechanism such as FW policy
+ changes or disconnect path.
+* NHI_CMD_APPROVE_TBT_NETWORKING: Request from user space module to FW to
+ establish path.
+* NHI_CMD_ICM_IN_SAFE_MODE: Indication that the FW has entered safe mode.
+
+Communication with Intel Connection Manager(ICM) Firmware
+=========================================================
+
+There are several circular buffers in Thunderbolt each using Direct Memory
+Access (DMA).
+
+Communication with ICM utilizes circular buffer ring #0. (The other rings are
+used for peer to peer communication, packet transmission and receiving).
+
+The driver allocates a shared memory that is physically mapped onto the DMA
+physical space at ring #0.
+For the software to communicate with the firmware, the driver sends a command
+in ring #0. The command contains a pre-defined field (PDF) value notifying the
+firmware that the driver is ready. To proceed, the driver must receive the
+appropriate PDF value in response from the firmware.
+
+Once the exchange is completed, messages can be sent to the firmware through
+the driver. Similarly, the firmware can now send notifications about hardware
+and firmware events.
+
+Information
+===========
+
+Mailing list:
+ thunderbolt-software@lists.01.org
+ Register at: https://lists.01.org/mailman/listinfo/thunderbolt-software
+ Archives at: https://lists.01.org/pipermail/thunderbolt-software/
+
+For additional information about Thunderbolt technology visit:
+ https://01.org/thunderbolt-sw
+ https://thunderbolttechnology.net/
--
2.7.4
^ permalink raw reply related
* [PATCH v9 1/8] thunderbolt: Macro rename
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
This first patch updates the NHI Thunderbolt controller registers file to
reflect that it is not only for Cactus Ridge.
No functional change intended.
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
Signed-off-by: Andreas Noever <andreas.noever@gmail.com>
---
drivers/thunderbolt/nhi_regs.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index 86b996c..75cf069 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -1,11 +1,11 @@
/*
- * Thunderbolt Cactus Ridge driver - NHI registers
+ * Thunderbolt driver - NHI registers
*
* Copyright (c) 2014 Andreas Noever <andreas.noever@gmail.com>
*/
-#ifndef DSL3510_REGS_H_
-#define DSL3510_REGS_H_
+#ifndef NHI_REGS_H_
+#define NHI_REGS_H_
#include <linux/types.h>
--
2.7.4
^ permalink raw reply related
* [PATCH v9 4/8] thunderbolt: Networking state machine
From: Amir Levy @ 2016-11-09 14:20 UTC (permalink / raw)
To: gregkh
Cc: andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci, netdev,
linux-doc, mario_limonciello, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, Amir Levy
In-Reply-To: <1478701208-4585-1-git-send-email-amir.jer.levy@intel.com>
This patch builds the peer to peer communication path.
Communication is established by a negotiation process whereby messages are
sent back and forth between the peers until a connection is established.
This includes the Thunderbolt Network driver communication with the second
peer via Intel Connection Manager(ICM) firmware.
+--------------------+ +--------------------+
|Host 1 | |Host 2 |
| | | |
| +-----------+ | | +-----------+ |
| |Thunderbolt| | | |Thunderbolt| |
| |Networking | | | |Networking | |
| |Driver | | | |Driver | |
| +-----------+ | | +-----------+ |
| ^ | | ^ |
| | | | | |
| +------------+---+ | | +------------+---+ |
| |Thunderbolt | | | | |Thunderbolt | | |
| |Controller v | | | |Controller v | |
| | +---+ | | | | +---+ | |
| | |ICM|<-+-+------------+-+-------->|ICM| | |
| | +---+ | | | | +---+ | |
| +----------------+ | | +----------------+ |
+--------------------+ +--------------------+
Note that this patch only establishes the link between the two hosts and
not Network Packet handling - this is dealt with in the next patch.
Signed-off-by: Amir Levy <amir.jer.levy@intel.com>
---
drivers/thunderbolt/icm/Makefile | 2 +-
drivers/thunderbolt/icm/icm_nhi.c | 262 ++++++++++++-
drivers/thunderbolt/icm/net.c | 783 ++++++++++++++++++++++++++++++++++++++
drivers/thunderbolt/icm/net.h | 70 ++++
4 files changed, 1109 insertions(+), 8 deletions(-)
create mode 100644 drivers/thunderbolt/icm/net.c
diff --git a/drivers/thunderbolt/icm/Makefile b/drivers/thunderbolt/icm/Makefile
index f0d0fbb..94a2797 100644
--- a/drivers/thunderbolt/icm/Makefile
+++ b/drivers/thunderbolt/icm/Makefile
@@ -1,2 +1,2 @@
obj-${CONFIG_THUNDERBOLT_ICM} += thunderbolt-icm.o
-thunderbolt-icm-objs := icm_nhi.o
+thunderbolt-icm-objs := icm_nhi.o net.o
diff --git a/drivers/thunderbolt/icm/icm_nhi.c b/drivers/thunderbolt/icm/icm_nhi.c
index c843ce8..edc910b 100644
--- a/drivers/thunderbolt/icm/icm_nhi.c
+++ b/drivers/thunderbolt/icm/icm_nhi.c
@@ -64,6 +64,13 @@ static const struct nla_policy nhi_genl_policy[NHI_ATTR_MAX + 1] = {
.len = TBT_ICM_RING_MAX_FRAME_SIZE },
[NHI_ATTR_MSG_FROM_ICM] = { .type = NLA_BINARY,
.len = TBT_ICM_RING_MAX_FRAME_SIZE },
+ [NHI_ATTR_LOCAL_ROUTE_STRING] = {
+ .len = sizeof(struct route_string) },
+ [NHI_ATTR_LOCAL_UUID] = { .len = sizeof(uuid_be) },
+ [NHI_ATTR_REMOTE_UUID] = { .len = sizeof(uuid_be) },
+ [NHI_ATTR_LOCAL_DEPTH] = { .type = NLA_U8, },
+ [NHI_ATTR_ENABLE_FULL_E2E] = { .type = NLA_FLAG, },
+ [NHI_ATTR_MATCH_FRAME_ID] = { .type = NLA_FLAG, },
};
/* NHI genetlink family */
@@ -480,6 +487,29 @@ int nhi_mailbox(struct tbt_nhi_ctxt *nhi_ctxt, u32 cmd, u32 data, bool deinit)
return 0;
}
+static inline bool nhi_is_path_disconnected(u32 cmd, u8 num_ports)
+{
+ return (cmd >= DISCONNECT_PORT_A_INTER_DOMAIN_PATH &&
+ cmd < (DISCONNECT_PORT_A_INTER_DOMAIN_PATH + num_ports));
+}
+
+static int nhi_mailbox_disconn_path(struct tbt_nhi_ctxt *nhi_ctxt, u32 cmd)
+ __releases(&controllers_list_mutex)
+{
+ struct port_net_dev *port;
+ u32 port_num = cmd - DISCONNECT_PORT_A_INTER_DOMAIN_PATH;
+
+ port = &(nhi_ctxt->net_devices[port_num]);
+ mutex_lock(&port->state_mutex);
+
+ mutex_unlock(&controllers_list_mutex);
+ port->medium_sts = MEDIUM_READY_FOR_APPROVAL;
+ if (port->net_dev)
+ negotiation_events(port->net_dev, MEDIUM_DISCONNECTED);
+ mutex_unlock(&port->state_mutex);
+ return 0;
+}
+
static int nhi_mailbox_generic(struct tbt_nhi_ctxt *nhi_ctxt, u32 mb_cmd)
__releases(&controllers_list_mutex)
{
@@ -526,13 +556,90 @@ static int nhi_genl_mailbox(__always_unused struct sk_buff *u_skb,
return -ERESTART;
nhi_ctxt = nhi_search_ctxt(*(u32 *)info->userhdr);
- if (nhi_ctxt && !nhi_ctxt->d0_exit)
- return nhi_mailbox_generic(nhi_ctxt, mb_cmd);
+ if (nhi_ctxt && !nhi_ctxt->d0_exit) {
+
+ /* rwsem is released later by the below functions */
+ if (nhi_is_path_disconnected(cmd, nhi_ctxt->num_ports))
+ return nhi_mailbox_disconn_path(nhi_ctxt, cmd);
+ else
+ return nhi_mailbox_generic(nhi_ctxt, mb_cmd);
+
+ }
mutex_unlock(&controllers_list_mutex);
return -ENODEV;
}
+static int nhi_genl_approve_networking(__always_unused struct sk_buff *u_skb,
+ struct genl_info *info)
+{
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ struct route_string *route_str;
+ int res = -ENODEV;
+ u8 port_num;
+
+ if (!info || !info->userhdr || !info->attrs ||
+ !info->attrs[NHI_ATTR_LOCAL_ROUTE_STRING] ||
+ !info->attrs[NHI_ATTR_LOCAL_UUID] ||
+ !info->attrs[NHI_ATTR_REMOTE_UUID] ||
+ !info->attrs[NHI_ATTR_LOCAL_DEPTH])
+ return -EINVAL;
+
+ /*
+ * route_str is an unique topological address
+ * used for approving remote controller
+ */
+ route_str = nla_data(info->attrs[NHI_ATTR_LOCAL_ROUTE_STRING]);
+ /* extracts the port we're connected to */
+ port_num = PORT_NUM_FROM_LINK(L0_PORT_NUM(route_str->lo));
+
+ if (mutex_lock_interruptible(&controllers_list_mutex))
+ return -ERESTART;
+
+ nhi_ctxt = nhi_search_ctxt(*(u32 *)info->userhdr);
+ if (nhi_ctxt && !nhi_ctxt->d0_exit) {
+ struct port_net_dev *port;
+
+ if (port_num >= nhi_ctxt->num_ports) {
+ res = -EINVAL;
+ goto free_ctl_list;
+ }
+
+ port = &(nhi_ctxt->net_devices[port_num]);
+
+ mutex_lock(&port->state_mutex);
+ mutex_unlock(&controllers_list_mutex);
+
+ if (port->medium_sts != MEDIUM_READY_FOR_APPROVAL)
+ goto unlock;
+
+ port->medium_sts = MEDIUM_READY_FOR_CONNECTION;
+
+ if (!port->net_dev) {
+ port->net_dev = nhi_alloc_etherdev(nhi_ctxt, port_num,
+ info);
+ if (!port->net_dev) {
+ mutex_unlock(&port->state_mutex);
+ return -ENOMEM;
+ }
+ } else {
+ nhi_update_etherdev(nhi_ctxt, port->net_dev, info);
+
+ negotiation_events(port->net_dev,
+ MEDIUM_READY_FOR_CONNECTION);
+ }
+
+unlock:
+ mutex_unlock(&port->state_mutex);
+
+ return 0;
+ }
+
+free_ctl_list:
+ mutex_unlock(&controllers_list_mutex);
+
+ return res;
+}
static int nhi_genl_send_msg(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
const u8 *msg, u32 msg_len)
@@ -579,17 +686,127 @@ static int nhi_genl_send_msg(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
return res;
}
+static bool nhi_handle_inter_domain_msg(struct tbt_nhi_ctxt *nhi_ctxt,
+ struct thunderbolt_ip_header *hdr)
+{
+ struct port_net_dev *port;
+ u8 port_num;
+
+ const uuid_be proto_uuid = APPLE_THUNDERBOLT_IP_PROTOCOL_UUID;
+
+ if (uuid_be_cmp(proto_uuid, hdr->apple_tbt_ip_proto_uuid) != 0)
+ return true;
+
+ port_num = PORT_NUM_FROM_LINK(
+ L0_PORT_NUM(be32_to_cpu(hdr->route_str.lo)));
+
+ if (unlikely(port_num >= nhi_ctxt->num_ports))
+ return false;
+
+ port = &(nhi_ctxt->net_devices[port_num]);
+ mutex_lock(&port->state_mutex);
+ if (port->net_dev != NULL)
+ negotiation_messages(port->net_dev, hdr);
+ mutex_unlock(&port->state_mutex);
+
+ return false;
+}
+
+static void nhi_handle_notification_msg(struct tbt_nhi_ctxt *nhi_ctxt,
+ const u8 *msg)
+{
+ struct port_net_dev *port;
+ u8 port_num;
+
+#define INTER_DOMAIN_LINK_SHIFT 0
+#define INTER_DOMAIN_LINK_MASK GENMASK(2, INTER_DOMAIN_LINK_SHIFT)
+ switch (msg[3]) {
+
+ case NC_INTER_DOMAIN_CONNECTED:
+ port_num = PORT_NUM_FROM_MSG(msg[5]);
+#define INTER_DOMAIN_APPROVED BIT(3)
+ if (port_num < nhi_ctxt->num_ports &&
+ !(msg[5] & INTER_DOMAIN_APPROVED))
+ nhi_ctxt->net_devices[port_num].medium_sts =
+ MEDIUM_READY_FOR_APPROVAL;
+ break;
+
+ case NC_INTER_DOMAIN_DISCONNECTED:
+ port_num = PORT_NUM_FROM_MSG(msg[5]);
+
+ if (unlikely(port_num >= nhi_ctxt->num_ports))
+ break;
+
+ port = &(nhi_ctxt->net_devices[port_num]);
+ mutex_lock(&port->state_mutex);
+ port->medium_sts = MEDIUM_DISCONNECTED;
+
+ if (port->net_dev != NULL)
+ negotiation_events(port->net_dev,
+ MEDIUM_DISCONNECTED);
+ mutex_unlock(&port->state_mutex);
+ break;
+ }
+}
+
+static bool nhi_handle_icm_response_msg(struct tbt_nhi_ctxt *nhi_ctxt,
+ const u8 *msg)
+{
+ struct port_net_dev *port;
+ bool send_event = true;
+ u8 port_num;
+
+ if (nhi_ctxt->ignore_icm_resp &&
+ msg[3] == RC_INTER_DOMAIN_PKT_SENT) {
+ nhi_ctxt->ignore_icm_resp = false;
+ send_event = false;
+ }
+ if (nhi_ctxt->wait_for_icm_resp) {
+ nhi_ctxt->wait_for_icm_resp = false;
+ up(&nhi_ctxt->send_sem);
+ }
+
+ if (msg[3] == RC_APPROVE_INTER_DOMAIN_CONNECTION) {
+#define APPROVE_INTER_DOMAIN_ERROR BIT(0)
+ if (unlikely(msg[2] & APPROVE_INTER_DOMAIN_ERROR))
+ return send_event;
+
+ port_num = PORT_NUM_FROM_LINK((msg[5]&INTER_DOMAIN_LINK_MASK)>>
+ INTER_DOMAIN_LINK_SHIFT);
+
+ if (unlikely(port_num >= nhi_ctxt->num_ports))
+ return send_event;
+
+ port = &(nhi_ctxt->net_devices[port_num]);
+ mutex_lock(&port->state_mutex);
+ port->medium_sts = MEDIUM_CONNECTED;
+
+ if (port->net_dev != NULL)
+ negotiation_events(port->net_dev, MEDIUM_CONNECTED);
+ mutex_unlock(&port->state_mutex);
+ }
+
+ return send_event;
+}
+
static bool nhi_msg_from_icm_analysis(struct tbt_nhi_ctxt *nhi_ctxt,
enum pdf_value pdf,
const u8 *msg, u32 msg_len)
{
- /*
- * preparation for messages that won't be sent,
- * currently unused in this patch.
- */
bool send_event = true;
switch (pdf) {
+ case PDF_INTER_DOMAIN_REQUEST:
+ case PDF_INTER_DOMAIN_RESPONSE:
+ send_event = nhi_handle_inter_domain_msg(
+ nhi_ctxt,
+ (struct thunderbolt_ip_header *)msg);
+ break;
+
+ case PDF_FW_TO_SW_NOTIFICATION:
+ nhi_handle_notification_msg(nhi_ctxt, msg);
+ break;
+
case PDF_ERROR_NOTIFICATION:
/* fallthrough */
case PDF_WRITE_CONFIGURATION_REGISTERS:
@@ -599,7 +816,12 @@ static bool nhi_msg_from_icm_analysis(struct tbt_nhi_ctxt *nhi_ctxt,
nhi_ctxt->wait_for_icm_resp = false;
up(&nhi_ctxt->send_sem);
}
- /* fallthrough */
+ break;
+
+ case PDF_FW_TO_SW_RESPONSE:
+ send_event = nhi_handle_icm_response_msg(nhi_ctxt, msg);
+ break;
+
default:
break;
}
@@ -788,6 +1010,12 @@ static const struct genl_ops nhi_ops[] = {
.doit = nhi_genl_mailbox,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = NHI_CMD_APPROVE_TBT_NETWORKING,
+ .policy = nhi_genl_policy,
+ .doit = nhi_genl_approve_networking,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static int nhi_suspend(struct device *dev) __releases(&nhi_ctxt->send_sem)
@@ -795,6 +1023,17 @@ static int nhi_suspend(struct device *dev) __releases(&nhi_ctxt->send_sem)
struct tbt_nhi_ctxt *nhi_ctxt = pci_get_drvdata(to_pci_dev(dev));
void __iomem *rx_reg, *tx_reg;
u32 rx_reg_val, tx_reg_val;
+ int i;
+
+ for (i = 0; i < nhi_ctxt->num_ports; i++) {
+ struct port_net_dev *port = &nhi_ctxt->net_devices[i];
+
+ mutex_lock(&port->state_mutex);
+ port->medium_sts = MEDIUM_DISCONNECTED;
+ if (port->net_dev)
+ negotiation_events(port->net_dev, MEDIUM_DISCONNECTED);
+ mutex_unlock(&port->state_mutex);
+ }
/* must be after negotiation_events, since messages might be sent */
nhi_ctxt->d0_exit = true;
@@ -954,6 +1193,15 @@ static void icm_nhi_remove(struct pci_dev *pdev)
nhi_suspend(&pdev->dev);
+ for (i = 0; i < nhi_ctxt->num_ports; i++) {
+ mutex_lock(&nhi_ctxt->net_devices[i].state_mutex);
+ if (nhi_ctxt->net_devices[i].net_dev) {
+ nhi_dealloc_etherdev(nhi_ctxt->net_devices[i].net_dev);
+ nhi_ctxt->net_devices[i].net_dev = NULL;
+ }
+ mutex_unlock(&nhi_ctxt->net_devices[i].state_mutex);
+ }
+
if (nhi_ctxt->net_workqueue)
destroy_workqueue(nhi_ctxt->net_workqueue);
diff --git a/drivers/thunderbolt/icm/net.c b/drivers/thunderbolt/icm/net.c
new file mode 100644
index 0000000..beeafb3
--- /dev/null
+++ b/drivers/thunderbolt/icm/net.c
@@ -0,0 +1,783 @@
+/*******************************************************************************
+ *
+ * Intel Thunderbolt(TM) driver
+ * Copyright(c) 2014 - 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ ******************************************************************************/
+
+#include <linux/etherdevice.h>
+#include <linux/crc32.h>
+#include <linux/prefetch.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/jhash.h>
+#include <linux/vmalloc.h>
+#include <net/ip6_checksum.h>
+#include "icm_nhi.h"
+#include "net.h"
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP)
+static int debug = -1;
+module_param(debug, int, 0000);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+#define TBT_NET_RX_HDR_SIZE 256
+
+#define NUM_TX_LOGIN_RETRIES 60
+
+#define APPLE_THUNDERBOLT_IP_PROTOCOL_REVISION 1
+
+#define LOGIN_TX_PATH 0xf
+
+#define TBT_NET_MTU (64 * 1024)
+
+/* Number of Rx buffers we bundle into one write to the hardware */
+#define TBT_NET_RX_BUFFER_WRITE 16
+
+#define TBT_NET_MULTICAST_HASH_TABLE_SIZE 1024
+#define TBT_NET_ETHER_ADDR_HASH(addr) (((addr[4] >> 4) | (addr[5] << 4)) % \
+ TBT_NET_MULTICAST_HASH_TABLE_SIZE)
+
+#define BITS_PER_U32 (sizeof(u32) * BITS_PER_BYTE)
+
+#define TBT_NET_NUM_TX_BUFS 256
+#define TBT_NET_NUM_RX_BUFS 256
+#define TBT_NET_SIZE_TOTAL_DESCS ((TBT_NET_NUM_TX_BUFS + TBT_NET_NUM_RX_BUFS) \
+ * sizeof(struct tbt_buf_desc))
+
+
+#define TBT_NUM_FRAMES_PER_PAGE (PAGE_SIZE / TBT_RING_MAX_FRAME_SIZE)
+
+#define TBT_NUM_BUFS_BETWEEN(idx1, idx2, num_bufs) \
+ (((num_bufs) - 1) - \
+ ((((idx1) - (idx2)) + (num_bufs)) & ((num_bufs) - 1)))
+
+#define TX_WAKE_THRESHOLD (2 * DIV_ROUND_UP(TBT_NET_MTU, \
+ TBT_RING_MAX_FRM_DATA_SZ))
+
+#define TBT_NET_DESC_ATTR_SOF_EOF (((PDF_TBT_NET_START_OF_FRAME << \
+ DESC_ATTR_SOF_SHIFT) & \
+ DESC_ATTR_SOF_MASK) | \
+ ((PDF_TBT_NET_END_OF_FRAME << \
+ DESC_ATTR_EOF_SHIFT) & \
+ DESC_ATTR_EOF_MASK))
+
+/* E2E workaround */
+#define TBT_EXIST_BUT_UNUSED_HOPID 2
+
+enum tbt_net_frame_pdf {
+ PDF_TBT_NET_MIDDLE_FRAME,
+ PDF_TBT_NET_START_OF_FRAME,
+ PDF_TBT_NET_END_OF_FRAME,
+};
+
+struct thunderbolt_ip_login {
+ struct thunderbolt_ip_header header;
+ __be32 protocol_revision;
+ __be32 transmit_path;
+ __be32 reserved[4];
+ __be32 crc;
+};
+
+struct thunderbolt_ip_login_response {
+ struct thunderbolt_ip_header header;
+ __be32 status;
+ __be32 receiver_mac_address[2];
+ __be32 receiver_mac_address_length;
+ __be32 reserved[4];
+ __be32 crc;
+};
+
+struct thunderbolt_ip_logout {
+ struct thunderbolt_ip_header header;
+ __be32 crc;
+};
+
+struct thunderbolt_ip_status {
+ struct thunderbolt_ip_header header;
+ __be32 status;
+ __be32 crc;
+};
+
+struct approve_inter_domain_connection_cmd {
+ __be32 req_code;
+ __be32 attributes;
+#define AIDC_ATTR_LINK_SHIFT 16
+#define AIDC_ATTR_LINK_MASK GENMASK(18, AIDC_ATTR_LINK_SHIFT)
+#define AIDC_ATTR_DEPTH_SHIFT 20
+#define AIDC_ATTR_DEPTH_MASK GENMASK(23, AIDC_ATTR_DEPTH_SHIFT)
+ uuid_be remote_uuid;
+ __be16 transmit_ring_number;
+ __be16 transmit_path;
+ __be16 receive_ring_number;
+ __be16 receive_path;
+ __be32 crc;
+
+};
+
+enum neg_event {
+ RECEIVE_LOGOUT = NUM_MEDIUM_STATUSES,
+ RECEIVE_LOGIN_RESPONSE,
+ RECEIVE_LOGIN,
+ NUM_NEG_EVENTS
+};
+
+enum disconnect_path_stage {
+ STAGE_1 = BIT(0),
+ STAGE_2 = BIT(1)
+};
+
+/**
+ * struct tbt_port - the basic tbt_port structure
+ * @tbt_nhi_ctxt: context of the nhi controller.
+ * @net_dev: networking device object.
+ * @login_retry_work: work queue for sending login requests.
+ * @login_response_work: work queue for sending login responses.
+ * @work_struct logout_work: work queue for sending logout requests.
+ * @status_reply_work: work queue for sending logout replies.
+ * @approve_inter_domain_work: work queue for sending interdomain to icm.
+ * @route_str: allows to route the messages to destination.
+ * @interdomain_local_uuid: allows to route the messages from local source.
+ * @interdomain_remote_uuid: allows to route the messages to destination.
+ * @command_id a number that identifies the command.
+ * @negotiation_status: holds the network negotiation state.
+ * @msg_enable: used for debugging filters.
+ * @seq_num: a number that identifies the session.
+ * @login_retry_count: counts number of login retries sent.
+ * @local_depth: depth of the remote peer in the chain.
+ * @transmit_path: routing parameter for the icm.
+ * @frame_id: counting ID of frames.
+ * @num: port number.
+ * @local_path: routing parameter for the icm.
+ * @enable_full_e2e: whether to enable full E2E.
+ * @match_frame_id: whether to match frame id on incoming packets.
+ */
+struct tbt_port {
+ struct tbt_nhi_ctxt *nhi_ctxt;
+ struct net_device *net_dev;
+ struct delayed_work login_retry_work;
+ struct work_struct login_response_work;
+ struct work_struct logout_work;
+ struct work_struct status_reply_work;
+ struct work_struct approve_inter_domain_work;
+ struct route_string route_str;
+ uuid_be interdomain_local_uuid;
+ uuid_be interdomain_remote_uuid;
+ u32 command_id;
+ u16 negotiation_status;
+ u16 msg_enable;
+ u8 seq_num;
+ u8 login_retry_count;
+ u8 local_depth;
+ u8 transmit_path;
+ u16 frame_id;
+ u8 num;
+ u8 local_path;
+ bool enable_full_e2e : 1;
+ bool match_frame_id : 1;
+};
+
+static void disconnect_path(struct tbt_port *port,
+ enum disconnect_path_stage stage)
+{
+ u32 cmd = (DISCONNECT_PORT_A_INTER_DOMAIN_PATH + port->num);
+
+ cmd <<= REG_INMAIL_CMD_CMD_SHIFT;
+ cmd &= REG_INMAIL_CMD_CMD_MASK;
+ cmd |= REG_INMAIL_CMD_REQUEST;
+
+ mutex_lock(&port->nhi_ctxt->mailbox_mutex);
+ if (!mutex_trylock(&port->nhi_ctxt->d0_exit_mailbox_mutex)) {
+ netif_notice(port, link, port->net_dev, "controller id %#x is existing D0\n",
+ port->nhi_ctxt->id);
+ } else {
+ nhi_mailbox(port->nhi_ctxt, cmd, stage, false);
+
+ port->nhi_ctxt->net_devices[port->num].medium_sts =
+ MEDIUM_READY_FOR_CONNECTION;
+
+ mutex_unlock(&port->nhi_ctxt->d0_exit_mailbox_mutex);
+ }
+ mutex_unlock(&port->nhi_ctxt->mailbox_mutex);
+}
+
+static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ void __iomem *tx_reg = NULL;
+ u32 tx_reg_val = 0;
+
+ netif_carrier_off(net_dev);
+ netif_stop_queue(net_dev);
+
+ if (port->negotiation_status & BIT(MEDIUM_CONNECTED)) {
+ void __iomem *rx_reg = iobase + REG_RX_OPTIONS_BASE +
+ (port->local_path * REG_OPTS_STEP);
+ u32 rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN;
+
+ tx_reg = iobase + REG_TX_OPTIONS_BASE +
+ (port->local_path * REG_OPTS_STEP);
+ tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN;
+
+ disconnect_path(port, STAGE_1);
+
+ /* disable RX flow control */
+ iowrite32(rx_reg_val, rx_reg);
+ /* disable TX flow control */
+ iowrite32(tx_reg_val, tx_reg);
+ /* disable RX ring */
+ iowrite32(rx_reg_val & ~REG_OPTS_VALID, rx_reg);
+
+ rx_reg = iobase + REG_RX_RING_BASE +
+ (port->local_path * REG_RING_STEP);
+ iowrite32(0, rx_reg + REG_RING_PHYS_LO_OFFSET);
+ iowrite32(0, rx_reg + REG_RING_PHYS_HI_OFFSET);
+ }
+
+ /* Stop login messages */
+ cancel_delayed_work_sync(&port->login_retry_work);
+
+ if (send_logout)
+ queue_work(port->nhi_ctxt->net_workqueue, &port->logout_work);
+
+ if (port->negotiation_status & BIT(MEDIUM_CONNECTED)) {
+ unsigned long flags;
+
+ /* wait for TX to finish */
+ usleep_range(5 * USEC_PER_MSEC, 7 * USEC_PER_MSEC);
+ /* disable TX ring */
+ iowrite32(tx_reg_val & ~REG_OPTS_VALID, tx_reg);
+
+ disconnect_path(port, STAGE_2);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /* disable RX and TX interrupts */
+ RING_INT_DISABLE_TX_RX(iobase, port->local_path,
+ port->nhi_ctxt->num_paths);
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+ }
+}
+
+static inline int send_message(struct tbt_port *port, const char *func,
+ enum pdf_value pdf, u32 msg_len,
+ const void *msg)
+{
+ u32 crc_offset = msg_len - sizeof(__be32);
+ __be32 *crc = (__be32 *)((u8 *)msg + crc_offset);
+ bool is_intdom = (pdf == PDF_INTER_DOMAIN_RESPONSE);
+ int res;
+
+ *crc = cpu_to_be32(~__crc32c_le(~0, msg, crc_offset));
+ res = down_timeout(&port->nhi_ctxt->send_sem,
+ msecs_to_jiffies(3 * MSEC_PER_SEC));
+ if (res) {
+ netif_err(port, link, port->net_dev, "%s: controller id %#x timeout on send semaphore\n",
+ func, port->nhi_ctxt->id);
+ return res;
+ }
+
+ if (!mutex_trylock(&port->nhi_ctxt->d0_exit_send_mutex)) {
+ up(&port->nhi_ctxt->send_sem);
+ netif_notice(port, link, port->net_dev, "%s: controller id %#x is existing D0\n",
+ func, port->nhi_ctxt->id);
+ return -ENODEV;
+ }
+
+ res = nhi_send_message(port->nhi_ctxt, pdf, msg_len, msg, is_intdom);
+
+ mutex_unlock(&port->nhi_ctxt->d0_exit_send_mutex);
+ if (res)
+ up(&port->nhi_ctxt->send_sem);
+
+ return res;
+}
+
+static void approve_inter_domain(struct work_struct *work)
+{
+ struct tbt_port *port = container_of(work, typeof(*port),
+ approve_inter_domain_work);
+ struct approve_inter_domain_connection_cmd approve_msg = {
+ .req_code = cpu_to_be32(CC_APPROVE_INTER_DOMAIN_CONNECTION),
+ .transmit_path = cpu_to_be16(LOGIN_TX_PATH),
+ };
+ u32 aidc = (L0_PORT_NUM(port->route_str.lo) << AIDC_ATTR_LINK_SHIFT) &
+ AIDC_ATTR_LINK_MASK;
+
+ aidc |= (port->local_depth << AIDC_ATTR_DEPTH_SHIFT) &
+ AIDC_ATTR_DEPTH_MASK;
+
+ approve_msg.attributes = cpu_to_be32(aidc);
+
+ memcpy(&approve_msg.remote_uuid, &port->interdomain_remote_uuid,
+ sizeof(approve_msg.remote_uuid));
+ approve_msg.transmit_ring_number = cpu_to_be16(port->local_path);
+ approve_msg.receive_ring_number = cpu_to_be16(port->local_path);
+ approve_msg.receive_path = cpu_to_be16(port->transmit_path);
+
+ send_message(port, __func__, PDF_SW_TO_FW_COMMAND, sizeof(approve_msg),
+ &approve_msg);
+}
+
+static inline void prepare_header(struct thunderbolt_ip_header *header,
+ struct tbt_port *port,
+ enum thunderbolt_ip_packet_type packet_type,
+ u8 len_dwords)
+{
+ const uuid_be proto_uuid = APPLE_THUNDERBOLT_IP_PROTOCOL_UUID;
+
+ header->packet_type = cpu_to_be32(packet_type);
+ header->route_str.hi = cpu_to_be32(port->route_str.hi);
+ header->route_str.lo = cpu_to_be32(port->route_str.lo);
+ header->attributes = cpu_to_be32(
+ ((port->seq_num << HDR_ATTR_SEQ_NUM_SHIFT) &
+ HDR_ATTR_SEQ_NUM_MASK) |
+ ((len_dwords << HDR_ATTR_LEN_SHIFT) & HDR_ATTR_LEN_MASK));
+ memcpy(&header->apple_tbt_ip_proto_uuid, &proto_uuid,
+ sizeof(header->apple_tbt_ip_proto_uuid));
+ memcpy(&header->initiator_uuid, &port->interdomain_local_uuid,
+ sizeof(header->initiator_uuid));
+ memcpy(&header->target_uuid, &port->interdomain_remote_uuid,
+ sizeof(header->target_uuid));
+ header->command_id = cpu_to_be32(port->command_id);
+
+ port->command_id++;
+}
+
+static void status_reply(struct work_struct *work)
+{
+ struct tbt_port *port = container_of(work, typeof(*port),
+ status_reply_work);
+ struct thunderbolt_ip_status status_msg = {
+ .status = 0,
+ };
+
+ prepare_header(&status_msg.header, port,
+ THUNDERBOLT_IP_STATUS_TYPE,
+ (offsetof(struct thunderbolt_ip_status, crc) -
+ offsetof(struct thunderbolt_ip_status,
+ header.apple_tbt_ip_proto_uuid)) /
+ sizeof(u32));
+
+ send_message(port, __func__, PDF_INTER_DOMAIN_RESPONSE,
+ sizeof(status_msg), &status_msg);
+
+}
+
+static void logout(struct work_struct *work)
+{
+ struct tbt_port *port = container_of(work, typeof(*port),
+ logout_work);
+ struct thunderbolt_ip_logout logout_msg;
+
+ prepare_header(&logout_msg.header, port,
+ THUNDERBOLT_IP_LOGOUT_TYPE,
+ (offsetof(struct thunderbolt_ip_logout, crc) -
+ offsetof(struct thunderbolt_ip_logout,
+ header.apple_tbt_ip_proto_uuid)) / sizeof(u32));
+
+ send_message(port, __func__, PDF_INTER_DOMAIN_RESPONSE,
+ sizeof(logout_msg), &logout_msg);
+
+}
+
+static void login_response(struct work_struct *work)
+{
+ struct tbt_port *port = container_of(work, typeof(*port),
+ login_response_work);
+ struct thunderbolt_ip_login_response login_res_msg = {
+ .receiver_mac_address_length = cpu_to_be32(ETH_ALEN),
+ };
+
+ prepare_header(&login_res_msg.header, port,
+ THUNDERBOLT_IP_LOGIN_RESPONSE_TYPE,
+ (offsetof(struct thunderbolt_ip_login_response, crc) -
+ offsetof(struct thunderbolt_ip_login_response,
+ header.apple_tbt_ip_proto_uuid)) / sizeof(u32));
+
+ ether_addr_copy((u8 *)login_res_msg.receiver_mac_address,
+ port->net_dev->dev_addr);
+
+ send_message(port, __func__, PDF_INTER_DOMAIN_RESPONSE,
+ sizeof(login_res_msg), &login_res_msg);
+
+}
+
+static void login_retry(struct work_struct *work)
+{
+ struct tbt_port *port = container_of(work, typeof(*port),
+ login_retry_work.work);
+ struct thunderbolt_ip_login login_msg = {
+ .protocol_revision = cpu_to_be32(
+ APPLE_THUNDERBOLT_IP_PROTOCOL_REVISION),
+ .transmit_path = cpu_to_be32(LOGIN_TX_PATH),
+ };
+
+
+ if (port->nhi_ctxt->d0_exit)
+ return;
+
+ port->login_retry_count++;
+
+ prepare_header(&login_msg.header, port,
+ THUNDERBOLT_IP_LOGIN_TYPE,
+ (offsetof(struct thunderbolt_ip_login, crc) -
+ offsetof(struct thunderbolt_ip_login,
+ header.apple_tbt_ip_proto_uuid)) / sizeof(u32));
+
+ if (send_message(port, __func__, PDF_INTER_DOMAIN_RESPONSE,
+ sizeof(login_msg), &login_msg) == -ENODEV)
+ return;
+
+ if (likely(port->login_retry_count < NUM_TX_LOGIN_RETRIES))
+ queue_delayed_work(port->nhi_ctxt->net_workqueue,
+ &port->login_retry_work,
+ msecs_to_jiffies(5 * MSEC_PER_SEC));
+ else
+ netif_notice(port, link, port->net_dev, "port %u (%#x) login timeout after %u retries\n",
+ port->num, port->negotiation_status,
+ port->login_retry_count);
+}
+
+void negotiation_events(struct net_device *net_dev,
+ enum medium_status medium_sts)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ void __iomem *iobase = port->nhi_ctxt->iobase;
+ u32 sof_eof_en, tx_ring_conf, rx_ring_conf, e2e_en;
+ void __iomem *reg;
+ unsigned long flags;
+ u16 hop_id;
+ bool send_logout;
+
+ if (!netif_running(net_dev)) {
+ netif_dbg(port, link, net_dev, "port %u (%#x) is down\n",
+ port->num, port->negotiation_status);
+ return;
+ }
+
+ netif_dbg(port, link, net_dev, "port %u (%#x) receive event %u\n",
+ port->num, port->negotiation_status, medium_sts);
+
+ switch (medium_sts) {
+ case MEDIUM_DISCONNECTED:
+ send_logout = (port->negotiation_status
+ & (BIT(MEDIUM_CONNECTED)
+ | BIT(MEDIUM_READY_FOR_CONNECTION)));
+ send_logout = send_logout && !(port->negotiation_status &
+ BIT(RECEIVE_LOGOUT));
+
+ tbt_net_tear_down(net_dev, send_logout);
+ port->negotiation_status = BIT(MEDIUM_DISCONNECTED);
+ break;
+
+ case MEDIUM_CONNECTED:
+ /*
+ * check if meanwhile other side sent logout
+ * if yes, just don't allow connection to take place
+ * and disconnect path
+ */
+ if (port->negotiation_status & BIT(RECEIVE_LOGOUT)) {
+ disconnect_path(port, STAGE_1 | STAGE_2);
+ break;
+ }
+
+ port->negotiation_status = BIT(MEDIUM_CONNECTED);
+
+ /* configure TX ring */
+ reg = iobase + REG_TX_RING_BASE +
+ (port->local_path * REG_RING_STEP);
+
+ tx_ring_conf = (TBT_NET_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) &
+ REG_RING_SIZE_MASK;
+
+ iowrite32(tx_ring_conf, reg + REG_RING_SIZE_OFFSET);
+
+ /* enable the rings */
+ reg = iobase + REG_TX_OPTIONS_BASE +
+ (port->local_path * REG_OPTS_STEP);
+ if (port->enable_full_e2e) {
+ iowrite32(REG_OPTS_VALID | REG_OPTS_E2E_EN, reg);
+ hop_id = port->local_path;
+ } else {
+ iowrite32(REG_OPTS_VALID, reg);
+ hop_id = TBT_EXIST_BUT_UNUSED_HOPID;
+ }
+
+ reg = iobase + REG_RX_OPTIONS_BASE +
+ (port->local_path * REG_OPTS_STEP);
+
+ sof_eof_en = (BIT(PDF_TBT_NET_START_OF_FRAME) <<
+ REG_RX_OPTS_MASK_SOF_SHIFT) &
+ REG_RX_OPTS_MASK_SOF_MASK;
+
+ sof_eof_en |= (BIT(PDF_TBT_NET_END_OF_FRAME) <<
+ REG_RX_OPTS_MASK_EOF_SHIFT) &
+ REG_RX_OPTS_MASK_EOF_MASK;
+
+ iowrite32(sof_eof_en, reg + REG_RX_OPTS_MASK_OFFSET);
+
+ e2e_en = REG_OPTS_VALID | REG_OPTS_E2E_EN;
+ e2e_en |= (hop_id << REG_RX_OPTS_TX_E2E_HOP_ID_SHIFT) &
+ REG_RX_OPTS_TX_E2E_HOP_ID_MASK;
+
+ iowrite32(e2e_en, reg);
+
+ /*
+ * Configure RX ring
+ * must be after enable ring for E2E to work
+ */
+ reg = iobase + REG_RX_RING_BASE +
+ (port->local_path * REG_RING_STEP);
+
+ rx_ring_conf = (TBT_NET_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) &
+ REG_RING_SIZE_MASK;
+
+ rx_ring_conf |= (TBT_RING_MAX_FRAME_SIZE <<
+ REG_RING_BUF_SIZE_SHIFT) &
+ REG_RING_BUF_SIZE_MASK;
+
+ iowrite32(rx_ring_conf, reg + REG_RING_SIZE_OFFSET);
+
+ spin_lock_irqsave(&port->nhi_ctxt->lock, flags);
+ /* enable RX interrupt */
+ iowrite32(ioread32(iobase + REG_RING_INTERRUPT_BASE) |
+ REG_RING_INT_RX_PROCESSED(port->local_path,
+ port->nhi_ctxt->num_paths),
+ iobase + REG_RING_INTERRUPT_BASE);
+ spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags);
+
+ netif_info(port, link, net_dev, "Thunderbolt(TM) Networking port %u - ready\n",
+ port->num);
+
+ netif_carrier_on(net_dev);
+ netif_start_queue(net_dev);
+ break;
+
+ case MEDIUM_READY_FOR_CONNECTION:
+ /*
+ * If medium is connected, no reason to go back,
+ * keep it 'connected'.
+ * If received login response, don't need to trigger login
+ * retries again.
+ */
+ if (unlikely(port->negotiation_status &
+ (BIT(MEDIUM_CONNECTED) |
+ BIT(RECEIVE_LOGIN_RESPONSE))))
+ break;
+
+ port->negotiation_status = BIT(MEDIUM_READY_FOR_CONNECTION);
+ port->login_retry_count = 0;
+ queue_delayed_work(port->nhi_ctxt->net_workqueue,
+ &port->login_retry_work, 0);
+ break;
+
+ default:
+ break;
+ }
+}
+
+void negotiation_messages(struct net_device *net_dev,
+ struct thunderbolt_ip_header *hdr)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+ __be32 status;
+
+ if (!netif_running(net_dev)) {
+ netif_dbg(port, link, net_dev, "port %u (%#x) is down\n",
+ port->num, port->negotiation_status);
+ return;
+ }
+
+ switch (hdr->packet_type) {
+ case cpu_to_be32(THUNDERBOLT_IP_LOGIN_TYPE):
+ port->transmit_path = be32_to_cpu(
+ ((struct thunderbolt_ip_login *)hdr)->transmit_path);
+ netif_dbg(port, link, net_dev, "port %u (%#x) receive ThunderboltIP login message with transmit path %u\n",
+ port->num, port->negotiation_status,
+ port->transmit_path);
+
+ if (unlikely(port->negotiation_status &
+ BIT(MEDIUM_DISCONNECTED)))
+ break;
+
+ queue_work(port->nhi_ctxt->net_workqueue,
+ &port->login_response_work);
+
+ if (unlikely(port->negotiation_status & BIT(MEDIUM_CONNECTED)))
+ break;
+
+ /*
+ * In case a login response received from other peer
+ * on my login and acked their login for the first time,
+ * so just approve the inter-domain now
+ */
+ if (port->negotiation_status & BIT(RECEIVE_LOGIN_RESPONSE)) {
+ if (!(port->negotiation_status & BIT(RECEIVE_LOGIN)))
+ queue_work(port->nhi_ctxt->net_workqueue,
+ &port->approve_inter_domain_work);
+ /*
+ * if we reached the number of max retries or previous
+ * logout, schedule another round of login retries
+ */
+ } else if ((port->login_retry_count >= NUM_TX_LOGIN_RETRIES) ||
+ (port->negotiation_status & BIT(RECEIVE_LOGOUT))) {
+ port->negotiation_status &= ~(BIT(RECEIVE_LOGOUT));
+ port->login_retry_count = 0;
+ queue_delayed_work(port->nhi_ctxt->net_workqueue,
+ &port->login_retry_work, 0);
+ }
+
+ port->negotiation_status |= BIT(RECEIVE_LOGIN);
+
+ break;
+
+ case cpu_to_be32(THUNDERBOLT_IP_LOGIN_RESPONSE_TYPE):
+ status = ((struct thunderbolt_ip_login_response *)hdr)->status;
+ if (likely(status == 0)) {
+ netif_dbg(port, link, net_dev, "port %u (%#x) receive ThunderboltIP login response message\n",
+ port->num,
+ port->negotiation_status);
+
+ if (unlikely(port->negotiation_status &
+ (BIT(MEDIUM_DISCONNECTED) |
+ BIT(MEDIUM_CONNECTED) |
+ BIT(RECEIVE_LOGIN_RESPONSE))))
+ break;
+
+ port->negotiation_status |=
+ BIT(RECEIVE_LOGIN_RESPONSE);
+ cancel_delayed_work_sync(&port->login_retry_work);
+ /*
+ * login was received from other peer and now response
+ * on our login so approve the inter-domain
+ */
+ if (port->negotiation_status & BIT(RECEIVE_LOGIN))
+ queue_work(port->nhi_ctxt->net_workqueue,
+ &port->approve_inter_domain_work);
+ else
+ port->negotiation_status &=
+ ~BIT(RECEIVE_LOGOUT);
+ } else {
+ netif_notice(port, link, net_dev, "port %u (%#x) receive ThunderboltIP login response message with status %u\n",
+ port->num,
+ port->negotiation_status,
+ be32_to_cpu(status));
+ }
+ break;
+
+ case cpu_to_be32(THUNDERBOLT_IP_LOGOUT_TYPE):
+ netif_dbg(port, link, net_dev, "port %u (%#x) receive ThunderboltIP logout message\n",
+ port->num, port->negotiation_status);
+
+ queue_work(port->nhi_ctxt->net_workqueue,
+ &port->status_reply_work);
+ port->negotiation_status &= ~(BIT(RECEIVE_LOGIN) |
+ BIT(RECEIVE_LOGIN_RESPONSE));
+ port->negotiation_status |= BIT(RECEIVE_LOGOUT);
+
+ if (!(port->negotiation_status & BIT(MEDIUM_CONNECTED))) {
+ tbt_net_tear_down(net_dev, false);
+ break;
+ }
+
+ tbt_net_tear_down(net_dev, true);
+
+ port->negotiation_status |= BIT(MEDIUM_READY_FOR_CONNECTION);
+ port->negotiation_status &= ~(BIT(MEDIUM_CONNECTED));
+ break;
+
+ case cpu_to_be32(THUNDERBOLT_IP_STATUS_TYPE):
+ netif_dbg(port, link, net_dev, "port %u (%#x) receive ThunderboltIP status message with status %u\n",
+ port->num, port->negotiation_status,
+ be32_to_cpu(
+ ((struct thunderbolt_ip_status *)hdr)->status));
+ break;
+ }
+}
+
+void nhi_dealloc_etherdev(struct net_device *net_dev)
+{
+ unregister_netdev(net_dev);
+ free_netdev(net_dev);
+}
+
+void nhi_update_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
+ struct net_device *net_dev, struct genl_info *info)
+{
+ struct tbt_port *port = netdev_priv(net_dev);
+
+ nla_memcpy(&(port->route_str),
+ info->attrs[NHI_ATTR_LOCAL_ROUTE_STRING],
+ sizeof(port->route_str));
+ nla_memcpy(&port->interdomain_remote_uuid,
+ info->attrs[NHI_ATTR_REMOTE_UUID],
+ sizeof(port->interdomain_remote_uuid));
+ port->local_depth = nla_get_u8(info->attrs[NHI_ATTR_LOCAL_DEPTH]);
+ port->enable_full_e2e = nhi_ctxt->support_full_e2e ?
+ nla_get_flag(info->attrs[NHI_ATTR_ENABLE_FULL_E2E]) : false;
+ port->match_frame_id =
+ nla_get_flag(info->attrs[NHI_ATTR_MATCH_FRAME_ID]);
+ port->frame_id = 0;
+}
+
+struct net_device *nhi_alloc_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
+ u8 port_num, struct genl_info *info)
+{
+ struct tbt_port *port;
+ struct net_device *net_dev = alloc_etherdev(sizeof(struct tbt_port));
+ u32 hash;
+
+ if (!net_dev)
+ return NULL;
+
+ SET_NETDEV_DEV(net_dev, &nhi_ctxt->pdev->dev);
+
+ port = netdev_priv(net_dev);
+ port->nhi_ctxt = nhi_ctxt;
+ port->net_dev = net_dev;
+ nla_memcpy(&port->interdomain_local_uuid,
+ info->attrs[NHI_ATTR_LOCAL_UUID],
+ sizeof(port->interdomain_local_uuid));
+ nhi_update_etherdev(nhi_ctxt, net_dev, info);
+ port->num = port_num;
+ port->local_path = PATH_FROM_PORT(nhi_ctxt->num_paths, port_num);
+
+ port->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+ net_dev->addr_assign_type = NET_ADDR_PERM;
+ /* unicast and locally administred MAC */
+ net_dev->dev_addr[0] = (port_num << 4) | 0x02;
+ hash = jhash2((u32 *)&port->interdomain_local_uuid,
+ sizeof(port->interdomain_local_uuid)/sizeof(u32), 0);
+
+ memcpy(net_dev->dev_addr + 1, &hash, sizeof(hash));
+ hash = jhash2((u32 *)&port->interdomain_local_uuid,
+ sizeof(port->interdomain_local_uuid)/sizeof(u32), hash);
+
+ net_dev->dev_addr[5] = hash & 0xff;
+
+ scnprintf(net_dev->name, sizeof(net_dev->name), "tbtnet%%dp%hhu",
+ port_num);
+
+ INIT_DELAYED_WORK(&port->login_retry_work, login_retry);
+ INIT_WORK(&port->login_response_work, login_response);
+ INIT_WORK(&port->logout_work, logout);
+ INIT_WORK(&port->status_reply_work, status_reply);
+ INIT_WORK(&port->approve_inter_domain_work, approve_inter_domain);
+
+ netif_info(port, probe, net_dev,
+ "Thunderbolt(TM) Networking port %u - MAC Address: %pM\n",
+ port_num, net_dev->dev_addr);
+
+ return net_dev;
+}
diff --git a/drivers/thunderbolt/icm/net.h b/drivers/thunderbolt/icm/net.h
index 0281201..1cb6701 100644
--- a/drivers/thunderbolt/icm/net.h
+++ b/drivers/thunderbolt/icm/net.h
@@ -23,6 +23,10 @@
#include <linux/semaphore.h>
#include <net/genetlink.h>
+#define APPLE_THUNDERBOLT_IP_PROTOCOL_UUID \
+ UUID_BE(0x9E588F79, 0x478A, 0x1636, \
+ 0x64, 0x56, 0xC6, 0x97, 0xDD, 0xC8, 0x20, 0xA9)
+
/*
* Each physical port contains 2 channels.
* Devices are exposed to user based on physical ports.
@@ -33,6 +37,9 @@
* host channel/link which starts from 1.
*/
#define PORT_NUM_FROM_LINK(link) (((link) - 1) / CHANNELS_PER_PORT_NUM)
+#define PORT_NUM_FROM_MSG(msg) PORT_NUM_FROM_LINK(((msg) & \
+ INTER_DOMAIN_LINK_MASK) >> \
+ INTER_DOMAIN_LINK_SHIFT)
#define TBT_TX_RING_FULL(prod, cons, size) ((((prod) + 1) % (size)) == (cons))
#define TBT_TX_RING_EMPTY(prod, cons) ((prod) == (cons))
@@ -125,6 +132,17 @@ enum {
CC_SET_FW_MODE_FDA_DA_ALL
};
+struct route_string {
+ u32 hi;
+ u32 lo;
+};
+
+struct route_string_be {
+ __be32 hi;
+ __be32 lo;
+};
+
+#define L0_PORT_NUM(cpu_route_str_lo) ((cpu_route_str_lo) & GENMASK(5, 0))
/* NHI genetlink attributes */
enum {
@@ -138,12 +156,53 @@ enum {
NHI_ATTR_PDF,
NHI_ATTR_MSG_TO_ICM,
NHI_ATTR_MSG_FROM_ICM,
+ NHI_ATTR_LOCAL_ROUTE_STRING,
+ NHI_ATTR_LOCAL_UUID,
+ NHI_ATTR_REMOTE_UUID,
+ NHI_ATTR_LOCAL_DEPTH,
+ NHI_ATTR_ENABLE_FULL_E2E,
+ NHI_ATTR_MATCH_FRAME_ID,
__NHI_ATTR_MAX,
};
#define NHI_ATTR_MAX (__NHI_ATTR_MAX - 1)
+/* ThunderboltIP Packet Types */
+enum thunderbolt_ip_packet_type {
+ THUNDERBOLT_IP_LOGIN_TYPE,
+ THUNDERBOLT_IP_LOGIN_RESPONSE_TYPE,
+ THUNDERBOLT_IP_LOGOUT_TYPE,
+ THUNDERBOLT_IP_STATUS_TYPE
+};
+
+struct thunderbolt_ip_header {
+ struct route_string_be route_str;
+ __be32 attributes;
+#define HDR_ATTR_LEN_SHIFT 0
+#define HDR_ATTR_LEN_MASK GENMASK(5, HDR_ATTR_LEN_SHIFT)
+#define HDR_ATTR_SEQ_NUM_SHIFT 27
+#define HDR_ATTR_SEQ_NUM_MASK GENMASK(28, HDR_ATTR_SEQ_NUM_SHIFT)
+ uuid_be apple_tbt_ip_proto_uuid;
+ uuid_be initiator_uuid;
+ uuid_be target_uuid;
+ __be32 packet_type;
+ __be32 command_id;
+};
+
+enum medium_status {
+ /* Handle cable disconnection or peer down */
+ MEDIUM_DISCONNECTED,
+ /* Connection is fully established */
+ MEDIUM_CONNECTED,
+ /* Awaiting for being approved by user-space module */
+ MEDIUM_READY_FOR_APPROVAL,
+ /* Approved by user-space, awaiting for establishment flow to finish */
+ MEDIUM_READY_FOR_CONNECTION,
+ NUM_MEDIUM_STATUSES
+};
+
struct port_net_dev {
struct net_device *net_dev;
+ enum medium_status medium_sts;
struct mutex state_mutex;
};
@@ -213,5 +272,16 @@ struct tbt_nhi_ctxt {
int nhi_send_message(struct tbt_nhi_ctxt *nhi_ctxt, enum pdf_value pdf,
u32 msg_len, const void *msg, bool ignore_icm_resp);
int nhi_mailbox(struct tbt_nhi_ctxt *nhi_ctxt, u32 cmd, u32 data, bool deinit);
+struct net_device *nhi_alloc_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
+ u8 port_num, struct genl_info *info);
+void nhi_update_etherdev(struct tbt_nhi_ctxt *nhi_ctxt,
+ struct net_device *net_dev, struct genl_info *info);
+void nhi_dealloc_etherdev(struct net_device *net_dev);
+void negotiation_events(struct net_device *net_dev,
+ enum medium_status medium_sts);
+void negotiation_messages(struct net_device *net_dev,
+ struct thunderbolt_ip_header *hdr);
+void tbt_net_rx_msi(struct net_device *net_dev);
+void tbt_net_tx_msi(struct net_device *net_dev);
#endif
--
2.7.4
^ permalink raw reply related
* Re: [PATCH] usbnet: prevent device rpm suspend in usbnet_probe function
From: Oliver Neukum @ 2016-11-09 11:58 UTC (permalink / raw)
To: Alan Stern; +Cc: bjorn, Kai-Heng Feng, linux-kernel, linux-usb, netdev
In-Reply-To: <Pine.LNX.4.44L0.1611081339060.1499-100000@iolanthe.rowland.org>
On Tue, 2016-11-08 at 13:44 -0500, Alan Stern wrote:
> These problems could very well be caused by running at SuperSpeed
> (USB-3) instead of high speed (USB-2).
>
> Is there any way to test what happens when the device is attached to
> the computer by a USB-2 cable? That would prevent it from operating at
> SuperSpeed.
>
> The main point, however, is that the proposed patch doesn't seem to
> address the true problem, which is that the device gets suspended
> between probes. The patch only tries to prevent it from being
> suspended during a probe -- which is already prevented by the USB core.
But why doesn't it fail during normal operation?
I suspect that its firmware requires the altsetting
/* should we change control altsetting on a NCM/MBIM function? */
if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) {
data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM);
to be set before it accepts a suspension.
Regards
Oliver
^ permalink raw reply
* [iproute PATCH v2 1/2] include: Add linux/sctp.h
From: Phil Sutter @ 2016-11-09 11:12 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20161109111224.3946-1-phil@nwl.cc>
Add sanitized UAPI linux/sctp.h header file.
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
Changes since v1:
- File properly extracted from upstream kernel headers.
- Description updated.
---
include/linux/sctp.h | 1005 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1005 insertions(+)
create mode 100644 include/linux/sctp.h
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
new file mode 100644
index 0000000000000..eee08c066679e
--- /dev/null
+++ b/include/linux/sctp.h
@@ -0,0 +1,1005 @@
+/* SCTP kernel implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2002 Intel Corp.
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * This header represents the structures and constants needed to support
+ * the SCTP Extension to the Sockets API.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Or submit a bug report through the following website:
+ * http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ * La Monte H.P. Yarroll <piggy@acm.org>
+ * R. Stewart <randall@sctp.chicago.il.us>
+ * K. Morneau <kmorneau@cisco.com>
+ * Q. Xie <qxie1@email.mot.com>
+ * Karl Knutson <karl@athena.chicago.il.us>
+ * Jon Grimm <jgrimm@us.ibm.com>
+ * Daisy Chang <daisyc@us.ibm.com>
+ * Ryan Layer <rmlayer@us.ibm.com>
+ * Ardelle Fan <ardelle.fan@intel.com>
+ * Sridhar Samudrala <sri@us.ibm.com>
+ * Inaky Perez-Gonzalez <inaky.gonzalez@intel.com>
+ * Vlad Yasevich <vladislav.yasevich@hp.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#ifndef _SCTP_H
+#define _SCTP_H
+
+#include <linux/types.h>
+#include <linux/socket.h>
+
+typedef __s32 sctp_assoc_t;
+
+/* The following symbols come from the Sockets API Extensions for
+ * SCTP <draft-ietf-tsvwg-sctpsocket-07.txt>.
+ */
+#define SCTP_RTOINFO 0
+#define SCTP_ASSOCINFO 1
+#define SCTP_INITMSG 2
+#define SCTP_NODELAY 3 /* Get/set nodelay option. */
+#define SCTP_AUTOCLOSE 4
+#define SCTP_SET_PEER_PRIMARY_ADDR 5
+#define SCTP_PRIMARY_ADDR 6
+#define SCTP_ADAPTATION_LAYER 7
+#define SCTP_DISABLE_FRAGMENTS 8
+#define SCTP_PEER_ADDR_PARAMS 9
+#define SCTP_DEFAULT_SEND_PARAM 10
+#define SCTP_EVENTS 11
+#define SCTP_I_WANT_MAPPED_V4_ADDR 12 /* Turn on/off mapped v4 addresses */
+#define SCTP_MAXSEG 13 /* Get/set maximum fragment. */
+#define SCTP_STATUS 14
+#define SCTP_GET_PEER_ADDR_INFO 15
+#define SCTP_DELAYED_ACK_TIME 16
+#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME
+#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME
+#define SCTP_CONTEXT 17
+#define SCTP_FRAGMENT_INTERLEAVE 18
+#define SCTP_PARTIAL_DELIVERY_POINT 19 /* Set/Get partial delivery point */
+#define SCTP_MAX_BURST 20 /* Set/Get max burst */
+#define SCTP_AUTH_CHUNK 21 /* Set only: add a chunk type to authenticate */
+#define SCTP_HMAC_IDENT 22
+#define SCTP_AUTH_KEY 23
+#define SCTP_AUTH_ACTIVE_KEY 24
+#define SCTP_AUTH_DELETE_KEY 25
+#define SCTP_PEER_AUTH_CHUNKS 26 /* Read only */
+#define SCTP_LOCAL_AUTH_CHUNKS 27 /* Read only */
+#define SCTP_GET_ASSOC_NUMBER 28 /* Read only */
+#define SCTP_GET_ASSOC_ID_LIST 29 /* Read only */
+#define SCTP_AUTO_ASCONF 30
+#define SCTP_PEER_ADDR_THLDS 31
+#define SCTP_RECVRCVINFO 32
+#define SCTP_RECVNXTINFO 33
+#define SCTP_DEFAULT_SNDINFO 34
+
+/* Internal Socket Options. Some of the sctp library functions are
+ * implemented using these socket options.
+ */
+#define SCTP_SOCKOPT_BINDX_ADD 100 /* BINDX requests for adding addrs */
+#define SCTP_SOCKOPT_BINDX_REM 101 /* BINDX requests for removing addrs. */
+#define SCTP_SOCKOPT_PEELOFF 102 /* peel off association. */
+/* Options 104-106 are deprecated and removed. Do not use this space */
+#define SCTP_SOCKOPT_CONNECTX_OLD 107 /* CONNECTX old requests. */
+#define SCTP_GET_PEER_ADDRS 108 /* Get all peer address. */
+#define SCTP_GET_LOCAL_ADDRS 109 /* Get all local address. */
+#define SCTP_SOCKOPT_CONNECTX 110 /* CONNECTX requests. */
+#define SCTP_SOCKOPT_CONNECTX3 111 /* CONNECTX requests (updated) */
+#define SCTP_GET_ASSOC_STATS 112 /* Read only */
+#define SCTP_PR_SUPPORTED 113
+#define SCTP_DEFAULT_PRINFO 114
+#define SCTP_PR_ASSOC_STATUS 115
+
+/* PR-SCTP policies */
+#define SCTP_PR_SCTP_NONE 0x0000
+#define SCTP_PR_SCTP_TTL 0x0010
+#define SCTP_PR_SCTP_RTX 0x0020
+#define SCTP_PR_SCTP_PRIO 0x0030
+#define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_PRIO
+#define SCTP_PR_SCTP_MASK 0x0030
+
+#define __SCTP_PR_INDEX(x) ((x >> 4) - 1)
+#define SCTP_PR_INDEX(x) __SCTP_PR_INDEX(SCTP_PR_SCTP_ ## x)
+
+#define SCTP_PR_POLICY(x) ((x) & SCTP_PR_SCTP_MASK)
+#define SCTP_PR_SET_POLICY(flags, x) \
+ do { \
+ flags &= ~SCTP_PR_SCTP_MASK; \
+ flags |= x; \
+ } while (0)
+
+#define SCTP_PR_TTL_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_TTL)
+#define SCTP_PR_RTX_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_RTX)
+#define SCTP_PR_PRIO_ENABLED(x) (SCTP_PR_POLICY(x) == SCTP_PR_SCTP_PRIO)
+
+/* These are bit fields for msghdr->msg_flags. See section 5.1. */
+/* On user space Linux, these live in <bits/socket.h> as an enum. */
+enum sctp_msg_flags {
+ MSG_NOTIFICATION = 0x8000,
+#define MSG_NOTIFICATION MSG_NOTIFICATION
+};
+
+/* 5.3.1 SCTP Initiation Structure (SCTP_INIT)
+ *
+ * This cmsghdr structure provides information for initializing new
+ * SCTP associations with sendmsg(). The SCTP_INITMSG socket option
+ * uses this same data structure. This structure is not used for
+ * recvmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ----------------------
+ * IPPROTO_SCTP SCTP_INIT struct sctp_initmsg
+ */
+struct sctp_initmsg {
+ __u16 sinit_num_ostreams;
+ __u16 sinit_max_instreams;
+ __u16 sinit_max_attempts;
+ __u16 sinit_max_init_timeo;
+};
+
+/* 5.3.2 SCTP Header Information Structure (SCTP_SNDRCV)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg() and
+ * describes SCTP header information about a received message through
+ * recvmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ ----------------------
+ * IPPROTO_SCTP SCTP_SNDRCV struct sctp_sndrcvinfo
+ */
+struct sctp_sndrcvinfo {
+ __u16 sinfo_stream;
+ __u16 sinfo_ssn;
+ __u16 sinfo_flags;
+ __u32 sinfo_ppid;
+ __u32 sinfo_context;
+ __u32 sinfo_timetolive;
+ __u32 sinfo_tsn;
+ __u32 sinfo_cumtsn;
+ sctp_assoc_t sinfo_assoc_id;
+};
+
+/* 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO)
+ *
+ * This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ -------------------
+ * IPPROTO_SCTP SCTP_SNDINFO struct sctp_sndinfo
+ */
+struct sctp_sndinfo {
+ __u16 snd_sid;
+ __u16 snd_flags;
+ __u32 snd_ppid;
+ __u32 snd_context;
+ sctp_assoc_t snd_assoc_id;
+};
+
+/* 5.3.5 SCTP Receive Information Structure (SCTP_RCVINFO)
+ *
+ * This cmsghdr structure describes SCTP receive information
+ * about a received message through recvmsg().
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ -------------------
+ * IPPROTO_SCTP SCTP_RCVINFO struct sctp_rcvinfo
+ */
+struct sctp_rcvinfo {
+ __u16 rcv_sid;
+ __u16 rcv_ssn;
+ __u16 rcv_flags;
+ __u32 rcv_ppid;
+ __u32 rcv_tsn;
+ __u32 rcv_cumtsn;
+ __u32 rcv_context;
+ sctp_assoc_t rcv_assoc_id;
+};
+
+/* 5.3.6 SCTP Next Receive Information Structure (SCTP_NXTINFO)
+ *
+ * This cmsghdr structure describes SCTP receive information
+ * of the next message that will be delivered through recvmsg()
+ * if this information is already available when delivering
+ * the current message.
+ *
+ * cmsg_level cmsg_type cmsg_data[]
+ * ------------ ------------ -------------------
+ * IPPROTO_SCTP SCTP_NXTINFO struct sctp_nxtinfo
+ */
+struct sctp_nxtinfo {
+ __u16 nxt_sid;
+ __u16 nxt_flags;
+ __u32 nxt_ppid;
+ __u32 nxt_length;
+ sctp_assoc_t nxt_assoc_id;
+};
+
+/*
+ * sinfo_flags: 16 bits (unsigned integer)
+ *
+ * This field may contain any of the following flags and is composed of
+ * a bitwise OR of these values.
+ */
+enum sctp_sinfo_flags {
+ SCTP_UNORDERED = (1 << 0), /* Send/receive message unordered. */
+ SCTP_ADDR_OVER = (1 << 1), /* Override the primary destination. */
+ SCTP_ABORT = (1 << 2), /* Send an ABORT message to the peer. */
+ SCTP_SACK_IMMEDIATELY = (1 << 3), /* SACK should be sent without delay. */
+ SCTP_NOTIFICATION = MSG_NOTIFICATION, /* Next message is not user msg but notification. */
+ SCTP_EOF = MSG_FIN, /* Initiate graceful shutdown process. */
+};
+
+typedef union {
+ __u8 raw;
+ struct sctp_initmsg init;
+ struct sctp_sndrcvinfo sndrcv;
+} sctp_cmsg_data_t;
+
+/* These are cmsg_types. */
+typedef enum sctp_cmsg_type {
+ SCTP_INIT, /* 5.2.1 SCTP Initiation Structure */
+#define SCTP_INIT SCTP_INIT
+ SCTP_SNDRCV, /* 5.2.2 SCTP Header Information Structure */
+#define SCTP_SNDRCV SCTP_SNDRCV
+ SCTP_SNDINFO, /* 5.3.4 SCTP Send Information Structure */
+#define SCTP_SNDINFO SCTP_SNDINFO
+ SCTP_RCVINFO, /* 5.3.5 SCTP Receive Information Structure */
+#define SCTP_RCVINFO SCTP_RCVINFO
+ SCTP_NXTINFO, /* 5.3.6 SCTP Next Receive Information Structure */
+#define SCTP_NXTINFO SCTP_NXTINFO
+} sctp_cmsg_t;
+
+/*
+ * 5.3.1.1 SCTP_ASSOC_CHANGE
+ *
+ * Communication notifications inform the ULP that an SCTP association
+ * has either begun or ended. The identifier for a new association is
+ * provided by this notificaion. The notification information has the
+ * following format:
+ *
+ */
+struct sctp_assoc_change {
+ __u16 sac_type;
+ __u16 sac_flags;
+ __u32 sac_length;
+ __u16 sac_state;
+ __u16 sac_error;
+ __u16 sac_outbound_streams;
+ __u16 sac_inbound_streams;
+ sctp_assoc_t sac_assoc_id;
+ __u8 sac_info[0];
+};
+
+/*
+ * sac_state: 32 bits (signed integer)
+ *
+ * This field holds one of a number of values that communicate the
+ * event that happened to the association. They include:
+ *
+ * Note: The following state names deviate from the API draft as
+ * the names clash too easily with other kernel symbols.
+ */
+enum sctp_sac_state {
+ SCTP_COMM_UP,
+ SCTP_COMM_LOST,
+ SCTP_RESTART,
+ SCTP_SHUTDOWN_COMP,
+ SCTP_CANT_STR_ASSOC,
+};
+
+/*
+ * 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+ *
+ * When a destination address on a multi-homed peer encounters a change
+ * an interface details event is sent. The information has the
+ * following structure:
+ */
+struct sctp_paddr_change {
+ __u16 spc_type;
+ __u16 spc_flags;
+ __u32 spc_length;
+ struct sockaddr_storage spc_aaddr;
+ int spc_state;
+ int spc_error;
+ sctp_assoc_t spc_assoc_id;
+} __attribute__((packed, aligned(4)));
+
+/*
+ * spc_state: 32 bits (signed integer)
+ *
+ * This field holds one of a number of values that communicate the
+ * event that happened to the address. They include:
+ */
+enum sctp_spc_state {
+ SCTP_ADDR_AVAILABLE,
+ SCTP_ADDR_UNREACHABLE,
+ SCTP_ADDR_REMOVED,
+ SCTP_ADDR_ADDED,
+ SCTP_ADDR_MADE_PRIM,
+ SCTP_ADDR_CONFIRMED,
+};
+
+
+/*
+ * 5.3.1.3 SCTP_REMOTE_ERROR
+ *
+ * A remote peer may send an Operational Error message to its peer.
+ * This message indicates a variety of error conditions on an
+ * association. The entire error TLV as it appears on the wire is
+ * included in a SCTP_REMOTE_ERROR event. Please refer to the SCTP
+ * specification [SCTP] and any extensions for a list of possible
+ * error formats. SCTP error TLVs have the format:
+ */
+struct sctp_remote_error {
+ __u16 sre_type;
+ __u16 sre_flags;
+ __u32 sre_length;
+ __u16 sre_error;
+ sctp_assoc_t sre_assoc_id;
+ __u8 sre_data[0];
+};
+
+
+/*
+ * 5.3.1.4 SCTP_SEND_FAILED
+ *
+ * If SCTP cannot deliver a message it may return the message as a
+ * notification.
+ */
+struct sctp_send_failed {
+ __u16 ssf_type;
+ __u16 ssf_flags;
+ __u32 ssf_length;
+ __u32 ssf_error;
+ struct sctp_sndrcvinfo ssf_info;
+ sctp_assoc_t ssf_assoc_id;
+ __u8 ssf_data[0];
+};
+
+/*
+ * ssf_flags: 16 bits (unsigned integer)
+ *
+ * The flag value will take one of the following values
+ *
+ * SCTP_DATA_UNSENT - Indicates that the data was never put on
+ * the wire.
+ *
+ * SCTP_DATA_SENT - Indicates that the data was put on the wire.
+ * Note that this does not necessarily mean that the
+ * data was (or was not) successfully delivered.
+ */
+enum sctp_ssf_flags {
+ SCTP_DATA_UNSENT,
+ SCTP_DATA_SENT,
+};
+
+/*
+ * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+ *
+ * When a peer sends a SHUTDOWN, SCTP delivers this notification to
+ * inform the application that it should cease sending data.
+ */
+struct sctp_shutdown_event {
+ __u16 sse_type;
+ __u16 sse_flags;
+ __u32 sse_length;
+ sctp_assoc_t sse_assoc_id;
+};
+
+/*
+ * 5.3.1.6 SCTP_ADAPTATION_INDICATION
+ *
+ * When a peer sends a Adaptation Layer Indication parameter , SCTP
+ * delivers this notification to inform the application
+ * that of the peers requested adaptation layer.
+ */
+struct sctp_adaptation_event {
+ __u16 sai_type;
+ __u16 sai_flags;
+ __u32 sai_length;
+ __u32 sai_adaptation_ind;
+ sctp_assoc_t sai_assoc_id;
+};
+
+/*
+ * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT
+ *
+ * When a receiver is engaged in a partial delivery of a
+ * message this notification will be used to indicate
+ * various events.
+ */
+struct sctp_pdapi_event {
+ __u16 pdapi_type;
+ __u16 pdapi_flags;
+ __u32 pdapi_length;
+ __u32 pdapi_indication;
+ sctp_assoc_t pdapi_assoc_id;
+};
+
+enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, };
+
+/*
+ * 5.3.1.8. SCTP_AUTHENTICATION_EVENT
+ *
+ * When a receiver is using authentication this message will provide
+ * notifications regarding new keys being made active as well as errors.
+ */
+struct sctp_authkey_event {
+ __u16 auth_type;
+ __u16 auth_flags;
+ __u32 auth_length;
+ __u16 auth_keynumber;
+ __u16 auth_altkeynumber;
+ __u32 auth_indication;
+ sctp_assoc_t auth_assoc_id;
+};
+
+enum { SCTP_AUTH_NEWKEY = 0, };
+
+/*
+ * 6.1.9. SCTP_SENDER_DRY_EVENT
+ *
+ * When the SCTP stack has no more user data to send or retransmit, this
+ * notification is given to the user. Also, at the time when a user app
+ * subscribes to this event, if there is no data to be sent or
+ * retransmit, the stack will immediately send up this notification.
+ */
+struct sctp_sender_dry_event {
+ __u16 sender_dry_type;
+ __u16 sender_dry_flags;
+ __u32 sender_dry_length;
+ sctp_assoc_t sender_dry_assoc_id;
+};
+
+/*
+ * Described in Section 7.3
+ * Ancillary Data and Notification Interest Options
+ */
+struct sctp_event_subscribe {
+ __u8 sctp_data_io_event;
+ __u8 sctp_association_event;
+ __u8 sctp_address_event;
+ __u8 sctp_send_failure_event;
+ __u8 sctp_peer_error_event;
+ __u8 sctp_shutdown_event;
+ __u8 sctp_partial_delivery_event;
+ __u8 sctp_adaptation_layer_event;
+ __u8 sctp_authentication_event;
+ __u8 sctp_sender_dry_event;
+};
+
+/*
+ * 5.3.1 SCTP Notification Structure
+ *
+ * The notification structure is defined as the union of all
+ * notification types.
+ *
+ */
+union sctp_notification {
+ struct {
+ __u16 sn_type; /* Notification type. */
+ __u16 sn_flags;
+ __u32 sn_length;
+ } sn_header;
+ struct sctp_assoc_change sn_assoc_change;
+ struct sctp_paddr_change sn_paddr_change;
+ struct sctp_remote_error sn_remote_error;
+ struct sctp_send_failed sn_send_failed;
+ struct sctp_shutdown_event sn_shutdown_event;
+ struct sctp_adaptation_event sn_adaptation_event;
+ struct sctp_pdapi_event sn_pdapi_event;
+ struct sctp_authkey_event sn_authkey_event;
+ struct sctp_sender_dry_event sn_sender_dry_event;
+};
+
+/* Section 5.3.1
+ * All standard values for sn_type flags are greater than 2^15.
+ * Values from 2^15 and down are reserved.
+ */
+
+enum sctp_sn_type {
+ SCTP_SN_TYPE_BASE = (1<<15),
+ SCTP_ASSOC_CHANGE,
+#define SCTP_ASSOC_CHANGE SCTP_ASSOC_CHANGE
+ SCTP_PEER_ADDR_CHANGE,
+#define SCTP_PEER_ADDR_CHANGE SCTP_PEER_ADDR_CHANGE
+ SCTP_SEND_FAILED,
+#define SCTP_SEND_FAILED SCTP_SEND_FAILED
+ SCTP_REMOTE_ERROR,
+#define SCTP_REMOTE_ERROR SCTP_REMOTE_ERROR
+ SCTP_SHUTDOWN_EVENT,
+#define SCTP_SHUTDOWN_EVENT SCTP_SHUTDOWN_EVENT
+ SCTP_PARTIAL_DELIVERY_EVENT,
+#define SCTP_PARTIAL_DELIVERY_EVENT SCTP_PARTIAL_DELIVERY_EVENT
+ SCTP_ADAPTATION_INDICATION,
+#define SCTP_ADAPTATION_INDICATION SCTP_ADAPTATION_INDICATION
+ SCTP_AUTHENTICATION_EVENT,
+#define SCTP_AUTHENTICATION_INDICATION SCTP_AUTHENTICATION_EVENT
+ SCTP_SENDER_DRY_EVENT,
+#define SCTP_SENDER_DRY_EVENT SCTP_SENDER_DRY_EVENT
+};
+
+/* Notification error codes used to fill up the error fields in some
+ * notifications.
+ * SCTP_PEER_ADDRESS_CHAGE : spc_error
+ * SCTP_ASSOC_CHANGE : sac_error
+ * These names should be potentially included in the draft 04 of the SCTP
+ * sockets API specification.
+ */
+typedef enum sctp_sn_error {
+ SCTP_FAILED_THRESHOLD,
+ SCTP_RECEIVED_SACK,
+ SCTP_HEARTBEAT_SUCCESS,
+ SCTP_RESPONSE_TO_USER_REQ,
+ SCTP_INTERNAL_ERROR,
+ SCTP_SHUTDOWN_GUARD_EXPIRES,
+ SCTP_PEER_FAULTY,
+} sctp_sn_error_t;
+
+/*
+ * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO)
+ *
+ * The protocol parameters used to initialize and bound retransmission
+ * timeout (RTO) are tunable. See [SCTP] for more information on how
+ * these parameters are used in RTO calculation.
+ */
+struct sctp_rtoinfo {
+ sctp_assoc_t srto_assoc_id;
+ __u32 srto_initial;
+ __u32 srto_max;
+ __u32 srto_min;
+};
+
+/*
+ * 7.1.2 Association Parameters (SCTP_ASSOCINFO)
+ *
+ * This option is used to both examine and set various association and
+ * endpoint parameters.
+ */
+struct sctp_assocparams {
+ sctp_assoc_t sasoc_assoc_id;
+ __u16 sasoc_asocmaxrxt;
+ __u16 sasoc_number_peer_destinations;
+ __u32 sasoc_peer_rwnd;
+ __u32 sasoc_local_rwnd;
+ __u32 sasoc_cookie_life;
+};
+
+/*
+ * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR)
+ *
+ * Requests that the peer mark the enclosed address as the association
+ * primary. The enclosed address must be one of the association's
+ * locally bound addresses. The following structure is used to make a
+ * set primary request:
+ */
+struct sctp_setpeerprim {
+ sctp_assoc_t sspp_assoc_id;
+ struct sockaddr_storage sspp_addr;
+} __attribute__((packed, aligned(4)));
+
+/*
+ * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
+ *
+ * Requests that the local SCTP stack use the enclosed peer address as
+ * the association primary. The enclosed address must be one of the
+ * association peer's addresses. The following structure is used to
+ * make a set peer primary request:
+ */
+struct sctp_prim {
+ sctp_assoc_t ssp_assoc_id;
+ struct sockaddr_storage ssp_addr;
+} __attribute__((packed, aligned(4)));
+
+/* For backward compatibility use, define the old name too */
+#define sctp_setprim sctp_prim
+
+/*
+ * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER)
+ *
+ * Requests that the local endpoint set the specified Adaptation Layer
+ * Indication parameter for all future INIT and INIT-ACK exchanges.
+ */
+struct sctp_setadaptation {
+ __u32 ssb_adaptation_ind;
+};
+
+/*
+ * 7.1.13 Peer Address Parameters (SCTP_PEER_ADDR_PARAMS)
+ *
+ * Applications can enable or disable heartbeats for any peer address
+ * of an association, modify an address's heartbeat interval, force a
+ * heartbeat to be sent immediately, and adjust the address's maximum
+ * number of retransmissions sent before an address is considered
+ * unreachable. The following structure is used to access and modify an
+ * address's parameters:
+ */
+enum sctp_spp_flags {
+ SPP_HB_ENABLE = 1<<0, /*Enable heartbeats*/
+ SPP_HB_DISABLE = 1<<1, /*Disable heartbeats*/
+ SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE,
+ SPP_HB_DEMAND = 1<<2, /*Send heartbeat immediately*/
+ SPP_PMTUD_ENABLE = 1<<3, /*Enable PMTU discovery*/
+ SPP_PMTUD_DISABLE = 1<<4, /*Disable PMTU discovery*/
+ SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE,
+ SPP_SACKDELAY_ENABLE = 1<<5, /*Enable SACK*/
+ SPP_SACKDELAY_DISABLE = 1<<6, /*Disable SACK*/
+ SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
+ SPP_HB_TIME_IS_ZERO = 1<<7, /* Set HB delay to 0 */
+};
+
+struct sctp_paddrparams {
+ sctp_assoc_t spp_assoc_id;
+ struct sockaddr_storage spp_address;
+ __u32 spp_hbinterval;
+ __u16 spp_pathmaxrxt;
+ __u32 spp_pathmtu;
+ __u32 spp_sackdelay;
+ __u32 spp_flags;
+} __attribute__((packed, aligned(4)));
+
+/*
+ * 7.1.18. Add a chunk that must be authenticated (SCTP_AUTH_CHUNK)
+ *
+ * This set option adds a chunk type that the user is requesting to be
+ * received only in an authenticated way. Changes to the list of chunks
+ * will only effect future associations on the socket.
+ */
+struct sctp_authchunk {
+ __u8 sauth_chunk;
+};
+
+/*
+ * 7.1.19. Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT)
+ *
+ * This option gets or sets the list of HMAC algorithms that the local
+ * endpoint requires the peer to use.
+ */
+/* This here is only used by user space as is. It might not be a good idea
+ * to export/reveal the whole structure with reserved fields etc.
+ */
+enum {
+ SCTP_AUTH_HMAC_ID_SHA1 = 1,
+ SCTP_AUTH_HMAC_ID_SHA256 = 3,
+};
+
+struct sctp_hmacalgo {
+ __u32 shmac_num_idents;
+ __u16 shmac_idents[];
+};
+
+/* Sadly, user and kernel space have different names for
+ * this structure member, so this is to not break anything.
+ */
+#define shmac_number_of_idents shmac_num_idents
+
+/*
+ * 7.1.20. Set a shared key (SCTP_AUTH_KEY)
+ *
+ * This option will set a shared secret key which is used to build an
+ * association shared key.
+ */
+struct sctp_authkey {
+ sctp_assoc_t sca_assoc_id;
+ __u16 sca_keynumber;
+ __u16 sca_keylength;
+ __u8 sca_key[];
+};
+
+/*
+ * 7.1.21. Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY)
+ *
+ * This option will get or set the active shared key to be used to build
+ * the association shared key.
+ */
+
+struct sctp_authkeyid {
+ sctp_assoc_t scact_assoc_id;
+ __u16 scact_keynumber;
+};
+
+
+/*
+ * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK)
+ *
+ * This option will effect the way delayed acks are performed. This
+ * option allows you to get or set the delayed ack time, in
+ * milliseconds. It also allows changing the delayed ack frequency.
+ * Changing the frequency to 1 disables the delayed sack algorithm. If
+ * the assoc_id is 0, then this sets or gets the endpoints default
+ * values. If the assoc_id field is non-zero, then the set or get
+ * effects the specified association for the one to many model (the
+ * assoc_id field is ignored by the one to one model). Note that if
+ * sack_delay or sack_freq are 0 when setting this option, then the
+ * current values will remain unchanged.
+ */
+struct sctp_sack_info {
+ sctp_assoc_t sack_assoc_id;
+ uint32_t sack_delay;
+ uint32_t sack_freq;
+};
+
+struct sctp_assoc_value {
+ sctp_assoc_t assoc_id;
+ uint32_t assoc_value;
+};
+
+/*
+ * 7.2.2 Peer Address Information
+ *
+ * Applications can retrieve information about a specific peer address
+ * of an association, including its reachability state, congestion
+ * window, and retransmission timer values. This information is
+ * read-only. The following structure is used to access this
+ * information:
+ */
+struct sctp_paddrinfo {
+ sctp_assoc_t spinfo_assoc_id;
+ struct sockaddr_storage spinfo_address;
+ __s32 spinfo_state;
+ __u32 spinfo_cwnd;
+ __u32 spinfo_srtt;
+ __u32 spinfo_rto;
+ __u32 spinfo_mtu;
+} __attribute__((packed, aligned(4)));
+
+/* Peer addresses's state. */
+/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x]
+ * calls.
+ * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters.
+ * Not yet confirmed by a heartbeat and not available for data
+ * transfers.
+ * ACTIVE : Peer address confirmed, active and available for data transfers.
+ * INACTIVE: Peer address inactive and not available for data transfers.
+ */
+enum sctp_spinfo_state {
+ SCTP_INACTIVE,
+ SCTP_PF,
+ SCTP_ACTIVE,
+ SCTP_UNCONFIRMED,
+ SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */
+};
+
+/*
+ * 7.2.1 Association Status (SCTP_STATUS)
+ *
+ * Applications can retrieve current status information about an
+ * association, including association state, peer receiver window size,
+ * number of unacked data chunks, and number of data chunks pending
+ * receipt. This information is read-only. The following structure is
+ * used to access this information:
+ */
+struct sctp_status {
+ sctp_assoc_t sstat_assoc_id;
+ __s32 sstat_state;
+ __u32 sstat_rwnd;
+ __u16 sstat_unackdata;
+ __u16 sstat_penddata;
+ __u16 sstat_instrms;
+ __u16 sstat_outstrms;
+ __u32 sstat_fragmentation_point;
+ struct sctp_paddrinfo sstat_primary;
+};
+
+/*
+ * 7.2.3. Get the list of chunks the peer requires to be authenticated
+ * (SCTP_PEER_AUTH_CHUNKS)
+ *
+ * This option gets a list of chunks for a specified association that
+ * the peer requires to be received authenticated only.
+ */
+struct sctp_authchunks {
+ sctp_assoc_t gauth_assoc_id;
+ __u32 gauth_number_of_chunks;
+ uint8_t gauth_chunks[];
+};
+
+/* The broken spelling has been released already in lksctp-tools header,
+ * so don't break anyone, now that it's fixed.
+ */
+#define guth_number_of_chunks gauth_number_of_chunks
+
+/* Association states. */
+enum sctp_sstat_state {
+ SCTP_EMPTY = 0,
+ SCTP_CLOSED = 1,
+ SCTP_COOKIE_WAIT = 2,
+ SCTP_COOKIE_ECHOED = 3,
+ SCTP_ESTABLISHED = 4,
+ SCTP_SHUTDOWN_PENDING = 5,
+ SCTP_SHUTDOWN_SENT = 6,
+ SCTP_SHUTDOWN_RECEIVED = 7,
+ SCTP_SHUTDOWN_ACK_SENT = 8,
+};
+
+/*
+ * 8.2.6. Get the Current Identifiers of Associations
+ * (SCTP_GET_ASSOC_ID_LIST)
+ *
+ * This option gets the current list of SCTP association identifiers of
+ * the SCTP associations handled by a one-to-many style socket.
+ */
+struct sctp_assoc_ids {
+ __u32 gaids_number_of_ids;
+ sctp_assoc_t gaids_assoc_id[];
+};
+
+/*
+ * 8.3, 8.5 get all peer/local addresses in an association.
+ * This parameter struct is used by SCTP_GET_PEER_ADDRS and
+ * SCTP_GET_LOCAL_ADDRS socket options used internally to implement
+ * sctp_getpaddrs() and sctp_getladdrs() API.
+ */
+struct sctp_getaddrs_old {
+ sctp_assoc_t assoc_id;
+ int addr_num;
+ struct sockaddr *addrs;
+};
+
+struct sctp_getaddrs {
+ sctp_assoc_t assoc_id; /*input*/
+ __u32 addr_num; /*output*/
+ __u8 addrs[0]; /*output, variable size*/
+};
+
+/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves
+ * association stats. All stats are counts except sas_maxrto and
+ * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since
+ * the last call. Will return 0 when RTO was not update since last call
+ */
+struct sctp_assoc_stats {
+ sctp_assoc_t sas_assoc_id; /* Input */
+ /* Transport of observed max RTO */
+ struct sockaddr_storage sas_obs_rto_ipaddr;
+ __u64 sas_maxrto; /* Maximum Observed RTO for period */
+ __u64 sas_isacks; /* SACKs received */
+ __u64 sas_osacks; /* SACKs sent */
+ __u64 sas_opackets; /* Packets sent */
+ __u64 sas_ipackets; /* Packets received */
+ __u64 sas_rtxchunks; /* Retransmitted Chunks */
+ __u64 sas_outofseqtsns;/* TSN received > next expected */
+ __u64 sas_idupchunks; /* Dups received (ordered+unordered) */
+ __u64 sas_gapcnt; /* Gap Acknowledgements Received */
+ __u64 sas_ouodchunks; /* Unordered data chunks sent */
+ __u64 sas_iuodchunks; /* Unordered data chunks received */
+ __u64 sas_oodchunks; /* Ordered data chunks sent */
+ __u64 sas_iodchunks; /* Ordered data chunks received */
+ __u64 sas_octrlchunks; /* Control chunks sent */
+ __u64 sas_ictrlchunks; /* Control chunks received */
+};
+
+/*
+ * 8.1 sctp_bindx()
+ *
+ * The flags parameter is formed from the bitwise OR of zero or more of the
+ * following currently defined flags:
+ */
+#define SCTP_BINDX_ADD_ADDR 0x01
+#define SCTP_BINDX_REM_ADDR 0x02
+
+/* This is the structure that is passed as an argument(optval) to
+ * getsockopt(SCTP_SOCKOPT_PEELOFF).
+ */
+typedef struct {
+ sctp_assoc_t associd;
+ int sd;
+} sctp_peeloff_arg_t;
+
+/*
+ * Peer Address Thresholds socket option
+ */
+struct sctp_paddrthlds {
+ sctp_assoc_t spt_assoc_id;
+ struct sockaddr_storage spt_address;
+ __u16 spt_pathmaxrxt;
+ __u16 spt_pathpfthld;
+};
+
+/*
+ * Socket Option for Getting the Association/Stream-Specific PR-SCTP Status
+ */
+struct sctp_prstatus {
+ sctp_assoc_t sprstat_assoc_id;
+ __u16 sprstat_sid;
+ __u16 sprstat_policy;
+ __u64 sprstat_abandoned_unsent;
+ __u64 sprstat_abandoned_sent;
+};
+
+struct sctp_default_prinfo {
+ sctp_assoc_t pr_assoc_id;
+ __u32 pr_value;
+ __u16 pr_policy;
+};
+
+struct sctp_info {
+ __u32 sctpi_tag;
+ __u32 sctpi_state;
+ __u32 sctpi_rwnd;
+ __u16 sctpi_unackdata;
+ __u16 sctpi_penddata;
+ __u16 sctpi_instrms;
+ __u16 sctpi_outstrms;
+ __u32 sctpi_fragmentation_point;
+ __u32 sctpi_inqueue;
+ __u32 sctpi_outqueue;
+ __u32 sctpi_overall_error;
+ __u32 sctpi_max_burst;
+ __u32 sctpi_maxseg;
+ __u32 sctpi_peer_rwnd;
+ __u32 sctpi_peer_tag;
+ __u8 sctpi_peer_capable;
+ __u8 sctpi_peer_sack;
+ __u16 __reserved1;
+
+ /* assoc status info */
+ __u64 sctpi_isacks;
+ __u64 sctpi_osacks;
+ __u64 sctpi_opackets;
+ __u64 sctpi_ipackets;
+ __u64 sctpi_rtxchunks;
+ __u64 sctpi_outofseqtsns;
+ __u64 sctpi_idupchunks;
+ __u64 sctpi_gapcnt;
+ __u64 sctpi_ouodchunks;
+ __u64 sctpi_iuodchunks;
+ __u64 sctpi_oodchunks;
+ __u64 sctpi_iodchunks;
+ __u64 sctpi_octrlchunks;
+ __u64 sctpi_ictrlchunks;
+
+ /* primary transport info */
+ struct sockaddr_storage sctpi_p_address;
+ __s32 sctpi_p_state;
+ __u32 sctpi_p_cwnd;
+ __u32 sctpi_p_srtt;
+ __u32 sctpi_p_rto;
+ __u32 sctpi_p_hbinterval;
+ __u32 sctpi_p_pathmaxrxt;
+ __u32 sctpi_p_sackdelay;
+ __u32 sctpi_p_sackfreq;
+ __u32 sctpi_p_ssthresh;
+ __u32 sctpi_p_partial_bytes_acked;
+ __u32 sctpi_p_flight_size;
+ __u16 sctpi_p_error;
+ __u16 __reserved2;
+
+ /* sctp sock info */
+ __u32 sctpi_s_autoclose;
+ __u32 sctpi_s_adaptation_ind;
+ __u32 sctpi_s_pd_point;
+ __u8 sctpi_s_nodelay;
+ __u8 sctpi_s_disable_fragments;
+ __u8 sctpi_s_v4mapped;
+ __u8 sctpi_s_frag_interleave;
+ __u32 sctpi_s_type;
+ __u32 __reserved3;
+};
+
+#endif /* _SCTP_H */
--
2.10.0
^ permalink raw reply related
* [iproute PATCH v2 2/2] ss: Add support for SCTP protocol
From: Phil Sutter @ 2016-11-09 11:12 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20161109111224.3946-1-phil@nwl.cc>
This makes use of the sctp_diag interface recently added to the kernel.
Joint work with Xin Long who provided the PoC implementation which I
merely polished up a bit.
Signed-off-by: Phil Sutter <phil@nwl.cc>
---
Changes since v1:
- Added missing bits to ss man page.
---
man/man8/ss.8 | 3 +
misc/ss.c | 212 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 207 insertions(+), 8 deletions(-)
diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 8911976faa35b..4ef11523b4268 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -122,6 +122,9 @@ Display RAW sockets.
.B \-x, \-\-unix
Display Unix domain sockets (alias for -f unix).
.TP
+.B \-S, \-\-sctp
+Display SCTP sockets.
+.TP
.B \-f FAMILY, \-\-family=FAMILY
Display sockets of type FAMILY.
Currently the following families are supported: unix, inet, inet6, link, netlink.
diff --git a/misc/ss.c b/misc/ss.c
index dd77b8153b6da..bb72fafe6f3cd 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -43,6 +43,7 @@
#include <linux/filter.h>
#include <linux/packet_diag.h>
#include <linux/netlink_diag.h>
+#include <linux/sctp.h>
#define MAGIC_SEQ 123456
@@ -102,6 +103,7 @@ int show_header = 1;
/* If show_users & show_proc_ctx only do user_ent_hash_build() once */
int user_ent_hash_build_init;
int follow_events;
+int sctp_ino;
int netid_width;
int state_width;
@@ -111,6 +113,7 @@ int serv_width;
int screen_width;
static const char *TCP_PROTO = "tcp";
+static const char *SCTP_PROTO = "sctp";
static const char *UDP_PROTO = "udp";
static const char *RAW_PROTO = "raw";
static const char *dg_proto;
@@ -126,13 +129,14 @@ enum {
PACKET_DG_DB,
PACKET_R_DB,
NETLINK_DB,
+ SCTP_DB,
MAX_DB
};
#define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
#define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB))
#define ALL_DB ((1<<MAX_DB)-1)
-#define INET_DBM ((1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<RAW_DB))
+#define INET_DBM ((1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<RAW_DB)|(1<<SCTP_DB))
enum {
SS_UNKNOWN,
@@ -150,6 +154,17 @@ enum {
SS_MAX
};
+enum {
+ SCTP_STATE_CLOSED = 0,
+ SCTP_STATE_COOKIE_WAIT = 1,
+ SCTP_STATE_COOKIE_ECHOED = 2,
+ SCTP_STATE_ESTABLISHED = 3,
+ SCTP_STATE_SHUTDOWN_PENDING = 4,
+ SCTP_STATE_SHUTDOWN_SENT = 5,
+ SCTP_STATE_SHUTDOWN_RECEIVED = 6,
+ SCTP_STATE_SHUTDOWN_ACK_SENT = 7,
+};
+
#define SS_ALL ((1 << SS_MAX) - 1)
#define SS_CONN (SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)))
@@ -204,6 +219,10 @@ static const struct filter default_dbs[MAX_DB] = {
.states = (1 << SS_CLOSE),
.families = (1 << AF_NETLINK),
},
+ [SCTP_DB] = {
+ .states = SS_CONN,
+ .families = (1 << AF_INET) | (1 << AF_INET6),
+ },
};
static const struct filter default_afs[AF_MAX] = {
@@ -264,6 +283,7 @@ static void filter_default_dbs(struct filter *f)
filter_db_set(f, PACKET_R_DB);
filter_db_set(f, PACKET_DG_DB);
filter_db_set(f, NETLINK_DB);
+ filter_db_set(f, SCTP_DB);
}
static void filter_states_set(struct filter *f, int states)
@@ -705,6 +725,17 @@ static const char *sstate_name[] = {
[SS_CLOSING] = "CLOSING",
};
+static const char *sctp_sstate_name[] = {
+ [SCTP_STATE_CLOSED] = "CLOSED",
+ [SCTP_STATE_COOKIE_WAIT] = "COOKIE_WAIT",
+ [SCTP_STATE_COOKIE_ECHOED] = "COOKIE_ECHOED",
+ [SCTP_STATE_ESTABLISHED] = "ESTAB",
+ [SCTP_STATE_SHUTDOWN_PENDING] = "SHUTDOWN_PENDING",
+ [SCTP_STATE_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
+ [SCTP_STATE_SHUTDOWN_RECEIVED] = "SHUTDOWN_RECEIVED",
+ [SCTP_STATE_SHUTDOWN_ACK_SENT] = "ACK_SENT",
+};
+
static const char *sstate_namel[] = {
"UNKNOWN",
[SS_ESTABLISHED] = "established",
@@ -793,12 +824,30 @@ struct tcpstat {
struct tcp_bbr_info *bbr_info;
};
+/* SCTP assocs share the same inode number with their parent endpoint. So if we
+ * have seen the inode number before, it must be an assoc instead of the next
+ * endpoint. */
+static bool is_sctp_assoc(struct sockstat *s, const char *sock_name)
+{
+ if (strcmp(sock_name, "sctp"))
+ return false;
+ if (!sctp_ino || sctp_ino != s->ino)
+ return false;
+ return true;
+}
+
static void sock_state_print(struct sockstat *s, const char *sock_name)
{
if (netid_width)
- printf("%-*s ", netid_width, sock_name);
- if (state_width)
- printf("%-*s ", state_width, sstate_name[s->state]);
+ printf("%-*s ", netid_width,
+ is_sctp_assoc(s, sock_name) ? "" : sock_name);
+ if (state_width) {
+ if (is_sctp_assoc(s, sock_name))
+ printf("`- %-*s ", state_width - 3,
+ sctp_sstate_name[s->state]);
+ else
+ printf("%-*s ", state_width, sstate_name[s->state]);
+ }
printf("%-6d %-6d ", s->rq, s->wq);
}
@@ -908,6 +957,8 @@ static void init_service_resolver(void)
c->proto = TCP_PROTO;
else if (strcmp(proto, UDP_PROTO) == 0)
c->proto = UDP_PROTO;
+ else if (strcmp(proto, SCTP_PROTO) == 0)
+ c->proto = SCTP_PROTO;
else
c->proto = NULL;
c->next = rlist;
@@ -1679,6 +1730,8 @@ static char *proto_name(int protocol)
return "udp";
case IPPROTO_TCP:
return "tcp";
+ case IPPROTO_SCTP:
+ return "sctp";
case IPPROTO_DCCP:
return "dccp";
}
@@ -1771,6 +1824,56 @@ static char *sprint_bw(char *buf, double bw)
return buf;
}
+static void sctp_stats_print(struct sctp_info *s)
+{
+ if (s->sctpi_tag)
+ printf(" tag:%x", s->sctpi_tag);
+ if (s->sctpi_state)
+ printf(" state:%s", sctp_sstate_name[s->sctpi_state]);
+ if (s->sctpi_rwnd)
+ printf(" rwnd:%d", s->sctpi_rwnd);
+ if (s->sctpi_unackdata)
+ printf(" unackdata:%d", s->sctpi_unackdata);
+ if (s->sctpi_penddata)
+ printf(" penddata:%d", s->sctpi_penddata);
+ if (s->sctpi_instrms)
+ printf(" instrms:%d", s->sctpi_instrms);
+ if (s->sctpi_outstrms)
+ printf(" outstrms:%d", s->sctpi_outstrms);
+ if (s->sctpi_inqueue)
+ printf(" inqueue:%d", s->sctpi_inqueue);
+ if (s->sctpi_outqueue)
+ printf(" outqueue:%d", s->sctpi_outqueue);
+ if (s->sctpi_overall_error)
+ printf(" overerr:%d", s->sctpi_overall_error);
+ if (s->sctpi_max_burst)
+ printf(" maxburst:%d", s->sctpi_max_burst);
+ if (s->sctpi_maxseg)
+ printf(" maxseg:%d", s->sctpi_maxseg);
+ if (s->sctpi_peer_rwnd)
+ printf(" prwnd:%d", s->sctpi_peer_rwnd);
+ if (s->sctpi_peer_tag)
+ printf(" ptag:%x", s->sctpi_peer_tag);
+ if (s->sctpi_peer_capable)
+ printf(" pcapable:%d", s->sctpi_peer_capable);
+ if (s->sctpi_peer_sack)
+ printf(" psack:%d", s->sctpi_peer_sack);
+ if (s->sctpi_s_autoclose)
+ printf(" autoclose:%d", s->sctpi_s_autoclose);
+ if (s->sctpi_s_adaptation_ind)
+ printf(" adapind:%d", s->sctpi_s_adaptation_ind);
+ if (s->sctpi_s_pd_point)
+ printf(" pdpoint:%d", s->sctpi_s_pd_point);
+ if (s->sctpi_s_nodelay)
+ printf(" nodealy:%d", s->sctpi_s_nodelay);
+ if (s->sctpi_s_disable_fragments)
+ printf(" nofrag:%d", s->sctpi_s_disable_fragments);
+ if (s->sctpi_s_v4mapped)
+ printf(" v4mapped:%d", s->sctpi_s_v4mapped);
+ if (s->sctpi_s_frag_interleave)
+ printf(" fraginl:%d", s->sctpi_s_frag_interleave);
+}
+
static void tcp_stats_print(struct tcpstat *s)
{
char b1[64];
@@ -1902,6 +2005,13 @@ static void tcp_timer_print(struct tcpstat *s)
}
}
+static void sctp_timer_print(struct tcpstat *s)
+{
+ if (s->timer)
+ printf(" timer:(T3_RTX,%s,%d)",
+ print_ms_timer(s->timeout), s->retrans);
+}
+
static int tcp_show_line(char *line, const struct filter *f, int family)
{
int rto = 0, ato = 0;
@@ -2168,6 +2278,64 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
}
}
+static const char *format_host_sa(struct sockaddr_storage *sa)
+{
+ union {
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+ } *saddr = (void *)sa;
+
+ switch (sa->ss_family) {
+ case AF_INET:
+ return format_host(AF_INET, 4, &saddr->sin.sin_addr);
+ case AF_INET6:
+ return format_host(AF_INET6, 16, &saddr->sin6.sin6_addr);
+ default:
+ return "";
+ }
+}
+
+static void sctp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r,
+ struct rtattr *tb[])
+{
+ struct sockaddr_storage *sa;
+ int len;
+
+ print_skmeminfo(tb, INET_DIAG_SKMEMINFO);
+
+ if (tb[INET_DIAG_LOCALS]) {
+ len = RTA_PAYLOAD(tb[INET_DIAG_LOCALS]);
+ sa = RTA_DATA(tb[INET_DIAG_LOCALS]);
+
+ printf("locals:%s", format_host_sa(sa));
+ for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa))
+ printf(",%s", format_host_sa(sa));
+
+ }
+ if (tb[INET_DIAG_PEERS]) {
+ len = RTA_PAYLOAD(tb[INET_DIAG_PEERS]);
+ sa = RTA_DATA(tb[INET_DIAG_PEERS]);
+
+ printf(" peers:%s", format_host_sa(sa));
+ for (sa++, len -= sizeof(*sa); len > 0; sa++, len -= sizeof(*sa))
+ printf(",%s", format_host_sa(sa));
+ }
+ if (tb[INET_DIAG_INFO]) {
+ struct sctp_info *info;
+ len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
+
+ /* workaround for older kernels with less fields */
+ if (len < sizeof(*info)) {
+ info = alloca(sizeof(*info));
+ memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+ memset((char *)info + len, 0, sizeof(*info) - len);
+ } else
+ info = RTA_DATA(tb[INET_DIAG_INFO]);
+
+ sctp_stats_print(info);
+ }
+}
+
static void parse_diag_msg(struct nlmsghdr *nlh, struct sockstat *s)
{
struct rtattr *tb[INET_DIAG_MAX+1];
@@ -2221,7 +2389,10 @@ static int inet_show_sock(struct nlmsghdr *nlh,
t.timer = r->idiag_timer;
t.timeout = r->idiag_expires;
t.retrans = r->idiag_retrans;
- tcp_timer_print(&t);
+ if (protocol == IPPROTO_SCTP)
+ sctp_timer_print(&t);
+ else
+ tcp_timer_print(&t);
}
if (show_details) {
@@ -2242,8 +2413,12 @@ static int inet_show_sock(struct nlmsghdr *nlh,
if (show_mem || show_tcpinfo) {
printf("\n\t");
- tcp_show_info(nlh, r, tb);
+ if (protocol == IPPROTO_SCTP)
+ sctp_show_info(nlh, r, tb);
+ else
+ tcp_show_info(nlh, r, tb);
}
+ sctp_ino = s->ino;
printf("\n");
return 0;
@@ -2627,6 +2802,17 @@ outerr:
} while (0);
}
+static int sctp_show(struct filter *f)
+{
+ if (!filter_af_get(f, AF_INET) && !filter_af_get(f, AF_INET6))
+ return 0;
+
+ if (!getenv("PROC_NET_SCTP") && !getenv("PROC_ROOT")
+ && inet_show_netlink(f, NULL, IPPROTO_SCTP) == 0)
+ return 0;
+
+ return 0;
+}
static int dgram_show_line(char *line, const struct filter *f, int family)
{
@@ -3738,6 +3924,7 @@ static void _usage(FILE *dest)
" -6, --ipv6 display only IP version 6 sockets\n"
" -0, --packet display PACKET sockets\n"
" -t, --tcp display only TCP sockets\n"
+" -S, --sctp display only SCTP sockets\n"
" -u, --udp display only UDP sockets\n"
" -d, --dccp display only DCCP sockets\n"
" -w, --raw display only RAW sockets\n"
@@ -3820,6 +4007,7 @@ static const struct option long_opts[] = {
{ "events", 0, 0, 'E' },
{ "dccp", 0, 0, 'd' },
{ "tcp", 0, 0, 't' },
+ { "sctp", 0, 0, 'S' },
{ "udp", 0, 0, 'u' },
{ "raw", 0, 0, 'w' },
{ "unix", 0, 0, 'x' },
@@ -3855,7 +4043,7 @@ int main(int argc, char *argv[])
int ch;
int state_filter = 0;
- while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbEf:miA:D:F:vVzZN:KH",
+ while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spbEf:miA:D:F:vVzZN:KHS",
long_opts, NULL)) != EOF) {
switch (ch) {
case 'n':
@@ -3894,6 +4082,9 @@ int main(int argc, char *argv[])
case 't':
filter_db_set(¤t_filter, TCP_DB);
break;
+ case 'S':
+ filter_db_set(¤t_filter, SCTP_DB);
+ break;
case 'u':
filter_db_set(¤t_filter, UDP_DB);
break;
@@ -3958,6 +4149,7 @@ int main(int argc, char *argv[])
filter_db_set(¤t_filter, UDP_DB);
filter_db_set(¤t_filter, DCCP_DB);
filter_db_set(¤t_filter, TCP_DB);
+ filter_db_set(¤t_filter, SCTP_DB);
filter_db_set(¤t_filter, RAW_DB);
} else if (strcmp(p, "udp") == 0) {
filter_db_set(¤t_filter, UDP_DB);
@@ -3965,6 +4157,8 @@ int main(int argc, char *argv[])
filter_db_set(¤t_filter, DCCP_DB);
} else if (strcmp(p, "tcp") == 0) {
filter_db_set(¤t_filter, TCP_DB);
+ } else if (strcmp(p, "sctp") == 0) {
+ filter_db_set(¤t_filter, SCTP_DB);
} else if (strcmp(p, "raw") == 0) {
filter_db_set(¤t_filter, RAW_DB);
} else if (strcmp(p, "unix") == 0) {
@@ -4089,7 +4283,7 @@ int main(int argc, char *argv[])
filter_merge_defaults(¤t_filter);
if (resolve_services && resolve_hosts &&
- (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB))))
+ (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<SCTP_DB))))
init_service_resolver();
@@ -4205,6 +4399,8 @@ int main(int argc, char *argv[])
tcp_show(¤t_filter, IPPROTO_TCP);
if (current_filter.dbs & (1<<DCCP_DB))
tcp_show(¤t_filter, IPPROTO_DCCP);
+ if (current_filter.dbs & (1<<SCTP_DB))
+ sctp_show(¤t_filter);
if (show_users || show_proc_ctx || show_sock_ctx)
user_ent_destroy();
--
2.10.0
^ permalink raw reply related
* [iproute PATCH v2 0/2] ss: Implement sctp_diag support
From: Phil Sutter @ 2016-11-09 11:12 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
This is version two of SCTP support in ss patch set. With only minor
changes from v1, it is rather a resubmit to draw people's attention.
The changes from v1 are:
- Patch 1 adds linux/sctp.h after proper extraction from kernel UAPI
headers (although it didn't change due to that), patch description
updated accordingly.
- Patch 2 also adds the new flag to ss man page.
Phil Sutter (2):
include: Add linux/sctp.h
ss: Add support for SCTP protocol
include/linux/sctp.h | 1005 ++++++++++++++++++++++++++++++++++++++++++++++++++
man/man8/ss.8 | 3 +
misc/ss.c | 212 ++++++++++-
3 files changed, 1212 insertions(+), 8 deletions(-)
create mode 100644 include/linux/sctp.h
--
2.10.0
^ permalink raw reply
* Re: [PATCH net-next 1/2] bpf, mlx4: fix prog refcount in mlx4_en_try_alloc_resources error path
From: Daniel Borkmann @ 2016-11-09 11:04 UTC (permalink / raw)
To: kbuild test robot
Cc: kbuild-all, Zhiyi Sun, bblanco, Tariq Toukan, Yishai Hadas,
netdev, linux-rdma, linux-kernel, alexei.starovoitov
In-Reply-To: <201611091853.HAp072gP%fengguang.wu@intel.com>
On 11/09/2016 11:58 AM, kbuild test robot wrote:
[...]
> All errors (new ones prefixed by >>):
>
> drivers/net/ethernet/mellanox/mlx4/en_netdev.c: In function 'mlx4_xdp_set':
>>> drivers/net/ethernet/mellanox/mlx4/en_netdev.c:2752:4: error: implicit declaration of function 'bpf_prog_add_undo' [-Werror=implicit-function-declaration]
> bpf_prog_add_undo(prog, priv->rx_ring_num - 1);
> ^~~~~~~~~~~~~~~~~
> cc1: some warnings being treated as errors
Ahh right, needs an empty variant for !CONFIG_BPF_SYSCALL. I'll fix that up
before sending an official patch.
Thanks,
Daniel
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox