Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next V3 3/7] liquidio CN23XX: VF mac address
From: Raghu Vatsavayi @ 2016-12-07 16:54 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481129677-10586-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for configuring mtu, multicast and mac address.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 195 +++++++++++++++++++++
 .../net/ethernet/cavium/liquidio/liquidio_common.h |   1 +
 .../net/ethernet/cavium/liquidio/octeon_network.h  |   1 +
 3 files changed, 197 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 07e4864..6f23944 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -864,6 +864,194 @@ static int liquidio_stop(struct net_device *netdev)
 	return 0;
 }
 
+/**
+ * \brief Converts a mask based on net device flags
+ * @param netdev network device
+ *
+ * This routine generates a octnet_ifflags mask from the net device flags
+ * received from the OS.
+ */
+static enum octnet_ifflags get_new_flags(struct net_device *netdev)
+{
+	enum octnet_ifflags f = OCTNET_IFFLAG_UNICAST;
+
+	if (netdev->flags & IFF_PROMISC)
+		f |= OCTNET_IFFLAG_PROMISC;
+
+	if (netdev->flags & IFF_ALLMULTI)
+		f |= OCTNET_IFFLAG_ALLMULTI;
+
+	if (netdev->flags & IFF_MULTICAST) {
+		f |= OCTNET_IFFLAG_MULTICAST;
+
+		/* Accept all multicast addresses if there are more than we
+		 * can handle
+		 */
+		if (netdev_mc_count(netdev) > MAX_OCTEON_MULTICAST_ADDR)
+			f |= OCTNET_IFFLAG_ALLMULTI;
+	}
+
+	if (netdev->flags & IFF_BROADCAST)
+		f |= OCTNET_IFFLAG_BROADCAST;
+
+	return f;
+}
+
+static void liquidio_set_uc_list(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mac;
+
+	if (lio->netdev_uc_count == netdev_uc_count(netdev))
+		return;
+
+	if (netdev_uc_count(netdev) > MAX_NCTRL_UDD) {
+		dev_err(&oct->pci_dev->dev, "too many MAC addresses in netdev uc list\n");
+		return;
+	}
+
+	lio->netdev_uc_count = netdev_uc_count(netdev);
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_UC_LIST;
+	nctrl.ncmd.s.more = lio->netdev_uc_count;
+	nctrl.ncmd.s.param1 = oct->vf_num;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mac = &nctrl.udd[0];
+	netdev_for_each_uc_addr(ha, netdev) {
+		ether_addr_copy(((u8 *)mac) + 2, ha->addr);
+		mac++;
+	}
+
+	octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+}
+
+/**
+ * \brief Net device set_multicast_list
+ * @param netdev network device
+ */
+static void liquidio_set_mcast_list(struct net_device *netdev)
+{
+	int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mc;
+	int ret;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	/* Create a ctrl pkt command to be sent to core app. */
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
+	nctrl.ncmd.s.param1 = get_new_flags(netdev);
+	nctrl.ncmd.s.param2 = mc_count;
+	nctrl.ncmd.s.more = mc_count;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mc = &nctrl.udd[0];
+	netdev_for_each_mc_addr(ha, netdev) {
+		*mc = 0;
+		ether_addr_copy(((u8 *)mc) + 2, ha->addr);
+		/* no need to swap bytes */
+		if (++mc > &nctrl.udd[mc_count])
+			break;
+	}
+
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* Apparently, any activity in this call from the kernel has to
+	 * be atomic. So we won't wait for response.
+	 */
+	nctrl.wait_time = 0;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
+			ret);
+	}
+
+	liquidio_set_uc_list(netdev);
+}
+
+/**
+ * \brief Net device set_mac_address
+ * @param netdev network device
+ */
+static int liquidio_set_mac(struct net_device *netdev, void *p)
+{
+	struct sockaddr *addr = (struct sockaddr *)p;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+		return 0;
+
+	if (lio->linfo.macaddr_is_admin_asgnd)
+		return -EPERM;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+	nctrl.ncmd.s.param1 = 0;
+	nctrl.ncmd.s.more = 1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+	nctrl.wait_time = 100;
+
+	nctrl.udd[0] = 0;
+	/* The MAC Address is presented in network byte order. */
+	ether_addr_copy((u8 *)&nctrl.udd[0] + 2, addr->sa_data);
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
+		return -ENOMEM;
+	}
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data);
+
+	return 0;
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->mtu = new_mtu;
+
+	netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+		 netdev->name, netdev->mtu, new_mtu);
+
+	netdev->mtu = new_mtu;
+
+	return 0;
+}
+
 /** Sending command to enable/disable RX checksum offload
  * @param netdev                pointer to network device
  * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
@@ -966,6 +1154,9 @@ static int liquidio_set_features(struct net_device *netdev,
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
+	.ndo_set_mac_address	= liquidio_set_mac,
+	.ndo_set_rx_mode	= liquidio_set_mcast_list,
+	.ndo_change_mtu		= liquidio_change_mtu,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
 	.ndo_select_queue	= select_q,
@@ -1165,6 +1356,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		netdev->hw_features = lio->dev_capability;
 
+		/* MTU range: 68 - 16000 */
+		netdev->min_mtu = LIO_MIN_MTU_SIZE;
+		netdev->max_mtu = LIO_MAX_MTU_SIZE;
+
 		/* Point to the  properties for octeon device to which this
 		 * interface belongs.
 		 */
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index f308ee4..ba329f6 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -212,6 +212,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_CMD_ID_ACTIVE         0x1a
 
+#define   OCTNET_CMD_SET_UC_LIST       0x1b
 #define   OCTNET_CMD_SET_VF_LINKSTATE  0x1c
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index e94edc8..6bb8941 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -123,6 +123,7 @@ struct lio {
 	/* work queue for  link status */
 	struct cavium_wq	link_status_wq;
 
+	int netdev_uc_count;
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V3 4/7] liquidio CN23XX: VF scatter gather lists
From: Raghu Vatsavayi @ 2016-12-07 16:54 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481129677-10586-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for VF scatter gather lists.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 134 +++++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 6f23944..f861a9b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -55,10 +55,28 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
+
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
 #define OCTNIC_GSO_MAX_SIZE \
 		(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
+struct octnic_gather {
+	/* List manipulation. Next and prev pointers. */
+	struct list_head list;
+
+	/* Size of the gather component at sg in bytes. */
+	int sg_size;
+
+	/* Number of bytes that sg was adjusted to make it 8B-aligned. */
+	int adjust;
+
+	/* Gather component that can accommodate max sized fragment list
+	 * received from the IP layer.
+	 */
+	struct octeon_sg_entry *sg;
+};
+
 struct octeon_device_priv {
 	/* Tasklet structures for this device. */
 	struct tasklet_struct droq_tasklet;
@@ -237,6 +255,114 @@ static void start_txq(struct net_device *netdev)
 }
 
 /**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static struct list_head *list_delete_head(struct list_head *root)
+{
+	struct list_head *node;
+
+	if ((root->prev == root) && (root->next == root))
+		node = NULL;
+	else
+		node = root->next;
+
+	if (node)
+		list_del(node);
+
+	return node;
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+static void delete_glists(struct lio *lio)
+{
+	struct octnic_gather *g;
+	int i;
+
+	if (!lio->glist)
+		return;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		do {
+			g = (struct octnic_gather *)
+			    list_delete_head(&lio->glist[i]);
+			if (g) {
+				if (g->sg)
+					kfree((void *)((unsigned long)g->sg -
+							g->adjust));
+				kfree(g);
+			}
+		} while (g);
+	}
+
+	kfree(lio->glist);
+	kfree(lio->glist_lock);
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+static int setup_glists(struct lio *lio, int num_iqs)
+{
+	struct octnic_gather *g;
+	int i, j;
+
+	lio->glist_lock =
+	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
+	if (!lio->glist_lock)
+		return 1;
+
+	lio->glist =
+	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
+	if (!lio->glist) {
+		kfree(lio->glist_lock);
+		return 1;
+	}
+
+	for (i = 0; i < num_iqs; i++) {
+		spin_lock_init(&lio->glist_lock[i]);
+
+		INIT_LIST_HEAD(&lio->glist[i]);
+
+		for (j = 0; j < lio->tx_qsize; j++) {
+			g = kzalloc(sizeof(*g), GFP_KERNEL);
+			if (!g)
+				break;
+
+			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+				      OCT_SG_ENTRY_SIZE);
+
+			g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+			if (!g->sg) {
+				kfree(g);
+				break;
+			}
+
+			/* The gather component should be aligned on 64-bit
+			 * boundary
+			 */
+			if (((unsigned long)g->sg) & 7) {
+				g->adjust = 8 - (((unsigned long)g->sg) & 7);
+				g->sg = (struct octeon_sg_entry *)
+					((unsigned long)g->sg + g->adjust);
+			}
+			list_add_tail(&g->list, &lio->glist[i]);
+		}
+
+		if (j != lio->tx_qsize) {
+			delete_glists(lio);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * \brief Print link information
  * @param netdev network device
  */
@@ -681,6 +807,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 
 	cleanup_link_status_change_wq(netdev);
 
+	delete_glists(lio);
+
 	free_netdev(netdev);
 
 	oct->props[ifidx].gmxport = -1;
@@ -1379,6 +1507,12 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		/* Copy MAC Address to OS network device structure */
 		ether_addr_copy(netdev->dev_addr, mac);
 
+		if (setup_glists(lio, num_iqueues)) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Gather list allocation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
 		if (netdev->features & NETIF_F_LRO)
 			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
 					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V3 5/7] liquidio CN23XX: VF xmit
From: Raghu Vatsavayi @ 2016-12-07 16:54 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481129677-10586-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for transmit functionality in VF.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 .../ethernet/cavium/liquidio/cn23xx_vf_device.c    |  21 ++
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 340 +++++++++++++++++++++
 .../net/ethernet/cavium/liquidio/request_manager.c |   6 +-
 3 files changed, 364 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
index 108e487..b6117b6 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
@@ -529,6 +529,26 @@ static u64 cn23xx_vf_msix_interrupt_handler(void *dev)
 	return ret;
 }
 
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+	u32 pkt_in_done = readl(iq->inst_cnt_reg);
+	u32 last_done;
+	u32 new_idx;
+
+	last_done = pkt_in_done - iq->pkt_in_done;
+	iq->pkt_in_done = pkt_in_done;
+
+	/* Modulo of the new index with the IQ size will give us
+	 * the new index.  The iq->reset_instr_cnt is always zero for
+	 * cn23xx, so no extra adjustments are needed.
+	 */
+	new_idx = (iq->octeon_read_index +
+		   (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+		  iq->max_count;
+
+	return new_idx;
+}
+
 static void cn23xx_enable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
 {
 	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
@@ -660,6 +680,7 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
 	oct->fn_list.msix_interrupt_handler = cn23xx_vf_msix_interrupt_handler;
 
 	oct->fn_list.setup_device_regs = cn23xx_setup_vf_device_regs;
+	oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
 
 	oct->fn_list.enable_interrupt = cn23xx_enable_vf_interrupt;
 	oct->fn_list.disable_interrupt = cn23xx_disable_vf_interrupt;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index f861a9b..bcc8888 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -55,6 +55,21 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+union tx_info {
+	u64 u64;
+	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+		u16 gso_size;
+		u16 gso_segs;
+		u32 reserved;
+#else
+		u32 reserved;
+		u16 gso_segs;
+		u16 gso_size;
+#endif
+	} s;
+};
+
 #define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
 
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
@@ -255,6 +270,19 @@ static void start_txq(struct net_device *netdev)
 }
 
 /**
+ * \brief Stop a queue
+ * @param netdev network device
+ * @param q which queue to stop
+ */
+static void stop_q(struct net_device *netdev, int q)
+{
+	if (netif_is_multiqueue(netdev))
+		netif_stop_subqueue(netdev, q);
+	else
+		netif_stop_queue(netdev);
+}
+
+/**
  * Remove the node at the head of the list. The list would be empty at
  * the end of this call if there are no more nodes in the list.
  */
@@ -945,6 +973,45 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
 }
 
 /**
+ * \brief Setup input and output queues
+ * @param octeon_dev octeon device
+ * @param ifidx Interface index
+ *
+ * Note: Queues are with respect to the octeon device. Thus
+ * an input queue is for egress packets, and output queues
+ * are for ingress packets.
+ */
+static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
+{
+	struct net_device *netdev;
+	int num_tx_descs;
+	struct lio *lio;
+	int retval = 0;
+	int q;
+
+	netdev = octeon_dev->props[ifidx].netdev;
+
+	lio = GET_LIO(netdev);
+
+	/* set up IQs. */
+	for (q = 0; q < lio->linfo.num_txpciq; q++) {
+		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
+		    octeon_get_conf(octeon_dev), lio->ifidx);
+		retval = octeon_setup_iq(octeon_dev, ifidx, q,
+					 lio->linfo.txpciq[q], num_tx_descs,
+					 netdev_get_tx_queue(netdev, q));
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				" %s : Runtime IQ(TxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * \brief Net device open for LiquidIO
  * @param netdev network device
  */
@@ -1180,6 +1247,259 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	return 0;
 }
 
+/** \brief Transmit networks packets to the Octeon interface
+ * @param skbuff   skbuff struct to be passed to network layer.
+ * @param netdev   pointer to network device
+ * @returns whether the packet was transmitted to the device okay or not
+ *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
+ */
+static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct octnet_buf_free_info *finfo;
+	union octnic_cmd_setup cmdsetup;
+	struct octnic_data_pkt ndata;
+	struct octeon_instr_irh *irh;
+	struct oct_iq_stats *stats;
+	struct octeon_device *oct;
+	int q_idx = 0, iq_no = 0;
+	union tx_info *tx_info;
+	struct lio *lio;
+	int status = 0;
+	u64 dptr = 0;
+	u32 tag = 0;
+	int j;
+
+	lio = GET_LIO(netdev);
+	oct = lio->oct_dev;
+
+	if (netif_is_multiqueue(netdev)) {
+		q_idx = skb->queue_mapping;
+		q_idx = (q_idx % (lio->linfo.num_txpciq));
+		tag = q_idx;
+		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
+	} else {
+		iq_no = lio->txq;
+	}
+
+	stats = &oct->instr_queue[iq_no]->stats;
+
+	/* Check for all conditions in which the current packet cannot be
+	 * transmitted.
+	 */
+	if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
+	    (!lio->linfo.link.s.link_up) || (skb->len <= 0)) {
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed link_status : %d\n",
+			   lio->linfo.link.s.link_up);
+		goto lio_xmit_failed;
+	}
+
+	/* Use space in skb->cb to store info used to unmap and
+	 * free the buffers.
+	 */
+	finfo = (struct octnet_buf_free_info *)skb->cb;
+	finfo->lio = lio;
+	finfo->skb = skb;
+	finfo->sc = NULL;
+
+	/* Prepare the attributes for the data to be passed to OSI. */
+	memset(&ndata, 0, sizeof(struct octnic_data_pkt));
+
+	ndata.buf = finfo;
+
+	ndata.q_no = iq_no;
+
+	if (netif_is_multiqueue(netdev)) {
+		if (octnet_iq_is_full(oct, ndata.q_no)) {
+			/* defer sending if queue is full */
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			stats->tx_iq_busy++;
+			return NETDEV_TX_BUSY;
+		}
+	} else {
+		if (octnet_iq_is_full(oct, lio->txq)) {
+			/* defer sending if queue is full */
+			stats->tx_iq_busy++;
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			return NETDEV_TX_BUSY;
+		}
+	}
+
+	ndata.datasize = skb->len;
+
+	cmdsetup.u64 = 0;
+	cmdsetup.s.iq_no = iq_no;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		cmdsetup.s.transport_csum = 1;
+
+	if (!skb_shinfo(skb)->nr_frags) {
+		cmdsetup.s.u.datasize = skb->len;
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+		/* Offload checksum calculation for TCP/UDP packets */
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      skb->data,
+				      skb->len,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		ndata.reqtype = REQTYPE_NORESP_NET;
+
+	} else {
+		struct skb_frag_struct *frag;
+		struct octnic_gather *g;
+		int i, frags;
+
+		spin_lock(&lio->glist_lock[q_idx]);
+		g = (struct octnic_gather *)list_delete_head(
+		    &lio->glist[q_idx]);
+		spin_unlock(&lio->glist_lock[q_idx]);
+
+		if (!g) {
+			netif_info(lio, tx_err, lio->netdev,
+				   "Transmit scatter gather: glist null!\n");
+			goto lio_xmit_failed;
+		}
+
+		cmdsetup.s.gather = 1;
+		cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
+		memset(g->sg, 0, g->sg_size);
+
+		g->sg[0].ptr[0] = dma_map_single(&oct->pci_dev->dev,
+						 skb->data,
+						 (skb->len - skb->data_len),
+						 DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+		add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
+
+		frags = skb_shinfo(skb)->nr_frags;
+		i = 1;
+		while (frags--) {
+			frag = &skb_shinfo(skb)->frags[i - 1];
+
+			g->sg[(i >> 2)].ptr[(i & 3)] =
+				dma_map_page(&oct->pci_dev->dev,
+					     frag->page.p,
+					     frag->page_offset,
+					     frag->size,
+					     DMA_TO_DEVICE);
+			if (dma_mapping_error(&oct->pci_dev->dev,
+					      g->sg[i >> 2].ptr[i & 3])) {
+				dma_unmap_single(&oct->pci_dev->dev,
+						 g->sg[0].ptr[0],
+						 skb->len - skb->data_len,
+						 DMA_TO_DEVICE);
+				for (j = 1; j < i; j++) {
+					frag = &skb_shinfo(skb)->frags[j - 1];
+					dma_unmap_page(&oct->pci_dev->dev,
+						       g->sg[j >> 2].ptr[j & 3],
+						       frag->size,
+						       DMA_TO_DEVICE);
+				}
+				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+					__func__);
+				return NETDEV_TX_BUSY;
+			}
+
+			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+			i++;
+		}
+
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      g->sg, g->sg_size,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
+				__func__);
+			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
+					 skb->len - skb->data_len,
+					 DMA_TO_DEVICE);
+			for (j = 1; j <= frags; j++) {
+				frag = &skb_shinfo(skb)->frags[j - 1];
+				dma_unmap_page(&oct->pci_dev->dev,
+					       g->sg[j >> 2].ptr[j & 3],
+					       frag->size, DMA_TO_DEVICE);
+			}
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		finfo->g = g;
+
+		ndata.reqtype = REQTYPE_NORESP_NET_SG;
+	}
+
+	irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+	tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+
+	if (skb_shinfo(skb)->gso_size) {
+		tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
+		tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
+	}
+
+	status = octnet_send_nic_data_pkt(oct, &ndata);
+	if (status == IQ_SEND_FAILED)
+		goto lio_xmit_failed;
+
+	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
+
+	if (status == IQ_SEND_STOP) {
+		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
+			iq_no);
+		stop_q(lio->netdev, q_idx);
+	}
+
+	netif_trans_update(netdev);
+
+	if (skb_shinfo(skb)->gso_size)
+		stats->tx_done += skb_shinfo(skb)->gso_segs;
+	else
+		stats->tx_done++;
+	stats->tx_tot_bytes += skb->len;
+
+	return NETDEV_TX_OK;
+
+lio_xmit_failed:
+	stats->tx_dropped++;
+	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
+		   iq_no, stats->tx_dropped);
+	if (dptr)
+		dma_unmap_single(&oct->pci_dev->dev, dptr,
+				 ndata.datasize, DMA_TO_DEVICE);
+	tx_buffer_free(skb);
+	return NETDEV_TX_OK;
+}
+
+/** \brief Network device Tx timeout
+ * @param netdev    pointer to network device
+ */
+static void liquidio_tx_timeout(struct net_device *netdev)
+{
+	struct lio *lio;
+
+	lio = GET_LIO(netdev);
+
+	netif_info(lio, tx_err, lio->netdev,
+		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
+		   netdev->stats.tx_dropped);
+	netif_trans_update(netdev);
+	txqs_wake(netdev);
+}
+
 /** Sending command to enable/disable RX checksum offload
  * @param netdev                pointer to network device
  * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
@@ -1282,8 +1602,10 @@ static int liquidio_set_features(struct net_device *netdev,
 static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
+	.ndo_start_xmit		= liquidio_xmit,
 	.ndo_set_mac_address	= liquidio_set_mac,
 	.ndo_set_rx_mode	= liquidio_set_mcast_list,
+	.ndo_tx_timeout		= liquidio_tx_timeout,
 	.ndo_change_mtu		= liquidio_change_mtu,
 	.ndo_fix_features	= liquidio_fix_features,
 	.ndo_set_features	= liquidio_set_features,
@@ -1507,6 +1829,24 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		/* Copy MAC Address to OS network device structure */
 		ether_addr_copy(netdev->dev_addr, mac);
 
+		if (setup_io_queues(octeon_dev, i)) {
+			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		/* For VFs, enable Octeon device interrupts here,
+		 * as this is contingent upon IO queue setup
+		 */
+		octeon_dev->fn_list.enable_interrupt(octeon_dev,
+						     OCTEON_ALL_INTR);
+
+		/* By default all interfaces on a single Octeon uses the same
+		 * tx and rx queues
+		 */
+		lio->txq = lio->linfo.txpciq[0].s.q_no;
+
+		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+
 		if (setup_glists(lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
 				"Gather list allocation failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index ea2b7e4..3ce6675 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -394,7 +394,7 @@ static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
 
-			if (OCTEON_CN23XX_PF(oct))
+			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
 				irh = (struct octeon_instr_irh *)
 					&sc->cmd.cmd3.irh;
 			else
@@ -607,7 +607,7 @@ static void check_db_timeout(struct work_struct *work)
 
 	oct_cfg = octeon_get_conf(oct);
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 
 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
@@ -700,7 +700,7 @@ int octeon_send_soft_command(struct octeon_device *oct,
 	struct octeon_instr_irh *irh;
 	u32 len;
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 		if (ih3->dlengsz) {
 			WARN_ON(!sc->dmadptr);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V3 6/7] liquidio CN23XX: VF TX buffers
From: Raghu Vatsavayi @ 2016-12-07 16:54 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481129677-10586-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for freeing VF xmit buffers.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 182 +++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index bcc8888..e4297f8 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -270,6 +270,19 @@ static void start_txq(struct net_device *netdev)
 }
 
 /**
+ * \brief Wake a queue
+ * @param netdev network device
+ * @param q which queue to wake
+ */
+static void wake_q(struct net_device *netdev, int q)
+{
+	if (netif_is_multiqueue(netdev))
+		netif_wake_subqueue(netdev, q);
+	else
+		netif_wake_queue(netdev);
+}
+
+/**
  * \brief Stop a queue
  * @param netdev network device
  * @param q which queue to stop
@@ -920,6 +933,163 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 	return 0;
 }
 
+static int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0;
+
+	if (netif_is_multiqueue(lio->netdev))
+		q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+	return q;
+}
+
+/**
+ * \brief Check Tx queue state for a given network buffer
+ * @param lio per-network private data
+ * @param skb network buffer
+ */
+static int check_txq_state(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0, iq = 0;
+
+	if (netif_is_multiqueue(lio->netdev)) {
+		q = skb->queue_mapping;
+		iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
+	} else {
+		iq = lio->txq;
+		q = iq;
+	}
+
+	if (octnet_iq_is_full(lio->oct_dev, iq))
+		return 0;
+
+	if (__netif_subqueue_stopped(lio->netdev, q)) {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
+		wake_q(lio->netdev, q);
+	}
+
+	return 1;
+}
+
+/**
+ * \brief Unmap and free network buffer
+ * @param buf buffer
+ */
+static void free_netbuf(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct sk_buff *skb;
+	struct lio *lio;
+
+	finfo = (struct octnet_buf_free_info *)buf;
+	skb = finfo->skb;
+	lio = finfo->lio;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
+			 DMA_TO_DEVICE);
+
+	check_txq_state(lio, skb);
+
+	tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer
+ * @param buf buffer
+ */
+static void free_netsgbuf(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct octnic_gather *g;
+	struct sk_buff *skb;
+	int i, frags, iq;
+	struct lio *lio;
+
+	finfo = (struct octnet_buf_free_info *)buf;
+	skb = finfo->skb;
+	lio = finfo->lio;
+	g = finfo->g;
+	frags = skb_shinfo(skb)->nr_frags;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 g->sg[0].ptr[0], (skb->len - skb->data_len),
+			 DMA_TO_DEVICE);
+
+	i = 1;
+	while (frags--) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		pci_unmap_page((lio->oct_dev)->pci_dev,
+			       g->sg[(i >> 2)].ptr[(i & 3)],
+			       frag->size, DMA_TO_DEVICE);
+		i++;
+	}
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 finfo->dptr, g->sg_size,
+			 DMA_TO_DEVICE);
+
+	iq = skb_iq(lio, skb);
+
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
+
+	check_txq_state(lio, skb); /* mq support: sub-queue state check */
+
+	tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer with response
+ * @param buf buffer
+ */
+static void free_netsgbuf_with_resp(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct octeon_soft_command *sc;
+	struct octnic_gather *g;
+	struct sk_buff *skb;
+	int i, frags, iq;
+	struct lio *lio;
+
+	sc = (struct octeon_soft_command *)buf;
+	skb = (struct sk_buff *)sc->callback_arg;
+	finfo = (struct octnet_buf_free_info *)&skb->cb;
+
+	lio = finfo->lio;
+	g = finfo->g;
+	frags = skb_shinfo(skb)->nr_frags;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 g->sg[0].ptr[0], (skb->len - skb->data_len),
+			 DMA_TO_DEVICE);
+
+	i = 1;
+	while (frags--) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		pci_unmap_page((lio->oct_dev)->pci_dev,
+			       g->sg[(i >> 2)].ptr[(i & 3)],
+			       frag->size, DMA_TO_DEVICE);
+		i++;
+	}
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 finfo->dptr, g->sg_size,
+			 DMA_TO_DEVICE);
+
+	iq = skb_iq(lio, skb);
+
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
+
+	/* Don't free the skb yet */
+
+	check_txq_state(lio, skb);
+}
+
 /**
  * \brief Callback for getting interface configuration
  * @param status status of request
@@ -1675,6 +1845,18 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, OPCODE_NIC_INFO,
 				    lio_nic_info, octeon_dev);
 
+	/* REQTYPE_RESP_NET and REQTYPE_SOFT_COMMAND do not have free functions.
+	 * They are handled directly.
+	 */
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET,
+					free_netbuf);
+
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET_SG,
+					free_netsgbuf);
+
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_RESP_NET_SG,
+					free_netsgbuf_with_resp);
+
 	for (i = 0; i < octeon_dev->ifcount; i++) {
 		resp_size = sizeof(struct liquidio_if_cfg_resp);
 		ctx_size = sizeof(struct liquidio_if_cfg_context);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next V3 7/7] liquidio VF rx data and ctl path
From: Raghu Vatsavayi @ 2016-12-07 16:54 UTC (permalink / raw)
  To: davem
  Cc: netdev, Raghu Vatsavayi, Raghu Vatsavayi, Derek Chickles,
	Satanand Burla, Felix Manlunas
In-Reply-To: <1481129677-10586-1-git-send-email-rvatsavayi@caviumnetworks.com>

Adds support for VF receive data control path.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@caviumnetworks.com>
Signed-off-by: Derek Chickles <derek.chickles@caviumnetworks.com>
Signed-off-by: Satanand Burla <satananda.burla@caviumnetworks.com>
Signed-off-by: Felix Manlunas <felix.manlunas@caviumnetworks.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 412 ++++++++++++++++++++-
 .../net/ethernet/cavium/liquidio/octeon_device.c   |   2 +-
 drivers/net/ethernet/cavium/liquidio/octeon_droq.c |  10 +
 .../ethernet/cavium/liquidio/response_manager.c    |   3 +-
 4 files changed, 423 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index e4297f8..9989ac3 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -38,6 +38,8 @@
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
+/* Bit mask values for lio->ifstate */
+#define   LIO_IFSTATE_DROQ_OPS             0x01
 #define   LIO_IFSTATE_REGISTERED           0x02
 #define   LIO_IFSTATE_RUNNING              0x04
 
@@ -55,6 +57,14 @@ struct liquidio_if_cfg_resp {
 	u64 status;
 };
 
+struct liquidio_rx_ctl_context {
+	int octeon_id;
+
+	wait_queue_head_t wc;
+
+	int cond;
+};
+
 union tx_info {
 	u64 u64;
 	struct {
@@ -177,6 +187,16 @@ static int wait_for_pending_requests(struct octeon_device *oct)
 };
 
 /**
+ * \brief check interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to check
+ */
+static int ifstate_check(struct lio *lio, int state_flag)
+{
+	return atomic_read(&lio->ifstate) & state_flag;
+}
+
+/**
  * \brief set interface state
  * @param lio per-network private data
  * @param state_flag flag state to set
@@ -510,6 +530,31 @@ static void update_link_status(struct net_device *netdev,
 	}
 }
 
+static void update_txq_status(struct octeon_device *oct, int iq_num)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+	struct net_device *netdev;
+	struct lio *lio;
+
+	netdev = oct->props[iq->ifidx].netdev;
+	lio = GET_LIO(netdev);
+	if (netif_is_multiqueue(netdev)) {
+		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+		    lio->linfo.link.s.link_up &&
+		    (!octnet_iq_is_full(oct, iq_num))) {
+			netif_wake_subqueue(netdev, iq->q_index);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+						  tx_restart, 1);
+		} else {
+			if (!octnet_iq_is_full(oct, lio->txq)) {
+				INCR_INSTRQUEUE_PKT_COUNT(
+				    lio->oct_dev, lio->txq, tx_restart, 1);
+				wake_q(netdev, lio->txq);
+			}
+		}
+	}
+}
+
 static
 int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
 {
@@ -818,6 +863,91 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 }
 
 /**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+			    u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_rx_ctl_context *ctx;
+
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (status)
+		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+			CVM_CAST64(status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Send Rx control command
+ * @param lio per-network private data
+ * @param start_stop whether to start or stop
+ */
+static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+{
+	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+	struct liquidio_rx_ctl_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int retval;
+
+	if (oct->props[lio->ifidx].rx_on == start_stop)
+		return;
+
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+					  16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+	ncmd->s.param1 = start_stop;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = rx_ctl_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 5000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+	} else {
+		/* Sleep on a wait queue till the cond flag indicates that the
+		 * response arrived or timed-out.
+		 */
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+			return;
+		oct->props[lio->ifidx].rx_on = start_stop;
+	}
+
+	octeon_free_soft_command(oct, sc);
+}
+
+/**
  * \brief Destroy NIC device interface
  * @param oct octeon device
  * @param ifidx which interface to destroy
@@ -828,6 +958,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 {
 	struct net_device *netdev = oct->props[ifidx].netdev;
+	struct napi_struct *napi, *n;
 	struct lio *lio;
 
 	if (!netdev) {
@@ -843,6 +974,15 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
 		liquidio_stop(netdev);
 
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		oct->droq[0]->ops.poll_mode = 0;
+	}
+
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
@@ -863,7 +1003,8 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
  */
 static int liquidio_stop_nic_module(struct octeon_device *oct)
 {
-	int i;
+	struct lio *lio;
+	int i, j;
 
 	dev_dbg(&oct->pci_dev->dev, "Stopping network interfaces\n");
 	if (!oct->ifcount) {
@@ -871,6 +1012,17 @@ static int liquidio_stop_nic_module(struct octeon_device *oct)
 		return 1;
 	}
 
+	spin_lock_bh(&oct->cmd_resp_wqlock);
+	oct->cmd_resp_state = OCT_DRV_OFFLINE;
+	spin_unlock_bh(&oct->cmd_resp_wqlock);
+
+	for (i = 0; i < oct->ifcount; i++) {
+		lio = GET_LIO(oct->props[i].netdev);
+		for (j = 0; j < lio->linfo.num_rxpciq; j++)
+			octeon_unregister_droq_ops(oct,
+						   lio->linfo.rxpciq[j].s.q_no);
+	}
+
 	for (i = 0; i < oct->ifcount; i++)
 		liquidio_destroy_nic_device(oct, i);
 
@@ -1091,6 +1243,41 @@ static void free_netsgbuf_with_resp(void *buf)
 }
 
 /**
+ * \brief Setup output queue
+ * @param oct octeon device
+ * @param q_no which queue
+ * @param num_descs how many descriptors
+ * @param desc_size size of each descriptor
+ * @param app_ctx application context
+ */
+static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
+			     int desc_size, void *app_ctx)
+{
+	int ret_val;
+
+	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
+	/* droq creation and local register settings. */
+	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (ret_val == 1) {
+		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
+		return 0;
+	}
+
+	/* Enable the droq queues */
+	octeon_set_droq_pkt_op(oct, q_no, 1);
+
+	/* Send Credit for Octeon Output queues. Credits are always
+	 * sent after the output queue is enabled.
+	 */
+	writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
+
+	return ret_val;
+}
+
+/**
  * \brief Callback for getting interface configuration
  * @param status status of request
  * @param buf pointer to resp structure
@@ -1142,6 +1329,155 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
 	return (u16)(qindex % (lio->linfo.num_txpciq));
 }
 
+/** Routine to push packets arriving on Octeon interface upto network layer.
+ * @param oct_id   - octeon device id.
+ * @param skbuff   - skbuff struct to be passed to network layer.
+ * @param len      - size of total data received.
+ * @param rh       - Control header associated with the packet
+ * @param param    - additional control data with the packet
+ * @param arg      - farg registered in droq_ops
+ */
+static void
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+		     void *skbuff,
+		     u32 len,
+		     union octeon_rh *rh,
+		     void *param,
+		     void *arg)
+{
+	struct napi_struct *napi = param;
+	struct octeon_droq *droq =
+		container_of(param, struct octeon_droq, napi);
+	struct net_device *netdev = (struct net_device *)arg;
+	struct sk_buff *skb = (struct sk_buff *)skbuff;
+
+	if (netdev) {
+		struct lio *lio = GET_LIO(netdev);
+		int packet_was_received;
+
+		/* Do not proceed if the interface is not in RUNNING state. */
+		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+			recv_buffer_free(skb);
+			droq->stats.rx_dropped++;
+			return;
+		}
+
+		skb->dev = netdev;
+
+		skb_record_rx_queue(skb, droq->q_no);
+		if (likely(len > MIN_SKB_SIZE)) {
+			struct octeon_skb_page_info *pg_info;
+			unsigned char *va;
+
+			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+			if (pg_info->page) {
+				/* For Paged allocation use the frags */
+				va = page_address(pg_info->page) +
+					pg_info->page_offset;
+				memcpy(skb->data, va, MIN_SKB_SIZE);
+				skb_put(skb, MIN_SKB_SIZE);
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						pg_info->page,
+						pg_info->page_offset +
+						MIN_SKB_SIZE,
+						len - MIN_SKB_SIZE,
+						LIO_RXBUFFER_SZ);
+			}
+		} else {
+			struct octeon_skb_page_info *pg_info =
+				((struct octeon_skb_page_info *)(skb->cb));
+			skb_copy_to_linear_data(skb,
+						page_address(pg_info->page) +
+						pg_info->page_offset, len);
+			skb_put(skb, len);
+			put_page(pg_info->page);
+		}
+
+		skb_pull(skb, rh->r_dh.len * 8);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if ((netdev->features & NETIF_F_RXCSUM) &&
+		    (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))
+			/* checksum has already been verified */
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+
+		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
+
+		if (packet_was_received) {
+			droq->stats.rx_bytes_received += len;
+			droq->stats.rx_pkts_received++;
+			netdev->last_rx = jiffies;
+		} else {
+			droq->stats.rx_dropped++;
+			netif_info(lio, rx_err, lio->netdev,
+				   "droq:%d  error rx_dropped:%llu\n",
+				   droq->q_no, droq->stats.rx_dropped);
+		}
+
+	} else {
+		recv_buffer_free(skb);
+	}
+}
+
+/**
+ * \brief callback when receive interrupt occurs and we are in NAPI mode
+ * @param arg pointer to octeon output queue
+ */
+static void liquidio_vf_napi_drv_callback(void *arg)
+{
+	struct octeon_droq *droq = arg;
+
+	napi_schedule_irqoff(&droq->napi);
+}
+
+/**
+ * \brief Entry point for NAPI polling
+ * @param napi NAPI structure
+ * @param budget maximum number of items to process
+ */
+static int liquidio_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct octeon_instr_queue *iq;
+	struct octeon_device *oct;
+	struct octeon_droq *droq;
+	int tx_done = 0, iq_no;
+	int work_done;
+
+	droq = container_of(napi, struct octeon_droq, napi);
+	oct = droq->oct_dev;
+	iq_no = droq->q_no;
+
+	/* Handle Droq descriptors */
+	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+						 POLL_EVENT_PROCESS_PKTS,
+						 budget);
+
+	/* Flush the instruction queue */
+	iq = oct->instr_queue[iq_no];
+	if (iq) {
+		/* Process iq buffers with in the budget limits */
+		tx_done = octeon_flush_iq(oct, iq, 1, budget);
+		/* Update iq read-index rather than waiting for next interrupt.
+		 * Return back if tx_done is false.
+		 */
+		update_txq_status(oct, iq_no);
+	} else {
+		dev_err(&oct->pci_dev->dev, "%s: iq (%d) num invalid\n",
+			__func__, iq_no);
+	}
+
+	if ((work_done < budget) && (tx_done)) {
+		napi_complete(napi);
+		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
+					     POLL_EVENT_ENABLE_INTR, 0);
+		return 0;
+	}
+
+	return (!tx_done) ? (budget) : (work_done);
+}
+
 /**
  * \brief Setup input and output queues
  * @param octeon_dev octeon device
@@ -1153,16 +1489,68 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb,
  */
 static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
 {
+	struct octeon_droq_ops droq_ops;
 	struct net_device *netdev;
+	static int cpu_id_modulus;
+	struct octeon_droq *droq;
+	struct napi_struct *napi;
+	static int cpu_id;
 	int num_tx_descs;
 	struct lio *lio;
 	int retval = 0;
-	int q;
+	int q, q_no;
 
 	netdev = octeon_dev->props[ifidx].netdev;
 
 	lio = GET_LIO(netdev);
 
+	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+	droq_ops.fptr = liquidio_push_packet;
+	droq_ops.farg = netdev;
+
+	droq_ops.poll_mode = 1;
+	droq_ops.napi_fn = liquidio_vf_napi_drv_callback;
+	cpu_id = 0;
+	cpu_id_modulus = num_present_cpus();
+
+	/* set up DROQs. */
+	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+
+		retval = octeon_setup_droq(
+		    octeon_dev, q_no,
+		    CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
+						lio->ifidx),
+		    CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
+						   lio->ifidx),
+		    NULL);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"%s : Runtime DROQ(RxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+
+		droq = octeon_dev->droq[q_no];
+		napi = &droq->napi;
+		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
+
+		/* designate a CPU for this droq */
+		droq->cpu_id = cpu_id;
+		cpu_id++;
+		if (cpu_id >= cpu_id_modulus)
+			cpu_id = 0;
+
+		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
+	}
+
+	/* 23XX VF can send/recv control messages (via the first VF-owned
+	 * droq) from the firmware even if the ethX interface is down,
+	 * so that's why poll_mode must be off for the first droq.
+	 */
+	octeon_dev->droq[0]->ops.poll_mode = 0;
+
 	/* set up IQs. */
 	for (q = 0; q < lio->linfo.num_txpciq; q++) {
 		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
@@ -1189,6 +1577,16 @@ static int liquidio_open(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+
+	if (!oct->props[lio->ifidx].napi_enabled) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_enable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 1;
+
+		oct->droq[0]->ops.poll_mode = 1;
+	}
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
@@ -1198,6 +1596,9 @@ static int liquidio_open(struct net_device *netdev)
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 	start_txq(netdev);
 
+	/* tell Octeon to start forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 1);
+
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
 
 	return 0;
@@ -1220,6 +1621,9 @@ static int liquidio_stop(struct net_device *netdev)
 	netif_carrier_off(netdev);
 	lio->link_changes++;
 
+	/* tell Octeon to stop forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 0);
+
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
 	txqs_stop(netdev);
@@ -2016,6 +2420,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			goto setup_nic_dev_fail;
 		}
 
+		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
+
 		/* For VFs, enable Octeon device interrupts here,
 		 * as this is contingent upon IO queue setup
 		 */
@@ -2026,8 +2432,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		 * tx and rx queues
 		 */
 		lio->txq = lio->linfo.txpciq[0].s.q_no;
+		lio->rxq = lio->linfo.rxpciq[0].s.q_no;
 
 		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
 
 		if (setup_glists(lio, num_iqueues)) {
 			dev_err(&octeon_dev->pci_dev->dev,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 583818e..a8df493 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1374,7 +1374,7 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
 	/*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
 	 *to trigger tx interrupts as well, if they are pending.
 	 */
-	if (oct && OCTEON_CN23XX_PF(oct)) {
+	if (oct && (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))) {
 		if (droq)
 			writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
 		/*we race with firmrware here. read and write the IN_DONE_CNTS*/
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 8bf1ac76..0be87d1 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -28,6 +28,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
 
 struct niclist {
 	struct list_head list;
@@ -261,6 +262,11 @@ int octeon_init_droq(struct octeon_device *oct,
 
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
 		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_vf);
+
+		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
 	} else {
 		return 1;
 	}
@@ -889,6 +895,10 @@ static inline void octeon_droq_drop_packets(struct octeon_device *oct,
 			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		}
 		break;
+
+		case OCTEON_CN23XX_VF_VID:
+			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+		break;
 		}
 		return 0;
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index fdaf742..2fbaae9 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -84,7 +84,8 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 
 		sc = (struct octeon_soft_command *)ordered_sc_list->
 		    head.next;
-		if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (OCTEON_CN23XX_PF(octeon_dev) ||
+		    OCTEON_CN23XX_VF(octeon_dev)) {
 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
 			rptr = sc->cmd.cmd3.rptr;
 		} else {
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH v3 net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
From: David Miller @ 2016-12-07 17:04 UTC (permalink / raw)
  To: alexei.starovoitov
  Cc: kubakici, daniel, kafai, netdev, ast, bblanco, brouer,
	john.fastabend, saeedm, tariqt, kernel-team
In-Reply-To: <20161207163756.GA33446@ast-mbp.thefacebook.com>

From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Date: Wed, 7 Dec 2016 08:37:58 -0800

> On Wed, Dec 07, 2016 at 11:41:12AM +0000, Jakub Kicinski wrote:
>> > I see nothing wrong if this is exposed/made visible in the usual way through
>> > ethtool -k as well. I guess at least that would be the expected way to query
>> > for such driver capabilities.
>> 
>> +1 on exposing this to user space.  Whether via ethtool -k or a
>> separate XDP-specific netlink message is mostly a question of whether
>> we expect the need to expose more complex capabilities than bits.
> 
> I'm very much against using NETIF_F_ flags and exposing this to user space.
> I see this xdp feature flag as temporary workaround until all drivers
> support adjust_head() helper. It is very much a fundamental feature for xdp.
> Without being able to add/remove headers the usability of xdp becomes very limited.
> 
> If you guys dont like extra ndo_xdp command, I'd suggest to do
> "if (prog->xdp_adjust_head)" check in the driver and if driver doesn't
> support it yet, just fail XDP_SETUP_PROG command.
> imo that will be more flexible interface, since in the future drivers
> can fail on different combination of features and simple boolean flag
> unlikely to serve us for long time.

Indeed, if the eventual plan is to have all drivers be required to
support a fundamental set of XDP features then exporting this in any
way to userspace is not a good idea.

^ permalink raw reply

* Re: [net-next][PATCH v2 18/18] RDS: IB: add missing connection cache usage info
From: David Miller @ 2016-12-07 17:05 UTC (permalink / raw)
  To: santosh.shilimkar; +Cc: netdev, linux-kernel
In-Reply-To: <eac62d66-7a41-965d-8ecb-c2e4b651c39d@oracle.com>

From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Wed, 7 Dec 2016 08:44:04 -0800

> On 12/7/2016 7:55 AM, David Miller wrote:
>> From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
>> Date: Tue,  6 Dec 2016 20:01:56 -0800
>>
>> What level of compatability exists here?  If we run an old tool on a
>> new
>> kernel, or a new tool on an old kernel, does it work properly?
>>
> Tools repo carries a copy of the header and thats how the old tool and
> new tools have been running with older/newer kernels. There are few
> more
> bits left before I can start using directly kernel header for newer
> tools.
> 
> Moreover this particular parameter is only used for verbose mode which
> isn't used in default options.

That doesn't really answer my question, I think.

Are you saying that one is required to run old tools on old kernels,
and new tools on new kernels, and that's how you have this setup in
your repo?

If so, that really isn't acceptable.  Both old and new tools must work
with all kernel versions.

^ permalink raw reply

* Re: pull-request: can 2016-12-07
From: David Miller @ 2016-12-07 17:07 UTC (permalink / raw)
  To: mkl; +Cc: netdev, linux-can, kernel
In-Reply-To: <20161207095040.5003-1-mkl@pengutronix.de>

From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed,  7 Dec 2016 10:50:39 +0100

> Andrey Konovalov triggered a warning in the CAN RAW layer, which is
> fixed by a patch by me.

Pulled, thanks Marc.

^ permalink raw reply

* RE: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: David Laight @ 2016-12-07 17:09 UTC (permalink / raw)
  To: 'Paolo Abeni', Eric Dumazet
  Cc: David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481044098.7129.7.camel@redhat.com>

From: Paolo Abeni
> Sent: 06 December 2016 17:08
...
> @@ -79,6 +82,9 @@ struct udp_sock {
>  	int			(*gro_complete)(struct sock *sk,
>  						struct sk_buff *skb,
>  						int nhoff);
> +
> +	/* since we are prone to drops, avoid dirtying any sk cacheline */
> +	atomic_t		drops ____cacheline_aligned_in_smp;
>  };

Isn't that likely to create a large hole on systems with large cache lines.
(Same as any other use of ____cacheline_aligned_in_smp.)

	David


^ permalink raw reply

* Re: [PATCH v3 net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
From: Daniel Borkmann @ 2016-12-07 17:14 UTC (permalink / raw)
  To: David Miller, alexei.starovoitov
  Cc: kubakici, kafai, netdev, ast, bblanco, brouer, john.fastabend,
	saeedm, tariqt, kernel-team
In-Reply-To: <20161207.120413.939362482173997833.davem@davemloft.net>

On 12/07/2016 06:04 PM, David Miller wrote:
> From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Date: Wed, 7 Dec 2016 08:37:58 -0800
>
>> On Wed, Dec 07, 2016 at 11:41:12AM +0000, Jakub Kicinski wrote:
>>>> I see nothing wrong if this is exposed/made visible in the usual way through
>>>> ethtool -k as well. I guess at least that would be the expected way to query
>>>> for such driver capabilities.
>>>
>>> +1 on exposing this to user space.  Whether via ethtool -k or a
>>> separate XDP-specific netlink message is mostly a question of whether
>>> we expect the need to expose more complex capabilities than bits.
>>
>> I'm very much against using NETIF_F_ flags and exposing this to user space.
>> I see this xdp feature flag as temporary workaround until all drivers
>> support adjust_head() helper. It is very much a fundamental feature for xdp.
>> Without being able to add/remove headers the usability of xdp becomes very limited.
>>
>> If you guys dont like extra ndo_xdp command, I'd suggest to do
>> "if (prog->xdp_adjust_head)" check in the driver and if driver doesn't
>> support it yet, just fail XDP_SETUP_PROG command.
>> imo that will be more flexible interface, since in the future drivers
>> can fail on different combination of features and simple boolean flag
>> unlikely to serve us for long time.
>
> Indeed, if the eventual plan is to have all drivers be required to
> support a fundamental set of XDP features then exporting this in any
> way to userspace is not a good idea.

Agreed, if that is required anyway, then much better and simpler to just
return the -ENOTSUPP from the XDP_SETUP_PROG handling of each driver that
way.

^ permalink raw reply

* Re: [PATCH] [v3] net: phy: phy drivers should not set SUPPORTED_[Asym_]Pause
From: Timur Tabi @ 2016-12-07 17:19 UTC (permalink / raw)
  To: Niklas Cassel; +Cc: Florian Fainelli, David Miller, jon.mason, netdev
In-Reply-To: <CAD5ja61A3diBgYwscUSoxJqE1ydmzr+cQ9rR+8uZa0mw-0m_3Q@mail.gmail.com>

On 12/07/2016 03:13 AM, Niklas Cassel wrote:
> You might want to include drivers/net/phy/dp83848.c in your patch.
> Support for pause frames in that phy was recently added to netdev-next.

Thanks.  I feel bad that I'm reverting your patch just a few days after 
it was applied.

-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc.  Qualcomm Technologies, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply

* Re: [net-next][PATCH v2 18/18] RDS: IB: add missing connection cache usage info
From: Santosh Shilimkar @ 2016-12-07 17:20 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20161207.120536.1153607792891600896.davem@davemloft.net>

On 12/7/2016 9:05 AM, David Miller wrote:
> From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
> Date: Wed, 7 Dec 2016 08:44:04 -0800
>
>> On 12/7/2016 7:55 AM, David Miller wrote:
>>> From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
>>> Date: Tue,  6 Dec 2016 20:01:56 -0800
>>>
>>> What level of compatability exists here?  If we run an old tool on a
>>> new
>>> kernel, or a new tool on an old kernel, does it work properly?
>>>
>> Tools repo carries a copy of the header and thats how the old tool and
>> new tools have been running with older/newer kernels. There are few
>> more
>> bits left before I can start using directly kernel header for newer
>> tools.
>>
>> Moreover this particular parameter is only used for verbose mode which
>> isn't used in default options.
>
> That doesn't really answer my question, I think.
>
Sorry for not being clear.

> Are you saying that one is required to run old tools on old kernels,
> and new tools on new kernels, and that's how you have this setup in
> your repo?
>
No.

> If so, that really isn't acceptable.  Both old and new tools must work
> with all kernel versions.
>
Older version of tools works on either kernel versions. Older tools
don't parse this additional info since its copy of header not
carrying some of these extra verbose fields. Newer/Updated tools which
can parse this extra info in needs newer or an updated kernel which
supports and populates these fields.

As mentioned, this particular option used only in verbose mode so
am ok to drop this change if its still a concern.

Regards,
Santosh

^ permalink raw reply

* [PATCH net-next] udp: under rx pressure, try to condense skbs
From: Eric Dumazet @ 2016-12-07 17:19 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Paolo Abeni

From: Eric Dumazet <edumazet@google.com>

Under UDP flood, many softirq producers try to add packets to
UDP receive queue, and one user thread is burning one cpu trying
to dequeue packets as fast as possible.

Two parts of the per packet cost are :
- copying payload from kernel space to user space,
- freeing memory pieces associated with skb.

If socket is under pressure, softirq handler(s) can try to pull in
skb->head the payload of the packet if it fits.

Meaning the softirq handler(s) can free/reuse the page fragment
immediately, instead of letting udp_recvmsg() do this hundreds of usec
later, possibly from another node.


Additional gains :
- We reduce skb->truesize and thus can store more packets per SO_RCVBUF
- We avoid cache line misses at copyout() time and consume_skb() time,
and avoid one put_page() with potential alien freeing on NUMA hosts.

This comes at the cost of a copy, bounded to available tail room, which
is usually small. (We might have to fix GRO_MAX_HEAD which looks bigger
than necessary)

This patch gave me about 5 % increase in throughput in my tests.

skb_condense() helper could probably used in other contexts.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h |    2 ++
 net/core/skbuff.c      |   28 ++++++++++++++++++++++++++++
 net/ipv4/udp.c         |   12 +++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c535fbccf2c7dbfae04cee393460e86d588c26b..0cd92b0f2af5fe5a7c153435b8dc758338180ae3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1966,6 +1966,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
 	return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
 }
 
+void skb_condense(struct sk_buff *skb);
+
 /**
  *	skb_headroom - bytes at buffer head
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b45cd1494243fc99686016949f4546dbba11f424..84151cf40aebb973bad5bee3ee4be0758084d83c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4931,3 +4931,31 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
 	return clone;
 }
 EXPORT_SYMBOL(pskb_extract);
+
+/**
+ * skb_condense - try to get rid of fragments/frag_list if possible
+ * @skb: buffer
+ *
+ * Can be used to save memory before skb is added to a busy queue.
+ * If packet has bytes in frags and enough tail room in skb->head,
+ * pull all of them, so that we can free the frags right now and adjust
+ * truesize.
+ * Notes:
+ *	We do not reallocate skb->head thus can not fail.
+ *	Caller must re-evaluate skb->truesize if needed.
+ */
+void skb_condense(struct sk_buff *skb)
+{
+	if (!skb->data_len ||
+	    skb->data_len > skb->end - skb->tail ||
+	    skb_cloned(skb))
+		return;
+
+	/* Nice, we can free page frag(s) right now */
+	__pskb_pull_tail(skb, skb->data_len);
+
+	/* Now adjust skb->truesize, since __pskb_pull_tail() does
+	 * not do this.
+	 */
+	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 16d88ba9ff1c402f77063cfb5eea2708d86da2fc..f5628ada47b53f0d92d08210e5d7e4132a107f73 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1199,7 +1199,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff_head *list = &sk->sk_receive_queue;
 	int rmem, delta, amt, err = -ENOMEM;
-	int size = skb->truesize;
+	int size;
 
 	/* try to avoid the costly atomic add/sub pair when the receive
 	 * queue is full; always allow at least a packet
@@ -1208,6 +1208,16 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 	if (rmem > sk->sk_rcvbuf)
 		goto drop;
 
+	/* Under mem pressure, it might be helpful to help udp_recvmsg()
+	 * having linear skbs :
+	 * - Reduce memory overhead and thus increase receive queue capacity
+	 * - Less cache line misses at copyout() time
+	 * - Less work at consume_skb() (less alien page frag freeing)
+	 */
+	if (rmem > (sk->sk_rcvbuf >> 1))
+		skb_condense(skb);
+	size = skb->truesize;
+
 	/* we drop only if the receive buf is full and the receive
 	 * queue contains some other skb
 	 */

^ permalink raw reply related

* Re: [PATCH v2 iproute2/net-next 0/3] tc: flower: Support matching on ICMP
From: Stephen Hemminger @ 2016-12-07 17:21 UTC (permalink / raw)
  To: Simon Horman; +Cc: netdev, Jiri Pirko
In-Reply-To: <1481118843-10428-1-git-send-email-simon.horman@netronome.com>

On Wed,  7 Dec 2016 14:54:00 +0100
Simon Horman <simon.horman@netronome.com> wrote:

> Add support for matching on ICMP type and code to flower. This is modeled
> on existing support for matching on L4 ports.
> 
> The second patch provided a minor cleanup which is in keeping with
> they style used in the last patch.
> 
> This is marked as an RFC to match the same designation given to the
> corresponding kernel patches.
> 
> 
> Changes since v1:
> * Rebase
> * Do not run noths() on u8 entity
> 
> Simon Horman (3):
>   tc: flower: update headers for TCA_FLOWER_KEY_ICMP*
>   tc: flower: introduce enum flower_endpoint
>   tc: flower: support matching on ICMP type and code
> 
>  include/linux/pkt_cls.h |  10 ++++
>  man/man8/tc-flower.8    |  20 ++++++--
>  tc/f_flower.c           | 123 +++++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 135 insertions(+), 18 deletions(-)
> 

I am holding of applying these to net-next until David applies kernel
portion.

^ permalink raw reply

* Re: [PATCH v3 net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
From: Martin KaFai Lau @ 2016-12-07 17:26 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Jakub Kicinski, Daniel Borkmann, netdev, Alexei Starovoitov,
	Brenden Blanco, David Miller, Jesper Dangaard Brouer,
	John Fastabend, Saeed Mahameed, Tariq Toukan, Kernel Team
In-Reply-To: <20161207163756.GA33446@ast-mbp.thefacebook.com>

On Wed, Dec 07, 2016 at 08:37:58AM -0800, Alexei Starovoitov wrote:
> On Wed, Dec 07, 2016 at 11:41:12AM +0000, Jakub Kicinski wrote:
> > > I see nothing wrong if this is exposed/made visible in the usual way through
> > > ethtool -k as well. I guess at least that would be the expected way to query
> > > for such driver capabilities.
> >
> > +1 on exposing this to user space.  Whether via ethtool -k or a
> > separate XDP-specific netlink message is mostly a question of whether
> > we expect the need to expose more complex capabilities than bits.
>
> I'm very much against using NETIF_F_ flags and exposing this to user space.
> I see this xdp feature flag as temporary workaround until all drivers
> support adjust_head() helper. It is very much a fundamental feature for xdp.
> Without being able to add/remove headers the usability of xdp becomes very limited.
>
> If you guys dont like extra ndo_xdp command, I'd suggest to do
> "if (prog->xdp_adjust_head)" check in the driver and if driver doesn't
> support it yet, just fail XDP_SETUP_PROG command.
> imo that will be more flexible interface, since in the future drivers
> can fail on different combination of features and simple boolean flag
> unlikely to serve us for long time.
It makes sense that adjust_head() will eventually be supported by
all xdp-capable driver.  If that is the case, lets check
prog->xdp_adjust_head inside the driver instead of adding
another ndo_xdp command which will become unuseful very soon.

^ permalink raw reply

* nfct_query hangs after multiple requests
From: Kirila Adamova @ 2016-12-07 17:27 UTC (permalink / raw)
  To: netdev@vger.kernel.org

Hi      

I am using nfct_query (libnetfilter_conntrack library) to get a connection from the conntrack table and then to update its connmark. This was working ok in a development environment, but when testing it in production with a lot of traffic, after around a minute, the daemon hangs on nfct_query and does not process any more data.      

Some background:      
- I am sending packets via NFLOG to the daemon (and setting a connmark 0x2/0x2)
- the daemon polls the NFLOG group and handles the packets via nflog_handle_packet      
- the callback registered with the nflog handle extracts the conntrack information from the packet header (L4 proto, src/dst ip, src/dst port)      
- an nf_conntrack pointer is created with this information      
- (calling another library which calls another callback)      
- if certain conditions are met      
-- register nfct callback -- nfct_callback_register(h, NFCT_T_ALL, my_nfct_callback, h)      
-- nfct_query with NFCT_Q_GET  to get the conntrack connection based on the ct data      
-- (in the nfct callback) check the connmark of the connection and run nfct query with NFCT_Q_UPDATE to update the connmark of that same connection      

The nfct_handle is opened at the start of the daemon and closed via signal handling at termination.      

After placing some debug prints in the code, I discovered that at some point nfct_query for NFCT_Q_GET is called, but it never enters the callback function.      

Debugging with strace showed the following:      
...      
recvfrom(4,"$\0\0\0\2\0\0\0h\4IX\22(\0\0\0\0\0\0\304\0\0\0\0\1\5\0h\4IX"..., 8192, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, [12]) = 36      
sendto(4,"", 0, 0, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0      
recvfrom(4,
---  and is hanging here.      

I am writing to this mailing list, hoping that somebody would have an idea how to proceed with debugging and what the issue might be. Obviously, it's the amount of connections. But there must be a way to handle them for longer than a minute. Once it hangs, it never resumes.      

Please let me know if you need any further information or part of the code.      

Versions used:      
libnetfilter_conntrack - 1.0.4      
libnetfilter_log  - 1.0.1      

Best regards      
Kirila

^ permalink raw reply

* Re: [PATCH 1/1] ixgbe: fcoe: return value of skb_linearize should be handled
From: Jeff Kirsher @ 2016-12-07 17:30 UTC (permalink / raw)
  To: Zhouyi Zhou, intel-wired-lan, netdev, linux-kernel; +Cc: Zhouyi Zhou
In-Reply-To: <1481096614-25295-1-git-send-email-zhouzhouyi@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 732 bytes --]

On Wed, 2016-12-07 at 15:43 +0800, Zhouyi Zhou wrote:
> Signed-off-by: Zhouyi Zhou <yizhouzhou@ict.ac.cn>
> Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
> Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com> 
> Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 6 +++++-
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +--
>  2 files changed, 6 insertions(+), 3 deletions(-)

Did Cong, Yuval and Eric give their Reviewed-by offline?  I see they made
comments and suggests, but never saw them actually give you their Reviewed-
by.  You cannot automatically add their Reviewed-by, Signed-off-by, etc
just because someone provides feedback on your patch.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Eric Dumazet @ 2016-12-07 17:32 UTC (permalink / raw)
  To: David Laight
  Cc: 'Paolo Abeni', David Miller, netdev, Willem de Bruijn
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6DB0237532@AcuExch.aculab.com>

On Wed, 2016-12-07 at 17:09 +0000, David Laight wrote:
> From: Paolo Abeni
> > Sent: 06 December 2016 17:08
> ...
> > @@ -79,6 +82,9 @@ struct udp_sock {
> >  	int			(*gro_complete)(struct sock *sk,
> >  						struct sk_buff *skb,
> >  						int nhoff);
> > +
> > +	/* since we are prone to drops, avoid dirtying any sk cacheline */
> > +	atomic_t		drops ____cacheline_aligned_in_smp;
> >  };
> 
> Isn't that likely to create a large hole on systems with large cache lines.
> (Same as any other use of ____cacheline_aligned_in_smp.)

Yes, I would like to avoid that, unless we come to the conclusion it is
absolutely needed.

I feel that we could simply use a pointer, and allocate memory on
demand, since many sockets do not ever experience a drop.

The pointer could stay in a read mostly section.

We even could use per cpu or node counter for some heavy drop cases. 

^ permalink raw reply

* Re: [net-next][PATCH v2 18/18] RDS: IB: add missing connection cache usage info
From: David Miller @ 2016-12-07 17:36 UTC (permalink / raw)
  To: santosh.shilimkar; +Cc: netdev, linux-kernel
In-Reply-To: <6159757e-c799-ee7f-0ee0-c9b3534a4237@oracle.com>

From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Wed, 7 Dec 2016 09:20:17 -0800

> Newer/Updated tools which can parse this extra info in needs newer
> or an updated kernel which supports and populates these fields.
> 
> As mentioned, this particular option used only in verbose mode so
> am ok to drop this change if its still a concern.

What does the newer tool do on an older kernel if it doesn't see
the fields?  Does it check the size of the structure given back
to it, and conditionally handle the older vs. the newer layout?

It must do this.

^ permalink raw reply

* Re: 4.9.0-rc8: tg3 dead after resume
From: Michael Chan @ 2016-12-07 17:37 UTC (permalink / raw)
  To: Billy Shuman; +Cc: Netdev, Siva Reddy Kallam
In-Reply-To: <CAHQNsodiku6Ln3y-=GzmmNLM0Emc2rEheFmc80OCuN91roojqA@mail.gmail.com>

On Wed, Dec 7, 2016 at 7:20 AM, Billy Shuman <wshuman3@gmail.com> wrote:
> After resume on 4.9.0-rc8 tg3 is dead.
>
> In logs I see:
> kernel: tg3 0000:44:00.0: phy probe failed, err -19
> kernel: tg3 0000:44:00.0: Problem fetching invariants of chip, aborting

-19 is -ENODEV which means tg3 cannot read the PHY ID.

If it's a true suspend/resume operation, the driver does not have to
go through probe during resume.  Please explain how you do
suspend/resume.

Did this work before?  There has been very few changes to tg3 recently.

>
> rmmod and modprobe does not fix the problem only a reboot resolves the issue.
>
> Billy

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Hannes Frederic Sowa @ 2016-12-07 17:37 UTC (permalink / raw)
  To: Eric Dumazet, David Laight
  Cc: Paolo Abeni, David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481131924.4930.40.camel@edumazet-glaptop3.roam.corp.google.com>

On Wed, Dec 7, 2016, at 18:32, Eric Dumazet wrote:
> On Wed, 2016-12-07 at 17:09 +0000, David Laight wrote:
> > From: Paolo Abeni
> > > Sent: 06 December 2016 17:08
> > ...
> > > @@ -79,6 +82,9 @@ struct udp_sock {
> > >  	int			(*gro_complete)(struct sock *sk,
> > >  						struct sk_buff *skb,
> > >  						int nhoff);
> > > +
> > > +	/* since we are prone to drops, avoid dirtying any sk cacheline */
> > > +	atomic_t		drops ____cacheline_aligned_in_smp;
> > >  };
> > 
> > Isn't that likely to create a large hole on systems with large cache lines.
> > (Same as any other use of ____cacheline_aligned_in_smp.)
> 
> Yes, I would like to avoid that, unless we come to the conclusion it is
> absolutely needed.
> 
> I feel that we could simply use a pointer, and allocate memory on
> demand, since many sockets do not ever experience a drop.
> 
> The pointer could stay in a read mostly section.
> 
> We even could use per cpu or node counter for some heavy drop cases. 

I had the same idea while discussing that with Paolo, merely using an
*atomic_t = kmalloc(sizeof(atomic_t)) out of band of the socket.

My fear was that those could be aggregated by the slab cache into one
cache line, causing even more heating on cachelines.

Bye,
Hannes

^ permalink raw reply

* Re: commit : ppp: add rtnetlink device creation support - breaks netcf on my machine.
From: Thomas Haller @ 2016-12-07 17:43 UTC (permalink / raw)
  To: Dan Williams, Guillaume Nault, Brad Campbell
  Cc: netdev, Thomas Graf, David Miller
In-Reply-To: <1481065966.11028.3.camel@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 642 bytes --]

On Tue, 2016-12-06 at 17:12 -0600, Dan Williams wrote:
> 
> > libnl1 rejects the IFLA_INFO_DATA attribute because it expects it
> > to
> > contain a sub-attribute. Since the payload size is zero it doesn't
> > match the policy and parsing fails.
> > 
> > There's no problem with libnl3 because its policy accepts empty
> > payloads for NLA_NESTED attributes (see libnl3 commit 4be02ace4826

Hi,

libnl1 is unmaintained these days. I don't think it makes sense to
backport that patch. The last upstream release was 3+ years ago, with
no upstream development since then.

IMHO netcf should drop libnl-1 support.

best,
Thomas

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] of: add optional naming of interfaces
From: Florian Fainelli @ 2016-12-07 17:49 UTC (permalink / raw)
  To: Volodymyr Bendiuga, robh+dt-DgEjT+Ai2ygdnm+yROfE0A,
	mark.rutland-5wv7dgnIgG8, frowand.list-Re5JQEeQqe8AvxtiuMwx3w,
	netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
	volodymyr.bendiuga-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Jonas Johansson, Mattias Walström
In-Reply-To: <1481116349-20678-1-git-send-email-volodymyr.bendiuga-qeDNsGSBLoYwFerOooGFRg@public.gmane.org>

On 12/07/2016 05:12 AM, Volodymyr Bendiuga wrote:
> From: Jonas Johansson <jonas.johansson-qeDNsGSBLoYwFerOooGFRg@public.gmane.org>
> 
> Signed-off-by: Mattias Walström <lazzer-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> Signed-off-by: Jonas Johansson <jonas.johansson-qeDNsGSBLoYwFerOooGFRg@public.gmane.org>

This does not belong to the Device Tree, there should be plenty of
information in user-space to make an educated device rename. I
definitively understand that some drivers (e.g: dsa) do actually get
their interface name from Device Tree directly (label property), but
this is probably the one and only case where this may be tolerated.

Besides, if you submit such a change, you would want to also provide a
consumer of that API to illustrate how this is used.
-- 
Florian
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Eric Dumazet @ 2016-12-07 17:52 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Paolo Abeni, David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481132267.1541189.811630457.167E9C56@webmail.messagingengine.com>

On Wed, 2016-12-07 at 18:37 +0100, Hannes Frederic Sowa wrote:

> I had the same idea while discussing that with Paolo, merely using an
> *atomic_t = kmalloc(sizeof(atomic_t)) out of band of the socket.
> 
> My fear was that those could be aggregated by the slab cache into one
> cache line, causing even more heating on cachelines.

For hot stuff, better use kmalloc(max_t(size_t, 
                                        L1_CACHE_BYTES,
                                        sizeof(...)) 
to avoid false sharing, unless this is per cpu data of course.

^ permalink raw reply

* Re: [PATCH] net/udp: do not touch skb->peeked unless really needed
From: Eric Dumazet @ 2016-12-07 17:55 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Laight, Paolo Abeni, David Miller, netdev, Willem de Bruijn
In-Reply-To: <1481132267.1541189.811630457.167E9C56@webmail.messagingengine.com>

On Wed, 2016-12-07 at 18:37 +0100, Hannes Frederic Sowa wrote:

> I had the same idea while discussing that with Paolo, merely using an
> *atomic_t = kmalloc(sizeof(atomic_t)) out of band of the socket.
> 
> My fear was that those could be aggregated by the slab cache into one
> cache line, causing even more heating on cachelines.

My exact idea was to let up to 4095 (or PAGE_SIZE - 1) increments being
done on the counter before switching to dynamically allocated memory.

( Some packets might be dropped by TCP sockets, not necessarily a sign
of an attack. just some spurious retransmits )

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox