From: John Fastabend <john.fastabend@gmail.com>
To: dborkman@redhat.com, fw@strlen.de, gerlitz.or@gmail.com,
hannes@stressinduktion.org
Cc: netdev@vger.kernel.org, john.ronciak@intel.com,
amirv@mellanox.com, eric.dumazet@gmail.com, danny.zhou@intel.com
Subject: [net-next PATCH v1 2/3] net: sched: add direct ring acces via af_packet to ixgbe
Date: Sun, 05 Oct 2014 17:07:06 -0700 [thread overview]
Message-ID: <20141006000705.32055.35262.stgit@nitbit.x32> (raw)
In-Reply-To: <20141006000629.32055.2295.stgit@nitbit.x32>
This implements the necessary ndo ops to support the af_packet
interface to directly own and manipulate queues.
Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe.h | 3
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 23 ++
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 232 ++++++++++++++++++++++
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 1
4 files changed, 251 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 673d820..2f6eadf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -678,6 +678,9 @@ struct ixgbe_adapter {
struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];
+ /* Direct User Space Queues */
+ struct sock *sk_handles[MAX_RX_QUEUES];
+
/* DCB parameters */
struct ieee_pfc *ixgbe_ieee_pfc;
struct ieee_ets *ixgbe_ieee_ets;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index cff383b..01a6e55 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2581,12 +2581,17 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
return -EOPNOTSUPP;
+ if (fsp->ring_cookie > MAX_RX_QUEUES)
+ return -EINVAL;
+
/*
* Don't allow programming if the action is a queue greater than
- * the number of online Rx queues.
+ * the number of online Rx queues unless it is a user space
+ * queue.
*/
if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
- (fsp->ring_cookie >= adapter->num_rx_queues))
+ (fsp->ring_cookie >= adapter->num_rx_queues) &&
+ !adapter->sk_handles[fsp->ring_cookie])
return -EINVAL;
/* Don't allow indexes to exist outside of available space */
@@ -2663,12 +2668,18 @@ static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
/* apply mask and compute/store hash */
ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+ /* Set input action to reg_idx for driver owned queues otherwise
+ * use the absolute index for user space queues.
+ */
+ if (fsp->ring_cookie < adapter->num_rx_queues &&
+ fsp->ring_cookie != IXGBE_FDIR_DROP_QUEUE)
+ input->action = adapter->rx_ring[input->action]->reg_idx;
+
/* program filters to filter memory */
err = ixgbe_fdir_write_perfect_filter_82599(hw,
- &input->filter, input->sw_idx,
- (input->action == IXGBE_FDIR_DROP_QUEUE) ?
- IXGBE_FDIR_DROP_QUEUE :
- adapter->rx_ring[input->action]->reg_idx);
+ &input->filter,
+ input->sw_idx,
+ input->action);
if (err)
goto err_out_w_lock;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 06ef5a3..6506550 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -48,7 +48,9 @@
#include <linux/if_macvlan.h>
#include <linux/if_bridge.h>
#include <linux/prefetch.h>
+#include <linux/mm.h>
#include <scsi/fc/fc_fcoe.h>
+#include <linux/if_packet.h>
#include "ixgbe.h"
#include "ixgbe_common.h"
@@ -70,6 +72,8 @@ const char ixgbe_driver_version[] = DRV_VERSION;
static const char ixgbe_copyright[] =
"Copyright (c) 1999-2014 Intel Corporation.";
+static unsigned int *dummy_page_buf;
+
static const struct ixgbe_info *ixgbe_info_tbl[] = {
[board_82598] = &ixgbe_82598_info,
[board_82599] = &ixgbe_82599_info,
@@ -3122,6 +3126,16 @@ static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
}
+static bool ixgbe_have_user_queues(struct ixgbe_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < MAX_RX_QUEUES; i++)
+ if (adapter->sk_handles[i])
+ return true;
+ return false;
+}
+
static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter,
struct ixgbe_ring *ring)
{
@@ -3156,7 +3170,8 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
* and performance reasons.
*/
if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
- !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) {
+ !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en) ||
+ ixgbe_have_user_queues(adapter)) {
for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
} else {
@@ -7812,6 +7827,210 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
kfree(fwd_adapter);
}
+static int ixgbe_ndo_split_queue_pairs(struct net_device *dev,
+ unsigned int start_from,
+ unsigned int qpairs_num,
+ struct sock *sk)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ unsigned int qpair_index;
+
+ /* allocate whatever availiable qpairs */
+ if (start_from == PACKET_QPAIRS_START_ANY) {
+ unsigned int count = 0;
+
+ for (qpair_index = adapter->num_rx_queues;
+ qpair_index < MAX_RX_QUEUES;
+ qpair_index++) {
+ if (!adapter->sk_handles[qpair_index]) {
+ count++;
+ if (count == qpairs_num) {
+ start_from = qpair_index - count + 1;
+ break;
+ }
+ } else {
+ count = 0;
+ }
+ }
+ }
+
+ /* otherwise the caller specified exact queues */
+ if ((start_from > MAX_TX_QUEUES) ||
+ (start_from > MAX_RX_QUEUES) ||
+ (start_from + qpairs_num > MAX_TX_QUEUES) ||
+ (start_from + qpairs_num > MAX_RX_QUEUES))
+ return -EINVAL;
+
+ /* If the qpairs are being used by the driver do not let user space
+ * consume the queues. Also if the queue has already been allocated
+ * to a socket do fail the request.
+ */
+ for (qpair_index = start_from;
+ qpair_index < start_from + qpairs_num;
+ qpair_index++) {
+ if ((qpair_index < adapter->num_tx_queues) ||
+ (qpair_index < adapter->num_rx_queues))
+ return -EINVAL;
+
+ if (adapter->sk_handles[qpair_index] != NULL)
+ return -EBUSY;
+ }
+
+ /* remember the sk handle for each queue pair */
+ for (qpair_index = start_from;
+ qpair_index < start_from + qpairs_num;
+ qpair_index++)
+ adapter->sk_handles[qpair_index] = sk;
+
+ return start_from;
+}
+
+static int ixgbe_ndo_get_queue_pairs(struct net_device *dev,
+ unsigned int *start_from,
+ unsigned int *qpairs_num,
+ struct sock *sk)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ unsigned int qpair_index;
+
+ *qpairs_num = 0;
+
+ for (qpair_index = adapter->num_tx_queues;
+ qpair_index < MAX_RX_QUEUES;
+ qpair_index++) {
+ if (adapter->sk_handles[qpair_index] == sk) {
+ if (*qpairs_num == 0)
+ *start_from = qpair_index;
+ *qpairs_num = *qpairs_num + 1;
+ }
+ }
+
+ return 0;
+}
+
+static int ixgbe_ndo_return_queue_pairs(struct net_device *dev, struct sock *sk)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ unsigned int qpair_index;
+
+ for (qpair_index = adapter->num_tx_queues;
+ qpair_index < MAX_TX_QUEUES;
+ qpair_index++) {
+ if (adapter->sk_handles[qpair_index] == sk)
+ adapter->sk_handles[qpair_index] = NULL;
+ }
+
+ return 0;
+}
+
+/* Rx descriptor starts from 0x1000 and Tx descriptor starts from 0x6000
+ * both the TX and RX descriptors use 4K pages.
+ */
+#define RX_DESC_ADDR_OFFSET 0x1000
+#define TX_DESC_ADDR_OFFSET 0x6000
+#define PAGE_SIZE_4K 4096
+
+static int
+ixgbe_ndo_qpair_map_region(struct net_device *dev,
+ struct tpacket_dev_qpair_map_region_info *info)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+ /* no need to map systme memory to userspace for ixgbe */
+ info->tp_dev_sysm_sz = 0;
+ info->tp_num_sysm_map_regions = 0;
+
+ info->tp_dev_bar_sz = pci_resource_len(adapter->pdev, 0);
+ info->tp_num_map_regions = 2;
+
+ info->regions[0].page_offset = RX_DESC_ADDR_OFFSET;
+ info->regions[0].page_sz = PAGE_SIZE;
+ info->regions[0].page_cnt = 1;
+ info->regions[1].page_offset = TX_DESC_ADDR_OFFSET;
+ info->regions[1].page_sz = PAGE_SIZE;
+ info->regions[1].page_cnt = 1;
+
+ return 0;
+}
+
+static int ixgbe_ndo_get_device_desc_info(struct net_device *dev,
+ struct tpacket_dev_info *dev_info)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ int max_queues;
+
+ max_queues = max(adapter->num_rx_queues, adapter->num_tx_queues);
+
+ dev_info->tp_device_id = adapter->hw.device_id;
+ dev_info->tp_vendor_id = adapter->hw.vendor_id;
+ dev_info->tp_subsystem_device_id = adapter->hw.subsystem_device_id;
+ dev_info->tp_subsystem_vendor_id = adapter->hw.subsystem_vendor_id;
+ dev_info->tp_revision_id = adapter->hw.revision_id;
+ dev_info->tp_numa_node = dev_to_node(&dev->dev);
+
+ dev_info->tp_num_total_qpairs = min(MAX_RX_QUEUES, MAX_TX_QUEUES);
+ dev_info->tp_num_inuse_qpairs = max_queues;
+
+ dev_info->tp_rxdesc_size = sizeof(union ixgbe_adv_rx_desc);
+ dev_info->tp_rxdesc_ver = 1;
+ dev_info->tp_txdesc_size = sizeof(union ixgbe_adv_tx_desc);
+ dev_info->tp_txdesc_ver = 1;
+
+ return 0;
+}
+
+static int
+ixgbe_ndo_qpair_page_map(struct vm_area_struct *vma, struct net_device *dev)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ phys_addr_t phy_addr = pci_resource_start(adapter->pdev, 0);
+ unsigned long pfn_rx = (phy_addr + RX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
+ unsigned long pfn_tx = (phy_addr + TX_DESC_ADDR_OFFSET) >> PAGE_SHIFT;
+ unsigned long dummy_page_phy;
+ pgprot_t pre_vm_page_prot;
+ unsigned long start;
+ unsigned int i;
+ int err;
+
+ if (!dummy_page_buf) {
+ dummy_page_buf = kzalloc(PAGE_SIZE_4K, GFP_KERNEL);
+ if (!dummy_page_buf)
+ return -ENOMEM;
+
+ for (i = 0; i < PAGE_SIZE_4K / sizeof(unsigned int); i++)
+ dummy_page_buf[i] = 0xdeadbeef;
+ }
+
+ dummy_page_phy = virt_to_phys(dummy_page_buf);
+ pre_vm_page_prot = vma->vm_page_prot;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ /* assume the vm_start is 4K aligned address */
+ for (start = vma->vm_start;
+ start < vma->vm_end;
+ start += PAGE_SIZE_4K) {
+ if (start == vma->vm_start + RX_DESC_ADDR_OFFSET) {
+ err = remap_pfn_range(vma, start, pfn_rx, PAGE_SIZE_4K,
+ vma->vm_page_prot);
+ if (err)
+ return -EAGAIN;
+ } else if (start == vma->vm_start + TX_DESC_ADDR_OFFSET) {
+ err = remap_pfn_range(vma, start, pfn_tx, PAGE_SIZE_4K,
+ vma->vm_page_prot);
+ if (err)
+ return -EAGAIN;
+ } else {
+ unsigned long addr = dummy_page_phy > PAGE_SHIFT;
+
+ err = remap_pfn_range(vma, start, addr, PAGE_SIZE_4K,
+ pre_vm_page_prot);
+ if (err)
+ return -EAGAIN;
+ }
+ }
+ return 0;
+}
+
static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_open = ixgbe_open,
.ndo_stop = ixgbe_close,
@@ -7856,6 +8075,12 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_bridge_getlink = ixgbe_ndo_bridge_getlink,
.ndo_dfwd_add_station = ixgbe_fwd_add,
.ndo_dfwd_del_station = ixgbe_fwd_del,
+ .ndo_split_queue_pairs = ixgbe_ndo_split_queue_pairs,
+ .ndo_get_queue_pairs = ixgbe_ndo_get_queue_pairs,
+ .ndo_return_queue_pairs = ixgbe_ndo_return_queue_pairs,
+ .ndo_get_device_qpair_map_region_info = ixgbe_ndo_qpair_map_region,
+ .ndo_get_device_desc_info = ixgbe_ndo_get_device_desc_info,
+ .ndo_direct_qpair_page_map = ixgbe_ndo_qpair_page_map,
};
/**
@@ -8054,7 +8279,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->back = adapter;
adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
- hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+ hw->pci_hw_addr = pci_resource_start(pdev, 0);
+
+ hw->hw_addr = ioremap(hw->pci_hw_addr,
pci_resource_len(pdev, 0));
adapter->io_addr = hw->hw_addr;
if (!hw->hw_addr) {
@@ -8705,6 +8932,7 @@ module_init(ixgbe_init_module);
**/
static void __exit ixgbe_exit_module(void)
{
+ kfree(dummy_page_buf);
#ifdef CONFIG_IXGBE_DCA
dca_unregister_notify(&dca_notifier);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index dfd55d8..26e9163 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3022,6 +3022,7 @@ struct ixgbe_mbx_info {
struct ixgbe_hw {
u8 __iomem *hw_addr;
+ phys_addr_t pci_hw_addr;
void *back;
struct ixgbe_mac_info mac;
struct ixgbe_addr_filter_info addr_ctrl;
next prev parent reply other threads:[~2014-10-06 0:07 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-06 0:06 [net-next PATCH v1 1/3] net: sched: af_packet support for direct ring access John Fastabend
2014-10-06 0:07 ` John Fastabend [this message]
2014-10-06 0:07 ` [net-next PATCH v1 3/3] net: packet: Document PACKET_DEV_QPAIR_SPLIT and friends John Fastabend
2014-10-06 0:29 ` [net-next PATCH v1 1/3] net: sched: af_packet support for direct ring access Florian Westphal
2014-10-06 1:09 ` David Miller
2014-10-06 1:18 ` John Fastabend
2014-10-06 1:12 ` John Fastabend
2014-10-06 9:49 ` Daniel Borkmann
2014-10-06 15:01 ` John Fastabend
2014-10-06 16:35 ` Jesper Dangaard Brouer
2014-10-06 17:03 ` Hannes Frederic Sowa
2014-10-06 20:37 ` John Fastabend
2014-10-06 23:26 ` Hannes Frederic Sowa
2014-10-07 18:59 ` Neil Horman
2014-10-08 17:20 ` John Fastabend
2014-10-09 13:36 ` [PATCH] af_packet: Add Doorbell transmit mode to AF_PACKET sockets Neil Horman
2014-10-09 15:01 ` John Fastabend
2014-10-09 16:05 ` Neil Horman
2014-10-06 16:55 ` [net-next PATCH v1 1/3] net: sched: af_packet support for direct ring access Stephen Hemminger
2014-10-06 20:42 ` John Fastabend
2014-10-06 21:42 ` David Miller
2014-10-07 4:25 ` John Fastabend
2014-10-07 4:24 ` Willem de Bruijn
2014-10-07 9:27 ` David Laight
2014-10-07 15:43 ` David Miller
2014-10-07 15:59 ` David Laight
2014-10-07 16:08 ` David Miller
2014-10-07 15:21 ` Zhou, Danny
2014-10-07 15:46 ` Willem de Bruijn
2014-10-07 15:55 ` John Fastabend
2014-10-07 16:06 ` Zhou, Danny
2014-10-07 16:05 ` David Miller
2014-10-10 3:49 ` Zhou, Danny
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141006000705.32055.35262.stgit@nitbit.x32 \
--to=john.fastabend@gmail.com \
--cc=amirv@mellanox.com \
--cc=danny.zhou@intel.com \
--cc=dborkman@redhat.com \
--cc=eric.dumazet@gmail.com \
--cc=fw@strlen.de \
--cc=gerlitz.or@gmail.com \
--cc=hannes@stressinduktion.org \
--cc=john.ronciak@intel.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.