From: Simon Horman <horms@verge.net.au>
To: e1000-devel@lists.sourceforge.net, netdev@vger.kernel.org
Cc: Arnd Bergmann <arndbergmann@googlemail.com>,
Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Subject: [rfc 4/4] igb: expose 82576 bandiwidth allocation
Date: Thu, 05 Nov 2009 11:58:51 +1100 [thread overview]
Message-ID: <20091105010628.148945886@vergenet.net> (raw)
In-Reply-To: 20091105005847.941190065@vergenet.net
[-- Attachment #1: igb-ba.patch --]
[-- Type: text/plain, Size: 12335 bytes --]
The 82576 has support for bandwidth allocation to VFs.
Contrary to the documentation in the 82576 datasheet v2.41 this
appears to work as follows:
* The ratio supplied is always proportional to 1Gbit/s,
regardless of if the link speed.
* The ratio supplied is an upper-bound on bandwidth available
to the VF, not a minimun guarantee
This patch exposes bandwidth control to userspace through a simple
per-device (PF) sysfs file, bandwidth_allocation.
* The file contains a whitespace delimited list of values, one per VF.
* The first value corresponds to the first VF and so on.
* Valid values are integers from 0 to 1000
* A value of 0 indicates that bandwidth_allocation is disabled.
* Other values indicate the allocated bandwidth, in 1/1000ths of a gigabit/s
e.g. The following for a PF with 4 VFs allocates ~20Mbits/ to VF 1,
~100Mbit/s to VF 2, and leave the other 2 VFs with no allocation.
echo "20 100 0 0" > /sys/class/net/eth3/device/bandwidth_allocation
This interface is intended to allow testing of the hardware feature.
There are ongoing discussions about how to expose this feature
to user-space in a more generic way.
Signed-off-by: Simon Horman <horms@verge.net.au>
Index: net-next-2.6/drivers/net/igb/igb_main.c
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb_main.c 2009-11-05 04:55:06.000000000 +0900
+++ net-next-2.6/drivers/net/igb/igb_main.c 2009-11-05 05:12:54.000000000 +0900
@@ -47,6 +47,9 @@
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
+#ifdef CONFIG_PCI_IOV
+#include <linux/ctype.h>
+#endif
#include "igb.h"
#define DRV_VERSION "1.3.16-k2"
@@ -152,6 +155,15 @@ static unsigned int max_vfs = 0;
module_param(max_vfs, uint, 0);
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
"per physical function");
+
+static ssize_t igb_set_bandwidth_allocation(struct device *,
+ struct device_attribute *,
+ const char *, size_t);
+static ssize_t igb_show_bandwidth_allocation(struct device *,
+ struct device_attribute *,
+ char *);
+DEVICE_ATTR(bandwidth_allocation, S_IRUGO | S_IWUSR,
+ igb_show_bandwidth_allocation, igb_set_bandwidth_allocation);
#endif /* CONFIG_PCI_IOV */
static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
@@ -1754,7 +1766,18 @@ static void __devinit igb_init_vf(struct
}
if (pci_enable_sriov(pdev, vfn))
- goto err;
+ goto err_kfree;
+
+ if (device_create_file(&pdev->dev, &dev_attr_bandwidth_allocation))
+ goto err_sriov;
+
+ adapter->bandwidth_allocation = kcalloc(vfn, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!adapter->bandwidth_allocation)
+ goto err_file;
+ memset(adapter->bandwidth_allocation, vfn * sizeof(unsigned int), 0);
+
+ spin_lock_init(&adapter->bandwidth_allocation_lock);
dev_info(&pdev->dev, "%d vfs allocated\n", vfn);
for (i = 0; i < vfn; i++) {
@@ -1765,7 +1788,11 @@ static void __devinit igb_init_vf(struct
adapter->vfs_allocated_count = vfn;
return;
-err:
+err_file:
+ device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+err_sriov:
+ pci_disable_sriov(pdev);
+err_kfree:
kfree(adapter->vf_data);
adapter->vf_data = NULL;
#endif /* CONFIG_PCI_IOV */
@@ -1781,6 +1808,7 @@ err:
static void igb_cleanup_vf(struct igb_adapter * adapter)
{
#ifdef CONFIG_PCI_IOV
+ struct pci_dev *pdev = adapter->pdev;
struct e1000_hw *hw = &adapter->hw;
if (!adapter->vf_data)
@@ -1797,6 +1825,9 @@ static void igb_cleanup_vf(struct igb_ad
wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
msleep(100);
dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+
+ device_remove_file(&pdev->dev, &dev_attr_bandwidth_allocation);
+ kfree(adapter->bandwidth_allocation);
#endif
}
@@ -2088,6 +2119,123 @@ void igb_configure_tx_ring(struct igb_ad
wr32(E1000_TXDCTL(reg_idx), txdctl);
}
+#ifdef CONFIG_PCI_IOV
+static void igb_disable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf)
+{
+ wr32(E1000_VMBASEL, vf);
+ wr32(E1000_VMBAC, 0);
+}
+
+static void igb_disable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int i;
+
+ for (i = 0; i < adapter->vfs_allocated_count; i++)
+ igb_disable_bandwidth_allocation_vf(hw, i);
+}
+
+static void igb_enable_bandwidth_allocation_vf(struct e1000_hw *hw, int vf,
+ unsigned int allocation)
+{
+ u32 rq;
+
+ /* Allocation is expressed as 1000ths of link speed [+]
+ *
+ * rq is calcualted as 1 / (allocation / 1000) = 1000 / allocation
+ *
+ * E1000_VMBAC_RF_INT_SHIFT and E1000_VMBAC_RF_MASK are used
+ * to marshal the result into the desired format: 23 bits of
+ * which 14 are to the right of the decimal point.
+ *
+ * [+] According to the the 82576 v2.41 datasheet rq should
+ * be a ratio of the link speed, however, empirically
+ * it appears to always be a ration of to 1Gbit/s,
+ * even when the link is 100Mbit/s.
+ */
+ rq = ((1000 << E1000_VMBAC_RF_INT_SHIFT) / allocation) &
+ E1000_VMBAC_RF_MASK;
+
+ wr32(E1000_VMBASEL, vf);
+ wr32(E1000_VMBAC, rq|E1000_VMBAC_RC_ENA);
+}
+
+static void igb_enable_bandwidth_allocation(struct igb_adapter *adapter)
+{
+ u32 i, reg;
+ struct e1000_hw *hw = &adapter->hw;
+
+ /* Only enable bandwidth_allocation if it has been set
+ * and the link speed is 100Mbit/s or 1Gbit/s */
+ if (!adapter->bandwidth_allocation ||
+ (adapter->link_speed != SPEED_100 &&
+ adapter->link_speed != SPEED_1000)) {
+ igb_disable_bandwidth_allocation(adapter);
+ return;
+ }
+
+ for (i = 0; i < adapter->vfs_allocated_count; i++) {
+ wr32(E1000_VMBASEL, i);
+ if (adapter->bandwidth_allocation[i])
+ igb_enable_bandwidth_allocation_vf(hw, i,
+ adapter->bandwidth_allocation[i]);
+ else
+ igb_disable_bandwidth_allocation_vf(hw, i);
+
+ /* XXX:
+ *
+ * The 82576 datasheet, section 4.5.11.1.5.1 "Configuring Tx
+ * Bandwidth to VMs" states that the desired setting is:
+ * VMBAMMW.MMW_SIZE = 16 * MSS
+ *
+ * But isn't MSS a property of skbs that are using tso
+ * rather than adapters?
+ *
+ * If so, should we use the maximum value here? */
+ /* XXX: Should this go inside or outside the for loop ? */
+ reg = 64 * 16;
+ wr32(E1000_VMBAMMW, reg);
+ }
+}
+#endif
+
+static void igb_check_bandwidth_allocation(struct igb_adapter *adapter)
+{
+#ifdef CONFIG_PCI_IOV
+ u32 vmbacs;
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (!adapter->vf_data)
+ return;
+
+ /* The 82576 datasheet, section 4.5.11.1.5.2 "Link Speed Change
+ * Procedure" describes the sequence below. However the
+ * SPEED_CHG never seems to be set.
+ */
+ vmbacs = rd32(E1000_VMBACS);
+ if (vmbacs & E1000_VMBACS_SPEED_CHG) {
+ /* XXX: Never seem to get here */
+ int err = 0;
+
+ if (vmbacs & E1000_VMBACS_VMBA_SET) {
+ igb_disable_bandwidth_allocation(adapter);
+ err = 1;
+ }
+
+ vmbacs &= ~E1000_VMBACS_SPEED_CHG;
+ wr32(E1000_VMBACS, vmbacs);
+
+ if (err)
+ return;
+ }
+
+ spin_lock(&adapter->bandwidth_allocation_lock);
+ igb_enable_bandwidth_allocation(adapter);
+ spin_unlock(&adapter->bandwidth_allocation_lock);
+#endif
+ return;
+}
+
/**
* igb_configure_tx - Configure transmit Unit after Reset
* @adapter: board private structure
@@ -2969,6 +3117,8 @@ static void igb_watchdog_task(struct wor
break;
}
+ igb_check_bandwidth_allocation(adapter);
+
netif_carrier_on(netdev);
igb_ping_all_vfs(adapter);
@@ -5854,4 +6004,101 @@ static void igb_vmm_control(struct igb_a
}
}
+#ifdef CONFIG_PCI_IOV
+static ssize_t igb_show_bandwidth_allocation(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = dev_get_drvdata(dev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int i;
+
+ if (!adapter->vf_data)
+ return -ENOENT;
+
+ *buf = '\0';
+ for (i = 0; i < adapter->vfs_allocated_count; i++) {
+ if (i > 0)
+ strcat(buf, " ");
+ sprintf(buf + strlen(buf), "%i",
+ adapter->bandwidth_allocation[i]);
+ }
+ strcat(buf, "\n");
+
+ return strlen(buf);
+}
+
+static unsigned long igb_strtoul(const char *cp, char **endp, unsigned int base)
+{
+ const char *orig = cp;
+ unsigned long x;
+
+ while (isspace(*cp))
+ cp++;
+
+ x = simple_strtoul(cp, endp, base);
+ if (cp == *endp)
+ *endp = (char *)orig;
+
+ return x;
+}
+
+static ssize_t igb_set_bandwidth_allocation(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct net_device *netdev = dev_get_drvdata(dev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+ int i;
+ size_t len;
+ ssize_t status = -ENOENT;
+ unsigned int *new, total;
+ unsigned long x;
+ const char *p;
+ char *next_p;
+
+ if (!adapter->vf_data)
+ return -ENOENT;
+
+ len = adapter->vfs_allocated_count * sizeof(unsigned int);
+
+ new = kmalloc(len, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ p = buf;
+ total = 0;
+ for (i = 0; i < adapter->vfs_allocated_count; i++) {
+ x = igb_strtoul(p, &next_p, 10);
+ if (p == next_p) {
+ dev_err(dev, "not enough values\n");
+ goto err;
+ }
+ if (x > 1000) {
+ dev_err(dev, "value is too large\n");
+ goto err;
+ }
+ new[i] = x;
+ total += x;
+ p = next_p;
+ }
+
+ /* Check for trailing rubbish */
+ igb_strtoul(p, &next_p, 10);
+ if (p != next_p) {
+ dev_err(dev, "trailing rubbish\n");
+ goto err;
+ }
+
+ spin_lock(&adapter->bandwidth_allocation_lock);
+ memcpy(adapter->bandwidth_allocation, new, len);
+ igb_enable_bandwidth_allocation(adapter);
+ spin_unlock(&adapter->bandwidth_allocation_lock);
+
+ status = count;
+err:
+ kfree(new);
+ return status;
+}
+#endif /* CONFIG_PCI_IOV */
/* igb_main.c */
Index: net-next-2.6/drivers/net/igb/e1000_regs.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_regs.h 2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/e1000_regs.h 2009-11-05 05:01:35.000000000 +0900
@@ -308,6 +308,16 @@
#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) /* VLAN Virtual Machine
* Filter - RW */
+/* Tx Bandwidth Allocation to VM Registers */
+#define E1000_VMBACS 0x03600 /* VM Bandwidth Allocation
+ * Control & Status - RW */
+#define E1000_VMBAMMW 0x03670 /* VM Bandwidth Allocation
+ * Max Memory Window - RW */
+#define E1000_VMBASEL 0x03604 /* VM Bandwidth Allocation
+ * Select - RW */
+#define E1000_VMBAC 0x03608 /* VM Bandwidth Allocation
+ * Config - RW */
+
#define wr32(reg, value) (writel(value, hw->hw_addr + reg))
#define rd32(reg) (readl(hw->hw_addr + reg))
#define wrfl() ((void)rd32(E1000_STATUS))
Index: net-next-2.6/drivers/net/igb/e1000_defines.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/e1000_defines.h 2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/e1000_defines.h 2009-11-05 05:01:35.000000000 +0900
@@ -711,4 +711,13 @@
#define E1000_VFTA_ENTRY_MASK 0x7F
#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F
+/* VM Bandwidth Allocation Control & Status */
+#define E1000_VMBACS_VMBA_SET 0x00001000
+#define E1000_VMBACS_SPEED_CHG 0x80000000
+
+/* VM Bandwidth Allocation Config */
+#define E1000_VMBAC_RF_INT_SHIFT 14
+#define E1000_VMBAC_RF_MASK ((1<<23)-1) /* RF_DEC and RF_INT */
+#define E1000_VMBAC_RC_ENA 0x80000000
+
#endif
Index: net-next-2.6/drivers/net/igb/igb.h
===================================================================
--- net-next-2.6.orig/drivers/net/igb/igb.h 2009-11-05 03:07:08.000000000 +0900
+++ net-next-2.6/drivers/net/igb/igb.h 2009-11-05 05:01:35.000000000 +0900
@@ -315,6 +315,10 @@ struct igb_adapter {
u16 rx_ring_count;
unsigned int vfs_allocated_count;
struct vf_data_storage *vf_data;
+#ifdef CONFIG_PCI_IOV
+ unsigned int *bandwidth_allocation;
+ spinlock_t bandwidth_allocation_lock;
+#endif
};
#define IGB_FLAG_HAS_MSI (1 << 0)
------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day
trial. Simplify your report design, integration and deployment - and focus on
what you do best, core application coding. Discover what's new with
Crystal Reports now. http://p.sf.net/sfu/bobj-july
next prev parent reply other threads:[~2009-11-05 0:58 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-05 0:58 [rfc 0/4] igb: bandwidth allocation Simon Horman
2009-11-05 0:58 ` [rfc 1/4] igb: Add igb_cleanup_vf() Simon Horman
2009-11-05 0:58 ` [rfc 2/4] igb: Initialise adapter->vfs_allocated_count in igb_init_vf() Simon Horman
2009-11-05 0:58 ` [rfc 3/4] igb: Common error path in igb_init_vfs() Simon Horman
2009-11-05 0:58 ` Simon Horman [this message]
2009-11-05 23:00 ` [rfc 4/4] igb: expose 82576 bandiwidth allocation Alexander Duyck
2009-11-05 23:30 ` Simon Horman
2009-11-05 23:42 ` Alexander Duyck
2009-11-06 3:57 ` Simon Horman
2009-11-05 1:46 ` [rfc 0/4] igb: bandwidth allocation Jeff Kirsher
2009-11-05 2:21 ` Simon Horman
2009-11-14 8:01 ` Jeff Kirsher
2009-11-25 6:31 ` Simon Horman
2009-11-05 12:09 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091105010628.148945886@vergenet.net \
--to=horms@verge.net.au \
--cc=arndbergmann@googlemail.com \
--cc=e1000-devel@lists.sourceforge.net \
--cc=jeffrey.t.kirsher@intel.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).