* [PATCHv3 NEXT 2/5] qlcnic: disable tx timeout recovery
From: amit.salecha @ 2010-07-14 6:33 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Amit Kumar Salecha
In-Reply-To: <1279089215-23231-1-git-send-email-amit.salecha@qlogic.com>
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
Disable tx timeout recovery, if auto_fw_reset is disable
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_main.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 4d18313..0fef8c3 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2581,7 +2581,8 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
if (adapter->need_fw_reset)
goto detach;
- if (adapter->reset_context) {
+ if (adapter->reset_context &&
+ auto_fw_reset == AUTO_FW_RESET_ENABLED) {
qlcnic_reset_hw_context(adapter);
adapter->netdev->trans_start = jiffies;
}
@@ -2594,7 +2595,8 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
qlcnic_dev_request_reset(adapter);
- clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state);
+ if ((auto_fw_reset == AUTO_FW_RESET_ENABLED))
+ clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state);
dev_info(&netdev->dev, "firmware hang detected\n");
--
1.6.0.2
^ permalink raw reply related
* [PATCHv3 NEXT 4/5] qlcnic: aer support
From: amit.salecha @ 2010-07-14 6:33 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Sucheta Chakraborty, Amit Kumar Salecha
In-Reply-To: <1279089215-23231-1-git-send-email-amit.salecha@qlogic.com>
From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Pci error recovery support added.
Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic.h | 1 +
drivers/net/qlcnic/qlcnic_main.c | 137 +++++++++++++++++++++++++++++++++++++-
2 files changed, 137 insertions(+), 1 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 02a50e6..0644642 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -911,6 +911,7 @@ struct qlcnic_mac_req {
#define __QLCNIC_DEV_UP 1
#define __QLCNIC_RESETTING 2
#define __QLCNIC_START_FW 4
+#define __QLCNIC_AER 5
#define QLCNIC_INTERRUPT_TEST 1
#define QLCNIC_LOOPBACK_TEST 2
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 0995f90..d5c94a3 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -34,6 +34,7 @@
#include <linux/ipv6.h>
#include <linux/inetdevice.h>
#include <linux/sysfs.h>
+#include <linux/aer.h>
MODULE_DESCRIPTION("QLogic 1/10 GbE Converged/Intelligent Ethernet Driver");
MODULE_LICENSE("GPL");
@@ -1306,6 +1307,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_disable_pdev;
pci_set_master(pdev);
+ pci_enable_pcie_error_reporting(pdev);
netdev = alloc_etherdev(sizeof(struct qlcnic_adapter));
if (!netdev) {
@@ -1437,6 +1439,7 @@ static void __devexit qlcnic_remove(struct pci_dev *pdev)
qlcnic_release_firmware(adapter);
+ pci_disable_pcie_error_reporting(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
@@ -2521,6 +2524,9 @@ static void
qlcnic_schedule_work(struct qlcnic_adapter *adapter,
work_func_t func, int delay)
{
+ if (test_bit(__QLCNIC_AER, &adapter->state))
+ return;
+
INIT_DELAYED_WORK(&adapter->fw_work, func);
schedule_delayed_work(&adapter->fw_work, round_jiffies_relative(delay));
}
@@ -2631,6 +2637,128 @@ reschedule:
qlcnic_schedule_work(adapter, qlcnic_fw_poll_work, FW_POLL_DELAY);
}
+static int qlcnic_is_first_func(struct pci_dev *pdev)
+{
+ struct pci_dev *oth_pdev;
+ int val = pdev->devfn;
+
+ while (val-- > 0) {
+ oth_pdev = pci_get_domain_bus_and_slot(pci_domain_nr
+ (pdev->bus), pdev->bus->number,
+ PCI_DEVFN(PCI_SLOT(pdev->devfn), val));
+
+ if (oth_pdev && (oth_pdev->current_state != PCI_D3cold))
+ return 0;
+ }
+ return 1;
+}
+
+static int qlcnic_attach_func(struct pci_dev *pdev)
+{
+ int err, first_func;
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+ pdev->error_state = pci_channel_io_normal;
+
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+
+ first_func = qlcnic_is_first_func(pdev);
+
+ if (qlcnic_api_lock(adapter))
+ return -EINVAL;
+
+ if (first_func) {
+ adapter->need_fw_reset = 1;
+ set_bit(__QLCNIC_START_FW, &adapter->state);
+ QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_INITIALIZING);
+ QLCDB(adapter, DRV, "Restarting fw\n");
+ }
+ qlcnic_api_unlock(adapter);
+
+ err = adapter->nic_ops->start_firmware(adapter);
+ if (err)
+ return err;
+
+ qlcnic_clr_drv_state(adapter);
+ qlcnic_setup_intr(adapter);
+
+ if (netif_running(netdev)) {
+ err = qlcnic_attach(adapter);
+ if (err) {
+ qlcnic_clr_all_drv_state(adapter);
+ clear_bit(__QLCNIC_AER, &adapter->state);
+ netif_device_attach(netdev);
+ return err;
+ }
+
+ err = qlcnic_up(adapter, netdev);
+ if (err)
+ goto done;
+
+ qlcnic_config_indev_addr(netdev, NETDEV_UP);
+ }
+ done:
+ netif_device_attach(netdev);
+ return err;
+}
+
+static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ if (state == pci_channel_io_normal)
+ return PCI_ERS_RESULT_RECOVERED;
+
+ set_bit(__QLCNIC_AER, &adapter->state);
+ netif_device_detach(netdev);
+
+ cancel_delayed_work_sync(&adapter->fw_work);
+
+ if (netif_running(netdev))
+ qlcnic_down(adapter, netdev);
+
+ qlcnic_detach(adapter);
+ qlcnic_teardown_intr(adapter);
+
+ clear_bit(__QLCNIC_RESETTING, &adapter->state);
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t qlcnic_io_slot_reset(struct pci_dev *pdev)
+{
+ return qlcnic_attach_func(pdev) ? PCI_ERS_RESULT_DISCONNECT :
+ PCI_ERS_RESULT_RECOVERED;
+}
+
+static void qlcnic_io_resume(struct pci_dev *pdev)
+{
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+
+ if (QLCRD32(adapter, QLCNIC_CRB_DEV_STATE) == QLCNIC_DEV_READY &&
+ test_and_clear_bit(__QLCNIC_AER, &adapter->state))
+ qlcnic_schedule_work(adapter, qlcnic_fw_poll_work,
+ FW_POLL_DELAY);
+}
+
+
static int
qlcnicvf_start_firmware(struct qlcnic_adapter *adapter)
{
@@ -3436,6 +3564,11 @@ static void
qlcnic_config_indev_addr(struct net_device *dev, unsigned long event)
{ }
#endif
+static struct pci_error_handlers qlcnic_err_handler = {
+ .error_detected = qlcnic_io_error_detected,
+ .slot_reset = qlcnic_io_slot_reset,
+ .resume = qlcnic_io_resume,
+};
static struct pci_driver qlcnic_driver = {
.name = qlcnic_driver_name,
@@ -3446,7 +3579,9 @@ static struct pci_driver qlcnic_driver = {
.suspend = qlcnic_suspend,
.resume = qlcnic_resume,
#endif
- .shutdown = qlcnic_shutdown
+ .shutdown = qlcnic_shutdown,
+ .err_handler = &qlcnic_err_handler
+
};
static int __init qlcnic_init_module(void)
--
1.6.0.2
^ permalink raw reply related
* [PATCHv3 NEXT 3/5] qlcnic: fix netdev notifier in error path
From: amit.salecha @ 2010-07-14 6:33 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Amit Kumar Salecha
In-Reply-To: <1279089215-23231-1-git-send-email-amit.salecha@qlogic.com>
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
netdev notifier are not unregistered if pci_register_driver fails.
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_main.c | 10 +++++++++-
1 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 0fef8c3..0995f90 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -3451,6 +3451,7 @@ static struct pci_driver qlcnic_driver = {
static int __init qlcnic_init_module(void)
{
+ int ret;
printk(KERN_INFO "%s\n", qlcnic_driver_string);
@@ -3459,8 +3460,15 @@ static int __init qlcnic_init_module(void)
register_inetaddr_notifier(&qlcnic_inetaddr_cb);
#endif
+ ret = pci_register_driver(&qlcnic_driver);
+ if (ret) {
+#ifdef CONFIG_INET
+ unregister_inetaddr_notifier(&qlcnic_inetaddr_cb);
+ unregister_netdevice_notifier(&qlcnic_netdev_cb);
+#endif
+ }
- return pci_register_driver(&qlcnic_driver);
+ return ret;
}
module_init(qlcnic_init_module);
--
1.6.0.2
^ permalink raw reply related
* [PATCHv3 NEXT 0/5]qlcnic: aer state
From: amit.salecha @ 2010-07-14 6:33 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman
Hi
Sending revised patches based upon your comment.
I was in under impression that using space after tab is illegal.
I like this type of indentation and will take care in future.
-Amit
^ permalink raw reply
* [PATCHv3 NEXT 1/5] qlcnic: fix pause params setting
From: amit.salecha @ 2010-07-14 6:33 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Rajesh Borundia, Amit Kumar Salecha
In-Reply-To: <1279089215-23231-1-git-send-email-amit.salecha@qlogic.com>
From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Turning off rx pause param and autoneg param is not supported so
return error in that case.
Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_ethtool.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index baf5a52..7d6558e 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -578,8 +578,12 @@ qlcnic_set_pauseparam(struct net_device *netdev,
}
QLCWR32(adapter, QLCNIC_NIU_GB_PAUSE_CTL, val);
} else if (adapter->ahw.port_type == QLCNIC_XGBE) {
+ if (!pause->rx_pause || pause->autoneg)
+ return -EOPNOTSUPP;
+
if ((port < 0) || (port > QLCNIC_NIU_MAX_XG_PORTS))
return -EIO;
+
val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL);
if (port == 0) {
if (pause->tx_pause)
--
1.6.0.2
^ permalink raw reply related
* Re: [PATCHv2 NEXT 5/5] qlcnic: restore NPAR config data after recovery
From: David Miller @ 2010-07-14 6:10 UTC (permalink / raw)
To: amit.salecha; +Cc: netdev, ameen.rahman, anirban.chakraborty, rajesh.borundia
In-Reply-To: <1279086893-22523-6-git-send-email-amit.salecha@qlogic.com>
From: amit.salecha@qlogic.com
Date: Tue, 13 Jul 2010 22:54:53 -0700
> + if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)
> + || !adapter->need_fw_reset)
> + return 0;
There is more improper indentation of a conditional here.
It should read:
if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
!adapter->need_fw_reset)
^ permalink raw reply
* Re: [PATCHv2 NEXT 2/5] qlcnic: disable tx timeout recovery
From: David Miller @ 2010-07-14 6:09 UTC (permalink / raw)
To: amit.salecha; +Cc: netdev, ameen.rahman
In-Reply-To: <1279086893-22523-3-git-send-email-amit.salecha@qlogic.com>
From: amit.salecha@qlogic.com
Date: Tue, 13 Jul 2010 22:54:50 -0700
>
> - if (adapter->reset_context) {
> + if (adapter->reset_context &&
> + auto_fw_reset == AUTO_FW_RESET_ENABLED) {
This new second line is improperly indented.
"auto_fw_reset" should line up with the first character
after the openning "(" on the previous line.
^ permalink raw reply
* Re: [PATCHv2 NEXT 1/5] qlcnic: fix pause params setting
From: David Miller @ 2010-07-14 6:09 UTC (permalink / raw)
To: amit.salecha; +Cc: netdev, ameen.rahman, rajesh.borundia
In-Reply-To: <1279086893-22523-2-git-send-email-amit.salecha@qlogic.com>
From: amit.salecha@qlogic.com
Date: Tue, 13 Jul 2010 22:54:49 -0700
> From: Rajesh Borundia <rajesh.borundia@qlogic.com>
>
> Turning off rx pause param and autoneg param is not supported so
> return error in that case.
>
> Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
> Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
> ---
> drivers/net/qlcnic/qlcnic_ethtool.c | 5 +++++
> 1 files changed, 5 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
> index baf5a52..8599993 100644
> --- a/drivers/net/qlcnic/qlcnic_ethtool.c
> +++ b/drivers/net/qlcnic/qlcnic_ethtool.c
> @@ -578,8 +578,13 @@ qlcnic_set_pauseparam(struct net_device *netdev,
> }
> QLCWR32(adapter, QLCNIC_NIU_GB_PAUSE_CTL, val);
> } else if (adapter->ahw.port_type == QLCNIC_XGBE) {
> +
> + if (!pause->rx_pause || pause->autoneg)
> + return -EOPNOTSUPP;
There is no reason to add an empty line at the beginning of this
code block, please remove it.
^ permalink raw reply
* [PATCHv2 NEXT 5/5] qlcnic: restore NPAR config data after recovery
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem
Cc: netdev, ameen.rahman, Anirban Chakraborty, Rajesh Borundia,
Amit Kumar Salecha
In-Reply-To: <1279086893-22523-1-git-send-email-amit.salecha@qlogic.com>
From: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
o NPAR configuration which is programmed in fw, need to
restore after fw recovery.
o Update version to 5.0.7
Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic.h | 7 ++-
drivers/net/qlcnic/qlcnic_ctx.c | 2 +
drivers/net/qlcnic/qlcnic_main.c | 86 +++++++++++++++++++++++++++++---------
3 files changed, 72 insertions(+), 23 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 0644642..e189477 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -51,8 +51,8 @@
#define _QLCNIC_LINUX_MAJOR 5
#define _QLCNIC_LINUX_MINOR 0
-#define _QLCNIC_LINUX_SUBVERSION 6
-#define QLCNIC_LINUX_VERSIONID "5.0.6"
+#define _QLCNIC_LINUX_SUBVERSION 7
+#define QLCNIC_LINUX_VERSIONID "5.0.7"
#define QLCNIC_DRV_IDC_VER 0x01
#define QLCNIC_VERSION_CODE(a, b, c) (((a) << 24) + ((b) << 16) + (c))
@@ -949,7 +949,6 @@ struct qlcnic_adapter {
u8 has_link_events;
u8 fw_type;
u16 tx_context_id;
- u16 mtu;
u16 is_up;
u16 link_speed;
@@ -1044,6 +1043,8 @@ struct qlcnic_pci_info {
struct qlcnic_npar_info {
u16 vlan_id;
+ u16 min_bw;
+ u16 max_bw;
u8 phy_port;
u8 type;
u8 active;
diff --git a/drivers/net/qlcnic/qlcnic_ctx.c b/drivers/net/qlcnic/qlcnic_ctx.c
index cdd44b4..cc5d861 100644
--- a/drivers/net/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/qlcnic/qlcnic_ctx.c
@@ -636,6 +636,8 @@ int qlcnic_get_nic_info(struct qlcnic_adapter *adapter,
QLCNIC_CDRP_CMD_GET_NIC_INFO);
if (err == QLCNIC_RCODE_SUCCESS) {
+ npar_info->pci_func = le16_to_cpu(nic_info->pci_func);
+ npar_info->op_mode = le16_to_cpu(nic_info->op_mode);
npar_info->phys_port = le16_to_cpu(nic_info->phys_port);
npar_info->switch_mode = le16_to_cpu(nic_info->switch_mode);
npar_info->max_tx_ques = le16_to_cpu(nic_info->max_tx_ques);
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 462cb6b..dc0b22b 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -507,6 +507,8 @@ qlcnic_init_pci_info(struct qlcnic_adapter *adapter)
adapter->npars[pfn].type = pci_info[i].type;
adapter->npars[pfn].phy_port = pci_info[i].default_port;
adapter->npars[pfn].mac_learning = DEFAULT_MAC_LEARN;
+ adapter->npars[pfn].min_bw = pci_info[i].tx_min_bw;
+ adapter->npars[pfn].max_bw = pci_info[i].tx_max_bw;
}
for (i = 0; i < QLCNIC_NIU_MAX_XG_PORTS; i++)
@@ -771,6 +773,50 @@ qlcnic_check_options(struct qlcnic_adapter *adapter)
}
static int
+qlcnic_reset_npar_config(struct qlcnic_adapter *adapter)
+{
+ int i, err = 0;
+ struct qlcnic_npar_info *npar;
+ struct qlcnic_info nic_info;
+
+ if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED)
+ || !adapter->need_fw_reset)
+ return 0;
+
+ if (adapter->op_mode == QLCNIC_MGMT_FUNC) {
+ /* Set the NPAR config data after FW reset */
+ for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
+ npar = &adapter->npars[i];
+ if (npar->type != QLCNIC_TYPE_NIC)
+ continue;
+ err = qlcnic_get_nic_info(adapter, &nic_info, i);
+ if (err)
+ goto err_out;
+ nic_info.min_tx_bw = npar->min_bw;
+ nic_info.max_tx_bw = npar->max_bw;
+ err = qlcnic_set_nic_info(adapter, &nic_info);
+ if (err)
+ goto err_out;
+
+ if (npar->enable_pm) {
+ err = qlcnic_config_port_mirroring(adapter,
+ npar->dest_npar, 1, i);
+ if (err)
+ goto err_out;
+
+ }
+ npar->mac_learning = DEFAULT_MAC_LEARN;
+ npar->host_vlan_tag = 0;
+ npar->promisc_mode = 0;
+ npar->discard_tagged = 0;
+ npar->vlan_id = 0;
+ }
+ }
+err_out:
+ return err;
+}
+
+static int
qlcnic_start_firmware(struct qlcnic_adapter *adapter)
{
int val, err, first_boot;
@@ -834,10 +880,9 @@ wait_init:
qlcnic_idc_debug_info(adapter, 1);
qlcnic_check_options(adapter);
-
- if (adapter->flags & QLCNIC_ESWITCH_ENABLED &&
- adapter->op_mode != QLCNIC_NON_PRIV_FUNC)
- qlcnic_dev_set_npar_ready(adapter);
+ if (qlcnic_reset_npar_config(adapter))
+ goto err_out;
+ qlcnic_dev_set_npar_ready(adapter);
adapter->need_fw_reset = 0;
@@ -2486,6 +2531,7 @@ qlcnic_dev_request_reset(struct qlcnic_adapter *adapter)
{
u32 state;
+ adapter->need_fw_reset = 1;
if (qlcnic_api_lock(adapter))
return;
@@ -2506,6 +2552,9 @@ qlcnic_dev_set_npar_ready(struct qlcnic_adapter *adapter)
{
u32 state;
+ if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
+ adapter->op_mode == QLCNIC_NON_PRIV_FUNC)
+ return;
if (qlcnic_api_lock(adapter))
return;
@@ -3019,7 +3068,7 @@ static struct bin_attribute bin_attr_mem = {
.write = qlcnic_sysfs_write_mem,
};
-int
+static int
validate_pm_config(struct qlcnic_adapter *adapter,
struct qlcnic_pm_func_cfg *pm_cfg, int count)
{
@@ -3118,7 +3167,7 @@ qlcnic_sysfs_read_pm_config(struct file *filp, struct kobject *kobj,
return size;
}
-int
+static int
validate_esw_config(struct qlcnic_adapter *adapter,
struct qlcnic_esw_func_cfg *esw_cfg, int count)
{
@@ -3154,9 +3203,8 @@ qlcnic_sysfs_write_esw_config(struct file *file, struct kobject *kobj,
struct device *dev = container_of(kobj, struct device, kobj);
struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
struct qlcnic_esw_func_cfg *esw_cfg;
- u8 id, discard_tagged, promsc_mode, mac_learn;
- u8 vlan_tagging, pci_func, vlan_id;
int count, rem, i, ret;
+ u8 id, pci_func;
count = size / sizeof(struct qlcnic_esw_func_cfg);
rem = size % sizeof(struct qlcnic_esw_func_cfg);
@@ -3171,17 +3219,13 @@ qlcnic_sysfs_write_esw_config(struct file *file, struct kobject *kobj,
for (i = 0; i < count; i++) {
pci_func = esw_cfg[i].pci_func;
id = adapter->npars[pci_func].phy_port;
- vlan_tagging = esw_cfg[i].host_vlan_tag;
- promsc_mode = esw_cfg[i].promisc_mode;
- mac_learn = esw_cfg[i].mac_learning;
- vlan_id = esw_cfg[i].vlan_id;
- discard_tagged = esw_cfg[i].discard_tagged;
- ret = qlcnic_config_switch_port(adapter, id, vlan_tagging,
- discard_tagged,
- promsc_mode,
- mac_learn,
- pci_func,
- vlan_id);
+ ret = qlcnic_config_switch_port(adapter, id,
+ esw_cfg[i].host_vlan_tag,
+ esw_cfg[i].discard_tagged,
+ esw_cfg[i].promisc_mode,
+ esw_cfg[i].mac_learning,
+ esw_cfg[i].pci_func,
+ esw_cfg[i].vlan_id);
if (ret)
return ret;
}
@@ -3227,7 +3271,7 @@ qlcnic_sysfs_read_esw_config(struct file *file, struct kobject *kobj,
return size;
}
-int
+static int
validate_npar_config(struct qlcnic_adapter *adapter,
struct qlcnic_npar_func_cfg *np_cfg, int count)
{
@@ -3282,6 +3326,8 @@ qlcnic_sysfs_write_npar_config(struct file *file, struct kobject *kobj,
ret = qlcnic_set_nic_info(adapter, &nic_info);
if (ret)
return ret;
+ adapter->npars[i].min_bw = nic_info.min_tx_bw;
+ adapter->npars[i].max_bw = nic_info.max_tx_bw;
}
return size;
--
1.6.0.2
^ permalink raw reply related
* [PATCHv2 NEXT 2/5] qlcnic: disable tx timeout recovery
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Amit Kumar Salecha
In-Reply-To: <1279086893-22523-1-git-send-email-amit.salecha@qlogic.com>
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
Disable tx timeout recovery, if auto_fw_reset is disable
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_main.c | 6 ++++--
1 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 4d18313..d511fd1 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2581,7 +2581,8 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
if (adapter->need_fw_reset)
goto detach;
- if (adapter->reset_context) {
+ if (adapter->reset_context &&
+ auto_fw_reset == AUTO_FW_RESET_ENABLED) {
qlcnic_reset_hw_context(adapter);
adapter->netdev->trans_start = jiffies;
}
@@ -2594,7 +2595,8 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
qlcnic_dev_request_reset(adapter);
- clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state);
+ if ((auto_fw_reset == AUTO_FW_RESET_ENABLED))
+ clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state);
dev_info(&netdev->dev, "firmware hang detected\n");
--
1.6.0.2
^ permalink raw reply related
* [PATCHv2 NEXT 4/5] qlcnic: aer support
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Sucheta Chakraborty, Amit Kumar Salecha
In-Reply-To: <1279086893-22523-1-git-send-email-amit.salecha@qlogic.com>
From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Pci error recovery support added.
Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic.h | 1 +
drivers/net/qlcnic/qlcnic_main.c | 137 +++++++++++++++++++++++++++++++++++++-
2 files changed, 137 insertions(+), 1 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic.h b/drivers/net/qlcnic/qlcnic.h
index 02a50e6..0644642 100644
--- a/drivers/net/qlcnic/qlcnic.h
+++ b/drivers/net/qlcnic/qlcnic.h
@@ -911,6 +911,7 @@ struct qlcnic_mac_req {
#define __QLCNIC_DEV_UP 1
#define __QLCNIC_RESETTING 2
#define __QLCNIC_START_FW 4
+#define __QLCNIC_AER 5
#define QLCNIC_INTERRUPT_TEST 1
#define QLCNIC_LOOPBACK_TEST 2
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index c8275f0..462cb6b 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -34,6 +34,7 @@
#include <linux/ipv6.h>
#include <linux/inetdevice.h>
#include <linux/sysfs.h>
+#include <linux/aer.h>
MODULE_DESCRIPTION("QLogic 1/10 GbE Converged/Intelligent Ethernet Driver");
MODULE_LICENSE("GPL");
@@ -1306,6 +1307,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_disable_pdev;
pci_set_master(pdev);
+ pci_enable_pcie_error_reporting(pdev);
netdev = alloc_etherdev(sizeof(struct qlcnic_adapter));
if (!netdev) {
@@ -1437,6 +1439,7 @@ static void __devexit qlcnic_remove(struct pci_dev *pdev)
qlcnic_release_firmware(adapter);
+ pci_disable_pcie_error_reporting(pdev);
pci_release_regions(pdev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
@@ -2521,6 +2524,9 @@ static void
qlcnic_schedule_work(struct qlcnic_adapter *adapter,
work_func_t func, int delay)
{
+ if (test_bit(__QLCNIC_AER, &adapter->state))
+ return;
+
INIT_DELAYED_WORK(&adapter->fw_work, func);
schedule_delayed_work(&adapter->fw_work, round_jiffies_relative(delay));
}
@@ -2631,6 +2637,128 @@ reschedule:
qlcnic_schedule_work(adapter, qlcnic_fw_poll_work, FW_POLL_DELAY);
}
+static int qlcnic_is_first_func(struct pci_dev *pdev)
+{
+ struct pci_dev *oth_pdev;
+ int val = pdev->devfn;
+
+ while (val-- > 0) {
+ oth_pdev = pci_get_domain_bus_and_slot(pci_domain_nr
+ (pdev->bus), pdev->bus->number,
+ PCI_DEVFN(PCI_SLOT(pdev->devfn), val));
+
+ if (oth_pdev && (oth_pdev->current_state != PCI_D3cold))
+ return 0;
+ }
+ return 1;
+}
+
+static int qlcnic_attach_func(struct pci_dev *pdev)
+{
+ int err, first_func;
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+ pdev->error_state = pci_channel_io_normal;
+
+ err = pci_enable_device(pdev);
+ if (err)
+ return err;
+
+ pci_set_power_state(pdev, PCI_D0);
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+
+ first_func = qlcnic_is_first_func(pdev);
+
+ if (qlcnic_api_lock(adapter))
+ return -EINVAL;
+
+ if (first_func) {
+ adapter->need_fw_reset = 1;
+ set_bit(__QLCNIC_START_FW, &adapter->state);
+ QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_INITIALIZING);
+ QLCDB(adapter, DRV, "Restarting fw\n");
+ }
+ qlcnic_api_unlock(adapter);
+
+ err = adapter->nic_ops->start_firmware(adapter);
+ if (err)
+ return err;
+
+ qlcnic_clr_drv_state(adapter);
+ qlcnic_setup_intr(adapter);
+
+ if (netif_running(netdev)) {
+ err = qlcnic_attach(adapter);
+ if (err) {
+ qlcnic_clr_all_drv_state(adapter);
+ clear_bit(__QLCNIC_AER, &adapter->state);
+ netif_device_attach(netdev);
+ return err;
+ }
+
+ err = qlcnic_up(adapter, netdev);
+ if (err)
+ goto done;
+
+ qlcnic_config_indev_addr(netdev, NETDEV_UP);
+ }
+ done:
+ netif_device_attach(netdev);
+ return err;
+}
+
+static pci_ers_result_t qlcnic_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+ struct net_device *netdev = adapter->netdev;
+
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ if (state == pci_channel_io_normal)
+ return PCI_ERS_RESULT_RECOVERED;
+
+ set_bit(__QLCNIC_AER, &adapter->state);
+ netif_device_detach(netdev);
+
+ cancel_delayed_work_sync(&adapter->fw_work);
+
+ if (netif_running(netdev))
+ qlcnic_down(adapter, netdev);
+
+ qlcnic_detach(adapter);
+ qlcnic_teardown_intr(adapter);
+
+ clear_bit(__QLCNIC_RESETTING, &adapter->state);
+
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t qlcnic_io_slot_reset(struct pci_dev *pdev)
+{
+ return qlcnic_attach_func(pdev) ? PCI_ERS_RESULT_DISCONNECT :
+ PCI_ERS_RESULT_RECOVERED;
+}
+
+static void qlcnic_io_resume(struct pci_dev *pdev)
+{
+ struct qlcnic_adapter *adapter = pci_get_drvdata(pdev);
+
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+
+ if ((QLCRD32(adapter, QLCNIC_CRB_DEV_STATE) == QLCNIC_DEV_READY) &&
+ (test_and_clear_bit(__QLCNIC_AER, &adapter->state)))
+ qlcnic_schedule_work(adapter, qlcnic_fw_poll_work,
+ FW_POLL_DELAY);
+}
+
+
static int
qlcnicvf_start_firmware(struct qlcnic_adapter *adapter)
{
@@ -3436,6 +3564,11 @@ static void
qlcnic_config_indev_addr(struct net_device *dev, unsigned long event)
{ }
#endif
+static struct pci_error_handlers qlcnic_err_handler = {
+ .error_detected = qlcnic_io_error_detected,
+ .slot_reset = qlcnic_io_slot_reset,
+ .resume = qlcnic_io_resume,
+};
static struct pci_driver qlcnic_driver = {
.name = qlcnic_driver_name,
@@ -3446,7 +3579,9 @@ static struct pci_driver qlcnic_driver = {
.suspend = qlcnic_suspend,
.resume = qlcnic_resume,
#endif
- .shutdown = qlcnic_shutdown
+ .shutdown = qlcnic_shutdown,
+ .err_handler = &qlcnic_err_handler
+
};
static int __init qlcnic_init_module(void)
--
1.6.0.2
^ permalink raw reply related
* [PATCHv2 NEXT 3/5] qlcnic: fix netdev notifier in error path
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Amit Kumar Salecha
In-Reply-To: <1279086893-22523-1-git-send-email-amit.salecha@qlogic.com>
From: Amit Kumar Salecha <amit.salecha@qlogic.com>
netdev notifier are not unregistered if pci_register_driver fails.
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_main.c | 10 +++++++++-
1 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index d511fd1..c8275f0 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -3451,6 +3451,7 @@ static struct pci_driver qlcnic_driver = {
static int __init qlcnic_init_module(void)
{
+ int ret;
printk(KERN_INFO "%s\n", qlcnic_driver_string);
@@ -3459,8 +3460,15 @@ static int __init qlcnic_init_module(void)
register_inetaddr_notifier(&qlcnic_inetaddr_cb);
#endif
+ ret = pci_register_driver(&qlcnic_driver);
+ if (ret) {
+#ifdef CONFIG_INET
+ unregister_inetaddr_notifier(&qlcnic_inetaddr_cb);
+ unregister_netdevice_notifier(&qlcnic_netdev_cb);
+#endif
+ }
- return pci_register_driver(&qlcnic_driver);
+ return ret;
}
module_init(qlcnic_init_module);
--
1.6.0.2
^ permalink raw reply related
* [PATCHv2 NEXT 1/5] qlcnic: fix pause params setting
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman, Rajesh Borundia, Amit Kumar Salecha
In-Reply-To: <1279086893-22523-1-git-send-email-amit.salecha@qlogic.com>
From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Turning off rx pause param and autoneg param is not supported so
return error in that case.
Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Amit Kumar Salecha <amit.salecha@qlogic.com>
---
drivers/net/qlcnic/qlcnic_ethtool.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/drivers/net/qlcnic/qlcnic_ethtool.c b/drivers/net/qlcnic/qlcnic_ethtool.c
index baf5a52..8599993 100644
--- a/drivers/net/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/qlcnic/qlcnic_ethtool.c
@@ -578,8 +578,13 @@ qlcnic_set_pauseparam(struct net_device *netdev,
}
QLCWR32(adapter, QLCNIC_NIU_GB_PAUSE_CTL, val);
} else if (adapter->ahw.port_type == QLCNIC_XGBE) {
+
+ if (!pause->rx_pause || pause->autoneg)
+ return -EOPNOTSUPP;
+
if ((port < 0) || (port > QLCNIC_NIU_MAX_XG_PORTS))
return -EIO;
+
val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL);
if (port == 0) {
if (pause->tx_pause)
--
1.6.0.2
^ permalink raw reply related
* [PATCHv2 NEXT 0/5]qlcnic: aer state
From: amit.salecha @ 2010-07-14 5:54 UTC (permalink / raw)
To: davem; +Cc: netdev, ameen.rahman
Hi
Resending series of 5 to support aer and minor fixes.
Please ignore earlier patches, this fixes minor sparse warning
and version update.
Please apply them on net-next.
-Amit
^ permalink raw reply
* Re: [PATCH RFC] act_cpu: packet distributing
From: Eric Dumazet @ 2010-07-14 4:30 UTC (permalink / raw)
To: Changli Gao
Cc: Jamal Hadi Salim, David S. Miller, Patrick McHardy, Tom Herbert,
netdev
In-Reply-To: <AANLkTimPmXWyAspJqnya7aM_vDRNzDdJJf2bl7CYrAXN@mail.gmail.com>
Le mercredi 14 juillet 2010 à 12:17 +0800, Changli Gao a écrit :
> On Wed, Jul 14, 2010 at 11:41 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Why doing the search again, in case skb->sk already set by another
> > module before you, like tproxy ?
> >
>
> Although it is unlikely skb->sk is non null, as tc is before
> netfilter, I will handle this case. Thanks.
>
In this case, provide the skb->sk already set case on tproxy, not in
act_cpu. But doing it on both will give a hint for future modules...
> >> +static int tcf_cpu(struct sk_buff *skb, struct tc_action *a,
> >> + struct tcf_result *res)
> >> +{
> >> + struct tcf_cpu *p = a->priv;
> >> + u32 type;
> >> + u32 value;
> >> + int cpu, action;
> >> + struct sk_buff *nskb;
> >> + unsigned int qtail;
> >> +
> >> + spin_lock(&p->tcf_lock);
> >
> > Ok, the big lock...
> >
> > We have a lockless TCP/UDP input path, and this modules adds a lock
> > again.
> >
> >> + p->tcf_tm.lastuse = jiffies;
> >> + p->tcf_bstats.bytes += qdisc_pkt_len(skb);
> >> + p->tcf_bstats.packets++;
> >> + type = p->type;
> >> + value = p->value;
> >> + action = p->tcf_action;
> >> + spin_unlock(&p->tcf_lock);
> >> +
> >
> > Please change all this crap (legacy crap, copied from other tc
> > modules), by modern one, using RCU and no locking in hot path.
> >
>
> Thanks, I'll try. It is a write critical section, and for me it is
> difficult to convert this lock to RCU. Could you show me some
> examples?
We can convert bytes/packets stats to percpu stats for example.
That might need a generic change.
Then, a normal RCU protection should be enough to fetch
type/value/action fields.
^ permalink raw reply
* Re: [PATCH RFC] act_cpu: packet distributing
From: Changli Gao @ 2010-07-14 4:17 UTC (permalink / raw)
To: Eric Dumazet
Cc: Jamal Hadi Salim, David S. Miller, Patrick McHardy, Tom Herbert,
netdev
In-Reply-To: <1279078875.2444.103.camel@edumazet-laptop>
On Wed, Jul 14, 2010 at 11:41 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le mercredi 14 juillet 2010 à 11:17 +0800, Changli Gao a écrit :
>> I want to know if I can assign the sk to skb as nf_tproxy_core does to avoid
>> the duplicate search later. Thanks.
>>
>
> I suggest we first correct bugs before adding new features.
>
> For me, tproxy is a high suspect at this moment, I would not use it on
> production machine.
>
>> act_cpu: packet distributing
>>
>> This TC action can be used to redirect packets to the CPU:
>> * specified by the cpuid option
>> * specified by the class minor ID obtained previously
>> * on which the corresponding application runs
>>
>> It supports the similar functions of RPS and RFS, but is more flexible.
>>
>
> Thins kind of claims is disgusting.
>
> No documentation, I have no idea how you setup the thing.
for example:
redirect packets to the CPU on which the corresponding application runs
tc qdisc add dev eth0 ingress
tc filter add dev eth0 parent ffff:0 protocol ip basic action cpu socket
redirect packets to special CPU.
tc filter add dev eth0 parent ffff:0 protocol ip basic action cpu cpuid 1
sport hash packet distributing among CPU 0-1.
tc filter add dev eth0 parent ffff:0 protocol ip handle 1 flow hash
keys proto-src divisor 2 action cpu map 1
>> +static inline void sock_save_cpu(struct sock *sk)
>> +{
>> +#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
>> + int cpu = get_cpu();
>> + if (sk->sk_cpu != cpu)
>> + sk->sk_cpu = cpu;
>> + put_cpu();
>
> sk->sk_cpu = raw_smp_processor_id();
Thanks.
>
> Why doing the search again, in case skb->sk already set by another
> module before you, like tproxy ?
>
Although it is unlikely skb->sk is non null, as tc is before
netfilter, I will handle this case. Thanks.
>> +static int tcf_cpu(struct sk_buff *skb, struct tc_action *a,
>> + struct tcf_result *res)
>> +{
>> + struct tcf_cpu *p = a->priv;
>> + u32 type;
>> + u32 value;
>> + int cpu, action;
>> + struct sk_buff *nskb;
>> + unsigned int qtail;
>> +
>> + spin_lock(&p->tcf_lock);
>
> Ok, the big lock...
>
> We have a lockless TCP/UDP input path, and this modules adds a lock
> again.
>
>> + p->tcf_tm.lastuse = jiffies;
>> + p->tcf_bstats.bytes += qdisc_pkt_len(skb);
>> + p->tcf_bstats.packets++;
>> + type = p->type;
>> + value = p->value;
>> + action = p->tcf_action;
>> + spin_unlock(&p->tcf_lock);
>> +
>
> Please change all this crap (legacy crap, copied from other tc
> modules), by modern one, using RCU and no locking in hot path.
>
Thanks, I'll try. It is a write critical section, and for me it is
difficult to convert this lock to RCU. Could you show me some
examples?
Thanks.
--
Regards,
Changli Gao(xiaosuo@gmail.com)
^ permalink raw reply
* Re: [PATCH] tproxy: nf_tproxy_assign_sock() can handle tw sockets
From: Felipe W Damasio @ 2010-07-14 3:51 UTC (permalink / raw)
To: Eric Dumazet
Cc: Avi Kivity, David Miller, Patrick McHardy, linux-kernel, netdev
In-Reply-To: <1279078985.2444.105.camel@edumazet-laptop>
Hi,
2010/7/14 Eric Dumazet <eric.dumazet@gmail.com>:
>> I can, but my bosses will kick my ass if I bring down the ISP again :)
>
> I have no guarantee at all, even if we find the bug.
Ok :-)
>> If you think it's the only way to find the problem I'll tell them that
>> I need to do it. In this case, please tell me what other config
>> options/tools I can use to get as much info as possible...since I'll
>> probably be able to test this only once more on the production
>> environment for debugging purposes.
>
> You really should try to setup a lab to trigger the bug, and not doing
> experiments on production :)
Right, I'm trying.
The thing is: The ISP is a 200Mbps network with 10,000 users. The
first time it took around 2 minutes to trigger the bug. The second
time it took around 17 minutes.
So I *think* it's some TCP flag with some weird content...but I can't
find out what it is so I can trigger it on the lab.
So my only guess is to enable every possible debug flag I can think of
to track the bug down on the production environment. Any hints here
would be appreciated :)
Cheers,
Felipe Damasio
^ permalink raw reply
* Re: [PATCH] tproxy: nf_tproxy_assign_sock() can handle tw sockets
From: Eric Dumazet @ 2010-07-14 3:43 UTC (permalink / raw)
To: Felipe W Damasio
Cc: Avi Kivity, David Miller, Patrick McHardy, linux-kernel, netdev
In-Reply-To: <AANLkTikkcaionMp3SJLgo3JmCjxQ7rPZv3-3_RbxCx_4@mail.gmail.com>
Le mercredi 14 juillet 2010 à 00:27 -0300, Felipe W Damasio a écrit :
> Hi Mr. Dumazet,
>
> 2010/7/14 Eric Dumazet <eric.dumazet@gmail.com>:
> > RDX being the sk pointer (and sk+0x38 contains the corrupted "sk_prot" value)
> > , we notice RBP contains same "sk" value + 0x200000 (2 Mbytes).
> >
> > (same remark on your initial bug report)
> >
> > Could you enable CONFIG_FRAME_POINTER=y in your config ?
>
> I can, but my bosses will kick my ass if I bring down the ISP again :)
>
I have no guarantee at all, even if we find the bug.
> If you think it's the only way to find the problem I'll tell them that
> I need to do it. In this case, please tell me what other config
> options/tools I can use to get as much info as possible...since I'll
> probably be able to test this only once more on the production
> environment for debugging purposes.
>
You really should try to setup a lab to trigger the bug, and not doing
experiments on production :)
^ permalink raw reply
* Re: [PATCH RFC] act_cpu: packet distributing
From: Eric Dumazet @ 2010-07-14 3:41 UTC (permalink / raw)
To: Changli Gao
Cc: Jamal Hadi Salim, David S. Miller, Patrick McHardy, Tom Herbert,
netdev
In-Reply-To: <1279077475-2956-1-git-send-email-xiaosuo@gmail.com>
Le mercredi 14 juillet 2010 à 11:17 +0800, Changli Gao a écrit :
> I want to know if I can assign the sk to skb as nf_tproxy_core does to avoid
> the duplicate search later. Thanks.
>
I suggest we first correct bugs before adding new features.
For me, tproxy is a high suspect at this moment, I would not use it on
production machine.
> act_cpu: packet distributing
>
> This TC action can be used to redirect packets to the CPU:
> * specified by the cpuid option
> * specified by the class minor ID obtained previously
> * on which the corresponding application runs
>
> It supports the similar functions of RPS and RFS, but is more flexible.
>
Thins kind of claims is disgusting.
No documentation, I have no idea how you setup the thing.
RPS still misses documentation, but activating it is easy.
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
> ----
> include/linux/netdevice.h | 2
> include/linux/tc_act/tc_cpu.h | 31 +++++
> include/net/sock.h | 24 +++
> include/net/tc_act/tc_cpu.h | 18 ++
> net/core/dev.c | 4
> net/core/sock.c | 1
> net/sched/Kconfig | 12 +
> net/sched/Makefile | 1
> net/sched/act_cpu.c | 260 ++++++++++++++++++++++++++++++++++++++++++
> 9 files changed, 350 insertions(+), 3 deletions(-)
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index c4fedf0..318d422 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1435,6 +1435,8 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
>
> DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>
> +int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail);
> +
> #define HAVE_NETIF_QUEUE
>
> extern void __netif_schedule(struct Qdisc *q);
> diff --git a/include/linux/tc_act/tc_cpu.h b/include/linux/tc_act/tc_cpu.h
> new file mode 100644
> index 0000000..2704607
> --- /dev/null
> +++ b/include/linux/tc_act/tc_cpu.h
> @@ -0,0 +1,31 @@
> +#ifndef __LINUX_TC_CPU_H
> +#define __LINUX_TC_CPU_H
> +
> +#include <linux/pkt_cls.h>
> +#include <linux/types.h>
> +
> +#define TCA_ACT_CPU 12
> +
> +enum {
> + TCA_CPU_UNSPEC,
> + TCA_CPU_PARMS,
> + TCA_CPU_TM,
> + __TCA_CPU_MAX
> +};
> +#define TCA_CPU_MAX (__TCA_CPU_MAX - 1)
> +
> +enum {
> + TCA_CPU_TYPE_MAP,
> + TCA_CPU_TYPE_CPUID,
> + TCA_CPU_TYPE_SOCKET,
> + __TCA_CPU_TYPE_MAX
> +};
> +#define TCA_CPU_TYPE_MAX (__TCA_CPU_TYPE_MAX - 1)
> +
> +struct tc_cpu {
> + tc_gen;
> + __u32 type;
> + __u32 value;
> +};
> +
> +#endif /* __LINUX_TC_CPU_H */
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 3100e71..7913158 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -200,6 +200,7 @@ struct sock_common {
> * @sk_rcvtimeo: %SO_RCVTIMEO setting
> * @sk_sndtimeo: %SO_SNDTIMEO setting
> * @sk_rxhash: flow hash received from netif layer
> + * @sk_cpu: the CPU on which the corresponding process works.
> * @sk_filter: socket filtering instructions
> * @sk_protinfo: private area, net family specific, when not using slab
> * @sk_timer: sock cleanup timer
> @@ -284,6 +285,9 @@ struct sock {
> #ifdef CONFIG_RPS
> __u32 sk_rxhash;
> #endif
> +#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
> + int sk_cpu;
> +#endif
> unsigned long sk_flags;
> unsigned long sk_lingertime;
> struct sk_buff_head sk_error_queue;
> @@ -639,7 +643,24 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
> return sk->sk_backlog_rcv(sk, skb);
> }
>
> -static inline void sock_rps_record_flow(const struct sock *sk)
> +static inline void sock_reset_cpu(struct sock *sk)
> +{
> +#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
> + sk->sk_cpu = nr_cpumask_bits;
> +#endif
> +}
> +
> +static inline void sock_save_cpu(struct sock *sk)
> +{
> +#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
> + int cpu = get_cpu();
> + if (sk->sk_cpu != cpu)
> + sk->sk_cpu = cpu;
> + put_cpu();
sk->sk_cpu = raw_smp_processor_id();
> +#endif
> +}
> +
> +static inline void sock_rps_record_flow(struct sock *sk)
> {
> #ifdef CONFIG_RPS
> struct rps_sock_flow_table *sock_flow_table;
> @@ -649,6 +670,7 @@ static inline void sock_rps_record_flow(const struct sock *sk)
> rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
> rcu_read_unlock();
> #endif
> + sock_save_cpu(sk);
> }
>
> static inline void sock_rps_reset_flow(const struct sock *sk)
> diff --git a/include/net/tc_act/tc_cpu.h b/include/net/tc_act/tc_cpu.h
> new file mode 100644
> index 0000000..0504bf0
> --- /dev/null
> +++ b/include/net/tc_act/tc_cpu.h
> @@ -0,0 +1,18 @@
> +#ifndef __NET_TC_CPU_H
> +#define __NET_TC_CPU_H
> +
> +#include <linux/types.h>
> +#include <net/act_api.h>
> +
> +struct tcf_cpu {
> + struct tcf_common common;
> + u32 type;
> + u32 value;
> +};
> +
> +static inline struct tcf_cpu *to_tcf_cpu(struct tcf_common *pc)
> +{
> + return container_of(pc, struct tcf_cpu, common);
> +}
> +
> +#endif /* __NET_TC_CPU_H */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index e2b9fa2..45e8a21 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2443,8 +2443,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
> * enqueue_to_backlog is called to queue an skb to a per CPU backlog
> * queue (may be a remote CPU queue).
> */
> -static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
> - unsigned int *qtail)
> +int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail)
> {
> struct softnet_data *sd;
> unsigned long flags;
> @@ -2482,6 +2481,7 @@ enqueue:
> kfree_skb(skb);
> return NET_RX_DROP;
> }
> +EXPORT_SYMBOL(enqueue_to_backlog);
>
> /**
> * netif_rx - post buffer to the network code
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 363bc26..7a71e76 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1045,6 +1045,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
> if (!try_module_get(prot->owner))
> goto out_free_sec;
> sk_tx_queue_clear(sk);
> + sock_reset_cpu(sk);
> }
>
> return sk;
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index 2f691fb..a826758 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -427,6 +427,18 @@ config NET_CLS_ACT
> A recent version of the iproute2 package is required to use
> extended matches.
>
> +config NET_ACT_CPU
> + tristate "Packet distributing"
> + depends on NET_CLS_ACT
> + depends on RPS
> + ---help---
> + Say Y here to do packets distributing. With it, you can distribute
> + the packets among the CPUs on the system in any way as you like. It
> + only works in ingress.
> +
> + To compile this code as a module, choose M here: the
> + module will be called act_cpu.
> +
> config NET_ACT_POLICE
> tristate "Traffic Policing"
> depends on NET_CLS_ACT
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index f14e71b..a9e0b96 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -7,6 +7,7 @@ obj-y := sch_generic.o sch_mq.o
> obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
> obj-$(CONFIG_NET_CLS) += cls_api.o
> obj-$(CONFIG_NET_CLS_ACT) += act_api.o
> +obj-$(CONFIG_NET_ACT_CPU) += act_cpu.o
> obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
> obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
> obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
> diff --git a/net/sched/act_cpu.c b/net/sched/act_cpu.c
> new file mode 100644
> index 0000000..6b7d7fc
> --- /dev/null
> +++ b/net/sched/act_cpu.c
> @@ -0,0 +1,260 @@
> +/*
> + * Packet distributing actions
> + *
> + * Copyright (c) 2010- Changli Gao <xiaosuo@gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 2 of the License, or (at your option)
> + * any later version.
> + */
> +
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/gfp.h>
> +#include <net/net_namespace.h>
> +#include <net/netlink.h>
> +#include <net/pkt_sched.h>
> +#include <net/tcp.h>
> +#include <net/udp.h>
> +#include <linux/tc_act/tc_cpu.h>
> +#include <net/tc_act/tc_cpu.h>
> +
> +#define CPU_TAB_MASK 15
> +static struct tcf_common *tcf_cpu_ht[CPU_TAB_MASK + 1];
> +static u32 cpu_idx_gen;
> +static DEFINE_RWLOCK(cpu_lock);
> +
> +static struct tcf_hashinfo cpu_hash_info = {
> + .htab = tcf_cpu_ht,
> + .hmask = CPU_TAB_MASK,
> + .lock = &cpu_lock
> +};
> +
> +static const struct nla_policy cpu_policy[TCA_CPU_MAX + 1] = {
> + [TCA_CPU_PARMS] = { .len = sizeof(struct tc_cpu) },
> +};
> +
> +static int tcf_cpu_init(struct nlattr *nla, struct nlattr *est,
> + struct tc_action *a, int ovr, int bind)
> +{
> + struct nlattr *tb[TCA_CPU_MAX + 1];
> + struct tc_cpu *parm;
> + struct tcf_cpu *p;
> + struct tcf_common *pc;
> + int ret;
> +
> + if (nla == NULL)
> + return -EINVAL;
> + ret = nla_parse_nested(tb, TCA_CPU_MAX, nla, cpu_policy);
> + if (ret < 0)
> + return ret;
> + if (tb[TCA_CPU_PARMS] == NULL)
> + return -EINVAL;
> + parm = nla_data(tb[TCA_CPU_PARMS]);
> + if (parm->type > TCA_CPU_TYPE_MAX || parm->action != TC_ACT_STOLEN)
> + return -EINVAL;
> +
> + pc = tcf_hash_check(parm->index, a, bind, &cpu_hash_info);
> + if (!pc) {
> + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
> + &cpu_idx_gen, &cpu_hash_info);
> + if (IS_ERR(pc))
> + return PTR_ERR(pc);
> + ret = ACT_P_CREATED;
> + } else {
> + if (!ovr) {
> + tcf_hash_release(pc, bind, &cpu_hash_info);
> + return -EEXIST;
> + }
> + ret = 0;
> + }
> + p = to_tcf_cpu(pc);
> +
> + spin_lock_bh(&p->tcf_lock);
> + p->type = parm->type;
> + p->value = parm->value;
> + p->tcf_action = parm->action;
> + spin_unlock_bh(&p->tcf_lock);
> +
> + if (ret == ACT_P_CREATED)
> + tcf_hash_insert(pc, &cpu_hash_info);
> +
> + return ret;
> +}
> +
> +static int tcf_cpu_cleanup(struct tc_action *a, int bind)
> +{
> + struct tcf_cpu *p = a->priv;
> +
> + return tcf_hash_release(&p->common, bind, &cpu_hash_info);
> +}
> +
> +static int tcf_cpu_from_sock(struct sk_buff *skb)
> +{
> + struct iphdr *iph;
> + struct sock *sk;
> + struct {
> + __be16 source, dest;
> + } *ports;
> + int cpu;
> +
> + if (skb->dev == NULL || skb->protocol != __constant_htons(ETH_P_IP))
> + goto err;
> + if (!pskb_may_pull(skb, sizeof(*iph)))
> + goto err;
> + iph = (struct iphdr *) skb->data;
> + if (!pskb_may_pull(skb, iph->ihl * 4 + 4))
> + goto err;
> + ports = (void *) (skb->data + iph->ihl * 4);
Why doing the search again, in case skb->sk already set by another
module before you, like tproxy ?
> + switch (iph->protocol) {
> + case IPPROTO_TCP:
> + sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
> + ports->source, iph->daddr, ports->dest,
> + skb->dev->ifindex);
> + break;
> + case IPPROTO_UDP:
> + sk = udp4_lib_lookup(dev_net(skb->dev), iph->saddr,
> + ports->source, iph->daddr, ports->dest,
> + skb->dev->ifindex);
> + break;
> + default:
> + goto err;
> + }
> +
> + if (!sk)
> + goto err;
> + cpu = sk->sk_cpu;
> + if (sk->sk_protocol == IPPROTO_TCP && sk->sk_state == TCP_TIME_WAIT)
> + inet_twsk_put(inet_twsk(sk));
> + else
> + sock_put(sk);
> +
> + return cpu;
> +
> +err:
> + return smp_processor_id();
> +}
> +
> +static int tcf_cpu(struct sk_buff *skb, struct tc_action *a,
> + struct tcf_result *res)
> +{
> + struct tcf_cpu *p = a->priv;
> + u32 type;
> + u32 value;
> + int cpu, action;
> + struct sk_buff *nskb;
> + unsigned int qtail;
> +
> + spin_lock(&p->tcf_lock);
Ok, the big lock...
We have a lockless TCP/UDP input path, and this modules adds a lock
again.
> + p->tcf_tm.lastuse = jiffies;
> + p->tcf_bstats.bytes += qdisc_pkt_len(skb);
> + p->tcf_bstats.packets++;
> + type = p->type;
> + value = p->value;
> + action = p->tcf_action;
> + spin_unlock(&p->tcf_lock);
> +
Please change all this crap (legacy crap, copied from other tc
modules), by modern one, using RCU and no locking in hot path.
^ permalink raw reply
* Re: [PATCH] tproxy: nf_tproxy_assign_sock() can handle tw sockets
From: Felipe W Damasio @ 2010-07-14 3:27 UTC (permalink / raw)
To: Eric Dumazet
Cc: Avi Kivity, David Miller, Patrick McHardy, linux-kernel, netdev
In-Reply-To: <1279077678.2444.95.camel@edumazet-laptop>
Hi Mr. Dumazet,
2010/7/14 Eric Dumazet <eric.dumazet@gmail.com>:
> RDX being the sk pointer (and sk+0x38 contains the corrupted "sk_prot" value)
> , we notice RBP contains same "sk" value + 0x200000 (2 Mbytes).
>
> (same remark on your initial bug report)
>
> Could you enable CONFIG_FRAME_POINTER=y in your config ?
I can, but my bosses will kick my ass if I bring down the ISP again :)
If you think it's the only way to find the problem I'll tell them that
I need to do it. In this case, please tell me what other config
options/tools I can use to get as much info as possible...since I'll
probably be able to test this only once more on the production
environment for debugging purposes.
Cheers,
Felipe Damasio
^ permalink raw reply
* Re: [PATCH RFC] act_cpu: packet distributing
From: Changli Gao @ 2010-07-14 3:27 UTC (permalink / raw)
To: Jamal Hadi Salim
Cc: David S. Miller, Patrick McHardy, Tom Herbert, Eric Dumazet,
netdev, Changli Gao
In-Reply-To: <1279077475-2956-1-git-send-email-xiaosuo@gmail.com>
[-- Attachment #1: Type: text/plain, Size: 666 bytes --]
On Wed, Jul 14, 2010 at 11:17 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> I want to know if I can assign the sk to skb as nf_tproxy_core does to avoid
> the duplicate search later. Thanks.
>
> act_cpu: packet distributing
>
> This TC action can be used to redirect packets to the CPU:
> * specified by the cpuid option
> * specified by the class minor ID obtained previously
> * on which the corresponding application runs
>
> It supports the similar functions of RPS and RFS, but is more flexible.
>
> Signed-off-by: Changli Gao <xiaosuo@gmail.com>
The iproute2 patch against 2.6.31 is attached
--
Regards,
Changli Gao(xiaosuo@gmail.com)
[-- Attachment #2: iproute2-act_cpu.diff --]
[-- Type: text/plain, Size: 5350 bytes --]
diff -urN iproute2-2.6.31.orig/include/linux/tc_act/tc_cpu.h iproute2-2.6.31/include/linux/tc_act/tc_cpu.h
--- iproute2-2.6.31.orig/include/linux/tc_act/tc_cpu.h 1970-01-01 08:00:00.000000000 +0800
+++ iproute2-2.6.31/include/linux/tc_act/tc_cpu.h 2010-07-02 16:57:10.000000000 +0800
@@ -0,0 +1,31 @@
+#ifndef __LINUX_TC_CPU_H
+#define __LINUX_TC_CPU_H
+
+#include <linux/pkt_cls.h>
+#include <linux/types.h>
+
+#define TCA_ACT_CPU 12
+
+enum {
+ TCA_CPU_UNSPEC,
+ TCA_CPU_PARMS,
+ TCA_CPU_TM,
+ __TCA_CPU_MAX
+};
+#define TCA_CPU_MAX (__TCA_CPU_MAX - 1)
+
+enum {
+ TCA_CPU_TYPE_MAP,
+ TCA_CPU_TYPE_CPUID,
+ TCA_CPU_TYPE_SOCKET,
+ __TCA_CPU_TYPE_MAX
+};
+#define TCA_CPU_TYPE_MAX (__TCA_CPU_TYPE_MAX - 1)
+
+struct tc_cpu {
+ tc_gen;
+ __u32 type;
+ __u32 value;
+};
+
+#endif /* __LINUX_TC_CPU_H */
diff -urN iproute2-2.6.31.orig/tc/Makefile iproute2-2.6.31/tc/Makefile
--- iproute2-2.6.31.orig/tc/Makefile 2010-07-14 11:21:05.000000000 +0800
+++ iproute2-2.6.31/tc/Makefile 2010-07-02 16:56:41.000000000 +0800
@@ -32,6 +32,7 @@
TCMODULES += m_gact.o
TCMODULES += m_mirred.o
TCMODULES += m_nat.o
+TCMODULES += m_cpu.o
TCMODULES += m_pedit.o
TCMODULES += m_skbedit.o
TCMODULES += p_ip.o
diff -urN iproute2-2.6.31.orig/tc/m_cpu.c iproute2-2.6.31/tc/m_cpu.c
--- iproute2-2.6.31.orig/tc/m_cpu.c 1970-01-01 08:00:00.000000000 +0800
+++ iproute2-2.6.31/tc/m_cpu.c 2010-07-12 16:38:56.000000000 +0800
@@ -0,0 +1,186 @@
+/*
+ * m_cpu.c Packet distributing module
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Changli Gao <xiaosuo@gmail.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include <linux/tc_act/tc_cpu.h>
+
+static void
+explain(void)
+{
+ fprintf(stderr, "Usage: ... cpu METHOD\n"
+ "METHOD := { CPUID-SPEC | MAP-SPEC | SOCKET-SPEC }\n"
+ "CPUID-SPEC := cpuid CPUID\n"
+ "MAP-SPEC := map OFFSET\n"
+ "SOCKET-SPEC := socket\n");
+}
+
+static void
+usage(void)
+{
+ explain();
+ exit(-1);
+}
+
+static int
+parse_cpu_args(int *argc_p, char ***argv_p,struct tc_cpu *sel)
+{
+ int argc = *argc_p;
+ char **argv = *argv_p;
+
+ if (argc <= 0)
+ return -1;
+
+ if (matches(*argv, "map") == 0)
+ sel->type = TCA_CPU_TYPE_MAP;
+ else if (matches(*argv, "cpuid") == 0)
+ sel->type = TCA_CPU_TYPE_CPUID;
+ else if (matches(*argv, "socket") == 0) {
+ sel->type = TCA_CPU_TYPE_SOCKET;
+ goto out;
+ } else
+ return -1;
+
+ NEXT_ARG();
+
+ if (get_u32(&sel->value, *argv, 10)) {
+ fprintf(stderr, "Cpu: Illegal \"%s\"\n",
+ sel->type == TCA_CPU_TYPE_CPUID ? "CPUID" : "OFFSET");
+ return -1;
+ }
+
+out:
+ argc--;
+ argv++;
+
+ *argc_p = argc;
+ *argv_p = argv;
+ return 0;
+}
+
+static int
+parse_cpu(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n)
+{
+ struct tc_cpu sel;
+ int argc = *argc_p;
+ char **argv = *argv_p;
+ struct rtattr *tail;
+
+ memset(&sel, 0, sizeof(sel));
+ sel.action = TC_ACT_STOLEN;
+
+ if (argc <= 0) {
+ explain();
+ return -1;
+ }
+
+ if (matches(*argv, "cpu") == 0) {
+ NEXT_ARG();
+ } else {
+ fprintf(stderr, "cpu bad arguments %s\n", *argv);
+ return -1;
+ }
+
+ if (matches(*argv, "help") == 0)
+ usage();
+ if (parse_cpu_args(&argc, &argv, &sel)) {
+ fprintf(stderr, "Illegal cpu construct (%s) \n", *argv);
+ explain();
+ return -1;
+ }
+
+ while (argc > 0) {
+ if (matches(*argv, "drop") == 0 ||
+ matches(*argv, "shot") == 0) {
+ /* I have set it above */
+ argc--;
+ argv++;
+ continue;
+ } else if (matches(*argv, "index") == 0) {
+ NEXT_ARG();
+ if (get_u32(&sel.index, *argv, 10)) {
+ fprintf(stderr, "Cpu: Illegal \"index\"\n");
+ return -1;
+ }
+ argc--;
+ argv++;
+ } else {
+ fprintf(stderr, "Invalid option: %s\n", *argv);
+ usage();
+ }
+ }
+
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+ addattr_l(n, MAX_MSG, TCA_CPU_PARMS, &sel, sizeof(sel));
+ tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
+
+ *argc_p = argc;
+ *argv_p = argv;
+ return 0;
+}
+
+static int
+print_cpu(struct action_util *au,FILE * f, struct rtattr *arg)
+{
+ struct tc_cpu *sel;
+ struct rtattr *tb[TCA_CPU_MAX + 1];
+ char buf[12];
+ const static char *type_str[] = {
+ [TCA_CPU_TYPE_MAP] = "map",
+ [TCA_CPU_TYPE_CPUID] = "cpuid",
+ [TCA_CPU_TYPE_SOCKET] = "socket"
+ };
+
+ if (arg == NULL)
+ return -1;
+
+ parse_rtattr_nested(tb, TCA_CPU_MAX, arg);
+
+ if (tb[TCA_CPU_PARMS] == NULL) {
+ fprintf(f, "[NULL cpu parameters]");
+ return -1;
+ }
+ sel = RTA_DATA(tb[TCA_CPU_PARMS]);
+
+ if (sel->type != TCA_CPU_TYPE_SOCKET)
+ sprintf(buf, "%u", sel->value);
+ else
+ buf[0] = '\0';
+
+ fprintf(f, " cpu %s%s%s", type_str[sel->type],
+ sel->type == TCA_CPU_TYPE_SOCKET ? "" : " ", buf);
+
+ if (show_stats) {
+ if (tb[TCA_CPU_TM]) {
+ struct tcf_t *tm = RTA_DATA(tb[TCA_CPU_TM]);
+ print_tm(f,tm);
+ }
+ }
+
+ return 0;
+}
+
+struct action_util cpu_action_util = {
+ .id = "cpu",
+ .parse_aopt = parse_cpu,
+ .print_aopt = print_cpu,
+};
^ permalink raw reply
* Re: [PATCH] tproxy: nf_tproxy_assign_sock() can handle tw sockets
From: Eric Dumazet @ 2010-07-14 3:21 UTC (permalink / raw)
To: Felipe W Damasio
Cc: Avi Kivity, David Miller, Patrick McHardy, linux-kernel, netdev
In-Reply-To: <AANLkTilgxvDPpBJvwbw3PHZfXDaa2_6-TbJ8lujz5y3V@mail.gmail.com>
Le mardi 13 juillet 2010 à 17:55 -0300, Felipe W Damasio a écrit :
> Hi Mr. Dumazet,
>
> I used the patched kernel on the production machine and squid frooze again.
>
> This is the dmesg message:
>
>
> general protection fault: 0000 [#1] SMP
> last sysfs file: /sys/devices/pci0000:00/0000:00:1f.3/i2c-0/name
> CPU 1
> Modules linked in:
>
> Pid: 5533, comm: squid Not tainted 2.6.34 #6 DX58SO/
> RIP: 0010:[<ffffffff81369b2a>] [<ffffffff81369b2a>] sock_rfree+0x26/0x37
> RSP: 0018:ffff88042287fc20 EFLAGS: 00010206
> RAX: 66c86f938964c696 RBX: ffff88034e8f9a00 RCX: 0000000000000720
> RDX: ffff8803f0ce05c0 RSI: ffff8803d441960c RDI: ffff88034e8f9a00
> RBP: ffff8803f0ee05c0 R08: ffffea000dcb9998 R09: 0000000000000000
> R10: 000000000003d830 R11: ffff8803f0ee05c0 R12: 00000000000005a8
> R13: 00000000000005a8 R14: 0000000000004378 R15: 0000000000000000
> FS: 00007f4cf33ee710(0000) GS:ffff880001840000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000021d5fd0 CR3: 0000000422872000 CR4: 00000000000006e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process squid (pid: 5533, threadinfo ffff88042287e000, task ffff88042eb61a40)
> Stack:
> ffffffff8136ecda ffff88034e8f9a00 ffffffff8136ea8c ffff88034e8f9a00
> <0> ffffffff813ab142 00000000000000d0 ffffffff8136f9f9 000000000eec60e2
> <0> ffff88042eb61a40 ffff88042eb61a40 ffff88042eb61a40 00000000edca7300
> Call Trace:
> [<ffffffff8136ecda>] ? skb_release_head_state+0x6d/0xb7
> [<ffffffff8136ea8c>] ? __kfree_skb+0x9/0x7d
> [<ffffffff813ab142>] ? tcp_recvmsg+0x6a3/0x89a
> [<ffffffff8136f9f9>] ? __alloc_skb+0x5e/0x14e
> [<ffffffff81369dde>] ? sock_common_recvmsg+0x30/0x45
> [<ffffffff81367b0f>] ? sock_aio_read+0xdd/0xf1
> [<ffffffff813b6c97>] ? tcp_write_xmit+0x93e/0x96c
> [<ffffffff810ac500>] ? do_sync_read+0xb0/0xf2
> [<ffffffff810acf32>] ? vfs_read+0xb9/0xff
> [<ffffffff810ad034>] ? sys_read+0x45/0x6e
> [<ffffffff8100292b>] ? system_call_fastpath+0x16/0x1b
> Code: ff ff ff ff c3 48 8b 57 18 8b 87 d8 00 00 00 48 8d 8a ac 00 00
> 00 f0 29 82 ac 00 00 00 48 8b 57 18 8b 8f d8 00 00 00 48 8b 42 38 <48>
> 83 b8 b0 00 00 00 00 74 06 01 8a f4 00 00 00 c3 41 57 41 89
> RIP [<ffffffff81369b2a>] sock_rfree+0x26/0x37
> RSP <ffff88042287fc20>
> ---[ end trace 22e6ca9ef825c0e6 ]---
>
>
> Seems to be the same issue, right?
>
Exactly the same. Only RAX value is different, its another chain.
BTW, 0x720 is not skb->len like I said earlier, but skb->truesize, and
0x720 is OK on a 64 bit machine for a regular packet.
48 8b 57 18 mov 0x18(%rdi),%rdx skb->sk
8b 87 d8 00 00 00 mov 0xd8(%rdi),%eax skb->truesize
48 8d 8a ac 00 00 00 lea 0xac(%rdx),%rcx
f0 29 82 ac 00 00 00 lock sub %eax,0xac(%rdx)
48 8b 57 18 mov 0x18(%rdi),%rdx skb->sk
8b 8f d8 00 00 00 mov 0xd8(%rdi),%ecx skb->truesize
48 8b 42 38 mov 0x38(%rdx),%rax sk->sk_prot
<48> 83 b8 b0 00 00 00 00 cmpq $0x0,0xb0(%rax)
74 06 je .+6
01 8a fa 00 00 00 add %ecx,0xfa(%rdx)
One thing to notice are the RDX and RBP values:
RDX: ffff8803f0ce05c0
RBP: ffff8803f0ee05c0
RDX being the sk pointer (and sk+0x38 contains the corrupted "sk_prot" value)
, we notice RBP contains same "sk" value + 0x200000 (2 Mbytes).
(same remark on your initial bug report)
Could you enable CONFIG_FRAME_POINTER=y in your config ?
^ permalink raw reply
* [PATCH RFC] act_cpu: packet distributing
From: Changli Gao @ 2010-07-14 3:17 UTC (permalink / raw)
To: Jamal Hadi Salim
Cc: David S. Miller, Patrick McHardy, Tom Herbert, Eric Dumazet,
netdev, Changli Gao
I want to know if I can assign the sk to skb as nf_tproxy_core does to avoid
the duplicate search later. Thanks.
act_cpu: packet distributing
This TC action can be used to redirect packets to the CPU:
* specified by the cpuid option
* specified by the class minor ID obtained previously
* on which the corresponding application runs
It supports the similar functions of RPS and RFS, but is more flexible.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
include/linux/netdevice.h | 2
include/linux/tc_act/tc_cpu.h | 31 +++++
include/net/sock.h | 24 +++
include/net/tc_act/tc_cpu.h | 18 ++
net/core/dev.c | 4
net/core/sock.c | 1
net/sched/Kconfig | 12 +
net/sched/Makefile | 1
net/sched/act_cpu.c | 260 ++++++++++++++++++++++++++++++++++++++++++
9 files changed, 350 insertions(+), 3 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c4fedf0..318d422 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1435,6 +1435,8 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
+int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail);
+
#define HAVE_NETIF_QUEUE
extern void __netif_schedule(struct Qdisc *q);
diff --git a/include/linux/tc_act/tc_cpu.h b/include/linux/tc_act/tc_cpu.h
new file mode 100644
index 0000000..2704607
--- /dev/null
+++ b/include/linux/tc_act/tc_cpu.h
@@ -0,0 +1,31 @@
+#ifndef __LINUX_TC_CPU_H
+#define __LINUX_TC_CPU_H
+
+#include <linux/pkt_cls.h>
+#include <linux/types.h>
+
+#define TCA_ACT_CPU 12
+
+enum {
+ TCA_CPU_UNSPEC,
+ TCA_CPU_PARMS,
+ TCA_CPU_TM,
+ __TCA_CPU_MAX
+};
+#define TCA_CPU_MAX (__TCA_CPU_MAX - 1)
+
+enum {
+ TCA_CPU_TYPE_MAP,
+ TCA_CPU_TYPE_CPUID,
+ TCA_CPU_TYPE_SOCKET,
+ __TCA_CPU_TYPE_MAX
+};
+#define TCA_CPU_TYPE_MAX (__TCA_CPU_TYPE_MAX - 1)
+
+struct tc_cpu {
+ tc_gen;
+ __u32 type;
+ __u32 value;
+};
+
+#endif /* __LINUX_TC_CPU_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 3100e71..7913158 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -200,6 +200,7 @@ struct sock_common {
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer
+ * @sk_cpu: the CPU on which the corresponding process works.
* @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
@@ -284,6 +285,9 @@ struct sock {
#ifdef CONFIG_RPS
__u32 sk_rxhash;
#endif
+#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
+ int sk_cpu;
+#endif
unsigned long sk_flags;
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
@@ -639,7 +643,24 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
return sk->sk_backlog_rcv(sk, skb);
}
-static inline void sock_rps_record_flow(const struct sock *sk)
+static inline void sock_reset_cpu(struct sock *sk)
+{
+#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
+ sk->sk_cpu = nr_cpumask_bits;
+#endif
+}
+
+static inline void sock_save_cpu(struct sock *sk)
+{
+#if defined(CONFIG_NET_ACT_CPU) || defined(CONFIG_NET_ACT_CPU_MODULE)
+ int cpu = get_cpu();
+ if (sk->sk_cpu != cpu)
+ sk->sk_cpu = cpu;
+ put_cpu();
+#endif
+}
+
+static inline void sock_rps_record_flow(struct sock *sk)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
@@ -649,6 +670,7 @@ static inline void sock_rps_record_flow(const struct sock *sk)
rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
rcu_read_unlock();
#endif
+ sock_save_cpu(sk);
}
static inline void sock_rps_reset_flow(const struct sock *sk)
diff --git a/include/net/tc_act/tc_cpu.h b/include/net/tc_act/tc_cpu.h
new file mode 100644
index 0000000..0504bf0
--- /dev/null
+++ b/include/net/tc_act/tc_cpu.h
@@ -0,0 +1,18 @@
+#ifndef __NET_TC_CPU_H
+#define __NET_TC_CPU_H
+
+#include <linux/types.h>
+#include <net/act_api.h>
+
+struct tcf_cpu {
+ struct tcf_common common;
+ u32 type;
+ u32 value;
+};
+
+static inline struct tcf_cpu *to_tcf_cpu(struct tcf_common *pc)
+{
+ return container_of(pc, struct tcf_cpu, common);
+}
+
+#endif /* __NET_TC_CPU_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index e2b9fa2..45e8a21 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2443,8 +2443,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
- unsigned int *qtail)
+int enqueue_to_backlog(struct sk_buff *skb, int cpu, unsigned int *qtail)
{
struct softnet_data *sd;
unsigned long flags;
@@ -2482,6 +2481,7 @@ enqueue:
kfree_skb(skb);
return NET_RX_DROP;
}
+EXPORT_SYMBOL(enqueue_to_backlog);
/**
* netif_rx - post buffer to the network code
diff --git a/net/core/sock.c b/net/core/sock.c
index 363bc26..7a71e76 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1045,6 +1045,7 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
if (!try_module_get(prot->owner))
goto out_free_sec;
sk_tx_queue_clear(sk);
+ sock_reset_cpu(sk);
}
return sk;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb..a826758 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -427,6 +427,18 @@ config NET_CLS_ACT
A recent version of the iproute2 package is required to use
extended matches.
+config NET_ACT_CPU
+ tristate "Packet distributing"
+ depends on NET_CLS_ACT
+ depends on RPS
+ ---help---
+ Say Y here to do packets distributing. With it, you can distribute
+ the packets among the CPUs on the system in any way as you like. It
+ only works in ingress.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_cpu.
+
config NET_ACT_POLICE
tristate "Traffic Policing"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71b..a9e0b96 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -7,6 +7,7 @@ obj-y := sch_generic.o sch_mq.o
obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
obj-$(CONFIG_NET_CLS) += cls_api.o
obj-$(CONFIG_NET_CLS_ACT) += act_api.o
+obj-$(CONFIG_NET_ACT_CPU) += act_cpu.o
obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
diff --git a/net/sched/act_cpu.c b/net/sched/act_cpu.c
new file mode 100644
index 0000000..6b7d7fc
--- /dev/null
+++ b/net/sched/act_cpu.c
@@ -0,0 +1,260 @@
+/*
+ * Packet distributing actions
+ *
+ * Copyright (c) 2010- Changli Gao <xiaosuo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/tc_act/tc_cpu.h>
+#include <net/tc_act/tc_cpu.h>
+
+#define CPU_TAB_MASK 15
+static struct tcf_common *tcf_cpu_ht[CPU_TAB_MASK + 1];
+static u32 cpu_idx_gen;
+static DEFINE_RWLOCK(cpu_lock);
+
+static struct tcf_hashinfo cpu_hash_info = {
+ .htab = tcf_cpu_ht,
+ .hmask = CPU_TAB_MASK,
+ .lock = &cpu_lock
+};
+
+static const struct nla_policy cpu_policy[TCA_CPU_MAX + 1] = {
+ [TCA_CPU_PARMS] = { .len = sizeof(struct tc_cpu) },
+};
+
+static int tcf_cpu_init(struct nlattr *nla, struct nlattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct nlattr *tb[TCA_CPU_MAX + 1];
+ struct tc_cpu *parm;
+ struct tcf_cpu *p;
+ struct tcf_common *pc;
+ int ret;
+
+ if (nla == NULL)
+ return -EINVAL;
+ ret = nla_parse_nested(tb, TCA_CPU_MAX, nla, cpu_policy);
+ if (ret < 0)
+ return ret;
+ if (tb[TCA_CPU_PARMS] == NULL)
+ return -EINVAL;
+ parm = nla_data(tb[TCA_CPU_PARMS]);
+ if (parm->type > TCA_CPU_TYPE_MAX || parm->action != TC_ACT_STOLEN)
+ return -EINVAL;
+
+ pc = tcf_hash_check(parm->index, a, bind, &cpu_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+ &cpu_idx_gen, &cpu_hash_info);
+ if (IS_ERR(pc))
+ return PTR_ERR(pc);
+ ret = ACT_P_CREATED;
+ } else {
+ if (!ovr) {
+ tcf_hash_release(pc, bind, &cpu_hash_info);
+ return -EEXIST;
+ }
+ ret = 0;
+ }
+ p = to_tcf_cpu(pc);
+
+ spin_lock_bh(&p->tcf_lock);
+ p->type = parm->type;
+ p->value = parm->value;
+ p->tcf_action = parm->action;
+ spin_unlock_bh(&p->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &cpu_hash_info);
+
+ return ret;
+}
+
+static int tcf_cpu_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_cpu *p = a->priv;
+
+ return tcf_hash_release(&p->common, bind, &cpu_hash_info);
+}
+
+static int tcf_cpu_from_sock(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ struct sock *sk;
+ struct {
+ __be16 source, dest;
+ } *ports;
+ int cpu;
+
+ if (skb->dev == NULL || skb->protocol != __constant_htons(ETH_P_IP))
+ goto err;
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ goto err;
+ iph = (struct iphdr *) skb->data;
+ if (!pskb_may_pull(skb, iph->ihl * 4 + 4))
+ goto err;
+ ports = (void *) (skb->data + iph->ihl * 4);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
+ ports->source, iph->daddr, ports->dest,
+ skb->dev->ifindex);
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(dev_net(skb->dev), iph->saddr,
+ ports->source, iph->daddr, ports->dest,
+ skb->dev->ifindex);
+ break;
+ default:
+ goto err;
+ }
+
+ if (!sk)
+ goto err;
+ cpu = sk->sk_cpu;
+ if (sk->sk_protocol == IPPROTO_TCP && sk->sk_state == TCP_TIME_WAIT)
+ inet_twsk_put(inet_twsk(sk));
+ else
+ sock_put(sk);
+
+ return cpu;
+
+err:
+ return smp_processor_id();
+}
+
+static int tcf_cpu(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_cpu *p = a->priv;
+ u32 type;
+ u32 value;
+ int cpu, action;
+ struct sk_buff *nskb;
+ unsigned int qtail;
+
+ spin_lock(&p->tcf_lock);
+ p->tcf_tm.lastuse = jiffies;
+ p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+ p->tcf_bstats.packets++;
+ type = p->type;
+ value = p->value;
+ action = p->tcf_action;
+ spin_unlock(&p->tcf_lock);
+
+ if (G_TC_AT(skb->tc_verd) & AT_EGRESS) {
+ if (net_ratelimit())
+ pr_notice("act_cpu only works in ingress!\n");
+ goto drop;
+ }
+
+ nskb = skb_act_clone(skb, GFP_ATOMIC, action);
+ if (nskb == NULL)
+ goto drop;
+ nskb->tc_verd = 0;
+ nskb->tc_verd = SET_TC_NCLS(nskb->tc_verd);
+
+ switch (type) {
+ case TCA_CPU_TYPE_MAP:
+ cpu = TC_H_MIN(res->classid) - value;
+ break;
+ case TCA_CPU_TYPE_CPUID:
+ cpu = value;
+ break;
+ case TCA_CPU_TYPE_SOCKET:
+ cpu = tcf_cpu_from_sock(nskb);
+ break;
+ default:
+ kfree_skb(nskb);
+ goto drop;
+ }
+ if (cpu >= nr_cpumask_bits || !cpu_online(cpu))
+ cpu = smp_processor_id();
+
+ if (enqueue_to_backlog(nskb, cpu, &qtail) != NET_RX_SUCCESS)
+ goto drop;
+
+ return action;
+
+drop:
+ spin_lock(&p->tcf_lock);
+ p->tcf_qstats.drops++;
+ spin_unlock(&p->tcf_lock);
+ return TC_ACT_SHOT;
+}
+
+static int tcf_cpu_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+ int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_cpu *p = a->priv;
+ struct tc_cpu opt;
+ struct tcf_t t;
+
+ opt.index = p->tcf_index;
+ opt.action = p->tcf_action;
+ opt.refcnt = p->tcf_refcnt - ref;
+ opt.bindcnt = p->tcf_bindcnt - bind;
+ opt.type = p->type;
+ opt.value = p->value;
+ NLA_PUT(skb, TCA_CPU_PARMS, sizeof(opt), &opt);
+ t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+ NLA_PUT(skb, TCA_CPU_TM, sizeof(t), &t);
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_cpu_ops = {
+ .kind = "cpu",
+ .hinfo = &cpu_hash_info,
+ .type = TCA_ACT_CPU,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_cpu,
+ .dump = tcf_cpu_dump,
+ .cleanup = tcf_cpu_cleanup,
+ .lookup = tcf_hash_search,
+ .init = tcf_cpu_init,
+ .walk = tcf_generic_walker
+};
+
+MODULE_AUTHOR("Changli Gao <xiaosuo@gmail.com>");
+MODULE_DESCRIPTION("Packet distributing actions");
+MODULE_LICENSE("GPL");
+
+static int __init cpu_init_module(void)
+{
+ return tcf_register_action(&act_cpu_ops);
+}
+
+static void __exit cpu_cleanup_module(void)
+{
+ tcf_unregister_action(&act_cpu_ops);
+}
+
+module_init(cpu_init_module);
+module_exit(cpu_cleanup_module);
^ permalink raw reply related
* [PATCH 2.6.35-rc1] net-next: vmxnet3 fixes [5/5] Respect the interrupt type in VM configuration
From: Shreyas Bhatewara @ 2010-07-14 0:51 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel, pv-drivers
In-Reply-To: <alpine.LRH.2.00.1007070232300.5939@localhost.localdomain>
Do not ignore interrupt type in VM configuration
When interrupt type is not auto, do not ignore the interrupt type set from
VM configuration.
Driver may not always respect the interrupt type in configuration but it
will certainly try to. Eg: if MSIx is configured and enabling MSIx fails
in driver, it fall back on MSI and then on INTx.
Signed-off-by: Shreyas Bhatewara <sbhatewara@vmware.com>
---
drivers/net/vmxnet3/vmxnet3_drv.c | 13 ++++++++++---
drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 +-
drivers/net/vmxnet3/vmxnet3_int.h | 6 +++---
3 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index ffd6a9b..74fe3f0 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2355,9 +2355,13 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
adapter->intr.mask_mode = (cfg >> 2) & 0x3;
if (adapter->intr.type == VMXNET3_IT_AUTO) {
- int err;
+ adapter->intr.type = VMXNET3_IT_MSIX;
+ }
#ifdef CONFIG_PCI_MSI
+ if (adapter->intr.type == VMXNET3_IT_MSIX) {
+ int err;
+
adapter->intr.msix_entries[0].entry = 0;
err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
VMXNET3_LINUX_MAX_MSIX_VECT);
@@ -2366,15 +2370,18 @@ vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
adapter->intr.type = VMXNET3_IT_MSIX;
return;
}
-#endif
+ adapter->intr.type = VMXNET3_IT_MSI;
+ }
+ if (adapter->intr.type == VMXNET3_IT_MSI) {
+ int err;
err = pci_enable_msi(adapter->pdev);
if (!err) {
adapter->intr.num_intrs = 1;
- adapter->intr.type = VMXNET3_IT_MSI;
return;
}
}
+#endif /* CONFIG_PCI_MSI */
adapter->intr.type = VMXNET3_IT_INTX;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index de1ba14..52128c7 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -291,7 +291,7 @@ vmxnet3_set_flags(struct net_device *netdev, u32 data)
/* update harware LRO capability accordingly */
if (lro_requested)
- adapter->shared->devRead.misc.uptFeatures &= UPT1_F_LRO;
+ adapter->shared->devRead.misc.uptFeatures |= UPT1_F_LRO;
else
adapter->shared->devRead.misc.uptFeatures &=
~UPT1_F_LRO;
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index bbed330..d17fae6 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -68,10 +68,10 @@
/*
* Version numbers
*/
-#define VMXNET3_DRIVER_VERSION_STRING "1.0.5.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING "1.0.13.0-k"
/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM 0x01000500
+#define VMXNET3_DRIVER_VERSION_NUM 0x01000B00
/*
@@ -150,7 +150,7 @@ vmxnet3_cmd_ring_desc_avail(struct vmxnet3_cmd_ring *ring)
}
static inline bool
-vmxnet3_cmd_ring_desc_empty(struct vmxnet3_cmd_ring *ring)
+vmxnet3_cmd_ring_desc_empty(const struct vmxnet3_cmd_ring *ring)
{
return (ring->next2comp == ring->next2fill);
}
^ permalink raw reply related
* [PATCH 2.6.35-rc1] net-next: vmxnet3 fixes [4/5] Do not reset when the device is not opened
From: Shreyas Bhatewara @ 2010-07-14 0:49 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel, pv-drivers, ronghua, matthieu
In-Reply-To: <alpine.LRH.2.00.1007070229030.5939@localhost.localdomain>
Do not reset when the device is not opened
If a reset is scheduled, and the device goes thru close and open, it
may happen that reset and open may run in parallel.
The reset code now bails out if the device is not opened.
Signed-off-by: Ronghua Zang <ronghua@vmware.com>
Signed-off-by: Matthieu Bucchianeri <matthieu@vmware.com>
Signed-off-by: Shreyas Bhatewara <sbhatewara@vmware.com>
---
drivers/net/vmxnet3/vmxnet3_drv.c | 5 +++--
1 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1e31d40..ffd6a9b 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2417,8 +2417,9 @@ vmxnet3_reset_work(struct work_struct *data)
if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state))
return;
- /* if the device is closed, we must leave it alone */
- if (netif_running(adapter->netdev)) {
+ /* if the device is closed or is being opened, we must leave it alone */
+ if (netif_running(adapter->netdev) &&
+ (adapter->netdev->flags & IFF_UP)) {
printk(KERN_INFO "%s: resetting\n", adapter->netdev->name);
vmxnet3_quiesce_dev(adapter);
vmxnet3_reset_dev(adapter);
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox