* [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Jon Mason @ 2012-07-13 21:45 UTC (permalink / raw)
To: linux-kernel; +Cc: netdev, linux-pci, Dave Jiang
In-Reply-To: <1342215900-3358-1-git-send-email-jon.mason@intel.com>
A virtual ethernet device that uses the NTB transport API to send/receive data.
Signed-off-by: Jon Mason <jon.mason@intel.com>
---
drivers/net/Kconfig | 4 +
drivers/net/Makefile | 1 +
drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 416 insertions(+), 0 deletions(-)
create mode 100644 drivers/net/ntb_netdev.c
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c2bd80..9bf8a71 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -178,6 +178,10 @@ config NETPOLL_TRAP
config NET_POLL_CONTROLLER
def_bool NETPOLL
+config NTB_NETDEV
+ tristate "Virtual Ethernet over NTB"
+ depends on NTB
+
config RIONET
tristate "RapidIO Ethernet over messaging driver support"
depends on RAPIDIO
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3d375ca..9890148 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH) += usb/
obj-$(CONFIG_USB_CDC_PHONET) += usb/
obj-$(CONFIG_HYPERV_NET) += hyperv/
+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
new file mode 100644
index 0000000..bcbd9d4
--- /dev/null
+++ b/drivers/net/ntb_netdev.c
@@ -0,0 +1,411 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel PCIe NTB Network Linux driver
+ *
+ * Contact Information:
+ * Jon Mason <jon.mason@intel.com>
+ */
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/module.h>
+#include <linux/ntb.h>
+
+#define NTB_NETDEV_VER "0.4"
+
+MODULE_DESCRIPTION(KBUILD_MODNAME);
+MODULE_VERSION(NTB_NETDEV_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+struct ntb_netdev {
+ struct net_device *ndev;
+ struct ntb_transport_qp *qp;
+};
+
+#define NTB_TX_TIMEOUT_MS 1000
+#define NTB_RXQ_SIZE 100
+
+static struct net_device *netdev;
+
+static void ntb_netdev_event_handler(int status)
+{
+ struct ntb_netdev *dev = netdev_priv(netdev);
+
+ pr_debug("%s: Event %x, Link %x\n", KBUILD_MODNAME, status,
+ ntb_transport_link_query(dev->qp));
+
+ /* Currently, only link status event is supported */
+ if (status)
+ netif_carrier_on(netdev);
+ else
+ netif_carrier_off(netdev);
+}
+
+static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp)
+{
+ struct net_device *ndev = netdev;
+ struct sk_buff *skb;
+ int len, rc;
+
+ while ((skb = ntb_transport_rx_dequeue(qp, &len))) {
+ pr_debug("%s: %d byte payload received\n", __func__, len);
+
+ skb_put(skb, len);
+ skb->protocol = eth_type_trans(skb, ndev);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ if (netif_rx(skb) == NET_RX_DROP) {
+ ndev->stats.rx_errors++;
+ ndev->stats.rx_dropped++;
+ } else {
+ ndev->stats.rx_packets++;
+ ndev->stats.rx_bytes += len;
+ }
+
+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
+ if (!skb) {
+ ndev->stats.rx_errors++;
+ ndev->stats.rx_frame_errors++;
+ pr_err("%s: No skb\n", __func__);
+ break;
+ }
+
+ rc = ntb_transport_rx_enqueue(qp, skb, skb->data,
+ ndev->mtu + ETH_HLEN);
+ if (rc) {
+ ndev->stats.rx_errors++;
+ ndev->stats.rx_fifo_errors++;
+ pr_err("%s: error re-enqueuing\n", __func__);
+ break;
+ }
+ }
+}
+
+static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp)
+{
+ struct net_device *ndev = netdev;
+ struct sk_buff *skb;
+ int len;
+
+ while ((skb = ntb_transport_tx_dequeue(qp, &len))) {
+ ndev->stats.tx_packets++;
+ ndev->stats.tx_bytes += skb->len;
+ dev_kfree_skb(skb);
+ }
+
+ if (netif_queue_stopped(ndev))
+ netif_wake_queue(ndev);
+}
+
+static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ struct ntb_netdev *dev = netdev_priv(ndev);
+ int rc;
+
+ pr_debug("%s: ntb_transport_tx_enqueue\n", KBUILD_MODNAME);
+
+ rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
+ if (rc)
+ goto err;
+
+ return NETDEV_TX_OK;
+
+err:
+ ndev->stats.tx_dropped++;
+ ndev->stats.tx_errors++;
+ netif_stop_queue(ndev);
+ return NETDEV_TX_BUSY;
+}
+
+static int ntb_netdev_open(struct net_device *ndev)
+{
+ struct ntb_netdev *dev = netdev_priv(ndev);
+ struct sk_buff *skb;
+ int rc, i, len;
+
+ /* Add some empty rx bufs */
+ for (i = 0; i < NTB_RXQ_SIZE; i++) {
+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
+ ndev->mtu + ETH_HLEN);
+ if (rc == -EINVAL)
+ goto err;
+ }
+
+ netif_carrier_off(ndev);
+ ntb_transport_link_up(dev->qp);
+
+ return 0;
+
+err:
+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
+ kfree(skb);
+ return rc;
+}
+
+static int ntb_netdev_close(struct net_device *ndev)
+{
+ struct ntb_netdev *dev = netdev_priv(ndev);
+ struct sk_buff *skb;
+ int len;
+
+ ntb_transport_link_down(dev->qp);
+
+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
+ kfree(skb);
+
+ return 0;
+}
+
+static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
+{
+ struct ntb_netdev *dev = netdev_priv(ndev);
+ struct sk_buff *skb;
+ int len, rc;
+
+ if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
+ return -EINVAL;
+
+ if (!netif_running(ndev)) {
+ ndev->mtu = new_mtu;
+ return 0;
+ }
+
+ /* Bring down the link and dispose of posted rx entries */
+ ntb_transport_link_down(dev->qp);
+
+ if (ndev->mtu < new_mtu) {
+ int i;
+
+ for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
+ kfree(skb);
+
+ for (; i; i--) {
+ skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
+ if (!skb) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
+ new_mtu + ETH_HLEN);
+ if (rc) {
+ kfree(skb);
+ goto err;
+ }
+ }
+ }
+
+ ndev->mtu = new_mtu;
+
+ ntb_transport_link_up(dev->qp);
+
+ return 0;
+
+err:
+ ntb_transport_link_down(dev->qp);
+
+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
+ kfree(skb);
+
+ pr_err("Error changing MTU, device inoperable\n");
+ return rc;
+}
+
+static void ntb_netdev_tx_timeout(struct net_device *ndev)
+{
+ if (netif_running(ndev))
+ netif_wake_queue(ndev);
+}
+
+static const struct net_device_ops ntb_netdev_ops = {
+ .ndo_open = ntb_netdev_open,
+ .ndo_stop = ntb_netdev_close,
+ .ndo_start_xmit = ntb_netdev_start_xmit,
+ .ndo_change_mtu = ntb_netdev_change_mtu,
+ .ndo_tx_timeout = ntb_netdev_tx_timeout,
+ .ndo_set_mac_address = eth_mac_addr,
+};
+
+static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+ strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
+}
+
+static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
+ "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
+ "rx_frame_errors", "rx_fifo_errors",
+ "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
+};
+
+static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
+{
+ return ARRAY_SIZE(ntb_nic_stats);
+}
+
+static int ntb_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ntb_get_stats_count(dev);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
+ u32 sset, u8 *data)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
+ }
+}
+
+static void
+ntb_get_ethtool_stats(struct net_device *dev,
+ __attribute__((unused)) struct ethtool_stats *stats,
+ u64 *data)
+{
+ int i = 0;
+
+ data[i++] = dev->stats.rx_packets;
+ data[i++] = dev->stats.rx_bytes;
+ data[i++] = dev->stats.rx_errors;
+ data[i++] = dev->stats.rx_dropped;
+ data[i++] = dev->stats.rx_length_errors;
+ data[i++] = dev->stats.rx_frame_errors;
+ data[i++] = dev->stats.rx_fifo_errors;
+ data[i++] = dev->stats.tx_packets;
+ data[i++] = dev->stats.tx_bytes;
+ data[i++] = dev->stats.tx_errors;
+ data[i++] = dev->stats.tx_dropped;
+}
+
+static const struct ethtool_ops ntb_ethtool_ops = {
+ .get_drvinfo = ntb_get_drvinfo,
+ .get_sset_count = ntb_get_sset_count,
+ .get_strings = ntb_get_strings,
+ .get_ethtool_stats = ntb_get_ethtool_stats,
+ .get_link = ethtool_op_get_link,
+};
+
+static int __init ntb_netdev_init_module(void)
+{
+ struct ntb_netdev *dev;
+ int rc;
+
+ pr_info("%s: Probe\n", KBUILD_MODNAME);
+
+ netdev = alloc_etherdev(sizeof(struct ntb_netdev));
+ if (!netdev)
+ return -ENOMEM;
+
+ dev = netdev_priv(netdev);
+ dev->ndev = netdev;
+ netdev->features = NETIF_F_HIGHDMA;
+
+ netdev->hw_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
+
+ random_ether_addr(netdev->perm_addr);
+ memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
+
+ netdev->netdev_ops = &ntb_netdev_ops;
+ SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
+
+ dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
+ ntb_netdev_tx_handler,
+ ntb_netdev_event_handler);
+ if (!dev->qp) {
+ rc = -EIO;
+ goto err;
+ }
+
+ netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
+
+ rc = register_netdev(netdev);
+ if (rc)
+ goto err1;
+
+ pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
+ return 0;
+
+err1:
+ ntb_transport_free_queue(dev->qp);
+err:
+ free_netdev(netdev);
+ return rc;
+}
+module_init(ntb_netdev_init_module);
+
+static void __exit ntb_netdev_exit_module(void)
+{
+ struct ntb_netdev *dev = netdev_priv(netdev);
+
+ unregister_netdev(netdev);
+ ntb_transport_free_queue(dev->qp);
+ free_netdev(netdev);
+
+ pr_info("%s: Driver removed\n", KBUILD_MODNAME);
+}
+module_exit(ntb_netdev_exit_module);
--
1.7.5.4
^ permalink raw reply related
* Re: [Ksummit-2012-discuss] Organising Mini Summits within the Kernel Summit
From: Ben Hutchings @ 2012-07-13 22:35 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: James Bottomley, ksummit-2012-discuss, netdev
In-Reply-To: <20120711084417.4a8132ff@nehalam.linuxnetplumber.net>
On Wed, 2012-07-11 at 08:44 -0700, Stephen Hemminger wrote:
> On Wed, 11 Jul 2012 09:09:15 +0100
> James Bottomley <James.Bottomley@HansenPartnership.com> wrote:
>
> > Hi All,
> >
> > We have set aside the second day of the kernel summit (Tuesday 28
> > August) as mini-summit day. So far we have only the PCI mini summit on
> > this day, so if you can think of other topics, please send them to the
> > kernel summit discuss list:
> >
> > ksummit-2012-discuss@lists.linux-foundation.org
> >
> > Looking at the available rooms, we think we can run about four or five
> > mini summits.
> >
> > As an added incentive, mini summit organisers get to pick who they
> > invite and all the people they pick will get an automatic invitation to
> > the third day of the kernel summit (but not the core first day) and the
> > evening events.
> >
> > James
>
> Is there enough interest to have a networking mini-summit?
I would be interested.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Jiri Pirko @ 2012-07-13 23:14 UTC (permalink / raw)
To: Jon Mason; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <1342215900-3358-2-git-send-email-jon.mason@intel.com>
Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.mason@intel.com wrote:
>A virtual ethernet device that uses the NTB transport API to send/receive data.
>
>Signed-off-by: Jon Mason <jon.mason@intel.com>
>---
> drivers/net/Kconfig | 4 +
> drivers/net/Makefile | 1 +
> drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 416 insertions(+), 0 deletions(-)
> create mode 100644 drivers/net/ntb_netdev.c
>
>diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
>index 0c2bd80..9bf8a71 100644
>--- a/drivers/net/Kconfig
>+++ b/drivers/net/Kconfig
>@@ -178,6 +178,10 @@ config NETPOLL_TRAP
> config NET_POLL_CONTROLLER
> def_bool NETPOLL
>
>+config NTB_NETDEV
>+ tristate "Virtual Ethernet over NTB"
>+ depends on NTB
>+
> config RIONET
> tristate "RapidIO Ethernet over messaging driver support"
> depends on RAPIDIO
>diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>index 3d375ca..9890148 100644
>--- a/drivers/net/Makefile
>+++ b/drivers/net/Makefile
>@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH) += usb/
> obj-$(CONFIG_USB_CDC_PHONET) += usb/
>
> obj-$(CONFIG_HYPERV_NET) += hyperv/
>+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
>diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
>new file mode 100644
>index 0000000..bcbd9d4
>--- /dev/null
>+++ b/drivers/net/ntb_netdev.c
>@@ -0,0 +1,411 @@
>+/*
>+ * This file is provided under a dual BSD/GPLv2 license. When using or
>+ * redistributing this file, you may do so under either license.
>+ *
>+ * GPL LICENSE SUMMARY
>+ *
>+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
>+ *
>+ * This program is free software; you can redistribute it and/or modify
>+ * it under the terms of version 2 of the GNU General Public License as
>+ * published by the Free Software Foundation.
>+ *
>+ * This program is distributed in the hope that it will be useful, but
>+ * WITHOUT ANY WARRANTY; without even the implied warranty of
>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>+ * General Public License for more details.
>+ *
>+ * You should have received a copy of the GNU General Public License
>+ * along with this program; if not, write to the Free Software
>+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
>+ * The full GNU General Public License is included in this distribution
>+ * in the file called LICENSE.GPL.
>+ *
>+ * BSD LICENSE
>+ *
>+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
>+ *
>+ * Redistribution and use in source and binary forms, with or without
>+ * modification, are permitted provided that the following conditions
>+ * are met:
>+ *
>+ * * Redistributions of source code must retain the above copyright
>+ * notice, this list of conditions and the following disclaimer.
>+ * * Redistributions in binary form must reproduce the above copy
>+ * notice, this list of conditions and the following disclaimer in
>+ * the documentation and/or other materials provided with the
>+ * distribution.
>+ * * Neither the name of Intel Corporation nor the names of its
>+ * contributors may be used to endorse or promote products derived
>+ * from this software without specific prior written permission.
>+ *
>+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ *
>+ * Intel PCIe NTB Network Linux driver
>+ *
>+ * Contact Information:
>+ * Jon Mason <jon.mason@intel.com>
>+ */
>+#include <linux/etherdevice.h>
>+#include <linux/ethtool.h>
>+#include <linux/module.h>
>+#include <linux/ntb.h>
>+
>+#define NTB_NETDEV_VER "0.4"
Is it really necessary to provide this in-file versioning? Doesn't
kernel version itself do the trick?
>+
>+MODULE_DESCRIPTION(KBUILD_MODNAME);
>+MODULE_VERSION(NTB_NETDEV_VER);
>+MODULE_LICENSE("Dual BSD/GPL");
>+MODULE_AUTHOR("Intel Corporation");
>+
>+struct ntb_netdev {
>+ struct net_device *ndev;
>+ struct ntb_transport_qp *qp;
>+};
>+
>+#define NTB_TX_TIMEOUT_MS 1000
>+#define NTB_RXQ_SIZE 100
>+
>+static struct net_device *netdev;
>+
>+static void ntb_netdev_event_handler(int status)
>+{
>+ struct ntb_netdev *dev = netdev_priv(netdev);
>+
>+ pr_debug("%s: Event %x, Link %x\n", KBUILD_MODNAME, status,
>+ ntb_transport_link_query(dev->qp));
>+
>+ /* Currently, only link status event is supported */
>+ if (status)
>+ netif_carrier_on(netdev);
>+ else
>+ netif_carrier_off(netdev);
>+}
>+
>+static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp)
>+{
>+ struct net_device *ndev = netdev;
>+ struct sk_buff *skb;
>+ int len, rc;
>+
>+ while ((skb = ntb_transport_rx_dequeue(qp, &len))) {
>+ pr_debug("%s: %d byte payload received\n", __func__, len);
>+
>+ skb_put(skb, len);
>+ skb->protocol = eth_type_trans(skb, ndev);
>+ skb->ip_summed = CHECKSUM_NONE;
>+
>+ if (netif_rx(skb) == NET_RX_DROP) {
>+ ndev->stats.rx_errors++;
>+ ndev->stats.rx_dropped++;
>+ } else {
>+ ndev->stats.rx_packets++;
>+ ndev->stats.rx_bytes += len;
>+ }
>+
>+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
>+ if (!skb) {
>+ ndev->stats.rx_errors++;
>+ ndev->stats.rx_frame_errors++;
>+ pr_err("%s: No skb\n", __func__);
>+ break;
>+ }
>+
>+ rc = ntb_transport_rx_enqueue(qp, skb, skb->data,
>+ ndev->mtu + ETH_HLEN);
>+ if (rc) {
>+ ndev->stats.rx_errors++;
>+ ndev->stats.rx_fifo_errors++;
>+ pr_err("%s: error re-enqueuing\n", __func__);
>+ break;
>+ }
>+ }
>+}
>+
>+static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp)
>+{
>+ struct net_device *ndev = netdev;
>+ struct sk_buff *skb;
>+ int len;
>+
>+ while ((skb = ntb_transport_tx_dequeue(qp, &len))) {
>+ ndev->stats.tx_packets++;
>+ ndev->stats.tx_bytes += skb->len;
>+ dev_kfree_skb(skb);
>+ }
>+
>+ if (netif_queue_stopped(ndev))
>+ netif_wake_queue(ndev);
>+}
>+
>+static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
>+ struct net_device *ndev)
>+{
>+ struct ntb_netdev *dev = netdev_priv(ndev);
>+ int rc;
>+
>+ pr_debug("%s: ntb_transport_tx_enqueue\n", KBUILD_MODNAME);
>+
>+ rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
>+ if (rc)
>+ goto err;
>+
>+ return NETDEV_TX_OK;
>+
>+err:
>+ ndev->stats.tx_dropped++;
>+ ndev->stats.tx_errors++;
>+ netif_stop_queue(ndev);
>+ return NETDEV_TX_BUSY;
>+}
>+
>+static int ntb_netdev_open(struct net_device *ndev)
>+{
>+ struct ntb_netdev *dev = netdev_priv(ndev);
>+ struct sk_buff *skb;
>+ int rc, i, len;
>+
>+ /* Add some empty rx bufs */
>+ for (i = 0; i < NTB_RXQ_SIZE; i++) {
>+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
>+ if (!skb) {
>+ rc = -ENOMEM;
>+ goto err;
>+ }
>+
>+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
>+ ndev->mtu + ETH_HLEN);
>+ if (rc == -EINVAL)
>+ goto err;
>+ }
>+
>+ netif_carrier_off(ndev);
>+ ntb_transport_link_up(dev->qp);
>+
>+ return 0;
>+
>+err:
>+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
>+ kfree(skb);
>+ return rc;
>+}
>+
>+static int ntb_netdev_close(struct net_device *ndev)
>+{
>+ struct ntb_netdev *dev = netdev_priv(ndev);
>+ struct sk_buff *skb;
>+ int len;
>+
>+ ntb_transport_link_down(dev->qp);
>+
>+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
>+ kfree(skb);
>+
>+ return 0;
>+}
>+
>+static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
>+{
>+ struct ntb_netdev *dev = netdev_priv(ndev);
>+ struct sk_buff *skb;
>+ int len, rc;
>+
>+ if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
>+ return -EINVAL;
>+
>+ if (!netif_running(ndev)) {
>+ ndev->mtu = new_mtu;
>+ return 0;
>+ }
>+
>+ /* Bring down the link and dispose of posted rx entries */
>+ ntb_transport_link_down(dev->qp);
>+
>+ if (ndev->mtu < new_mtu) {
>+ int i;
>+
>+ for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
>+ kfree(skb);
>+
>+ for (; i; i--) {
>+ skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
>+ if (!skb) {
>+ rc = -ENOMEM;
>+ goto err;
>+ }
>+
>+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
>+ new_mtu + ETH_HLEN);
>+ if (rc) {
>+ kfree(skb);
>+ goto err;
>+ }
>+ }
>+ }
>+
>+ ndev->mtu = new_mtu;
>+
>+ ntb_transport_link_up(dev->qp);
>+
>+ return 0;
>+
>+err:
>+ ntb_transport_link_down(dev->qp);
>+
>+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
>+ kfree(skb);
>+
>+ pr_err("Error changing MTU, device inoperable\n");
Would be maybe better to use netdev_err here (and on similar other
places)
Also, it might be good to provide rollback in case any of
netdev_alloc_skb() fails.
>+ return rc;
>+}
>+
>+static void ntb_netdev_tx_timeout(struct net_device *ndev)
>+{
>+ if (netif_running(ndev))
>+ netif_wake_queue(ndev);
>+}
>+
>+static const struct net_device_ops ntb_netdev_ops = {
>+ .ndo_open = ntb_netdev_open,
>+ .ndo_stop = ntb_netdev_close,
>+ .ndo_start_xmit = ntb_netdev_start_xmit,
>+ .ndo_change_mtu = ntb_netdev_change_mtu,
>+ .ndo_tx_timeout = ntb_netdev_tx_timeout,
>+ .ndo_set_mac_address = eth_mac_addr,
Does your device support mac change while it's up and running?
>+};
>+
>+static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
>+ struct ethtool_drvinfo *info)
>+{
>+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
>+ strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
>+}
>+
>+static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
>+ "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
>+ "rx_frame_errors", "rx_fifo_errors",
>+ "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
>+};
>+
>+static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
>+{
>+ return ARRAY_SIZE(ntb_nic_stats);
>+}
>+
>+static int ntb_get_sset_count(struct net_device *dev, int sset)
>+{
>+ switch (sset) {
>+ case ETH_SS_STATS:
>+ return ntb_get_stats_count(dev);
>+ default:
>+ return -EOPNOTSUPP;
>+ }
>+}
>+
>+static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
>+ u32 sset, u8 *data)
>+{
>+ switch (sset) {
>+ case ETH_SS_STATS:
>+ memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
>+ }
>+}
>+
>+static void
>+ntb_get_ethtool_stats(struct net_device *dev,
>+ __attribute__((unused)) struct ethtool_stats *stats,
>+ u64 *data)
>+{
>+ int i = 0;
>+
>+ data[i++] = dev->stats.rx_packets;
>+ data[i++] = dev->stats.rx_bytes;
>+ data[i++] = dev->stats.rx_errors;
>+ data[i++] = dev->stats.rx_dropped;
>+ data[i++] = dev->stats.rx_length_errors;
>+ data[i++] = dev->stats.rx_frame_errors;
>+ data[i++] = dev->stats.rx_fifo_errors;
>+ data[i++] = dev->stats.tx_packets;
>+ data[i++] = dev->stats.tx_bytes;
>+ data[i++] = dev->stats.tx_errors;
>+ data[i++] = dev->stats.tx_dropped;
>+}
>+
>+static const struct ethtool_ops ntb_ethtool_ops = {
>+ .get_drvinfo = ntb_get_drvinfo,
>+ .get_sset_count = ntb_get_sset_count,
>+ .get_strings = ntb_get_strings,
>+ .get_ethtool_stats = ntb_get_ethtool_stats,
>+ .get_link = ethtool_op_get_link,
>+};
>+
>+static int __init ntb_netdev_init_module(void)
>+{
>+ struct ntb_netdev *dev;
>+ int rc;
>+
>+ pr_info("%s: Probe\n", KBUILD_MODNAME);
>+
>+ netdev = alloc_etherdev(sizeof(struct ntb_netdev));
I might be missing something but this place (module init) does not seems
like a good place to do alloc_etherdev(). Do you want to support only
one netdevice instance?
Anyway, I think that using "static netdev" should be avoided in any case.
>+ if (!netdev)
>+ return -ENOMEM;
>+
>+ dev = netdev_priv(netdev);
>+ dev->ndev = netdev;
>+ netdev->features = NETIF_F_HIGHDMA;
>+
>+ netdev->hw_features = netdev->features;
>+ netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
>+
>+ random_ether_addr(netdev->perm_addr);
>+ memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
>+
>+ netdev->netdev_ops = &ntb_netdev_ops;
>+ SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
>+
>+ dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
>+ ntb_netdev_tx_handler,
>+ ntb_netdev_event_handler);
>+ if (!dev->qp) {
>+ rc = -EIO;
>+ goto err;
>+ }
>+
>+ netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
>+
>+ rc = register_netdev(netdev);
>+ if (rc)
>+ goto err1;
>+
>+ pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
>+ return 0;
>+
>+err1:
>+ ntb_transport_free_queue(dev->qp);
>+err:
>+ free_netdev(netdev);
>+ return rc;
>+}
>+module_init(ntb_netdev_init_module);
>+
>+static void __exit ntb_netdev_exit_module(void)
>+{
>+ struct ntb_netdev *dev = netdev_priv(netdev);
>+
>+ unregister_netdev(netdev);
>+ ntb_transport_free_queue(dev->qp);
>+ free_netdev(netdev);
>+
>+ pr_info("%s: Driver removed\n", KBUILD_MODNAME);
>+}
>+module_exit(ntb_netdev_exit_module);
>--
>1.7.5.4
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: resurrecting tcphealth
From: Stephen Hemminger @ 2012-07-13 23:55 UTC (permalink / raw)
To: Piotr Sawuk; +Cc: netdev, linux-kernel
In-Reply-To: <e9caf38359467bfa8a1e2ac86f6ef2cc.squirrel@webmail.univie.ac.at>
I am not sure if the is really necessary since the most
of the stats are available elsewhere.
Here are some comments on getting the simplified to match
the kernel style.
>
> static inline struct tcp_sock *tcp_sk(const struct sock *sk)
>diff -rub A/net/ipv4/tcp_input.c B/net/ipv4/tcp_input.c
>--- A/net/ipv4/tcp_input.c 2012-06-22 20:37:50.000000000 +0200
>+++ B/net/ipv4/tcp_input.c 2012-07-06 10:12:12.000000000 +0200
>@@ -4414,6 +4415,8 @@
> }
>
> if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
>+ /* Course retransmit inefficiency- this packet has been received twice. */
>+ tp->dup_pkts_recv++;
I don't understand that comment, could you use a better sentence please?
>
> tp->rx_opt.saw_tstamp = 0;
>
>+ /*
>+ * Tcp health monitoring is interested in
>+ * total per-connection packet arrivals.
>+ * This is in the fast path, but is quick.
>+ */
>+ tp->pkts_recv++;
>+
Comment seems bigger justification than necessary for simple
operation.
>diff -rub A/net/ipv4/tcp_ipv4.c B/net/ipv4/tcp_ipv4.c
>--- A/net/ipv4/tcp_ipv4.c 2012-06-22 20:37:50.000000000 +0200
>+++ B/net/ipv4/tcp_ipv4.c 2012-07-11 09:34:22.000000000 +0200
>@@ -2533,6 +2533,82 @@
> return 0;
> }
>
>+
>+/*
>+ * Output /proc/net/tcphealth
>+ */
>+#define LINESZ 128
>+
>+int tcp_health_seq_show(struct seq_file *seq, void *v)
>+{
>+ int len, num;
>+ char srcIP[32], destIP[32];
Unnecessary see below
>+
>+ unsigned long SmoothedRttEstimate,
>+ AcksSent, DupAcksSent, PktsRecv, DupPktsRecv;
Do not use CamelCase in kernel code.
>+ struct tcp_iter_state *st;
>+
>+ if (v == SEQ_START_TOKEN) {
>+ seq_printf(seq,
>+ "TCP Health Monitoring (established connections only)\n"
>+ " -Duplicate ACKs indicate lost or reordered packets on the
>connection.\n"
>+ " -Duplicate Packets Received signal a slow and badly inefficient
>connection.\n"
>+ " -RttEst estimates how long future packets will take on a round trip
>over the connection.\n"
>+ "id Local Address Remote Address RttEst(ms) AcksSent "
Header seems excessive, just put one line of header please.
>+ "DupAcksSent PktsRecv DupPktsRecv\n");
>+ goto out;
>+ }
>+
>+ /* Loop through established TCP connections */
>+ st = seq->private;
>+
>+
>+ if (st->state == TCP_SEQ_STATE_ESTABLISHED)
>+ {
>+/* ; //insert read-lock here */
Don't think you need read-lock
>+ const struct tcp_sock *tp = tcp_sk(v);
>+ const struct inet_sock *inet = inet_sk(v);
>+ __be32 dest = inet->inet_daddr;
>+ __be32 src = inet->inet_rcv_saddr;
>+ __u16 destp = ntohs(inet->inet_dport);
>+ __u16 srcp = ntohs(inet->inet_sport);
>+
These temp variables aren't redundant.
>+ num = st->num;
>+ SmoothedRttEstimate = (tp->srtt >> 3);
>+ AcksSent = tp->acks_sent;
>+ DupAcksSent = tp->dup_acks_sent;
>+ PktsRecv = tp->pkts_recv;
>+ DupPktsRecv = tp->dup_pkts_recv;
>+
>+ sprintf(srcIP, "%lu.%lu.%lu.%lu:%u",
>+ ((src >> 24) & 0xFF), ((src >> 16) & 0xFF), ((src >> 8) & 0xFF), (src &
>0xFF),
>+ srcp);
>+ sprintf(destIP, "%3d.%3d.%3d.%3d:%u",
>+ ((dest >> 24) & 0xFF), ((dest >> 16) & 0xFF), ((dest >> 8) & 0xFF),
>(dest & 0xFF),
>+ destp);
>+
>+ seq_printf(seq, "%d: %-21s %-21s "
>+ "%8lu %8lu %8lu %8lu %8lu%n",
>+ num,
>+ srcIP,
>+ destIP,
>+ SmoothedRttEstimate,
>+ AcksSent,
>+ DupAcksSent,
>+ PktsRecv,
>+ DupPktsRecv,
>+
>+ &len
>+ );
>+
Kernel has %pI4 to print IP addresses.
seq_printf(seq, "%d: %-21pI4 %-21pI4 "
"%8lu %8lu %8lu %8lu %8lu\n",
num,
&inet->inet_rcv_saddr,
&inet->inet_daddr,
tp->srtt >> 3,
tp->acks_sent,
tp->dup_acks_sent,
tp->pkts_recv,
tp->dup_pkts_recv);
>+ seq_printf(seq, "%*s\n", LINESZ - 1 - len, "");
This padding of line is bogus, just print variable length line.
Are you trying to make it fixed length record file?
^ permalink raw reply
* Re: [RFC 1/2] PCI-Express Non-Transparent Bridge Support
From: Stephen Hemminger @ 2012-07-14 0:00 UTC (permalink / raw)
To: Jon Mason; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <1342215900-3358-1-git-send-email-jon.mason@intel.com>
On Fri, 13 Jul 2012 14:44:59 -0700
Jon Mason <jon.mason@intel.com> wrote:
> A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
> connecting 2 systems, providing electrical isolation between the two subsystems.
> A non-transparent bridge is functionally similar to a transparent bridge except
> that both sides of the bridge have their own independent address domains. The
> host on one side of the bridge will not have the visibility of the complete
> memory or I/O space on the other side of the bridge. To communicate across the
> non-transparent bridge, each NTB endpoint has one (or more) apertures exposed to
> the local system. Writes to these apertures are mirrored to memory on the
> remote system. Communications can also occur through the use of doorbell
> registers that initiate interrupts to the alternate domain, and scratch-pad
> registers accessible from both sides.
>
> The NTB device driver is needed to configure these memory windows, doorbell, and
> scratch-pad registers as well as use them in such a way as they can be turned
> into a viable communication channel to the remote system. ntb_hw.[ch]
> determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
> the underlying hardware to provide access and a common interface to the doorbell
> registers, scratch pads, and memory windows. These hardware interfaces are
> exported so that other, non-mainlined kernel drivers can access these.
> ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
> communication channel(s) and provide a reliable way of transferring data from
> one side to the other, which it then exports so that "client" drivers can access
> them. These client drivers are used to provide a standard kernel interface
> (i.e., Ethernet device) to NTB, such that Linux can transfer data from one
> system to the other in a standard way.
>
> Signed-off-by: Jon Mason <jon.mason@intel.com>
> +
> +static int max_num_cbs = 2;
> +module_param(max_num_cbs, uint, 0644);
> +MODULE_PARM_DESC(max_num_cbs, "Maximum number of NTB transport connections");
Rather than making it a fixed size, could you dynamically set these up
with rtnl_link_ops?
> +static struct ntb_device *ntbdev;
What about multiple boards in system?
> +/**
> + * ntb_hw_link_status() - return the hardware link status
> + * @ndev: pointer to ntb_device instance
> + *
> + * Returns true if the hardware is connected to the remote system
> + *
> + * RETURNS: true or false based on the hardware link state
> + */
> +bool ntb_hw_link_status(struct ntb_device *ndev)
> +{
> + return ndev->link_status == NTB_LINK_UP;
> +}
> +EXPORT_SYMBOL(ntb_hw_link_status);
Why isn't this inline in some header?
> +/**
> + * ntb_query_pdev() - return the pci_dev pointer
> + * @ndev: pointer to ntb_device instance
> + *
> + * Given the ntb pointer return the pci_dev pointerfor the NTB hardware device
> + *
> + * RETURNS: a pointer to the ntb pci_dev
> + */
> +struct pci_dev *ntb_query_pdev(struct ntb_device *ndev)
> +{
> + return ndev->pdev;
> +}
> +EXPORT_SYMBOL(ntb_query_pdev);
> +
> +/**
> + * ntb_query_max_cbs() - return the maximum number of callback tuples
> + * @ndev: pointer to ntb_device instance
> + *
> + * The number of callbacks can vary depending on the platform and MSI-X/MSI
> + * enablement
> + *
> + * RETURNS: the maximum number of callback tuples (3, 15, or 33)
> + */
> +unsigned int ntb_query_max_cbs(struct ntb_device *ndev)
> +{
> + return ndev->max_cbs > max_num_cbs ? max_num_cbs : ndev->max_cbs;
> +}
> +EXPORT_SYMBOL(ntb_query_max_cbs);
> +
> +/**
> + * ntb_register_event_callback() - register event callback
> + * @ndev: pointer to ntb_device instance
> + * @func: callback function to register
> + *
> + * This function registers a callback for any HW driver events such as link
> + * up/down, power management notices and etc.
> + *
> + * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
> + */
> +int ntb_register_event_callback(struct ntb_device *ndev,
> + void (*func)(void *handle, unsigned int event))
> +{
> + if (ndev->event_cb)
> + return -EINVAL;
> +
> + ndev->event_cb = func;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(ntb_register_event_callback);
> +
> +/**
> + * ntb_unregister_event_callback() - unregisters the event callback
> + * @ndev: pointer to ntb_device instance
> + *
> + * This function unregisters the existing callback from transport
> + */
> +void ntb_unregister_event_callback(struct ntb_device *ndev)
> +{
> + ndev->event_cb = NULL;
> +}
> +EXPORT_SYMBOL(ntb_unregister_event_callback);
> +
^ permalink raw reply
* [PATCH net-next v2 3/8] tipc: use standard printk shortcut macros (pr_err etc.)
From: Paul Gortmaker @ 2012-07-13 23:53 UTC (permalink / raw)
To: davem; +Cc: netdev, joe, Erik Hugne, Jon Maloy, Paul Gortmaker
In-Reply-To: <1342111201-9426-4-git-send-email-paul.gortmaker@windriver.com>
From: Erik Hugne <erik.hugne@ericsson.com>
All messages should go directly to the kernel log. The TIPC
specific error, warning, info and debug trace macro's are
removed and all references replaced with pr_err, pr_warn,
pr_info and pr_debug.
Commonly used sub-strings are explicitly declared as a const
char to reduce .text size.
Note that this means the debug messages (changed to pr_debug),
are now enabled through dynamic debugging, instead of a TIPC
specific Kconfig option (TIPC_DEBUG). The latter will be
phased out completely
Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
[PG: use pr_fmt as suggested by Joe Perches <joe@perches.com>]
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
[just resending the 3/8 patch, since the others are unchanged, aside
from minimal trivial context refresh.]
net/tipc/bcast.c | 2 +-
net/tipc/bearer.c | 52 ++++++++++++----------
net/tipc/config.c | 6 +--
net/tipc/core.c | 13 +++---
net/tipc/core.h | 12 +----
net/tipc/discover.c | 4 +-
net/tipc/handler.c | 4 +-
net/tipc/link.c | 116 +++++++++++++++++++++++++-----------------------
net/tipc/name_distr.c | 25 ++++++-----
net/tipc/name_table.c | 40 ++++++++---------
net/tipc/net.c | 8 ++--
net/tipc/netlink.c | 2 +-
net/tipc/node.c | 22 ++++-----
net/tipc/node_subscr.c | 3 +-
net/tipc/port.c | 8 ++--
net/tipc/ref.c | 10 ++---
net/tipc/socket.c | 10 ++---
net/tipc/subscr.c | 14 +++---
18 files changed, 177 insertions(+), 174 deletions(-)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d9df34f..fef3689 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -880,7 +880,7 @@ void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port)
if (!item->next) {
item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
if (!item->next) {
- warn("Incomplete multicast delivery, no memory\n");
+ pr_warn("Incomplete multicast delivery, no memory\n");
return;
}
item->next->next = NULL;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 86b703f..1840e1f 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -123,7 +123,7 @@ int tipc_register_media(struct tipc_media *m_ptr)
exit:
write_unlock_bh(&tipc_net_lock);
if (res)
- warn("Media <%s> registration error\n", m_ptr->name);
+ pr_warn("Media <%s> registration error\n", m_ptr->name);
return res;
}
@@ -418,12 +418,12 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
int res = -EINVAL;
if (!tipc_own_addr) {
- warn("Bearer <%s> rejected, not supported in standalone mode\n",
- name);
+ pr_warn("Bearer <%s> rejected, not supported in standalone mode\n",
+ name);
return -ENOPROTOOPT;
}
if (!bearer_name_validate(name, &b_names)) {
- warn("Bearer <%s> rejected, illegal name\n", name);
+ pr_warn("Bearer <%s> rejected, illegal name\n", name);
return -EINVAL;
}
if (tipc_addr_domain_valid(disc_domain) &&
@@ -435,12 +435,13 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
res = 0; /* accept specified node in own cluster */
}
if (res) {
- warn("Bearer <%s> rejected, illegal discovery domain\n", name);
+ pr_warn("Bearer <%s> rejected, illegal discovery domain\n",
+ name);
return -EINVAL;
}
if ((priority > TIPC_MAX_LINK_PRI) &&
(priority != TIPC_MEDIA_LINK_PRI)) {
- warn("Bearer <%s> rejected, illegal priority\n", name);
+ pr_warn("Bearer <%s> rejected, illegal priority\n", name);
return -EINVAL;
}
@@ -448,8 +449,8 @@ int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority)
m_ptr = tipc_media_find(b_names.media_name);
if (!m_ptr) {
- warn("Bearer <%s> rejected, media <%s> not registered\n", name,
- b_names.media_name);
+ pr_warn("Bearer <%s> rejected, media <%s> not registered\n",
+ name, b_names.media_name);
goto exit;
}
@@ -465,24 +466,25 @@ restart:
continue;
}
if (!strcmp(name, tipc_bearers[i].name)) {
- warn("Bearer <%s> rejected, already enabled\n", name);
+ pr_warn("Bearer <%s> rejected, already enabled\n",
+ name);
goto exit;
}
if ((tipc_bearers[i].priority == priority) &&
(++with_this_prio > 2)) {
if (priority-- == 0) {
- warn("Bearer <%s> rejected, duplicate priority\n",
- name);
+ pr_warn("Bearer <%s> rejected, duplicate priority\n",
+ name);
goto exit;
}
- warn("Bearer <%s> priority adjustment required %u->%u\n",
- name, priority + 1, priority);
+ pr_warn("Bearer <%s> priority adjustment required %u->%u\n",
+ name, priority + 1, priority);
goto restart;
}
}
if (bearer_id >= MAX_BEARERS) {
- warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
- name, MAX_BEARERS);
+ pr_warn("Bearer <%s> rejected, bearer limit reached (%u)\n",
+ name, MAX_BEARERS);
goto exit;
}
@@ -490,7 +492,8 @@ restart:
strcpy(b_ptr->name, name);
res = m_ptr->enable_bearer(b_ptr);
if (res) {
- warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res);
+ pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
+ name, -res);
goto exit;
}
@@ -508,12 +511,13 @@ restart:
res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain);
if (res) {
bearer_disable(b_ptr);
- warn("Bearer <%s> rejected, discovery object creation failed\n",
- name);
+ pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
+ name);
goto exit;
}
- info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
- name, tipc_addr_string_fill(addr_string, disc_domain), priority);
+ pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
+ name,
+ tipc_addr_string_fill(addr_string, disc_domain), priority);
exit:
write_unlock_bh(&tipc_net_lock);
return res;
@@ -531,12 +535,12 @@ int tipc_block_bearer(const char *name)
read_lock_bh(&tipc_net_lock);
b_ptr = tipc_bearer_find(name);
if (!b_ptr) {
- warn("Attempt to block unknown bearer <%s>\n", name);
+ pr_warn("Attempt to block unknown bearer <%s>\n", name);
read_unlock_bh(&tipc_net_lock);
return -EINVAL;
}
- info("Blocking bearer <%s>\n", name);
+ pr_info("Blocking bearer <%s>\n", name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
list_splice_init(&b_ptr->cong_links, &b_ptr->links);
@@ -562,7 +566,7 @@ static void bearer_disable(struct tipc_bearer *b_ptr)
struct tipc_link *l_ptr;
struct tipc_link *temp_l_ptr;
- info("Disabling bearer <%s>\n", b_ptr->name);
+ pr_info("Disabling bearer <%s>\n", b_ptr->name);
spin_lock_bh(&b_ptr->lock);
b_ptr->blocked = 1;
b_ptr->media->disable_bearer(b_ptr);
@@ -584,7 +588,7 @@ int tipc_disable_bearer(const char *name)
write_lock_bh(&tipc_net_lock);
b_ptr = tipc_bearer_find(name);
if (b_ptr == NULL) {
- warn("Attempt to disable unknown bearer <%s>\n", name);
+ pr_warn("Attempt to disable unknown bearer <%s>\n", name);
res = -EINVAL;
} else {
bearer_disable(b_ptr);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index c5712a3..7978fdd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -432,7 +432,7 @@ static void cfg_named_msg_event(void *userdata,
if ((size < sizeof(*req_hdr)) ||
(size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) ||
(ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) {
- warn("Invalid configuration message discarded\n");
+ pr_warn("Invalid configuration message discarded\n");
return;
}
@@ -478,7 +478,7 @@ int tipc_cfg_init(void)
return 0;
failed:
- err("Unable to create configuration service\n");
+ pr_err("Unable to create configuration service\n");
return res;
}
@@ -494,7 +494,7 @@ void tipc_cfg_reinit(void)
seq.lower = seq.upper = tipc_own_addr;
res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq);
if (res)
- err("Unable to reinitialize configuration service\n");
+ pr_err("Unable to reinitialize configuration service\n");
}
void tipc_cfg_stop(void)
diff --git a/net/tipc/core.c b/net/tipc/core.c
index f7b9523..3689cb4 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -34,14 +34,13 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/module.h>
-
#include "core.h"
#include "ref.h"
#include "name_table.h"
#include "subscr.h"
#include "config.h"
+#include <linux/module.h>
#ifndef CONFIG_TIPC_PORTS
#define CONFIG_TIPC_PORTS 8191
@@ -162,9 +161,9 @@ static int __init tipc_init(void)
int res;
if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
- warn("Unable to create log buffer\n");
+ pr_warn("Unable to create log buffer\n");
- info("Activated (version " TIPC_MOD_VER ")\n");
+ pr_info("Activated (version " TIPC_MOD_VER ")\n");
tipc_own_addr = 0;
tipc_remote_management = 1;
@@ -175,9 +174,9 @@ static int __init tipc_init(void)
res = tipc_core_start();
if (res)
- err("Unable to start in single node mode\n");
+ pr_err("Unable to start in single node mode\n");
else
- info("Started in single node mode\n");
+ pr_info("Started in single node mode\n");
return res;
}
@@ -185,7 +184,7 @@ static void __exit tipc_exit(void)
{
tipc_core_stop_net();
tipc_core_stop();
- info("Deactivated\n");
+ pr_info("Deactivated\n");
}
module_init(tipc_init);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 2a9bb99..c376ec0 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -37,6 +37,8 @@
#ifndef _TIPC_CORE_H
#define _TIPC_CORE_H
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/tipc.h>
#include <linux/tipc_config.h>
#include <linux/types.h>
@@ -89,13 +91,6 @@ void tipc_printf(struct print_buf *, const char *fmt, ...);
#define TIPC_OUTPUT TIPC_LOG
#endif
-#define err(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
- KERN_ERR "TIPC: " fmt, ## arg)
-#define warn(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
- KERN_WARNING "TIPC: " fmt, ## arg)
-#define info(fmt, arg...) tipc_printf(TIPC_OUTPUT, \
- KERN_NOTICE "TIPC: " fmt, ## arg)
-
#ifdef CONFIG_TIPC_DEBUG
/*
@@ -105,15 +100,12 @@ void tipc_printf(struct print_buf *, const char *fmt, ...);
#define DBG_OUTPUT TIPC_LOG
#endif
-#define dbg(fmt, arg...) tipc_printf(DBG_OUTPUT, KERN_DEBUG fmt, ## arg);
-
#define msg_dbg(msg, txt) tipc_msg_dbg(DBG_OUTPUT, msg, txt);
void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *);
#else
-#define dbg(fmt, arg...) do {} while (0)
#define msg_dbg(msg, txt) do {} while (0)
#define tipc_msg_dbg(buf, msg, txt) do {} while (0)
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ae054cf..2f91f37 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -106,8 +106,8 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
tipc_printbuf_init(&pb, media_addr_str, sizeof(media_addr_str));
tipc_media_addr_printf(&pb, media_addr);
tipc_printbuf_validate(&pb);
- warn("Duplicate %s using %s seen on <%s>\n",
- node_addr_str, media_addr_str, b_ptr->name);
+ pr_warn("Duplicate %s using %s seen on <%s>\n", node_addr_str,
+ media_addr_str, b_ptr->name);
}
/**
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 9c6f22f..7a52d39 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -57,14 +57,14 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument)
struct queue_item *item;
if (!handler_enabled) {
- err("Signal request ignored by handler\n");
+ pr_err("Signal request ignored by handler\n");
return -ENOPROTOOPT;
}
spin_lock_bh(&qitem_lock);
item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC);
if (!item) {
- err("Signal queue out of memory\n");
+ pr_err("Signal queue out of memory\n");
spin_unlock_bh(&qitem_lock);
return -ENOMEM;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f6bf483..e543b9f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -41,6 +41,12 @@
#include "discover.h"
#include "config.h"
+/*
+ * Error message prefixes
+ */
+static const char *link_co_err = "Link changeover error, ";
+static const char *link_rst_msg = "Resetting link ";
+static const char *link_unk_evt = "Unknown link event ";
/*
* Out-of-range value for link session numbers
@@ -300,20 +306,20 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
if (n_ptr->link_cnt >= 2) {
tipc_addr_string_fill(addr_string, n_ptr->addr);
- err("Attempt to establish third link to %s\n", addr_string);
+ pr_err("Attempt to establish third link to %s\n", addr_string);
return NULL;
}
if (n_ptr->links[b_ptr->identity]) {
tipc_addr_string_fill(addr_string, n_ptr->addr);
- err("Attempt to establish second link on <%s> to %s\n",
- b_ptr->name, addr_string);
+ pr_err("Attempt to establish second link on <%s> to %s\n",
+ b_ptr->name, addr_string);
return NULL;
}
l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
if (!l_ptr) {
- warn("Link creation failed, no memory\n");
+ pr_warn("Link creation failed, no memory\n");
return NULL;
}
@@ -371,7 +377,7 @@ struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
void tipc_link_delete(struct tipc_link *l_ptr)
{
if (!l_ptr) {
- err("Attempt to delete non-existent link\n");
+ pr_err("Attempt to delete non-existent link\n");
return;
}
@@ -632,8 +638,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_set_timer(l_ptr, cont_intv / 4);
break;
case RESET_MSG:
- info("Resetting link <%s>, requested by peer\n",
- l_ptr->name);
+ pr_info("%s<%s>, requested by peer\n", link_rst_msg,
+ l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
@@ -642,7 +648,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_set_timer(l_ptr, cont_intv);
break;
default:
- err("Unknown link event %u in WW state\n", event);
+ pr_err("%s%u in WW state\n", link_unk_evt, event);
}
break;
case WORKING_UNKNOWN:
@@ -654,8 +660,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_set_timer(l_ptr, cont_intv);
break;
case RESET_MSG:
- info("Resetting link <%s>, requested by peer "
- "while probing\n", l_ptr->name);
+ pr_info("%s<%s>, requested by peer while probing\n",
+ link_rst_msg, l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_RESET;
l_ptr->fsm_msg_cnt = 0;
@@ -680,8 +686,8 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
l_ptr->fsm_msg_cnt++;
link_set_timer(l_ptr, cont_intv / 4);
} else { /* Link has failed */
- warn("Resetting link <%s>, peer not responding\n",
- l_ptr->name);
+ pr_warn("%s<%s>, peer not responding\n",
+ link_rst_msg, l_ptr->name);
tipc_link_reset(l_ptr);
l_ptr->state = RESET_UNKNOWN;
l_ptr->fsm_msg_cnt = 0;
@@ -692,7 +698,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
}
break;
default:
- err("Unknown link event %u in WU state\n", event);
+ pr_err("%s%u in WU state\n", link_unk_evt, event);
}
break;
case RESET_UNKNOWN:
@@ -726,7 +732,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_set_timer(l_ptr, cont_intv);
break;
default:
- err("Unknown link event %u in RU state\n", event);
+ pr_err("%s%u in RU state\n", link_unk_evt, event);
}
break;
case RESET_RESET:
@@ -751,11 +757,11 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
link_set_timer(l_ptr, cont_intv);
break;
default:
- err("Unknown link event %u in RR state\n", event);
+ pr_err("%s%u in RR state\n", link_unk_evt, event);
}
break;
default:
- err("Unknown link state %u/%u\n", l_ptr->state, event);
+ pr_err("Unknown link state %u/%u\n", l_ptr->state, event);
}
}
@@ -856,7 +862,8 @@ int tipc_link_send_buf(struct tipc_link *l_ptr, struct sk_buff *buf)
}
kfree_skb(buf);
if (imp > CONN_MANAGER) {
- warn("Resetting link <%s>, send queue full", l_ptr->name);
+ pr_warn("%s<%s>, send queue full", link_rst_msg,
+ l_ptr->name);
tipc_link_reset(l_ptr);
}
return dsz;
@@ -1409,8 +1416,8 @@ static void link_reset_all(unsigned long addr)
tipc_node_lock(n_ptr);
- warn("Resetting all links to %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ pr_warn("Resetting all links to %s\n",
+ tipc_addr_string_fill(addr_string, n_ptr->addr));
for (i = 0; i < MAX_BEARERS; i++) {
if (n_ptr->links[i]) {
@@ -1428,7 +1435,7 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
{
struct tipc_msg *msg = buf_msg(buf);
- warn("Retransmission failure on link <%s>\n", l_ptr->name);
+ pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
if (l_ptr->addr) {
/* Handle failure on standard link */
@@ -1440,21 +1447,23 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
struct tipc_node *n_ptr;
char addr_string[16];
- info("Msg seq number: %u, ", msg_seqno(msg));
- info("Outstanding acks: %lu\n",
- (unsigned long) TIPC_SKB_CB(buf)->handle);
+ pr_info("Msg seq number: %u, ", msg_seqno(msg));
+ pr_cont("Outstanding acks: %lu\n",
+ (unsigned long) TIPC_SKB_CB(buf)->handle);
n_ptr = tipc_bclink_retransmit_to();
tipc_node_lock(n_ptr);
tipc_addr_string_fill(addr_string, n_ptr->addr);
- info("Broadcast link info for %s\n", addr_string);
- info("Supportable: %d, ", n_ptr->bclink.supportable);
- info("Supported: %d, ", n_ptr->bclink.supported);
- info("Acked: %u\n", n_ptr->bclink.acked);
- info("Last in: %u, ", n_ptr->bclink.last_in);
- info("Oos state: %u, ", n_ptr->bclink.oos_state);
- info("Last sent: %u\n", n_ptr->bclink.last_sent);
+ pr_info("Broadcast link info for %s\n", addr_string);
+ pr_info("Supportable: %d, Supported: %d, Acked: %u\n",
+ n_ptr->bclink.supportable,
+ n_ptr->bclink.supported,
+ n_ptr->bclink.acked);
+ pr_info("Last in: %u, Oos state: %u, Last sent: %u\n",
+ n_ptr->bclink.last_in,
+ n_ptr->bclink.oos_state,
+ n_ptr->bclink.last_sent);
tipc_k_signal((Handler)link_reset_all, (unsigned long)n_ptr->addr);
@@ -1479,8 +1488,8 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *buf,
l_ptr->retransm_queue_head = msg_seqno(msg);
l_ptr->retransm_queue_size = retransmits;
} else {
- err("Unexpected retransmit on link %s (qsize=%d)\n",
- l_ptr->name, l_ptr->retransm_queue_size);
+ pr_err("Unexpected retransmit on link %s (qsize=%d)\n",
+ l_ptr->name, l_ptr->retransm_queue_size);
}
return;
} else {
@@ -2074,8 +2083,9 @@ static void link_recv_proto_msg(struct tipc_link *l_ptr, struct sk_buff *buf)
if (msg_linkprio(msg) &&
(msg_linkprio(msg) != l_ptr->priority)) {
- warn("Resetting link <%s>, priority change %u->%u\n",
- l_ptr->name, l_ptr->priority, msg_linkprio(msg));
+ pr_warn("%s<%s>, priority change %u->%u\n",
+ link_rst_msg, l_ptr->name, l_ptr->priority,
+ msg_linkprio(msg));
l_ptr->priority = msg_linkprio(msg);
tipc_link_reset(l_ptr); /* Enforce change to take effect */
break;
@@ -2139,15 +2149,13 @@ static void tipc_link_tunnel(struct tipc_link *l_ptr,
tunnel = l_ptr->owner->active_links[selector & 1];
if (!tipc_link_is_up(tunnel)) {
- warn("Link changeover error, "
- "tunnel link no longer available\n");
+ pr_warn("%stunnel link no longer available\n", link_co_err);
return;
}
msg_set_size(tunnel_hdr, length + INT_H_SIZE);
buf = tipc_buf_acquire(length + INT_H_SIZE);
if (!buf) {
- warn("Link changeover error, "
- "unable to send tunnel msg\n");
+ pr_warn("%sunable to send tunnel msg\n", link_co_err);
return;
}
skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
@@ -2173,8 +2181,7 @@ void tipc_link_changeover(struct tipc_link *l_ptr)
return;
if (!l_ptr->owner->permit_changeover) {
- warn("Link changeover error, "
- "peer did not permit changeover\n");
+ pr_warn("%speer did not permit changeover\n", link_co_err);
return;
}
@@ -2192,8 +2199,8 @@ void tipc_link_changeover(struct tipc_link *l_ptr)
msg_set_size(&tunnel_hdr, INT_H_SIZE);
tipc_link_send_buf(tunnel, buf);
} else {
- warn("Link changeover error, "
- "unable to send changeover msg\n");
+ pr_warn("%sunable to send changeover msg\n",
+ link_co_err);
}
return;
}
@@ -2246,8 +2253,8 @@ void tipc_link_send_duplicate(struct tipc_link *l_ptr, struct tipc_link *tunnel)
msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
outbuf = tipc_buf_acquire(length + INT_H_SIZE);
if (outbuf == NULL) {
- warn("Link changeover error, "
- "unable to send duplicate msg\n");
+ pr_warn("%sunable to send duplicate msg\n",
+ link_co_err);
return;
}
skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
@@ -2298,8 +2305,8 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
if (!dest_link)
goto exit;
if (dest_link == *l_ptr) {
- err("Unexpected changeover message on link <%s>\n",
- (*l_ptr)->name);
+ pr_err("Unexpected changeover message on link <%s>\n",
+ (*l_ptr)->name);
goto exit;
}
*l_ptr = dest_link;
@@ -2310,7 +2317,7 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
goto exit;
*buf = buf_extract(tunnel_buf, INT_H_SIZE);
if (*buf == NULL) {
- warn("Link changeover error, duplicate msg dropped\n");
+ pr_warn("%sduplicate msg dropped\n", link_co_err);
goto exit;
}
kfree_skb(tunnel_buf);
@@ -2319,8 +2326,8 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
/* First original message ?: */
if (tipc_link_is_up(dest_link)) {
- info("Resetting link <%s>, changeover initiated by peer\n",
- dest_link->name);
+ pr_info("%s<%s>, changeover initiated by peer\n", link_rst_msg,
+ dest_link->name);
tipc_link_reset(dest_link);
dest_link->exp_msg_count = msg_count;
if (!msg_count)
@@ -2333,8 +2340,7 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
/* Receive original message */
if (dest_link->exp_msg_count == 0) {
- warn("Link switchover error, "
- "got too many tunnelled messages\n");
+ pr_warn("%sgot too many tunnelled messages\n", link_co_err);
goto exit;
}
dest_link->exp_msg_count--;
@@ -2346,7 +2352,7 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
kfree_skb(tunnel_buf);
return 1;
} else {
- warn("Link changeover error, original msg dropped\n");
+ pr_warn("%soriginal msg dropped\n", link_co_err);
}
}
exit:
@@ -2367,7 +2373,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
while (msgcount--) {
obuf = buf_extract(buf, pos);
if (obuf == NULL) {
- warn("Link unable to unbundle message(s)\n");
+ pr_warn("Link unable to unbundle message(s)\n");
break;
}
pos += align(msg_size(buf_msg(obuf)));
@@ -2538,7 +2544,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
set_fragm_size(pbuf, fragm_sz);
set_expected_frags(pbuf, exp_fragm_cnt - 1);
} else {
- dbg("Link unable to reassemble fragmented message\n");
+ pr_debug("Link unable to reassemble fragmented message\n");
kfree_skb(fbuf);
return -1;
}
@@ -3060,5 +3066,5 @@ print_state:
tipc_printf(buf, "\n");
tipc_printbuf_validate(buf);
- info("%s", print_area);
+ pr_info("%s", print_area);
}
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 158318e..55d3928 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -161,7 +161,7 @@ void tipc_named_publish(struct publication *publ)
buf = named_prepare_buf(PUBLICATION, ITEM_SIZE, 0);
if (!buf) {
- warn("Publication distribution failure\n");
+ pr_warn("Publication distribution failure\n");
return;
}
@@ -186,7 +186,7 @@ void tipc_named_withdraw(struct publication *publ)
buf = named_prepare_buf(WITHDRAWAL, ITEM_SIZE, 0);
if (!buf) {
- warn("Withdrawal distribution failure\n");
+ pr_warn("Withdrawal distribution failure\n");
return;
}
@@ -213,7 +213,7 @@ static void named_distribute(struct list_head *message_list, u32 node,
rest -= left;
buf = named_prepare_buf(PUBLICATION, left, node);
if (!buf) {
- warn("Bulk publication failure\n");
+ pr_warn("Bulk publication failure\n");
return;
}
item = (struct distr_item *)msg_data(buf_msg(buf));
@@ -283,9 +283,10 @@ static void named_purge_publ(struct publication *publ)
write_unlock_bh(&tipc_nametbl_lock);
if (p != publ) {
- err("Unable to remove publication from failed node\n"
- "(type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
- publ->type, publ->lower, publ->node, publ->ref, publ->key);
+ pr_err("Unable to remove publication from failed node\n"
+ " (type=%u, lower=%u, node=0x%x, ref=%u, key=%u)\n",
+ publ->type, publ->lower, publ->node, publ->ref,
+ publ->key);
}
kfree(p);
@@ -329,14 +330,14 @@ void tipc_named_recv(struct sk_buff *buf)
tipc_nodesub_unsubscribe(&publ->subscr);
kfree(publ);
} else {
- err("Unable to remove publication by node 0x%x\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- msg_orignode(msg),
- ntohl(item->type), ntohl(item->lower),
- ntohl(item->ref), ntohl(item->key));
+ pr_err("Unable to remove publication by node 0x%x\n"
+ " (type=%u, lower=%u, ref=%u, key=%u)\n",
+ msg_orignode(msg), ntohl(item->type),
+ ntohl(item->lower), ntohl(item->ref),
+ ntohl(item->key));
}
} else {
- warn("Unrecognized name table message received\n");
+ pr_warn("Unrecognized name table message received\n");
}
item++;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index cade0ac..c8b0b5c 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -126,7 +126,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper,
{
struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC);
if (publ == NULL) {
- warn("Publication creation failure, no memory\n");
+ pr_warn("Publication creation failure, no memory\n");
return NULL;
}
@@ -163,7 +163,7 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
struct sub_seq *sseq = tipc_subseq_alloc(1);
if (!nseq || !sseq) {
- warn("Name sequence creation failed, no memory\n");
+ pr_warn("Name sequence creation failed, no memory\n");
kfree(nseq);
kfree(sseq);
return NULL;
@@ -263,8 +263,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
/* Lower end overlaps existing entry => need an exact match */
if ((sseq->lower != lower) || (sseq->upper != upper)) {
- warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
+ pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
+ type, lower, upper);
return NULL;
}
@@ -286,8 +286,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
/* Fail if upper end overlaps into an existing entry */
if ((inspos < nseq->first_free) &&
(upper >= nseq->sseqs[inspos].lower)) {
- warn("Cannot publish {%u,%u,%u}, overlap error\n",
- type, lower, upper);
+ pr_warn("Cannot publish {%u,%u,%u}, overlap error\n",
+ type, lower, upper);
return NULL;
}
@@ -296,8 +296,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
struct sub_seq *sseqs = tipc_subseq_alloc(nseq->alloc * 2);
if (!sseqs) {
- warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
+ pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
+ type, lower, upper);
return NULL;
}
memcpy(sseqs, nseq->sseqs,
@@ -309,8 +309,8 @@ static struct publication *tipc_nameseq_insert_publ(struct name_seq *nseq,
info = kzalloc(sizeof(*info), GFP_ATOMIC);
if (!info) {
- warn("Cannot publish {%u,%u,%u}, no memory\n",
- type, lower, upper);
+ pr_warn("Cannot publish {%u,%u,%u}, no memory\n",
+ type, lower, upper);
return NULL;
}
@@ -492,8 +492,8 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper,
if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) ||
(lower > upper)) {
- dbg("Failed to publish illegal {%u,%u,%u} with scope %u\n",
- type, lower, upper, scope);
+ pr_debug("Failed to publish illegal {%u,%u,%u} with scope %u\n",
+ type, lower, upper, scope);
return NULL;
}
@@ -668,8 +668,8 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
struct publication *publ;
if (table.local_publ_count >= tipc_max_publications) {
- warn("Publication failed, local publication limit reached (%u)\n",
- tipc_max_publications);
+ pr_warn("Publication failed, local publication limit reached (%u)\n",
+ tipc_max_publications);
return NULL;
}
@@ -702,9 +702,9 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
return 1;
}
write_unlock_bh(&tipc_nametbl_lock);
- err("Unable to remove local publication\n"
- "(type=%u, lower=%u, ref=%u, key=%u)\n",
- type, lower, ref, key);
+ pr_err("Unable to remove local publication\n"
+ "(type=%u, lower=%u, ref=%u, key=%u)\n",
+ type, lower, ref, key);
return 0;
}
@@ -725,8 +725,8 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
tipc_nameseq_subscribe(seq, s);
spin_unlock_bh(&seq->lock);
} else {
- warn("Failed to create subscription for {%u,%u,%u}\n",
- s->seq.type, s->seq.lower, s->seq.upper);
+ pr_warn("Failed to create subscription for {%u,%u,%u}\n",
+ s->seq.type, s->seq.lower, s->seq.upper);
}
write_unlock_bh(&tipc_nametbl_lock);
}
@@ -942,7 +942,7 @@ void tipc_nametbl_stop(void)
for (i = 0; i < tipc_nametbl_size; i++) {
if (hlist_empty(&table.types[i]))
continue;
- err("tipc_nametbl_stop(): orphaned hash chain detected\n");
+ pr_err("nametbl_stop(): orphaned hash chain detected\n");
break;
}
kfree(table.types);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 7c236c8..5b5cea2 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -184,9 +184,9 @@ int tipc_net_start(u32 addr)
tipc_cfg_reinit();
- info("Started in network mode\n");
- info("Own node address %s, network identity %u\n",
- tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+ pr_info("Started in network mode\n");
+ pr_info("Own node address %s, network identity %u\n",
+ tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
return 0;
}
@@ -202,5 +202,5 @@ void tipc_net_stop(void)
list_for_each_entry_safe(node, t_node, &tipc_node_list, list)
tipc_node_delete(node);
write_unlock_bh(&tipc_net_lock);
- info("Left network mode\n");
+ pr_info("Left network mode\n");
}
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 7bda8e3..47a839d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -90,7 +90,7 @@ int tipc_netlink_start(void)
res = genl_register_family_with_ops(&tipc_genl_family,
&tipc_genl_ops, 1);
if (res) {
- err("Failed to register netlink interface\n");
+ pr_err("Failed to register netlink interface\n");
return res;
}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index d4fd341..d21db20 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -105,7 +105,7 @@ struct tipc_node *tipc_node_create(u32 addr)
n_ptr = kzalloc(sizeof(*n_ptr), GFP_ATOMIC);
if (!n_ptr) {
spin_unlock_bh(&node_create_lock);
- warn("Node creation failed, no memory\n");
+ pr_warn("Node creation failed, no memory\n");
return NULL;
}
@@ -151,8 +151,8 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
n_ptr->working_links++;
- info("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ pr_info("Established link <%s> on network plane %c\n",
+ l_ptr->name, l_ptr->b_ptr->net_plane);
if (!active[0]) {
active[0] = active[1] = l_ptr;
@@ -160,7 +160,7 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
return;
}
if (l_ptr->priority < active[0]->priority) {
- info("New link <%s> becomes standby\n", l_ptr->name);
+ pr_info("New link <%s> becomes standby\n", l_ptr->name);
return;
}
tipc_link_send_duplicate(active[0], l_ptr);
@@ -168,9 +168,9 @@ void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
active[0] = l_ptr;
return;
}
- info("Old link <%s> becomes standby\n", active[0]->name);
+ pr_info("Old link <%s> becomes standby\n", active[0]->name);
if (active[1] != active[0])
- info("Old link <%s> becomes standby\n", active[1]->name);
+ pr_info("Old link <%s> becomes standby\n", active[1]->name);
active[0] = active[1] = l_ptr;
}
@@ -211,11 +211,11 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
n_ptr->working_links--;
if (!tipc_link_is_active(l_ptr)) {
- info("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->b_ptr->net_plane);
+ pr_info("Lost standby link <%s> on network plane %c\n",
+ l_ptr->name, l_ptr->b_ptr->net_plane);
return;
}
- info("Lost link <%s> on network plane %c\n",
+ pr_info("Lost link <%s> on network plane %c\n",
l_ptr->name, l_ptr->b_ptr->net_plane);
active = &n_ptr->active_links[0];
@@ -290,8 +290,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
char addr_string[16];
u32 i;
- info("Lost contact with %s\n",
- tipc_addr_string_fill(addr_string, n_ptr->addr));
+ pr_info("Lost contact with %s\n",
+ tipc_addr_string_fill(addr_string, n_ptr->addr));
/* Flush broadcast link info associated with lost node */
if (n_ptr->bclink.supported) {
diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c
index 7a27344..5e34b01 100644
--- a/net/tipc/node_subscr.c
+++ b/net/tipc/node_subscr.c
@@ -51,7 +51,8 @@ void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr,
node_sub->node = tipc_node_find(addr);
if (!node_sub->node) {
- warn("Node subscription rejected, unknown node 0x%x\n", addr);
+ pr_warn("Node subscription rejected, unknown node 0x%x\n",
+ addr);
return;
}
node_sub->handle_node_down = handle_down;
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 70bf78b..2cbac39 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -191,7 +191,7 @@ void tipc_port_recv_mcast(struct sk_buff *buf, struct tipc_port_list *dp)
struct sk_buff *b = skb_clone(buf, GFP_ATOMIC);
if (b == NULL) {
- warn("Unable to deliver multicast message(s)\n");
+ pr_warn("Unable to deliver multicast message(s)\n");
goto exit;
}
if ((index == 0) && (cnt != 0))
@@ -221,12 +221,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
p_ptr = kzalloc(sizeof(*p_ptr), GFP_ATOMIC);
if (!p_ptr) {
- warn("Port creation failed, no memory\n");
+ pr_warn("Port creation failed, no memory\n");
return NULL;
}
ref = tipc_ref_acquire(p_ptr, &p_ptr->lock);
if (!ref) {
- warn("Port creation failed, reference table exhausted\n");
+ pr_warn("Port creation failed, ref. table exhausted\n");
kfree(p_ptr);
return NULL;
}
@@ -906,7 +906,7 @@ int tipc_createport(void *usr_handle,
up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC);
if (!up_ptr) {
- warn("Port creation failed, no memory\n");
+ pr_warn("Port creation failed, no memory\n");
return -ENOMEM;
}
p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup,
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 5cada0e..2a2a938 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -153,11 +153,11 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
struct reference *entry = NULL;
if (!object) {
- err("Attempt to acquire reference to non-existent object\n");
+ pr_err("Attempt to acquire ref. to non-existent obj\n");
return 0;
}
if (!tipc_ref_table.entries) {
- err("Reference table not found during acquisition attempt\n");
+ pr_err("Ref. table not found in acquisition attempt\n");
return 0;
}
@@ -211,7 +211,7 @@ void tipc_ref_discard(u32 ref)
u32 index_mask;
if (!tipc_ref_table.entries) {
- err("Reference table not found during discard attempt\n");
+ pr_err("Ref. table not found during discard attempt\n");
return;
}
@@ -222,11 +222,11 @@ void tipc_ref_discard(u32 ref)
write_lock_bh(&ref_table_lock);
if (!entry->object) {
- err("Attempt to discard reference to non-existent object\n");
+ pr_err("Attempt to discard ref. to non-existent obj\n");
goto exit;
}
if (entry->ref != ref) {
- err("Attempt to discard non-existent reference\n");
+ pr_err("Attempt to discard non-existent reference\n");
goto exit;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1ebb49f..09dc5b9 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -34,12 +34,12 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/export.h>
-#include <net/sock.h>
-
#include "core.h"
#include "port.h"
+#include <linux/export.h>
+#include <net/sock.h>
+
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
@@ -1787,13 +1787,13 @@ int tipc_socket_init(void)
res = proto_register(&tipc_proto, 1);
if (res) {
- err("Failed to register TIPC protocol type\n");
+ pr_err("Failed to register TIPC protocol type\n");
goto out;
}
res = sock_register(&tipc_family_ops);
if (res) {
- err("Failed to register TIPC socket type\n");
+ pr_err("Failed to register TIPC socket type\n");
proto_unregister(&tipc_proto);
goto out;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index f976e9c..5ed5965 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -305,8 +305,8 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
/* Refuse subscription if global limit exceeded */
if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
- warn("Subscription rejected, subscription limit reached (%u)\n",
- tipc_max_subscriptions);
+ pr_warn("Subscription rejected, limit reached (%u)\n",
+ tipc_max_subscriptions);
subscr_terminate(subscriber);
return NULL;
}
@@ -314,7 +314,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
/* Allocate subscription object */
sub = kmalloc(sizeof(*sub), GFP_ATOMIC);
if (!sub) {
- warn("Subscription rejected, no memory\n");
+ pr_warn("Subscription rejected, no memory\n");
subscr_terminate(subscriber);
return NULL;
}
@@ -328,7 +328,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
if ((!(sub->filter & TIPC_SUB_PORTS) ==
!(sub->filter & TIPC_SUB_SERVICE)) ||
(sub->seq.lower > sub->seq.upper)) {
- warn("Subscription rejected, illegal request\n");
+ pr_warn("Subscription rejected, illegal request\n");
kfree(sub);
subscr_terminate(subscriber);
return NULL;
@@ -440,7 +440,7 @@ static void subscr_named_msg_event(void *usr_handle,
/* Create subscriber object */
subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC);
if (subscriber == NULL) {
- warn("Subscriber rejected, no memory\n");
+ pr_warn("Subscriber rejected, no memory\n");
return;
}
INIT_LIST_HEAD(&subscriber->subscription_list);
@@ -458,7 +458,7 @@ static void subscr_named_msg_event(void *usr_handle,
NULL,
&subscriber->port_ref);
if (subscriber->port_ref == 0) {
- warn("Subscriber rejected, unable to create port\n");
+ pr_warn("Subscriber rejected, unable to create port\n");
kfree(subscriber);
return;
}
@@ -517,7 +517,7 @@ int tipc_subscr_start(void)
return 0;
failed:
- err("Failed to create subscription service\n");
+ pr_err("Failed to create subscription service\n");
return res;
}
--
1.7.9.7
^ permalink raw reply related
* Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Stephen Hemminger @ 2012-07-14 0:08 UTC (permalink / raw)
To: Jon Mason; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <1342215900-3358-2-git-send-email-jon.mason@intel.com>
On Fri, 13 Jul 2012 14:45:00 -0700
Jon Mason <jon.mason@intel.com> wrote:
> A virtual ethernet device that uses the NTB transport API to send/receive data.
>
> Signed-off-by: Jon Mason <jon.mason@intel.com>
> ---
> drivers/net/Kconfig | 4 +
> drivers/net/Makefile | 1 +
> drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 416 insertions(+), 0 deletions(-)
> create mode 100644 drivers/net/ntb_netdev.c
> +static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> + struct ethtool_drvinfo *info)
> +{
> + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> + strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> +}
> +
> +static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> + "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> + "rx_frame_errors", "rx_fifo_errors",
> + "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> +};
> +
> +static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
> +{
> + return ARRAY_SIZE(ntb_nic_stats);
> +}
> +
> +static int ntb_get_sset_count(struct net_device *dev, int sset)
> +{
> + switch (sset) {
> + case ETH_SS_STATS:
> + return ntb_get_stats_count(dev);
> + default:
> + return -EOPNOTSUPP;
> + }
> +}
> +
> +static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> + u32 sset, u8 *data)
> +{
> + switch (sset) {
> + case ETH_SS_STATS:
> + memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> + }
> +}
> +
> +static void
> +ntb_get_ethtool_stats(struct net_device *dev,
> + __attribute__((unused)) struct ethtool_stats *stats,
> + u64 *data)
> +{
> + int i = 0;
> +
> + data[i++] = dev->stats.rx_packets;
> + data[i++] = dev->stats.rx_bytes;
> + data[i++] = dev->stats.rx_errors;
> + data[i++] = dev->stats.rx_dropped;
> + data[i++] = dev->stats.rx_length_errors;
> + data[i++] = dev->stats.rx_frame_errors;
> + data[i++] = dev->stats.rx_fifo_errors;
> + data[i++] = dev->stats.tx_packets;
> + data[i++] = dev->stats.tx_bytes;
> + data[i++] = dev->stats.tx_errors;
> + data[i++] = dev->stats.tx_dropped;
> +}
These statistics add no value over existing network stats.
Don't implement ethtool stats unless device has something more
interesting to say.
> +static const struct ethtool_ops ntb_ethtool_ops = {
> + .get_drvinfo = ntb_get_drvinfo,
> + .get_sset_count = ntb_get_sset_count,
> + .get_strings = ntb_get_strings,
> + .get_ethtool_stats = ntb_get_ethtool_stats,
> + .get_link = ethtool_op_get_link,
> +};
If you want to implement bonding or bridging then implementing
get_settings would help.
> +static int __init ntb_netdev_init_module(void)
> +{
> + struct ntb_netdev *dev;
> + int rc;
> +
> + pr_info("%s: Probe\n", KBUILD_MODNAME);
Useless message
> + netdev = alloc_etherdev(sizeof(struct ntb_netdev));
> + if (!netdev)
> + return -ENOMEM;
> +
> + dev = netdev_priv(netdev);
> + dev->ndev = netdev;
> + netdev->features = NETIF_F_HIGHDMA;
> +
> + netdev->hw_features = netdev->features;
> + netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> +
> + random_ether_addr(netdev->perm_addr);
> + memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> +
> + netdev->netdev_ops = &ntb_netdev_ops;
> + SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
> +
> + dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> + ntb_netdev_tx_handler,
> + ntb_netdev_event_handler);
> + if (!dev->qp) {
> + rc = -EIO;
> + goto err;
> + }
> +
> + netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> +
> + rc = register_netdev(netdev);
> + if (rc)
> + goto err1;
> +
> + pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> + return 0;
> +
> +err1:
> + ntb_transport_free_queue(dev->qp);
> +err:
> + free_netdev(netdev);
> + return rc;
> +}
> +module_init(ntb_netdev_init_module);
> +
> +static void __exit ntb_netdev_exit_module(void)
> +{
> + struct ntb_netdev *dev = netdev_priv(netdev);
> +
> + unregister_netdev(netdev);
> + ntb_transport_free_queue(dev->qp);
> + free_netdev(netdev);
> +
> + pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> +}
> +module_exit(ntb_netdev_exit_module);
^ permalink raw reply
* Re: [RFC 1/2] PCI-Express Non-Transparent Bridge Support
From: Stephen Hemminger @ 2012-07-14 0:13 UTC (permalink / raw)
To: Jon Mason; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <1342215900-3358-1-git-send-email-jon.mason@intel.com>
On Fri, 13 Jul 2012 14:44:59 -0700
Jon Mason <jon.mason@intel.com> wrote:
> A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
> connecting 2 systems, providing electrical isolation between the two subsystems.
> A non-transparent bridge is functionally similar to a transparent bridge except
> that both sides of the bridge have their own independent address domains. The
> host on one side of the bridge will not have the visibility of the complete
> memory or I/O space on the other side of the bridge. To communicate across the
> non-transparent bridge, each NTB endpoint has one (or more) apertures exposed to
> the local system. Writes to these apertures are mirrored to memory on the
> remote system. Communications can also occur through the use of doorbell
> registers that initiate interrupts to the alternate domain, and scratch-pad
> registers accessible from both sides.
>
> The NTB device driver is needed to configure these memory windows, doorbell, and
> scratch-pad registers as well as use them in such a way as they can be turned
> into a viable communication channel to the remote system. ntb_hw.[ch]
> determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
> the underlying hardware to provide access and a common interface to the doorbell
> registers, scratch pads, and memory windows. These hardware interfaces are
> exported so that other, non-mainlined kernel drivers can access these.
> ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
> communication channel(s) and provide a reliable way of transferring data from
> one side to the other, which it then exports so that "client" drivers can access
> them. These client drivers are used to provide a standard kernel interface
> (i.e., Ethernet device) to NTB, such that Linux can transfer data from one
> system to the other in a standard way.
>
> Signed-off-by: Jon Mason <jon.mason@intel.com>
This driver does some reimplementing of standard type operations is this
because you are trying to use the same code on multiple platforms?
Example:
+
+static void ntb_list_add_head(spinlock_t *lock, struct list_head *entry,
+ struct list_head *list)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(lock, flags);
+ list_add(entry, list);
+ spin_unlock_irqrestore(lock, flags);
+}
+
+static void ntb_list_add_tail(spinlock_t *lock, struct list_head *entry,
+ struct list_head *list)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(lock, flags);
+ list_add_tail(entry, list);
+ spin_unlock_irqrestore(lock, flags);
+}
Which are used on skb's and yet we already have sk_buff_head with locking?
I know you probably are committed to this API, but is there some way to
reuse existing shared memory used by virtio-net between two ports?
^ permalink raw reply
* Re: resurrecting tcphealth
From: valdis.kletnieks @ 2012-07-14 1:31 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Piotr Sawuk, netdev, linux-kernel
In-Reply-To: <20120713165544.6767ea8f@nehalam.linuxnetplumber.net>
[-- Attachment #1: Type: text/plain, Size: 368 bytes --]
On Fri, 13 Jul 2012 16:55:44 -0700, Stephen Hemminger said:
> >+ /* Course retransmit inefficiency- this packet has been received twice. */
> >+ tp->dup_pkts_recv++;
>
> I don't understand that comment, could you use a better sentence please?
I think what was intended was:
/* Curse you, retransmit inefficiency! This packet has been received at least twice */
[-- Attachment #2: Type: application/pgp-signature, Size: 865 bytes --]
^ permalink raw reply
* Re: [PATCH] mac802154: fix sparse warning for mac802154_slave_get_priv
From: Alexander Smirnov @ 2012-07-14 3:42 UTC (permalink / raw)
To: Silviu-Mihai Popescu
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
davem@davemloft.net, Silviu-Mihai Popescu
In-Reply-To: <1342211770-4219-1-git-send-email-silviupopescu1990@gmail.com>
> Make sparse happy by fixing the following error:
> * symbol 'mac802154_slave_get_priv' was not declared. Should it be static?
>
> Signed-off-by: Silviu-Mihai Popescu <silviupopescu1990@gmail.com>
> ---
> net/mac802154/mib.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
Should be already fixed, please try the latest net-next tree.
^ permalink raw reply
* Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Jon Mason @ 2012-07-14 5:50 UTC (permalink / raw)
To: Jiri Pirko; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <20120713231403.GA1712@minipsycho.orion>
On Sat, Jul 14, 2012 at 01:14:03AM +0200, Jiri Pirko wrote:
> Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.mason@intel.com wrote:
> >A virtual ethernet device that uses the NTB transport API to send/receive data.
> >
> >Signed-off-by: Jon Mason <jon.mason@intel.com>
> >---
> > drivers/net/Kconfig | 4 +
> > drivers/net/Makefile | 1 +
> > drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 416 insertions(+), 0 deletions(-)
> > create mode 100644 drivers/net/ntb_netdev.c
> >
> >diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> >index 0c2bd80..9bf8a71 100644
> >--- a/drivers/net/Kconfig
> >+++ b/drivers/net/Kconfig
> >@@ -178,6 +178,10 @@ config NETPOLL_TRAP
> > config NET_POLL_CONTROLLER
> > def_bool NETPOLL
> >
> >+config NTB_NETDEV
> >+ tristate "Virtual Ethernet over NTB"
> >+ depends on NTB
> >+
> > config RIONET
> > tristate "RapidIO Ethernet over messaging driver support"
> > depends on RAPIDIO
> >diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> >index 3d375ca..9890148 100644
> >--- a/drivers/net/Makefile
> >+++ b/drivers/net/Makefile
> >@@ -69,3 +69,4 @@ obj-$(CONFIG_USB_IPHETH) += usb/
> > obj-$(CONFIG_USB_CDC_PHONET) += usb/
> >
> > obj-$(CONFIG_HYPERV_NET) += hyperv/
> >+obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
> >diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
> >new file mode 100644
> >index 0000000..bcbd9d4
> >--- /dev/null
> >+++ b/drivers/net/ntb_netdev.c
> >@@ -0,0 +1,411 @@
> >+/*
> >+ * This file is provided under a dual BSD/GPLv2 license. When using or
> >+ * redistributing this file, you may do so under either license.
> >+ *
> >+ * GPL LICENSE SUMMARY
> >+ *
> >+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ * This program is free software; you can redistribute it and/or modify
> >+ * it under the terms of version 2 of the GNU General Public License as
> >+ * published by the Free Software Foundation.
> >+ *
> >+ * This program is distributed in the hope that it will be useful, but
> >+ * WITHOUT ANY WARRANTY; without even the implied warranty of
> >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> >+ * General Public License for more details.
> >+ *
> >+ * You should have received a copy of the GNU General Public License
> >+ * along with this program; if not, write to the Free Software
> >+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> >+ * The full GNU General Public License is included in this distribution
> >+ * in the file called LICENSE.GPL.
> >+ *
> >+ * BSD LICENSE
> >+ *
> >+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
> >+ *
> >+ * Redistribution and use in source and binary forms, with or without
> >+ * modification, are permitted provided that the following conditions
> >+ * are met:
> >+ *
> >+ * * Redistributions of source code must retain the above copyright
> >+ * notice, this list of conditions and the following disclaimer.
> >+ * * Redistributions in binary form must reproduce the above copy
> >+ * notice, this list of conditions and the following disclaimer in
> >+ * the documentation and/or other materials provided with the
> >+ * distribution.
> >+ * * Neither the name of Intel Corporation nor the names of its
> >+ * contributors may be used to endorse or promote products derived
> >+ * from this software without specific prior written permission.
> >+ *
> >+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> >+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> >+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> >+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> >+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> >+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> >+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> >+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> >+ *
> >+ * Intel PCIe NTB Network Linux driver
> >+ *
> >+ * Contact Information:
> >+ * Jon Mason <jon.mason@intel.com>
> >+ */
> >+#include <linux/etherdevice.h>
> >+#include <linux/ethtool.h>
> >+#include <linux/module.h>
> >+#include <linux/ntb.h>
> >+
> >+#define NTB_NETDEV_VER "0.4"
>
> Is it really necessary to provide this in-file versioning? Doesn't
> kernel version itself do the trick?
Not necessarily. This may be distributed as a package outside of the kernel and the version is useful for debug.
>
> >+
> >+MODULE_DESCRIPTION(KBUILD_MODNAME);
> >+MODULE_VERSION(NTB_NETDEV_VER);
> >+MODULE_LICENSE("Dual BSD/GPL");
> >+MODULE_AUTHOR("Intel Corporation");
> >+
> >+struct ntb_netdev {
> >+ struct net_device *ndev;
> >+ struct ntb_transport_qp *qp;
> >+};
> >+
> >+#define NTB_TX_TIMEOUT_MS 1000
> >+#define NTB_RXQ_SIZE 100
> >+
> >+static struct net_device *netdev;
> >+
> >+static void ntb_netdev_event_handler(int status)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(netdev);
> >+
> >+ pr_debug("%s: Event %x, Link %x\n", KBUILD_MODNAME, status,
> >+ ntb_transport_link_query(dev->qp));
> >+
> >+ /* Currently, only link status event is supported */
> >+ if (status)
> >+ netif_carrier_on(netdev);
> >+ else
> >+ netif_carrier_off(netdev);
> >+}
> >+
> >+static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp)
> >+{
> >+ struct net_device *ndev = netdev;
> >+ struct sk_buff *skb;
> >+ int len, rc;
> >+
> >+ while ((skb = ntb_transport_rx_dequeue(qp, &len))) {
> >+ pr_debug("%s: %d byte payload received\n", __func__, len);
> >+
> >+ skb_put(skb, len);
> >+ skb->protocol = eth_type_trans(skb, ndev);
> >+ skb->ip_summed = CHECKSUM_NONE;
> >+
> >+ if (netif_rx(skb) == NET_RX_DROP) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_dropped++;
> >+ } else {
> >+ ndev->stats.rx_packets++;
> >+ ndev->stats.rx_bytes += len;
> >+ }
> >+
> >+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_frame_errors++;
> >+ pr_err("%s: No skb\n", __func__);
> >+ break;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(qp, skb, skb->data,
> >+ ndev->mtu + ETH_HLEN);
> >+ if (rc) {
> >+ ndev->stats.rx_errors++;
> >+ ndev->stats.rx_fifo_errors++;
> >+ pr_err("%s: error re-enqueuing\n", __func__);
> >+ break;
> >+ }
> >+ }
> >+}
> >+
> >+static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp)
> >+{
> >+ struct net_device *ndev = netdev;
> >+ struct sk_buff *skb;
> >+ int len;
> >+
> >+ while ((skb = ntb_transport_tx_dequeue(qp, &len))) {
> >+ ndev->stats.tx_packets++;
> >+ ndev->stats.tx_bytes += skb->len;
> >+ dev_kfree_skb(skb);
> >+ }
> >+
> >+ if (netif_queue_stopped(ndev))
> >+ netif_wake_queue(ndev);
> >+}
> >+
> >+static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
> >+ struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ int rc;
> >+
> >+ pr_debug("%s: ntb_transport_tx_enqueue\n", KBUILD_MODNAME);
> >+
> >+ rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
> >+ if (rc)
> >+ goto err;
> >+
> >+ return NETDEV_TX_OK;
> >+
> >+err:
> >+ ndev->stats.tx_dropped++;
> >+ ndev->stats.tx_errors++;
> >+ netif_stop_queue(ndev);
> >+ return NETDEV_TX_BUSY;
> >+}
> >+
> >+static int ntb_netdev_open(struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int rc, i, len;
> >+
> >+ /* Add some empty rx bufs */
> >+ for (i = 0; i < NTB_RXQ_SIZE; i++) {
> >+ skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ rc = -ENOMEM;
> >+ goto err;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
> >+ ndev->mtu + ETH_HLEN);
> >+ if (rc == -EINVAL)
> >+ goto err;
> >+ }
> >+
> >+ netif_carrier_off(ndev);
> >+ ntb_transport_link_up(dev->qp);
> >+
> >+ return 0;
> >+
> >+err:
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+ return rc;
> >+}
> >+
> >+static int ntb_netdev_close(struct net_device *ndev)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int len;
> >+
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+
> >+ return 0;
> >+}
> >+
> >+static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(ndev);
> >+ struct sk_buff *skb;
> >+ int len, rc;
> >+
> >+ if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
> >+ return -EINVAL;
> >+
> >+ if (!netif_running(ndev)) {
> >+ ndev->mtu = new_mtu;
> >+ return 0;
> >+ }
> >+
> >+ /* Bring down the link and dispose of posted rx entries */
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ if (ndev->mtu < new_mtu) {
> >+ int i;
> >+
> >+ for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
> >+ kfree(skb);
> >+
> >+ for (; i; i--) {
> >+ skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
> >+ if (!skb) {
> >+ rc = -ENOMEM;
> >+ goto err;
> >+ }
> >+
> >+ rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
> >+ new_mtu + ETH_HLEN);
> >+ if (rc) {
> >+ kfree(skb);
> >+ goto err;
> >+ }
> >+ }
> >+ }
> >+
> >+ ndev->mtu = new_mtu;
> >+
> >+ ntb_transport_link_up(dev->qp);
> >+
> >+ return 0;
> >+
> >+err:
> >+ ntb_transport_link_down(dev->qp);
> >+
> >+ while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
> >+ kfree(skb);
> >+
> >+ pr_err("Error changing MTU, device inoperable\n");
>
> Would be maybe better to use netdev_err here (and on similar other
> places)
Good point
>
> Also, it might be good to provide rollback in case any of
> netdev_alloc_skb() fails.
>
> >+ return rc;
> >+}
> >+
> >+static void ntb_netdev_tx_timeout(struct net_device *ndev)
> >+{
> >+ if (netif_running(ndev))
> >+ netif_wake_queue(ndev);
> >+}
> >+
> >+static const struct net_device_ops ntb_netdev_ops = {
> >+ .ndo_open = ntb_netdev_open,
> >+ .ndo_stop = ntb_netdev_close,
> >+ .ndo_start_xmit = ntb_netdev_start_xmit,
> >+ .ndo_change_mtu = ntb_netdev_change_mtu,
> >+ .ndo_tx_timeout = ntb_netdev_tx_timeout,
> >+ .ndo_set_mac_address = eth_mac_addr,
>
> Does your device support mac change while it's up and running?
It's virtual ethernet, so there is no hardware limitation, only what is acceptable for the remote side to receive.
>
> >+};
> >+
> >+static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> >+ struct ethtool_drvinfo *info)
> >+{
> >+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> >+ strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> >+}
> >+
> >+static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> >+ "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> >+ "rx_frame_errors", "rx_fifo_errors",
> >+ "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> >+};
> >+
> >+static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
> >+{
> >+ return ARRAY_SIZE(ntb_nic_stats);
> >+}
> >+
> >+static int ntb_get_sset_count(struct net_device *dev, int sset)
> >+{
> >+ switch (sset) {
> >+ case ETH_SS_STATS:
> >+ return ntb_get_stats_count(dev);
> >+ default:
> >+ return -EOPNOTSUPP;
> >+ }
> >+}
> >+
> >+static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> >+ u32 sset, u8 *data)
> >+{
> >+ switch (sset) {
> >+ case ETH_SS_STATS:
> >+ memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> >+ }
> >+}
> >+
> >+static void
> >+ntb_get_ethtool_stats(struct net_device *dev,
> >+ __attribute__((unused)) struct ethtool_stats *stats,
> >+ u64 *data)
> >+{
> >+ int i = 0;
> >+
> >+ data[i++] = dev->stats.rx_packets;
> >+ data[i++] = dev->stats.rx_bytes;
> >+ data[i++] = dev->stats.rx_errors;
> >+ data[i++] = dev->stats.rx_dropped;
> >+ data[i++] = dev->stats.rx_length_errors;
> >+ data[i++] = dev->stats.rx_frame_errors;
> >+ data[i++] = dev->stats.rx_fifo_errors;
> >+ data[i++] = dev->stats.tx_packets;
> >+ data[i++] = dev->stats.tx_bytes;
> >+ data[i++] = dev->stats.tx_errors;
> >+ data[i++] = dev->stats.tx_dropped;
> >+}
> >+
> >+static const struct ethtool_ops ntb_ethtool_ops = {
> >+ .get_drvinfo = ntb_get_drvinfo,
> >+ .get_sset_count = ntb_get_sset_count,
> >+ .get_strings = ntb_get_strings,
> >+ .get_ethtool_stats = ntb_get_ethtool_stats,
> >+ .get_link = ethtool_op_get_link,
> >+};
> >+
> >+static int __init ntb_netdev_init_module(void)
> >+{
> >+ struct ntb_netdev *dev;
> >+ int rc;
> >+
> >+ pr_info("%s: Probe\n", KBUILD_MODNAME);
> >+
> >+ netdev = alloc_etherdev(sizeof(struct ntb_netdev));
>
> I might be missing something but this place (module init) does not seems
> like a good place to do alloc_etherdev(). Do you want to support only
> one netdevice instance?
>
> Anyway, I think that using "static netdev" should be avoided in any case.
>
It would fail the probe if there is no underlying ntb hardware, but it would make sense to check for that before allocing the etherdev.
Thanks for the comments!
> >+ if (!netdev)
> >+ return -ENOMEM;
> >+
> >+ dev = netdev_priv(netdev);
> >+ dev->ndev = netdev;
> >+ netdev->features = NETIF_F_HIGHDMA;
> >+
> >+ netdev->hw_features = netdev->features;
> >+ netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> >+
> >+ random_ether_addr(netdev->perm_addr);
> >+ memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> >+
> >+ netdev->netdev_ops = &ntb_netdev_ops;
> >+ SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
> >+
> >+ dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> >+ ntb_netdev_tx_handler,
> >+ ntb_netdev_event_handler);
> >+ if (!dev->qp) {
> >+ rc = -EIO;
> >+ goto err;
> >+ }
> >+
> >+ netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> >+
> >+ rc = register_netdev(netdev);
> >+ if (rc)
> >+ goto err1;
> >+
> >+ pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> >+ return 0;
> >+
> >+err1:
> >+ ntb_transport_free_queue(dev->qp);
> >+err:
> >+ free_netdev(netdev);
> >+ return rc;
> >+}
> >+module_init(ntb_netdev_init_module);
> >+
> >+static void __exit ntb_netdev_exit_module(void)
> >+{
> >+ struct ntb_netdev *dev = netdev_priv(netdev);
> >+
> >+ unregister_netdev(netdev);
> >+ ntb_transport_free_queue(dev->qp);
> >+ free_netdev(netdev);
> >+
> >+ pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> >+}
> >+module_exit(ntb_netdev_exit_module);
> >--
> >1.7.5.4
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe netdev" in
> >the body of a message to majordomo@vger.kernel.org
> >More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Jon Mason @ 2012-07-14 5:55 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <20120713170826.09210b80@nehalam.linuxnetplumber.net>
On Fri, Jul 13, 2012 at 05:08:26PM -0700, Stephen Hemminger wrote:
> On Fri, 13 Jul 2012 14:45:00 -0700
> Jon Mason <jon.mason@intel.com> wrote:
>
> > A virtual ethernet device that uses the NTB transport API to send/receive data.
> >
> > Signed-off-by: Jon Mason <jon.mason@intel.com>
> > ---
> > drivers/net/Kconfig | 4 +
> > drivers/net/Makefile | 1 +
> > drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 416 insertions(+), 0 deletions(-)
> > create mode 100644 drivers/net/ntb_netdev.c
>
>
> > +static void ntb_get_drvinfo(__attribute__((unused)) struct net_device *dev,
> > + struct ethtool_drvinfo *info)
> > +{
> > + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
> > + strlcpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
> > +}
> > +
> > +static const char ntb_nic_stats[][ETH_GSTRING_LEN] = {
> > + "rx_packets", "rx_bytes", "rx_errors", "rx_dropped", "rx_length_errors",
> > + "rx_frame_errors", "rx_fifo_errors",
> > + "tx_packets", "tx_bytes", "tx_errors", "tx_dropped",
> > +};
> > +
> > +static int ntb_get_stats_count(__attribute__((unused)) struct net_device *dev)
> > +{
> > + return ARRAY_SIZE(ntb_nic_stats);
> > +}
> > +
> > +static int ntb_get_sset_count(struct net_device *dev, int sset)
> > +{
> > + switch (sset) {
> > + case ETH_SS_STATS:
> > + return ntb_get_stats_count(dev);
> > + default:
> > + return -EOPNOTSUPP;
> > + }
> > +}
> > +
> > +static void ntb_get_strings(__attribute__((unused)) struct net_device *dev,
> > + u32 sset, u8 *data)
> > +{
> > + switch (sset) {
> > + case ETH_SS_STATS:
> > + memcpy(data, *ntb_nic_stats, sizeof(ntb_nic_stats));
> > + }
> > +}
> > +
> > +static void
> > +ntb_get_ethtool_stats(struct net_device *dev,
> > + __attribute__((unused)) struct ethtool_stats *stats,
> > + u64 *data)
> > +{
> > + int i = 0;
> > +
> > + data[i++] = dev->stats.rx_packets;
> > + data[i++] = dev->stats.rx_bytes;
> > + data[i++] = dev->stats.rx_errors;
> > + data[i++] = dev->stats.rx_dropped;
> > + data[i++] = dev->stats.rx_length_errors;
> > + data[i++] = dev->stats.rx_frame_errors;
> > + data[i++] = dev->stats.rx_fifo_errors;
> > + data[i++] = dev->stats.tx_packets;
> > + data[i++] = dev->stats.tx_bytes;
> > + data[i++] = dev->stats.tx_errors;
> > + data[i++] = dev->stats.tx_dropped;
> > +}
>
> These statistics add no value over existing network stats.
> Don't implement ethtool stats unless device has something more
> interesting to say.
Fair enough
>
> > +static const struct ethtool_ops ntb_ethtool_ops = {
> > + .get_drvinfo = ntb_get_drvinfo,
> > + .get_sset_count = ntb_get_sset_count,
> > + .get_strings = ntb_get_strings,
> > + .get_ethtool_stats = ntb_get_ethtool_stats,
> > + .get_link = ethtool_op_get_link,
> > +};
>
> If you want to implement bonding or bridging then implementing
> get_settings would help.
Will do.
> > +static int __init ntb_netdev_init_module(void)
> > +{
> > + struct ntb_netdev *dev;
> > + int rc;
> > +
> > + pr_info("%s: Probe\n", KBUILD_MODNAME);
>
> Useless message
True, will remove.
Thanks for the comments!
> > + netdev = alloc_etherdev(sizeof(struct ntb_netdev));
> > + if (!netdev)
> > + return -ENOMEM;
> > +
> > + dev = netdev_priv(netdev);
> > + dev->ndev = netdev;
> > + netdev->features = NETIF_F_HIGHDMA;
> > +
> > + netdev->hw_features = netdev->features;
> > + netdev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
> > +
> > + random_ether_addr(netdev->perm_addr);
> > + memcpy(netdev->dev_addr, netdev->perm_addr, netdev->addr_len);
> > +
> > + netdev->netdev_ops = &ntb_netdev_ops;
> > + SET_ETHTOOL_OPS(netdev, &ntb_ethtool_ops);
> > +
> > + dev->qp = ntb_transport_create_queue(ntb_netdev_rx_handler,
> > + ntb_netdev_tx_handler,
> > + ntb_netdev_event_handler);
> > + if (!dev->qp) {
> > + rc = -EIO;
> > + goto err;
> > + }
> > +
> > + netdev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
> > +
> > + rc = register_netdev(netdev);
> > + if (rc)
> > + goto err1;
> > +
> > + pr_info("%s: %s created\n", KBUILD_MODNAME, netdev->name);
> > + return 0;
> > +
> > +err1:
> > + ntb_transport_free_queue(dev->qp);
> > +err:
> > + free_netdev(netdev);
> > + return rc;
> > +}
> > +module_init(ntb_netdev_init_module);
> > +
> > +static void __exit ntb_netdev_exit_module(void)
> > +{
> > + struct ntb_netdev *dev = netdev_priv(netdev);
> > +
> > + unregister_netdev(netdev);
> > + ntb_transport_free_queue(dev->qp);
> > + free_netdev(netdev);
> > +
> > + pr_info("%s: Driver removed\n", KBUILD_MODNAME);
> > +}
> > +module_exit(ntb_netdev_exit_module);
>
^ permalink raw reply
* Re: pull request: wireless-next 2012-07-12
From: David Miller @ 2012-07-14 6:05 UTC (permalink / raw)
To: linville-2XuSBdqkA4R54TAoqtyWWQ
Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20120712181539.GB25494-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Date: Thu, 12 Jul 2012 14:15:40 -0400
> Several drivers see updates: mwifiex, ath9k, iwlwifi, brcmsmac,
> wlcore/wl12xx/wl18xx, and a handful of others. The bcma bus got a
> lot of attention from Hauke Mehrtens. The cfg80211 component gets
> a flurry of patches for multi-channel support, and the mac80211
> component gets the first few VHT (11ac) and 60GHz (11ad) patches.
> This also includes the removal of the iwmc3200 drivers, since the
> hardware never became available to normal people.
>
> Additionally, the NFC subsystem gets a series of updates. According to
> Samuel, "Here are the interesting bits:
>
> - A better error management for the HCI stack.
> - An LLCP "late" binding implementation for a better NFC SAP usage. SAPs are
> now reserved only when there's a client for it.
> - Support for Sony RC-S360 (a.k.a. PaSoRi) pn533 based dongle. We can read and
> write NFC tags and also establish a p2p link with this dongle now.
> - A few LLCP fixes."
>
> Finally, this includes another pull of the fixes from the wireless
> tree in order to resolve some merge issues.
Pulled, thanks John.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH] MAINTAINERS: reflect actual changes in IEEE 802.15.4 maintainership
From: Dmitry Eremin-Solenikov @ 2012-07-14 6:15 UTC (permalink / raw)
To: linux-kernel
Cc: netdev, David S. Miller, Dmitry Eremin-Solenikov,
Alexander Smirnov
As the life flows, developers priorities shifts a bit. Reflect actual
changes in the maintainership of IEEE 802.15.4 code: Sergey mostly
stopped cared about this piece of code. Most of the work recently was
done by Alexander, so put him to the MAINTAINERS file to reflect his
status and to ease the life of respective patches.
Also add new net/mac802154/ directory to the list of maintained files.
Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
MAINTAINERS | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 150a29f..f03c703 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3403,13 +3403,14 @@ S: Supported
F: drivers/idle/i7300_idle.c
IEEE 802.15.4 SUBSYSTEM
+M: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
M: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
-M: Sergey Lapin <slapin@ossfans.org>
L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers)
W: http://apps.sourceforge.net/trac/linux-zigbee
T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git
S: Maintained
F: net/ieee802154/
+F: net/mac802154/
F: drivers/ieee802154/
IIO SUBSYSTEM AND DRIVERS
--
1.7.10.4
^ permalink raw reply related
* Re: [RFC 1/2] PCI-Express Non-Transparent Bridge Support
From: Jon Mason @ 2012-07-14 6:19 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <20120713171344.1066d3b1@nehalam.linuxnetplumber.net>
On Fri, Jul 13, 2012 at 05:13:44PM -0700, Stephen Hemminger wrote:
> On Fri, 13 Jul 2012 14:44:59 -0700
> Jon Mason <jon.mason@intel.com> wrote:
>
> > A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus
> > connecting 2 systems, providing electrical isolation between the two subsystems.
> > A non-transparent bridge is functionally similar to a transparent bridge except
> > that both sides of the bridge have their own independent address domains. The
> > host on one side of the bridge will not have the visibility of the complete
> > memory or I/O space on the other side of the bridge. To communicate across the
> > non-transparent bridge, each NTB endpoint has one (or more) apertures exposed to
> > the local system. Writes to these apertures are mirrored to memory on the
> > remote system. Communications can also occur through the use of doorbell
> > registers that initiate interrupts to the alternate domain, and scratch-pad
> > registers accessible from both sides.
> >
> > The NTB device driver is needed to configure these memory windows, doorbell, and
> > scratch-pad registers as well as use them in such a way as they can be turned
> > into a viable communication channel to the remote system. ntb_hw.[ch]
> > determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away
> > the underlying hardware to provide access and a common interface to the doorbell
> > registers, scratch pads, and memory windows. These hardware interfaces are
> > exported so that other, non-mainlined kernel drivers can access these.
> > ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a
> > communication channel(s) and provide a reliable way of transferring data from
> > one side to the other, which it then exports so that "client" drivers can access
> > them. These client drivers are used to provide a standard kernel interface
> > (i.e., Ethernet device) to NTB, such that Linux can transfer data from one
> > system to the other in a standard way.
> >
> > Signed-off-by: Jon Mason <jon.mason@intel.com>
>
> This driver does some reimplementing of standard type operations is this
> because you are trying to use the same code on multiple platforms?
>
> Example:
> +
> +static void ntb_list_add_head(spinlock_t *lock, struct list_head *entry,
> + struct list_head *list)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(lock, flags);
> + list_add(entry, list);
> + spin_unlock_irqrestore(lock, flags);
> +}
> +
> +static void ntb_list_add_tail(spinlock_t *lock, struct list_head *entry,
> + struct list_head *list)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(lock, flags);
> + list_add_tail(entry, list);
> + spin_unlock_irqrestore(lock, flags);
> +}
>
> Which are used on skb's and yet we already have sk_buff_head with locking?
>
> I know you probably are committed to this API, but is there some way to
> reuse existing shared memory used by virtio-net between two ports?
>
>
The intention is to be able to have multiple client drivers/virtual devices that are able to use NTB as the transport to the remote system. This is the reason why a void* is passed into the transport instead of skb*, making all of the extra book keeping necessary. Currently, only the virtual Ethernet has been done, which may be part of the confusion. I'd like to be able to find a way to have the virtio devices use ntb (and save me the work of reinventing the wheel), but step one is getting this code accepted :)
Thanks,
Jon
^ permalink raw reply
* [net 0/2][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2012-07-14 7:47 UTC (permalink / raw)
To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann
This series contains fixes to e1000e.
The following are changes since commit 7ac2908e4b2edaec60e9090ddb4d9ceb76c05e7d:
sch_sfb: Fix missing NULL check
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net master
Bruce Allan (1):
e1000e: fix test for PHY being accessible on 82577/8/9 and I217
Tushar Dave (1):
e1000e: Correct link check logic for 82571 serdes
drivers/net/ethernet/intel/e1000e/82571.c | 3 ++
drivers/net/ethernet/intel/e1000e/ich8lan.c | 42 ++++++++++++++++++++-------
2 files changed, 35 insertions(+), 10 deletions(-)
--
1.7.10.4
^ permalink raw reply
* [net 2/2] e1000e: fix test for PHY being accessible on 82577/8/9 and I217
From: Jeff Kirsher @ 2012-07-14 7:47 UTC (permalink / raw)
To: davem; +Cc: Bruce Allan, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1342252063-27023-1-git-send-email-jeffrey.t.kirsher@intel.com>
From: Bruce Allan <bruce.w.allan@intel.com>
Occasionally, the PHY can be initially inaccessible when the first read of
a PHY register, e.g. PHY_ID1, happens (signified by the returned value
0xFFFF) but subsequent accesses of the PHY work as expected. Add a retry
counter similar to how it is done in the generic e1000_get_phy_id().
Also, when the PHY is completely inaccessible (i.e. when subsequent reads
of the PHY_IDx registers returns all F's) and the MDIO access mode must be
set to slow before attempting to read the PHY ID again, the functions that
do these latter two actions expect the SW/FW/HW semaphore is not already
set so the semaphore must be released before and re-acquired after calling
them otherwise there is an unnecessarily inordinate amount of delay during
device initialization.
Reported-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/ich8lan.c | 42 ++++++++++++++++++++-------
1 file changed, 32 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 238ab2f..e3a7b07 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -325,24 +325,46 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
**/
static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
{
- u16 phy_reg;
- u32 phy_id;
+ u16 phy_reg = 0;
+ u32 phy_id = 0;
+ s32 ret_val;
+ u16 retry_count;
+
+ for (retry_count = 0; retry_count < 2; retry_count++) {
+ ret_val = e1e_rphy_locked(hw, PHY_ID1, &phy_reg);
+ if (ret_val || (phy_reg == 0xFFFF))
+ continue;
+ phy_id = (u32)(phy_reg << 16);
- e1e_rphy_locked(hw, PHY_ID1, &phy_reg);
- phy_id = (u32)(phy_reg << 16);
- e1e_rphy_locked(hw, PHY_ID2, &phy_reg);
- phy_id |= (u32)(phy_reg & PHY_REVISION_MASK);
+ ret_val = e1e_rphy_locked(hw, PHY_ID2, &phy_reg);
+ if (ret_val || (phy_reg == 0xFFFF)) {
+ phy_id = 0;
+ continue;
+ }
+ phy_id |= (u32)(phy_reg & PHY_REVISION_MASK);
+ break;
+ }
if (hw->phy.id) {
if (hw->phy.id == phy_id)
return true;
- } else {
- if ((phy_id != 0) && (phy_id != PHY_REVISION_MASK))
- hw->phy.id = phy_id;
+ } else if (phy_id) {
+ hw->phy.id = phy_id;
+ hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK);
return true;
}
- return false;
+ /*
+ * In case the PHY needs to be in mdio slow mode,
+ * set slow mode and try to get the PHY id again.
+ */
+ hw->phy.ops.release(hw);
+ ret_val = e1000_set_mdio_slow_mode_hv(hw);
+ if (!ret_val)
+ ret_val = e1000e_get_phy_id(hw);
+ hw->phy.ops.acquire(hw);
+
+ return !ret_val;
}
/**
--
1.7.10.4
^ permalink raw reply related
* Re: resurrecting tcphealth
From: Piotr Sawuk @ 2012-07-14 7:56 UTC (permalink / raw)
To: netdev; +Cc: linux-kernel
On Sa, 14.07.2012, 03:31, valdis.kletnieks@vt.edu wrote:
> On Fri, 13 Jul 2012 16:55:44 -0700, Stephen Hemminger said:
>
>> >+ /* Course retransmit inefficiency- this packet has been received
>> twice. */
>> >+ tp->dup_pkts_recv++;
>> I don't understand that comment, could you use a better sentence please?
>
> I think what was intended was:
>
> /* Curse you, retransmit inefficiency! This packet has been received at
least twice */
>
LOL, no. I think "course retransmit" is short for "course-grained timeout
caused retransmit" but I can't be sure since I'm not the author of these
lines. I'll replace that comment with the non-shorthand version though.
however, I think the real comment here should be:
/*A perceived shortcoming of the standard TCP implementation: A
TCP receiver can get duplicate packets from the sender because it cannot
acknowledge packets that arrive out of order. These duplicates would happen
when the sender mistakenly thinks some packets have been lost by the network
because it does not receive acks for them but in reality they were
successfully received out of order. Since the receiver has no way of letting
the sender know about the receipt of these packets, they could potentially
be re-sent and re-received at the receiver. Not only do duplicate packets
waste precious Internet bandwidth but they hurt performance because the
sender mistakenly detects congestion from packet losses. The SACK TCP
extension speci\fcally addresses this issue. A large number of duplicate
packets received would indicate a signi\fcant bene\ft to the wide adoption of
SACK. The duplicatepacketsreceived metric is computed at the
receiver and counts these packets on a per-connection basis.*/
as copied from his thesis at [1]. also in the thesis he writes:
In our limited experiment, the results indicated no duplicate packets were
received on any connection in the 18 hour run. This leads us to several
conclusions. Since duplicate ACKs were seen on many connections we know that
some packets were lost or reordered, but unACKed reordered packets never
caused a /coursegrainedtimeouts/ on our connections. Only these timeouts
will cause duplicate packets to be received since less severe out-of-order
conditions will be resolved with fast retransmits. The lack of course
timeouts
may be due to the quality of UCSD's ActiveWeb network or the paucity of
large gaps between received packet groups. It should be noted that Linux 2.2
implements fast retransmits for up to two packet gaps, thus reducing the
need for course grained timeouts due to the lack of SACK.
[1] https://sacerdoti.org/tcphealth/tcphealth-paper.pdf
^ permalink raw reply
* Re: PROBLEM: Silent data corruption when using sendfile()
From: Hillf Danton @ 2012-07-14 8:04 UTC (permalink / raw)
To: Johannes Truschnigg
Cc: linux-kernel, Eric Dumazet, Willy Tarreau, Linux-Netdev
In-Reply-To: <20120713171835.GA26052@vault.local>
On Sat, Jul 14, 2012 at 1:18 AM, Johannes Truschnigg
<johannes@truschnigg.info> wrote:
> Hello good people of linux-kernel.
>
> I've been bothered by silent data corruption from my personal fileserver - no
> matter the Layer 7 protocol used, huge transfers sporadically ended up damaged
> in-flight. I used Samba/CIFS, NFS(v4, via TCP), Apache httpd 2.2, thttpd,
> python and netcat to verify this.
>
> I think I managed to track down the culprit: as soon as I disable sendfile()
> for all programs that support such a configuration (netcat, afaik, won't ever
> use sendfile() to transmit data over a socket, so the problem was never
> reproducible there in the first place), everything reverts to perfect and
> proper working condition.
>
> I've been experiencing this problem with vanilla kernel releases from the 3.3
> up until 3.4.0 series. I do not know if it also occurs with earlier releases,
> but I can verify if that is useful. I set up the environment for a minimal
> kind of testcase (a large ISO image file available from the server's local
> filesystem, as well as from a mounted NFS export - once via lo, and once via
> br0/eth0), and proceeded to do the following:
>
> i=0; for i in {1..100}
> do
> echo "pass $i:"; sync; echo 3 > /proc/sys/vm/drop_caches
> cmp -b /mnt/nfs-test/lo/tmp/X15-65741.iso /srv/files/pub/tmp/X15-65741.iso
> done
>
> I then rotated the source of the data, and tested the network-mount against
> the loopback-mount, as well as the network-mount against the local filesystem.
>
> Computing the file's md5sum in a loop whilst dropping caches after each
> iteration by reading it directly from its location in the filesystem produces
> the very same hash every time - I therefore think it's safe to assume the
> corruption is introduced when traversing the networking stack. The hash also
> does not change if I repeadetly compute the md5sum of the file as transferred
> by, e. g., Apache httpd or smbd with sendfile explicitly disabled.
>
> Please take a look at the attachment to see the actual output of the above
> script. It does not matter if I do an actual transfer over the network from my
> server to one of its clients (I verified the problem with two different client
> machines, one even running Windows), or if the server is both source and
> destination of the transfer - as long as sendfile is involed, some of the data
> will always become garbled sooner or later. That also leads me to believe that
> my internetworking devices (my switch in particular) is working just fine;
> testing bulky transfers from one host to another confirms this insofar as thus
> all data makes it through unscathed.
>
> As soon as I switch off sendfile-support (in, e. g. Samba or Apache httpd), I
> can run a series of thousands and more transfers, and not experience any
> corruption at all. Whenever the data gets fubared, there is no hint at
> anything fishy going on in the debug ringbuffer - curruption takes place in
> total silence.
>
> The system in question has an Intel Pro/1000 PCI-e NIC for doing the networked
> file transfers, and is backed by a md RAID5-Array with LVM2 on top. The 4GB of
> system memory (ECC-enabled UDIMM) are operating in S4ECD4ED mode as reported
> by EDAC, and there are no reported errors. The CPU I have installed is an AMD
> Athlon II X2 245e on an ASUS M4A88TD-M/USB3 Motherboard. It's running Gentoo
> for amd64. The box can run prime96 in torture mode and linpack just fine for
> days - I'm therefore assuming the hardware to be working correctly.
>
> I have attached my kernel's config (from 3.4.0, as that's the image that I
> have running right now) attached for sake of completeness, as well as some
> information for you to see how I tested, and what these tests actually
> produced. If you need any other information to help track this down, please
> let me know.
>
> If you decide to answer please keep me CC'd, as I'm not subscribed to this
> list.
>
> Just in case the numerous attachments get scrubbed/removed, I've also uploaded
> them to http://johannes.truschnigg.info/tmp/sendfile_data_corruption/
>
> Thanks for reading, and have a nice weekend everyone :)
>
Is the above corruption related to the one below?
On Tue, Jul 3, 2012 at 8:02 AM, Willy Tarreau <w@1wt.eu> wrote:
>
> In fact it has been true zero copy in 2.6.25 until we faced a large
> amount of data corruption and the zero copy was disabled in 2.6.25.X.
> Since then it remained that way until you brought your patches to
> re-instantiate it.
^ permalink raw reply
* Re: [RFC PATCH] tun: don't zeroize sock->file on detach
From: Al Viro @ 2012-07-14 8:15 UTC (permalink / raw)
To: Stanislav Kinsbursky; +Cc: davem, netdev, ruanzhijie, linux-kernel
In-Reply-To: <20120711114753.24395.53193.stgit@localhost6.localdomain6>
On Wed, Jul 11, 2012 at 03:48:20PM +0400, Stanislav Kinsbursky wrote:
> This is a fix for bug, introduced in 3.4 kernel by commit
> 1ab5ecb90cb6a3df1476e052f76a6e8f6511cb3d, which, among other things, replaced
> simple sock_put() by sk_release_kernel(). Below is sequence, which leads to
> oops for non-persistent devices:
>
> tun_chr_close()
> tun_detach() <== tun->socket.file = NULL
> tun_free_netdev()
> sk_release_sock()
> sock_release(sock->file == NULL)
> iput(SOCK_INODE(sock)) <== dereference on NULL pointer
>
> This patch just removes zeroing of socket's file from __tun_detach().
> sock_release() will do this.
>
> Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
> ---
> drivers/net/tun.c | 1 -
> 1 files changed, 0 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 987aeef..c1639f3 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -185,7 +185,6 @@ static void __tun_detach(struct tun_struct *tun)
> netif_tx_lock_bh(tun->dev);
> netif_carrier_off(tun->dev);
> tun->tfile = NULL;
> - tun->socket.file = NULL;
> netif_tx_unlock_bh(tun->dev);
ACK, but I have to say that I don't like the entire area. The games around sock->file
in general tend to be really nasty. Examples:
1) net/9p/trans_fd.c:p9_socket_open():
we come there with freshly created and connected struct socket in *csocket
we do sock_map_fd() and bugger off if it fails
we do get_file(csocket->file) twice and, having grabbed the references, close
the damn fd.
What happens if that races with close() on the same fd before we get to those get_file()?
We hit sock_close(), which calls sock_release(), which clears csocket->file. Boom -
atomic_inc_long(&NULL->f_count) is not going to do us any good. Outright bug, mitigated
only by the fact that all callchains to that place go through mount(2), so you have elevated
privs anyway.
2) with this sucker we hit an interesting interplay with vhost; note that the total effect
of tun_get_socket() does *not* include any refcount changes. Nor should it - the caller
has a valid reference to struct file, after all. Eventually the caller proceeds to drop
the same reference, by doing fput(sock->file). And it will be the same struct file, but
proving that takes a lot of digging through the tun.c guts; the crucial observation is that
we never get to __tun_detach() as long as we have a reference to opened (cdev) file that
has been successfully attached at some point and that ones that hadn't been attached at
all wouldn't have passed through tun_get_socket(). IOW, it works, but it's brittle as hell.
Unless I've missed something in the analysis and it's really broken, that is.
Frankly, I would prefer to keep the reference to struct file for vhost explicitly in vhost
data structures. Would be less dependent on the guts of tun/macvtap/whatnot that way...
3) iscsi goes as far as allocating fake struct file (with kzalloc(), and $DEITY help you
if you ever call fput() on that), presumably for the sake of sctp. The only place in sctp
stack I see looking at sock->file is
/* in-kernel sockets don't generally have a file allocated to them
* if all they do is call sock_create_kern().
*/
if (sk->sk_socket->file)
f_flags = sk->sk_socket->file->f_flags;
timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
in __sctp_connect() and AFAICS we could bloody well have left it NULL - we leave ->f_flags
zero in that code anyway and that's what __sctp_connect() will presume on NULL ->file.
I'm not familiar enough with sctp or iscsi, but at the first look it seems to be asking
for removal of all those games with ->file in the latter.
I really wonder if we have a single legitimate case for anything other than sock_alloc_file()
setting sock->file. Anyone?
^ permalink raw reply
* Re: PROBLEM: Silent data corruption when using sendfile()
From: Eric Dumazet @ 2012-07-14 8:20 UTC (permalink / raw)
To: Hillf Danton
Cc: Johannes Truschnigg, linux-kernel, Willy Tarreau, Linux-Netdev
In-Reply-To: <CAJd=RBAntSubDBbJ292SzeoN4hTwBQ_Q23jt+Y6i-+vfrQ5EHQ@mail.gmail.com>
On Sat, 2012-07-14 at 16:04 +0800, Hillf Danton wrote:
> On Sat, Jul 14, 2012 at 1:18 AM, Johannes Truschnigg
> <johannes@truschnigg.info> wrote:
> > Hello good people of linux-kernel.
> >
> > I've been bothered by silent data corruption from my personal fileserver - no
> > matter the Layer 7 protocol used, huge transfers sporadically ended up damaged
> > in-flight. I used Samba/CIFS, NFS(v4, via TCP), Apache httpd 2.2, thttpd,
> > python and netcat to verify this.
> >
> > I think I managed to track down the culprit: as soon as I disable sendfile()
> > for all programs that support such a configuration (netcat, afaik, won't ever
> > use sendfile() to transmit data over a socket, so the problem was never
> > reproducible there in the first place), everything reverts to perfect and
> > proper working condition.
> >
> > I've been experiencing this problem with vanilla kernel releases from the 3.3
> > up until 3.4.0 series. I do not know if it also occurs with earlier releases,
> > but I can verify if that is useful. I set up the environment for a minimal
> > kind of testcase (a large ISO image file available from the server's local
> > filesystem, as well as from a mounted NFS export - once via lo, and once via
> > br0/eth0), and proceeded to do the following:
> >
> > i=0; for i in {1..100}
> > do
> > echo "pass $i:"; sync; echo 3 > /proc/sys/vm/drop_caches
> > cmp -b /mnt/nfs-test/lo/tmp/X15-65741.iso /srv/files/pub/tmp/X15-65741.iso
> > done
> >
> > I then rotated the source of the data, and tested the network-mount against
> > the loopback-mount, as well as the network-mount against the local filesystem.
> >
> > Computing the file's md5sum in a loop whilst dropping caches after each
> > iteration by reading it directly from its location in the filesystem produces
> > the very same hash every time - I therefore think it's safe to assume the
> > corruption is introduced when traversing the networking stack. The hash also
> > does not change if I repeadetly compute the md5sum of the file as transferred
> > by, e. g., Apache httpd or smbd with sendfile explicitly disabled.
> >
> > Please take a look at the attachment to see the actual output of the above
> > script. It does not matter if I do an actual transfer over the network from my
> > server to one of its clients (I verified the problem with two different client
> > machines, one even running Windows), or if the server is both source and
> > destination of the transfer - as long as sendfile is involed, some of the data
> > will always become garbled sooner or later. That also leads me to believe that
> > my internetworking devices (my switch in particular) is working just fine;
> > testing bulky transfers from one host to another confirms this insofar as thus
> > all data makes it through unscathed.
> >
> > As soon as I switch off sendfile-support (in, e. g. Samba or Apache httpd), I
> > can run a series of thousands and more transfers, and not experience any
> > corruption at all. Whenever the data gets fubared, there is no hint at
> > anything fishy going on in the debug ringbuffer - curruption takes place in
> > total silence.
> >
> > The system in question has an Intel Pro/1000 PCI-e NIC for doing the networked
> > file transfers, and is backed by a md RAID5-Array with LVM2 on top. The 4GB of
> > system memory (ECC-enabled UDIMM) are operating in S4ECD4ED mode as reported
> > by EDAC, and there are no reported errors. The CPU I have installed is an AMD
> > Athlon II X2 245e on an ASUS M4A88TD-M/USB3 Motherboard. It's running Gentoo
> > for amd64. The box can run prime96 in torture mode and linpack just fine for
> > days - I'm therefore assuming the hardware to be working correctly.
> >
> > I have attached my kernel's config (from 3.4.0, as that's the image that I
> > have running right now) attached for sake of completeness, as well as some
> > information for you to see how I tested, and what these tests actually
> > produced. If you need any other information to help track this down, please
> > let me know.
> >
> > If you decide to answer please keep me CC'd, as I'm not subscribed to this
> > list.
> >
> > Just in case the numerous attachments get scrubbed/removed, I've also uploaded
> > them to http://johannes.truschnigg.info/tmp/sendfile_data_corruption/
> >
> > Thanks for reading, and have a nice weekend everyone :)
> >
>
> Is the above corruption related to the one below?
>
>
> On Tue, Jul 3, 2012 at 8:02 AM, Willy Tarreau <w@1wt.eu> wrote:
> >
> > In fact it has been true zero copy in 2.6.25 until we faced a large
> > amount of data corruption and the zero copy was disabled in 2.6.25.X.
> > Since then it remained that way until you brought your patches to
> > re-instantiate it.
Might be, or not (could be a NIC bug)
Please Johannes could you try latest kernel tree ?
^ permalink raw reply
* Re: resurrecting tcphealth
From: Eric Dumazet @ 2012-07-14 8:27 UTC (permalink / raw)
To: Piotr Sawuk; +Cc: netdev, linux-kernel
In-Reply-To: <cc6495b92f1df180c1ad43057ceb0b98.squirrel@webmail.univie.ac.at>
On Sat, 2012-07-14 at 09:56 +0200, Piotr Sawuk wrote:
> On Sa, 14.07.2012, 03:31, valdis.kletnieks@vt.edu wrote:
> > On Fri, 13 Jul 2012 16:55:44 -0700, Stephen Hemminger said:
> >
> >> >+ /* Course retransmit inefficiency- this packet has been received
> >> twice. */
> >> >+ tp->dup_pkts_recv++;
> >> I don't understand that comment, could you use a better sentence please?
> >
> > I think what was intended was:
> >
> > /* Curse you, retransmit inefficiency! This packet has been received at
> least twice */
> >
>
> LOL, no. I think "course retransmit" is short for "course-grained timeout
> caused retransmit" but I can't be sure since I'm not the author of these
> lines. I'll replace that comment with the non-shorthand version though.
> however, I think the real comment here should be:
>
> /*A perceived shortcoming of the standard TCP implementation: A
> TCP receiver can get duplicate packets from the sender because it cannot
> acknowledge packets that arrive out of order. These duplicates would happen
> when the sender mistakenly thinks some packets have been lost by the network
> because it does not receive acks for them but in reality they were
> successfully received out of order. Since the receiver has no way of letting
> the sender know about the receipt of these packets, they could potentially
> be re-sent and re-received at the receiver. Not only do duplicate packets
> waste precious Internet bandwidth but they hurt performance because the
> sender mistakenly detects congestion from packet losses. The SACK TCP
> extension speci\fcally addresses this issue. A large number of duplicate
> packets received would indicate a signi\fcant bene\ft to the wide adoption of
> SACK. The duplicatepacketsreceived metric is computed at the
> receiver and counts these packets on a per-connection basis.*/
>
> as copied from his thesis at [1]. also in the thesis he writes:
>
> In our limited experiment, the results indicated no duplicate packets were
> received on any connection in the 18 hour run. This leads us to several
> conclusions. Since duplicate ACKs were seen on many connections we know that
> some packets were lost or reordered, but unACKed reordered packets never
> caused a /coursegrainedtimeouts/ on our connections. Only these timeouts
> will cause duplicate packets to be received since less severe out-of-order
> conditions will be resolved with fast retransmits. The lack of course
> timeouts
> may be due to the quality of UCSD's ActiveWeb network or the paucity of
> large gaps between received packet groups. It should be noted that Linux 2.2
> implements fast retransmits for up to two packet gaps, thus reducing the
> need for course grained timeouts due to the lack of SACK.
>
> [1] https://sacerdoti.org/tcphealth/tcphealth-paper.pdf
Not sure how pertinent is this paper today in 2012
I would prefer you add global counters, instead of per tcp counters that
most applications wont use at all.
Example of a more useful patch : add a counter of packets queued in Out
Of Order queue ( in tcp_data_queue_ofo() )
"netstat -s" will display the total count, without any changes in
userland tools/applications.
^ permalink raw reply
* Re: [RFC 2/2] net: Add support for NTB virtual ethernet device
From: Jiri Pirko @ 2012-07-14 8:30 UTC (permalink / raw)
To: Jon Mason; +Cc: linux-kernel, netdev, linux-pci, Dave Jiang
In-Reply-To: <20120714055034.GB4808@jonmason-lab>
Sat, Jul 14, 2012 at 07:50:35AM CEST, jon.mason@intel.com wrote:
>On Sat, Jul 14, 2012 at 01:14:03AM +0200, Jiri Pirko wrote:
>> Fri, Jul 13, 2012 at 11:45:00PM CEST, jon.mason@intel.com wrote:
>> >A virtual ethernet device that uses the NTB transport API to send/receive data.
>> >
>> >Signed-off-by: Jon Mason <jon.mason@intel.com>
>> >---
>> > drivers/net/Kconfig | 4 +
>> > drivers/net/Makefile | 1 +
>> > drivers/net/ntb_netdev.c | 411 ++++++++++++++++++++++++++++++++++++++++++++++
>> > 3 files changed, 416 insertions(+), 0 deletions(-)
>> > create mode 100644 drivers/net/ntb_netdev.c
<snip>
>> >+
>> >+static const struct net_device_ops ntb_netdev_ops = {
>> >+ .ndo_open = ntb_netdev_open,
>> >+ .ndo_stop = ntb_netdev_close,
>> >+ .ndo_start_xmit = ntb_netdev_start_xmit,
>> >+ .ndo_change_mtu = ntb_netdev_change_mtu,
>> >+ .ndo_tx_timeout = ntb_netdev_tx_timeout,
>> >+ .ndo_set_mac_address = eth_mac_addr,
>>
>> Does your device support mac change while it's up and running?
>
>It's virtual ethernet, so there is no hardware limitation, only what is acceptable for the remote side to receive.
In that case, it would be good to do:
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
This enables mac change in eth_mac_addr() when iface is running.
<snip>
>> >+
>> >+static int __init ntb_netdev_init_module(void)
>> >+{
>> >+ struct ntb_netdev *dev;
>> >+ int rc;
>> >+
>> >+ pr_info("%s: Probe\n", KBUILD_MODNAME);
>> >+
>> >+ netdev = alloc_etherdev(sizeof(struct ntb_netdev));
>>
>> I might be missing something but this place (module init) does not seems
>> like a good place to do alloc_etherdev(). Do you want to support only
>> one netdevice instance?
>>
>> Anyway, I think that using "static netdev" should be avoided in any case.
>>
>
>It would fail the probe if there is no underlying ntb hardware, but it would make sense to check for that before allocing the etherdev.
But isn't there possible to have multiple ntb hardware devices? It would make
sense to register ntb device here with ntb core and let the core call
probe which would actually create new netdev.
Is there a limitation that one underlying ntb hardware ~ one ntb netdevice?
Thanks,
Jiri
^ permalink raw reply
* Re: PROBLEM: Silent data corruption when using sendfile()
From: Willy Tarreau @ 2012-07-14 8:31 UTC (permalink / raw)
To: Eric Dumazet
Cc: Hillf Danton, Johannes Truschnigg, linux-kernel, Linux-Netdev
In-Reply-To: <1342254042.3265.9017.camel@edumazet-glaptop>
On Sat, Jul 14, 2012 at 10:20:41AM +0200, Eric Dumazet wrote:
> > On Tue, Jul 3, 2012 at 8:02 AM, Willy Tarreau <w@1wt.eu> wrote:
> > >
> > > In fact it has been true zero copy in 2.6.25 until we faced a large
> > > amount of data corruption and the zero copy was disabled in 2.6.25.X.
> > > Since then it remained that way until you brought your patches to
> > > re-instantiate it.
>
> Might be, or not (could be a NIC bug)
I may be wrong but what I recall from this bug was an issue when
forwarding TCP between two NICs, related to linear vs non-linear
data (I have memories of something around data not yet ACKed being
replaced before being retransmitted but I may be wrong). Anyway,
the way it was fixed consisted in simply disabling the zero-copy
code path. So this should be something different from what Johannes
reports. Maybe a regression since then though.
> Please Johannes could you try latest kernel tree ?
It would be useful, especially given the amount of changes you performed
in this area in latest version, it could be very possible that this new
bug got fixed as a side effect !
Regards,
Willy
^ permalink raw reply
* [net 1/2] e1000e: Correct link check logic for 82571 serdes
From: Jeff Kirsher @ 2012-07-14 8:34 UTC (permalink / raw)
To: davem
Cc: Tushar Dave, netdev, gospo, sassmann, stable, dnelson,
bruce.w.allan, Jeff Kirsher
From: Tushar Dave <tushar.n.dave@intel.com>
SYNCH bit and IV bit of RXCW register are sticky. Before examining these bits,
RXCW should be read twice to filter out one-time false events and have correct
values for these bits. Incorrect values of these bits in link check logic can
cause weird link stability issues if auto-negotiation fails.
CC: stable <stable@vger.kernel.org> [2.6.38+]
Reported-by: Dean Nelson <dnelson@redhat.com>
Signed-off-by: Tushar Dave <tushar.n.dave@intel.com>
Reviewed-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
drivers/net/ethernet/intel/e1000e/82571.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 36db4df..1f063dc 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -1572,6 +1572,9 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
ctrl = er32(CTRL);
status = er32(STATUS);
rxcw = er32(RXCW);
+ /* SYNCH bit and IV bit are sticky */
+ udelay(10);
+ rxcw = er32(RXCW);
if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
--
1.7.10.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox