Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCHv2 2/4] caif-u5500: CAIF shared memory transport protocol
From: Sjur Braendeland @ 2010-10-27 18:34 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Russel King, linux-arm-kernel, linus.walleij,
	stefan.xk.nilsson, Sjur Braendeland
In-Reply-To: <1288204482-2742-1-git-send-email-sjur.brandeland@stericsson.com>

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
---

>> +             if (!in_irq())
>> +                     spin_lock_bh(&pshm_drv->lock);
>> +             else
>> +                     spin_lock_irqsave(&pshm_drv->lock, flags);
>
>You should never have contextual based locking schemes like this,
>it's not just ugly it's also deadlock prone.
Ok, thanks - I have fixed this.
Currently the code is running in IRQ context anyway.

 drivers/net/caif/caif_shmcore.c |  744 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 744 insertions(+), 0 deletions(-)
 create mode 100644 drivers/net/caif/caif_shmcore.c

diff --git a/drivers/net/caif/caif_shmcore.c b/drivers/net/caif/caif_shmcore.c
new file mode 100644
index 0000000..19f9c06
--- /dev/null
+++ b/drivers/net/caif/caif_shmcore.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Authors:  Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com,
+ *           Daniel Martensson / daniel.martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":" __func__ "():" fmt
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <net/caif/caif_device.h>
+#include <net/caif/caif_shm.h>
+
+#define NR_TX_BUF		6
+#define NR_RX_BUF		6
+#define TX_BUF_SZ		0x2000
+#define RX_BUF_SZ		0x2000
+
+#define CAIF_NEEDED_HEADROOM	32
+
+#define CAIF_FLOW_ON		1
+#define CAIF_FLOW_OFF		0
+
+#define LOW_WATERMARK		3
+#define HIGH_WATERMARK		4
+
+/* Maximum number of CAIF buffers per shared memory buffer. */
+#define SHM_MAX_FRMS_PER_BUF	10
+
+/*
+ * Size in bytes of the descriptor area
+ * (With end of descriptor signalling)
+ */
+#define SHM_CAIF_DESC_SIZE	((SHM_MAX_FRMS_PER_BUF + 1) * \
+					sizeof(struct shm_pck_desc))
+
+/*
+ * Offset to the first CAIF frame within a shared memory buffer.
+ * Aligned on 32 bytes.
+ */
+#define SHM_CAIF_FRM_OFS	(SHM_CAIF_DESC_SIZE + (SHM_CAIF_DESC_SIZE % 32))
+
+/* Number of bytes for CAIF shared memory header. */
+#define SHM_HDR_LEN		1
+
+/* Number of padding bytes for the complete CAIF frame. */
+#define SHM_FRM_PAD_LEN		4
+
+#define CAIF_MAX_MTU		4096
+
+#define SHM_SET_FULL(x)	(((x+1) & 0x0F) << 0)
+#define SHM_GET_FULL(x)	(((x >> 0) & 0x0F) - 1)
+
+#define SHM_SET_EMPTY(x)	(((x+1) & 0x0F) << 4)
+#define SHM_GET_EMPTY(x)	(((x >> 4) & 0x0F) - 1)
+
+#define SHM_FULL_MASK		(0x0F << 0)
+#define SHM_EMPTY_MASK		(0x0F << 4)
+
+struct shm_pck_desc {
+	/*
+	 * Offset from start of shared memory area to start of
+	 * shared memory CAIF frame.
+	 */
+	u32 frm_ofs;
+	u32 frm_len;
+};
+
+struct buf_list {
+	unsigned char *desc_vptr;
+	u32 phy_addr;
+	u32 index;
+	u32 len;
+	u32 frames;
+	u32 frm_ofs;
+	struct list_head list;
+};
+
+struct shm_caif_frm {
+	/* Number of bytes of padding before the CAIF frame. */
+	u8 hdr_ofs;
+};
+
+struct shmdrv_layer {
+	/* caif_dev_common must always be first in the structure*/
+	struct caif_dev_common cfdev;
+
+	u32 shm_tx_addr;
+	u32 shm_rx_addr;
+	u32 shm_base_addr;
+	u32 tx_empty_available;
+	spinlock_t lock;
+
+	struct list_head tx_empty_list;
+	struct list_head tx_pend_list;
+	struct list_head tx_full_list;
+	struct list_head rx_empty_list;
+	struct list_head rx_pend_list;
+	struct list_head rx_full_list;
+
+	struct workqueue_struct *pshm_tx_workqueue;
+	struct workqueue_struct *pshm_rx_workqueue;
+
+	struct work_struct shm_tx_work;
+	struct work_struct shm_rx_work;
+
+	struct sk_buff_head sk_qhead;
+	struct shmdev_layer *pshm_dev;
+};
+
+static int shm_netdev_open(struct net_device *shm_netdev)
+{
+	netif_wake_queue(shm_netdev);
+	return 0;
+}
+
+static int shm_netdev_close(struct net_device *shm_netdev)
+{
+	netif_stop_queue(shm_netdev);
+	return 0;
+}
+
+int caif_shmdrv_rx_cb(u32 mbx_msg, void *priv)
+{
+	struct buf_list *pbuf;
+	struct shmdrv_layer *pshm_drv;
+	struct list_head *pos;
+	u32 avail_emptybuff = 0;
+	unsigned long flags = 0;
+
+	pshm_drv = (struct shmdrv_layer *)priv;
+
+	/* Check for received buffers. */
+	if (mbx_msg & SHM_FULL_MASK) {
+		int idx;
+
+		spin_lock_irqsave(&pshm_drv->lock, flags);
+
+		/* Check whether we have any outstanding buffers. */
+		if (list_empty(&pshm_drv->rx_empty_list)) {
+
+			/* Release spin lock. */
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+			/* We print even in IRQ context... */
+			pr_warn("No empty Rx buffers to fill: "
+					"mbx_msg:%x\n", mbx_msg);
+
+			/* Bail out. */
+			goto err_sync;
+		}
+
+		pbuf =
+			list_entry(pshm_drv->rx_empty_list.next,
+					struct buf_list, list);
+		idx = pbuf->index;
+
+		/* Check buffer synchronization. */
+		if (idx != SHM_GET_FULL(mbx_msg)) {
+
+			/* We print even in IRQ context... */
+			pr_warn(
+			"phyif_shm_mbx_msg_cb: RX full out of sync:"
+			" idx:%d, msg:%x SHM_GET_FULL(mbx_msg):%x\n",
+				idx, mbx_msg, SHM_GET_FULL(mbx_msg));
+
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+			/* Bail out. */
+			goto err_sync;
+		}
+
+		list_del_init(&pbuf->list);
+		list_add_tail(&pbuf->list, &pshm_drv->rx_full_list);
+
+		spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+		/* Schedule RX work queue. */
+		if (!work_pending(&pshm_drv->shm_rx_work))
+			queue_work(pshm_drv->pshm_rx_workqueue,
+						&pshm_drv->shm_rx_work);
+	}
+
+	/* Check for emptied buffers. */
+	if (mbx_msg & SHM_EMPTY_MASK) {
+		int idx;
+
+		spin_lock_irqsave(&pshm_drv->lock, flags);
+
+		/* Check whether we have any outstanding buffers. */
+		if (list_empty(&pshm_drv->tx_full_list)) {
+
+			/* We print even in IRQ context... */
+			pr_warn("No TX to empty: msg:%x\n", mbx_msg);
+
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+			/* Bail out. */
+			goto err_sync;
+		}
+
+		pbuf =
+			list_entry(pshm_drv->tx_full_list.next,
+					struct buf_list, list);
+		idx = pbuf->index;
+
+		/* Check buffer synchronization. */
+		if (idx != SHM_GET_EMPTY(mbx_msg)) {
+
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+			/* We print even in IRQ context... */
+			pr_warn("TX empty "
+				"out of sync:idx:%d, msg:%x\n", idx, mbx_msg);
+
+			/* Bail out. */
+			goto err_sync;
+		}
+		list_del_init(&pbuf->list);
+
+		/* Reset buffer parameters. */
+		pbuf->frames = 0;
+		pbuf->frm_ofs = SHM_CAIF_FRM_OFS;
+
+		list_add_tail(&pbuf->list, &pshm_drv->tx_empty_list);
+
+		/* Check the available no. of buffers in the empty list */
+		list_for_each(pos, &pshm_drv->tx_empty_list)
+			avail_emptybuff++;
+
+		/* Check whether we have to wake up the transmitter. */
+		if ((avail_emptybuff > HIGH_WATERMARK) &&
+					(!pshm_drv->tx_empty_available)) {
+			pshm_drv->tx_empty_available = 1;
+			pshm_drv->cfdev.flowctrl
+					(pshm_drv->pshm_dev->pshm_netdev,
+								CAIF_FLOW_ON);
+
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+			/* Schedule the work queue. if required */
+			if (!work_pending(&pshm_drv->shm_tx_work))
+				queue_work(pshm_drv->pshm_tx_workqueue,
+							&pshm_drv->shm_tx_work);
+		} else
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+	}
+
+	return 0;
+
+err_sync:
+	return -EIO;
+}
+
+static void shm_rx_work_func(struct work_struct *rx_work)
+{
+	struct shmdrv_layer *pshm_drv;
+	struct buf_list *pbuf;
+	unsigned long flags = 0;
+	struct sk_buff *skb;
+	char *p;
+	int ret;
+
+	pshm_drv = container_of(rx_work, struct shmdrv_layer, shm_rx_work);
+
+	while (1) {
+
+		struct shm_pck_desc *pck_desc;
+
+		spin_lock_irqsave(&pshm_drv->lock, flags);
+
+		/* Check for received buffers. */
+		if (list_empty(&pshm_drv->rx_full_list)) {
+			spin_unlock_irqrestore(&pshm_drv->lock, flags);
+			break;
+		}
+
+		pbuf =
+			list_entry(pshm_drv->rx_full_list.next, struct buf_list,
+					list);
+		list_del_init(&pbuf->list);
+
+		/* Retrieve pointer to start of the packet descriptor area. */
+		pck_desc = (struct shm_pck_desc *) pbuf->desc_vptr;
+
+		/*
+		 * Check whether descriptor contains a CAIF shared memory
+		 * frame.
+		 */
+		while (pck_desc->frm_ofs) {
+			unsigned int frm_buf_ofs;
+			unsigned int frm_pck_ofs;
+			unsigned int frm_pck_len;
+			/*
+			 * Check whether offset is within buffer limits
+			 * (lower).
+			 */
+			if (pck_desc->frm_ofs <
+				(pbuf->phy_addr - pshm_drv->shm_base_addr))
+				break;
+			/*
+			 * Check whether offset is within buffer limits
+			 * (higher).
+			 */
+			if (pck_desc->frm_ofs >
+				((pbuf->phy_addr - pshm_drv->shm_base_addr) +
+					pbuf->len))
+				break;
+
+			/* Calculate offset from start of buffer. */
+			frm_buf_ofs =
+				pck_desc->frm_ofs - (pbuf->phy_addr -
+						pshm_drv->shm_base_addr);
+
+			/*
+			 * Calculate offset and length of CAIF packet while
+			 * taking care of the shared memory header.
+			 */
+			frm_pck_ofs =
+				frm_buf_ofs + SHM_HDR_LEN +
+				(*(pbuf->desc_vptr + frm_buf_ofs));
+			frm_pck_len =
+				(pck_desc->frm_len - SHM_HDR_LEN -
+				(*(pbuf->desc_vptr + frm_buf_ofs)));
+
+			/* Check whether CAIF packet is within buffer limits */
+			if ((frm_pck_ofs + pck_desc->frm_len) > pbuf->len)
+				break;
+
+			/* Get a suitable CAIF packet and copy in data. */
+			skb = netdev_alloc_skb(pshm_drv->pshm_dev->pshm_netdev,
+							frm_pck_len + 1);
+			BUG_ON(skb == NULL);
+
+			p = skb_put(skb, frm_pck_len);
+			memcpy(p, pbuf->desc_vptr + frm_pck_ofs, frm_pck_len);
+
+			skb->protocol = htons(ETH_P_CAIF);
+			skb_reset_mac_header(skb);
+			skb->dev = pshm_drv->pshm_dev->pshm_netdev;
+
+			/* Push received packet up the stack. */
+			ret = netif_rx_ni(skb);
+
+			if (!ret) {
+				pshm_drv->pshm_dev->pshm_netdev->stats.
+								rx_packets++;
+				pshm_drv->pshm_dev->pshm_netdev->stats.
+						rx_bytes += pck_desc->frm_len;
+			} else
+				++pshm_drv->pshm_dev->pshm_netdev->stats.
+								rx_dropped;
+			/* Move to next packet descriptor. */
+			pck_desc++;
+		}
+
+		list_add_tail(&pbuf->list, &pshm_drv->rx_pend_list);
+
+		spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+	}
+
+	/* Schedule the work queue. if required */
+	if (!work_pending(&pshm_drv->shm_tx_work))
+		queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work);
+
+}
+
+static void shm_tx_work_func(struct work_struct *tx_work)
+{
+	u32 mbox_msg;
+	unsigned int frmlen, avail_emptybuff, append = 0;
+	unsigned long flags = 0;
+	struct buf_list *pbuf = NULL;
+	struct shmdrv_layer *pshm_drv;
+	struct shm_caif_frm *frm;
+	struct sk_buff *skb;
+	struct shm_pck_desc *pck_desc;
+	struct list_head *pos;
+
+	pshm_drv = container_of(tx_work, struct shmdrv_layer, shm_tx_work);
+
+	do {
+		/* Initialize mailbox message. */
+		mbox_msg = 0x00;
+		avail_emptybuff = 0;
+
+		spin_lock_irqsave(&pshm_drv->lock, flags);
+
+		/* Check for pending receive buffers. */
+		if (!list_empty(&pshm_drv->rx_pend_list)) {
+
+			pbuf = list_entry(pshm_drv->rx_pend_list.next,
+						struct buf_list, list);
+
+			list_del_init(&pbuf->list);
+			list_add_tail(&pbuf->list, &pshm_drv->rx_empty_list);
+			/*
+			 * Value index is never changed,
+			 * so read access should be safe.
+			 */
+			mbox_msg |= SHM_SET_EMPTY(pbuf->index);
+		}
+
+		skb = skb_peek(&pshm_drv->sk_qhead);
+
+		if (skb == NULL)
+			goto send_msg;
+
+		/* Check the available no. of buffers in the empty list */
+		list_for_each(pos, &pshm_drv->tx_empty_list)
+			avail_emptybuff++;
+
+		if ((avail_emptybuff < LOW_WATERMARK) &&
+					pshm_drv->tx_empty_available) {
+			/* Update blocking condition. */
+			pshm_drv->tx_empty_available = 0;
+			pshm_drv->cfdev.flowctrl
+					(pshm_drv->pshm_dev->pshm_netdev,
+					CAIF_FLOW_OFF);
+		}
+		/*
+		 * We simply return back to the caller if we do not have space
+		 * either in Tx pending list or Tx empty list. In this case,
+		 * we hold the received skb in the skb list, waiting to
+		 * be transmitted once Tx buffers become available
+		 */
+		if (list_empty(&pshm_drv->tx_empty_list))
+			goto send_msg;
+
+		/* Get the first free Tx buffer. */
+		pbuf = list_entry(pshm_drv->tx_empty_list.next,
+						struct buf_list, list);
+		do {
+			if (append) {
+				skb = skb_peek(&pshm_drv->sk_qhead);
+				if (skb == NULL)
+					break;
+			}
+
+			frm = (struct shm_caif_frm *)
+					(pbuf->desc_vptr + pbuf->frm_ofs);
+
+			frm->hdr_ofs = 0;
+			frmlen = 0;
+			frmlen += SHM_HDR_LEN + frm->hdr_ofs + skb->len;
+
+			/* Add tail padding if needed. */
+			if (frmlen % SHM_FRM_PAD_LEN)
+				frmlen += SHM_FRM_PAD_LEN -
+						(frmlen % SHM_FRM_PAD_LEN);
+
+			/*
+			 * Verify that packet, header and additional padding
+			 * can fit within the buffer frame area.
+			 */
+			if (frmlen >= (pbuf->len - pbuf->frm_ofs))
+				break;
+
+			if (!append) {
+				list_del_init(&pbuf->list);
+				append = 1;
+			}
+
+			skb = skb_dequeue(&pshm_drv->sk_qhead);
+			/* Copy in CAIF frame. */
+			skb_copy_bits(skb, 0, pbuf->desc_vptr +
+					pbuf->frm_ofs + SHM_HDR_LEN +
+						frm->hdr_ofs, skb->len);
+
+			pshm_drv->pshm_dev->pshm_netdev->stats.tx_packets++;
+			pshm_drv->pshm_dev->pshm_netdev->stats.tx_bytes +=
+									frmlen;
+			dev_kfree_skb(skb);
+
+			/* Fill in the shared memory packet descriptor area. */
+			pck_desc = (struct shm_pck_desc *) (pbuf->desc_vptr);
+			/* Forward to current frame. */
+			pck_desc += pbuf->frames;
+			pck_desc->frm_ofs = (pbuf->phy_addr -
+						pshm_drv->shm_base_addr) +
+								pbuf->frm_ofs;
+			pck_desc->frm_len = frmlen;
+			/* Terminate packet descriptor area. */
+			pck_desc++;
+			pck_desc->frm_ofs = 0;
+			/* Update buffer parameters. */
+			pbuf->frames++;
+			pbuf->frm_ofs += frmlen + (frmlen % 32);
+
+		} while (pbuf->frames < SHM_MAX_FRMS_PER_BUF);
+
+		/* Assign buffer as full. */
+		list_add_tail(&pbuf->list, &pshm_drv->tx_full_list);
+		append = 0;
+		mbox_msg |= SHM_SET_FULL(pbuf->index);
+send_msg:
+		spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+		if (mbox_msg)
+			pshm_drv->pshm_dev->pshmdev_mbxsend
+					(pshm_drv->pshm_dev->shm_id, mbox_msg);
+	} while (mbox_msg);
+}
+
+static int shm_netdev_tx(struct sk_buff *skb, struct net_device *shm_netdev)
+{
+	struct shmdrv_layer *pshm_drv;
+	unsigned long flags = 0;
+
+	pshm_drv = netdev_priv(shm_netdev);
+
+	spin_lock_irqsave(&pshm_drv->lock, flags);
+
+	skb_queue_tail(&pshm_drv->sk_qhead, skb);
+
+	spin_unlock_irqrestore(&pshm_drv->lock, flags);
+
+	/* Schedule Tx work queue. for deferred processing of skbs*/
+	if (!work_pending(&pshm_drv->shm_tx_work))
+		queue_work(pshm_drv->pshm_tx_workqueue, &pshm_drv->shm_tx_work);
+
+	return 0;
+}
+
+static const struct net_device_ops netdev_ops = {
+	.ndo_open = shm_netdev_open,
+	.ndo_stop = shm_netdev_close,
+	.ndo_start_xmit = shm_netdev_tx,
+};
+
+static void shm_netdev_setup(struct net_device *pshm_netdev)
+{
+	struct shmdrv_layer *pshm_drv;
+	pshm_netdev->netdev_ops = &netdev_ops;
+
+	pshm_netdev->mtu = CAIF_MAX_MTU;
+	pshm_netdev->type = ARPHRD_CAIF;
+	pshm_netdev->hard_header_len = CAIF_NEEDED_HEADROOM;
+	pshm_netdev->tx_queue_len = 0;
+	pshm_netdev->destructor = free_netdev;
+
+	pshm_drv = netdev_priv(pshm_netdev);
+
+	/* Initialize structures in a clean state. */
+	memset(pshm_drv, 0, sizeof(struct shmdrv_layer));
+
+	pshm_drv->cfdev.link_select = CAIF_LINK_LOW_LATENCY;
+}
+
+int caif_shmcore_probe(struct shmdev_layer *pshm_dev)
+{
+	int result, j;
+	struct shmdrv_layer *pshm_drv = NULL;
+
+	pshm_dev->pshm_netdev = alloc_netdev(sizeof(struct shmdrv_layer),
+						"cfshm%d", shm_netdev_setup);
+	if (!pshm_dev->pshm_netdev)
+		return -ENOMEM;
+
+	pshm_drv = netdev_priv(pshm_dev->pshm_netdev);
+	pshm_drv->pshm_dev = pshm_dev;
+
+	/*
+	 * Initialization starts with the verification of the
+	 * availability of MBX driver by calling its setup function.
+	 * MBX driver must be available by this time for proper
+	 * functioning of SHM driver.
+	 */
+	if ((pshm_dev->pshmdev_mbxsetup
+				(caif_shmdrv_rx_cb, pshm_dev, pshm_drv)) != 0) {
+		pr_warn("Could not config. SHM Mailbox,"
+				" Bailing out.....\n");
+		free_netdev(pshm_dev->pshm_netdev);
+		return -ENODEV;
+	}
+
+	skb_queue_head_init(&pshm_drv->sk_qhead);
+
+	pr_info("SHM DEVICE[%d] PROBED BY DRIVER, NEW SHM DRIVER"
+			" INSTANCE AT pshm_drv =0x%p\n",
+			pshm_drv->pshm_dev->shm_id, pshm_drv);
+
+	if (pshm_dev->shm_total_sz <
+			(NR_TX_BUF * TX_BUF_SZ + NR_RX_BUF * RX_BUF_SZ)) {
+
+		pr_warn("ERROR, Amount of available"
+				" Phys. SHM cannot accomodate current SHM "
+				"driver configuration, Bailing out ...\n");
+		free_netdev(pshm_dev->pshm_netdev);
+		return -ENOMEM;
+	}
+
+	pshm_drv->shm_base_addr = pshm_dev->shm_base_addr;
+	pshm_drv->shm_tx_addr = pshm_drv->shm_base_addr;
+
+	if (pshm_dev->shm_loopback)
+		pshm_drv->shm_rx_addr = pshm_drv->shm_tx_addr;
+	else
+		pshm_drv->shm_rx_addr = pshm_dev->shm_base_addr +
+						(NR_TX_BUF * TX_BUF_SZ);
+
+	INIT_LIST_HEAD(&pshm_drv->tx_empty_list);
+	INIT_LIST_HEAD(&pshm_drv->tx_pend_list);
+	INIT_LIST_HEAD(&pshm_drv->tx_full_list);
+
+	INIT_LIST_HEAD(&pshm_drv->rx_empty_list);
+	INIT_LIST_HEAD(&pshm_drv->rx_pend_list);
+	INIT_LIST_HEAD(&pshm_drv->rx_full_list);
+
+	INIT_WORK(&pshm_drv->shm_tx_work, shm_tx_work_func);
+	INIT_WORK(&pshm_drv->shm_rx_work, shm_rx_work_func);
+
+	pshm_drv->pshm_tx_workqueue =
+				create_singlethread_workqueue("shm_tx_work");
+	pshm_drv->pshm_rx_workqueue =
+				create_singlethread_workqueue("shm_rx_work");
+
+	for (j = 0; j < NR_TX_BUF; j++) {
+		struct buf_list *tx_buf =
+				kmalloc(sizeof(struct buf_list), GFP_KERNEL);
+
+		if (tx_buf == NULL) {
+			pr_warn("ERROR, Could not"
+					" allocate dynamic mem. for tx_buf,"
+					" Bailing out ...\n");
+			free_netdev(pshm_dev->pshm_netdev);
+			return -ENOMEM;
+		}
+		tx_buf->index = j;
+		tx_buf->phy_addr = pshm_drv->shm_tx_addr + (TX_BUF_SZ * j);
+		tx_buf->len = TX_BUF_SZ;
+		tx_buf->frames = 0;
+		tx_buf->frm_ofs = SHM_CAIF_FRM_OFS;
+
+		if (pshm_dev->shm_loopback)
+			tx_buf->desc_vptr = (char *)tx_buf->phy_addr;
+		else
+			tx_buf->desc_vptr =
+					ioremap(tx_buf->phy_addr, TX_BUF_SZ);
+
+		list_add_tail(&tx_buf->list, &pshm_drv->tx_empty_list);
+	}
+
+	for (j = 0; j < NR_RX_BUF; j++) {
+		struct buf_list *rx_buf =
+				kmalloc(sizeof(struct buf_list), GFP_KERNEL);
+
+		if (rx_buf == NULL) {
+			pr_warn("ERROR, Could not"
+					" allocate dynamic mem.for rx_buf,"
+					" Bailing out ...\n");
+			free_netdev(pshm_dev->pshm_netdev);
+			return -ENOMEM;
+		}
+		rx_buf->index = j;
+		rx_buf->phy_addr = pshm_drv->shm_rx_addr + (RX_BUF_SZ * j);
+		rx_buf->len = RX_BUF_SZ;
+
+		if (pshm_dev->shm_loopback)
+			rx_buf->desc_vptr = (char *)rx_buf->phy_addr;
+		else
+			rx_buf->desc_vptr =
+					ioremap(rx_buf->phy_addr, RX_BUF_SZ);
+		list_add_tail(&rx_buf->list, &pshm_drv->rx_empty_list);
+	}
+
+	pshm_drv->tx_empty_available = 1;
+	result = register_netdev(pshm_dev->pshm_netdev);
+	if (result)
+		pr_warn("ERROR[%d], SHM could not, "
+			"register with NW FRMWK Bailing out ...\n", result);
+
+	return result;
+}
+
+void caif_shmcore_remove(struct net_device *pshm_netdev)
+{
+	struct buf_list *pbuf;
+	struct shmdrv_layer *pshm_drv = NULL;
+
+	pshm_drv = netdev_priv(pshm_netdev);
+
+	while (!(list_empty(&pshm_drv->tx_pend_list))) {
+		pbuf =
+			list_entry(pshm_drv->tx_pend_list.next,
+					struct buf_list, list);
+
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	while (!(list_empty(&pshm_drv->tx_full_list))) {
+		pbuf =
+			list_entry(pshm_drv->tx_full_list.next,
+					struct buf_list, list);
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	while (!(list_empty(&pshm_drv->tx_empty_list))) {
+		pbuf =
+			list_entry(pshm_drv->tx_empty_list.next,
+					struct buf_list, list);
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	while (!(list_empty(&pshm_drv->rx_full_list))) {
+		pbuf =
+			list_entry(pshm_drv->tx_full_list.next,
+				struct buf_list, list);
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	while (!(list_empty(&pshm_drv->rx_pend_list))) {
+		pbuf =
+			list_entry(pshm_drv->tx_pend_list.next,
+				struct buf_list, list);
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	while (!(list_empty(&pshm_drv->rx_empty_list))) {
+		pbuf =
+			list_entry(pshm_drv->rx_empty_list.next,
+				struct buf_list, list);
+		list_del(&pbuf->list);
+		kfree(pbuf);
+	}
+
+	/* Destroy work queues. */
+	destroy_workqueue(pshm_drv->pshm_tx_workqueue);
+	destroy_workqueue(pshm_drv->pshm_rx_workqueue);
+
+	unregister_netdev(pshm_netdev);
+}
-- 
1.6.3.3


^ permalink raw reply related

* [PATCHv2 1/4] caif-u5500: Adding shared memory include
From: Sjur Braendeland @ 2010-10-27 18:34 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Russel King, linux-arm-kernel, linus.walleij,
	stefan.xk.nilsson, Amarnath Revanna, Sjur Braendeland
In-Reply-To: <20101027.110732.48492064.davem@davemloft.net>

From: Amarnath Revanna <amarnath.bangalore.revanna@stericsson.com>

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
---
 include/net/caif/caif_shm.h |   26 ++++++++++++++++++++++++++
 1 files changed, 26 insertions(+), 0 deletions(-)
 create mode 100644 include/net/caif/caif_shm.h

diff --git a/include/net/caif/caif_shm.h b/include/net/caif/caif_shm.h
new file mode 100644
index 0000000..5bcce55
--- /dev/null
+++ b/include/net/caif/caif_shm.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author: Amarnath Revanna / amarnath.bangalore.revanna@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef CAIF_SHM_H_
+#define CAIF_SHM_H_
+
+struct shmdev_layer {
+	u32 shm_base_addr;
+	u32 shm_total_sz;
+	u32 shm_id;
+	u32 shm_loopback;
+	void *hmbx;
+	int (*pshmdev_mbxsend) (u32 shm_id, u32 mbx_msg);
+	int (*pshmdev_mbxsetup) (void *pshmdrv_cb,
+				struct shmdev_layer *pshm_dev, void *pshm_drv);
+	struct net_device *pshm_netdev;
+};
+
+extern int caif_shmcore_probe(struct shmdev_layer *pshm_dev);
+extern void caif_shmcore_remove(struct net_device *pshm_netdev);
+
+#endif
-- 
1.6.3.3


^ permalink raw reply related

* Re: [Security] TIPC security issues
From: David Miller @ 2010-10-27 18:34 UTC (permalink / raw)
  To: drosenberg; +Cc: torvalds, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <1288203979.1836.2.camel@dan>

From: Dan Rosenberg <drosenberg@vsecurity.com>
Date: Wed, 27 Oct 2010 14:26:19 -0400

> The proposed fix is a start, but it's not sufficient to completely fix
> the problem.  What if the total of the iovecs wraps around back to 0?
> The total size will be returned as a small number, but large amounts of
> data will be copied into the allocated buffer since the individual
> iovecs can have arbitrary sizes.

The calculated length total is what should be used by the calling function
to decide how much to copy.

Sorry, I assumed the TIPC doing was sane like the rest of the networking.
:-(

I'll fix this up.


^ permalink raw reply

* Re: [Security] TIPC security issues
From: Paul Gortmaker @ 2010-10-27 18:27 UTC (permalink / raw)
  To: David Miller
  Cc: torvalds, drosenberg, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <20101027.105047.183059900.davem@davemloft.net>

On Wed, Oct 27, 2010 at 1:50 PM, David Miller <davem@davemloft.net> wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
> Date: Wed, 27 Oct 2010 10:37:46 -0700
>
>> If you _really_ care deeply, then some packet-oriented protocol can
>> just have its own private packet size limit (which would be way less
>> than 2GB), and then just look at the total size and say "oh, the total
>> size is bigger than my limit, so I'll just error out". Then, the fact
>> that verify_iovec() may have truncated the message to 2GB-1 doesn't
>> matter at all.
>>
>> (Practically speaking, I bet all packet-oriented protocols already
>> have a limit that is enforced by simply allocation patterns, so I
>> don't think it's actually a problem even now)
>
> This is, as it turns out, effectively what the TIPC socket layer
> already does.
>
> Most of the send calls that propagate down to this code adding up the
> iov_len lengths gets passed a maximum packet size.
>
> Anyways, here is what I came up with to kill this specific bug in
> TIPC:
>
> From 39dc17049a5ed989bab8997945a048ffddf48387 Mon Sep 17 00:00:00 2001
> From: David S. Miller <davem@davemloft.net>
> Date: Wed, 27 Oct 2010 10:46:59 -0700
> Subject: [PATCH] tipc: Fix iov_len handling in message send path.
>
> Use size_t to add together iov_len's from the iovec, error
> out with -EMSGSIZE if total is greater than INT_MAX.
>
> Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
>  net/tipc/msg.c |   13 ++++++++++---
>  1 files changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index ecb532f..b29eb8b 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -76,11 +76,15 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
>
>  int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
>  {
> -       int dsz = 0;
> +       size_t dsz = 0;
>        int i;
>
>        for (i = 0; i < num_sect; i++)
>                dsz += msg_sect[i].iov_len;
> +
> +       if (dsz > INT_MAX)
> +               return -EMSGSIZE;
> +

I've got some patches from Al that I'm pre-reviewing/testing
before spamming everyone with them (will send within 24h, if
that is OK).   Anyway, in the above, does it make sense to
check for the overflow incrementally, i.e. something like this?

-       for (i = 0; i < num_sect; i++)
-               dsz += msg_sect[i].iov_len;
+       for (i = 0; i < num_sect; i++) {
+               len = msg_sect[i].iov_len;
+               if (len > LONG_MAX - dsz)
+                       return LONG_MAX;
+               dsz += len;
+       }

(where len and dsz are both size_t).

Thanks,
Paul.

>        return dsz;
>  }
>
> @@ -93,12 +97,15 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
>  */
>
>  int tipc_msg_build(struct tipc_msg *hdr,
> -                           struct iovec const *msg_sect, u32 num_sect,
> -                           int max_size, int usrmem, struct sk_buff** buf)
> +                     struct iovec const *msg_sect, u32 num_sect,
> +                     int max_size, int usrmem, struct sk_buff** buf)
>  {
>        int dsz, sz, hsz, pos, res, cnt;
>
>        dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
> +       if (dsz < 0)
> +               return dsz;
> +
>        if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
>                *buf = NULL;
>                return -EINVAL;
> --
> 1.7.3.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [Security] TIPC security issues
From: Dan Rosenberg @ 2010-10-27 18:26 UTC (permalink / raw)
  To: David Miller; +Cc: torvalds, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <20101027.105047.183059900.davem@davemloft.net>

The proposed fix is a start, but it's not sufficient to completely fix
the problem.  What if the total of the iovecs wraps around back to 0?
The total size will be returned as a small number, but large amounts of
data will be copied into the allocated buffer since the individual
iovecs can have arbitrary sizes.

-Dan

On Wed, 2010-10-27 at 10:50 -0700, David Miller wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
> Date: Wed, 27 Oct 2010 10:37:46 -0700
> 
> > If you _really_ care deeply, then some packet-oriented protocol can
> > just have its own private packet size limit (which would be way less
> > than 2GB), and then just look at the total size and say "oh, the total
> > size is bigger than my limit, so I'll just error out". Then, the fact
> > that verify_iovec() may have truncated the message to 2GB-1 doesn't
> > matter at all.
> > 
> > (Practically speaking, I bet all packet-oriented protocols already
> > have a limit that is enforced by simply allocation patterns, so I
> > don't think it's actually a problem even now)
> 
> This is, as it turns out, effectively what the TIPC socket layer
> already does.
> 
> Most of the send calls that propagate down to this code adding up the
> iov_len lengths gets passed a maximum packet size.
> 
> Anyways, here is what I came up with to kill this specific bug in
> TIPC:
> 
> From 39dc17049a5ed989bab8997945a048ffddf48387 Mon Sep 17 00:00:00 2001
> From: David S. Miller <davem@davemloft.net>
> Date: Wed, 27 Oct 2010 10:46:59 -0700
> Subject: [PATCH] tipc: Fix iov_len handling in message send path.
> 
> Use size_t to add together iov_len's from the iovec, error
> out with -EMSGSIZE if total is greater than INT_MAX.
> 
> Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> ---
>  net/tipc/msg.c |   13 ++++++++++---
>  1 files changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/net/tipc/msg.c b/net/tipc/msg.c
> index ecb532f..b29eb8b 100644
> --- a/net/tipc/msg.c
> +++ b/net/tipc/msg.c
> @@ -76,11 +76,15 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
>  
>  int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
>  {
> -	int dsz = 0;
> +	size_t dsz = 0;
>  	int i;
>  
>  	for (i = 0; i < num_sect; i++)
>  		dsz += msg_sect[i].iov_len;
> +
> +	if (dsz > INT_MAX)
> +		return -EMSGSIZE;
> +
>  	return dsz;
>  }
>  
> @@ -93,12 +97,15 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
>   */
>  
>  int tipc_msg_build(struct tipc_msg *hdr,
> -			    struct iovec const *msg_sect, u32 num_sect,
> -			    int max_size, int usrmem, struct sk_buff** buf)
> +		      struct iovec const *msg_sect, u32 num_sect,
> +		      int max_size, int usrmem, struct sk_buff** buf)
>  {
>  	int dsz, sz, hsz, pos, res, cnt;
>  
>  	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
> +	if (dsz < 0)
> +		return dsz;
> +
>  	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
>  		*buf = NULL;
>  		return -EINVAL;
> -- 
> 1.7.3.2



^ permalink raw reply

* Re: [PATCH] fib_rules: __rcu annotates ctarget
From: David Miller @ 2010-10-27 18:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1288121095.2652.9.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 21:24:55 +0200

> Adds __rcu annotation to (struct fib_rule)->ctarget
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH] inetpeer: __rcu annotations
From: David Miller @ 2010-10-27 18:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1288086938.3169.55.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 11:55:38 +0200

> Adds __rcu annotations to inetpeer
> 	(struct inet_peer)->avl_left
> 	(struct inet_peer)->avl_right
> 
> This is a tedious cleanup, but removes one smp_wmb() from link_to_pool()
> since we now use more self documenting rcu_assign_pointer().
> 
> Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in
> all cases we dont need a memory barrier.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH] net: add __rcu annotations to protocol
From: David Miller @ 2010-10-27 18:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1288076548.3296.89.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 09:02:28 +0200

> Add __rcu annotations to :
>         struct net_protocol *inet_protos
>         struct net_protocol *inet6_protos
> 
> And use appropriate casts to reduce sparse warnings if
> CONFIG_SPARSE_RCU_POINTER=y
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH] ipv4: add __rcu annotations to routes.c
From: David Miller @ 2010-10-27 18:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1288076527.3296.88.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 09:02:07 +0200

> Add __rcu annotations to :
>         (struct dst_entry)->rt_next
>         (struct rt_hash_bucket)->chain
> 
> And use appropriate rcu primitives to reduce sparse warnings if
> CONFIG_SPARSE_RCU_POINTER=y
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH] tunnels: add __rcu annotations
From: David Miller @ 2010-10-27 18:22 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1288076486.3296.85.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 26 Oct 2010 09:01:26 +0200

> Add __rcu annotations to :
>         (struct ip_tunnel)->prl
>         (struct ip_tunnel_prl_entry)->next
>         (struct xfrm_tunnel)->next
> 	struct xfrm_tunnel *tunnel4_handlers
> 	struct xfrm_tunnel *tunnel64_handlers
> 
> And use appropriate rcu primitives to reduce sparse warnings if
> CONFIG_SPARSE_RCU_POINTER=y
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH] e1000e: add netpoll support for MSI/MSI-X IRQ modes
From: David Miller @ 2010-10-27 18:20 UTC (permalink / raw)
  To: dongdong.deng
  Cc: jesse, jeffrey.t.kirsher, bruce.w.allan, alexander.h.duyck,
	carolyn.wyborny, donald.c.skidmore, gregory.v.rose,
	peter.p.waskiewicz.jr, john.ronciak, e1000-devel, netdev
In-Reply-To: <1288072476-20720-1-git-send-email-dongdong.deng@windriver.com>


Intel folks, I assume you guys will look at this and integrate it.

Thanks.

^ permalink raw reply

* Re: [net-2.6 PATCH 1/1] qlge: bugfix: Restoring the vlan setting.
From: David Miller @ 2010-10-27 18:19 UTC (permalink / raw)
  To: ron.mercer; +Cc: netdev, jitendra.kalsaria, ying.lok
In-Reply-To: <1288191492-1967-1-git-send-email-ron.mercer@qlogic.com>

From: Ron Mercer <ron.mercer@qlogic.com>
Date: Wed, 27 Oct 2010 07:58:12 -0700

> Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
> Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next-2.6] be2net: Schedule/Destroy worker thread in probe()/remove() rather than open()/close()
From: David Miller @ 2010-10-27 18:14 UTC (permalink / raw)
  To: somnath.kotur; +Cc: netdev
In-Reply-To: <20101026090102.GA2460@emulex.com>

From: Somnath Kotur <somnath.kotur@emulex.com>
Date: Tue, 26 Oct 2010 14:31:03 +0530

> When async mcc compls are rcvd on an i/f that is down (and so interrupts are disabled)
> they just lie unprocessed in the compl queue.The compl queue can eventually get filled
> up and cause the BE to lock up.The fix is to use be_worker to reap mcc compls when the
> i/f is down.be_worker is now launched in be_probe() and canceled in be_remove().
> 
> Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>

Applied.

^ permalink raw reply

* Re: [patch 1/1] [PATCH BUG_FIX] ipv6: fix refcnt problem related to POSTDAD state
From: David Miller @ 2010-10-27 18:10 UTC (permalink / raw)
  To: herbert; +Cc: ursula.braun, netdev, linux-s390, vosburgh
In-Reply-To: <20101025230132.GA5474@gondor.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 25 Oct 2010 16:01:32 -0700

> On Mon, Oct 25, 2010 at 11:06:43AM +0200, Ursula Braun wrote:
>> Subject: [patch 1/1] [PATCH BUG_FIX] ipv6: fix refcnt problem related to POSTDAD state
>> 
>> From: Ursula Braun <ursula.braun@de.ibm.com>
>> 
>> After running this bonding setup script
>>     modprobe bonding miimon=100 mode=0 max_bonds=1
>>     ifconfig bond0 10.1.1.1/16
>>     ifenslave bond0 eth1
>>     ifenslave bond0 eth3
>> on s390 with qeth-driven slaves, modprobe -r fails with this message
>>     unregister_netdevice: waiting for bond0 to become free. Usage count = 1
>> due to twice detection of duplicate address.
>> Problem is caused by a missing decrease of ifp->refcnt in addrconf_dad_failure.
>> An extra call of in6_ifa_put(ifp) solves it.
>> Problem has been introduced with commit f2344a131bccdbfc5338e17fa71a807dee7944fa.
>> 
>> Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
>> Cc: David S. Miller <davem@davemloft.net>
> 
> Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
> 
> Thanks for catching this!

Applied, thanks everyone.

^ permalink raw reply

* Re: [PATCH 2/4] caif-u5500: CAIF shared memory transport protocol
From: David Miller @ 2010-10-27 18:07 UTC (permalink / raw)
  To: sjur.brandeland
  Cc: linux, sjurbr, netdev, linux-arm-kernel, linus.walleij,
	stefan.xk.nilsson, amarnath.bangalore.revanna
In-Reply-To: <1287774235-11151-3-git-send-email-sjur.brandeland@stericsson.com>

From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 22 Oct 2010 21:03:53 +0200

> +		if (!in_irq())
> +			spin_lock_bh(&pshm_drv->lock);
> +		else
> +			spin_lock_irqsave(&pshm_drv->lock, flags);

You should never have contextual based locking schemes like this,
it's not just ugly it's also deadlock prone.

^ permalink raw reply

* Re: [PATCH net-next-2.6] net: NETIF_F_HW_CSUM does not imply FCoE CRC offload
From: David Miller @ 2010-10-27 18:01 UTC (permalink / raw)
  To: bhutchings; +Cc: netdev, yi.zou
In-Reply-To: <1287758306.2316.35.camel@achroite.uk.solarflarecom.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 22 Oct 2010 15:38:26 +0100

> NETIF_F_HW_CSUM indicates the ability to update an TCP/IP-style 16-bit
> checksum with the checksum of an arbitrary part of the packet data,
> whereas the FCoE CRC is something entirely different.
> 
> Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
> Cc: stable@kernel.org [2.6.32+]

Applied, thanks a lot Ben.

^ permalink raw reply

* Re: [PATCH net-next-2.6] net: Fix some corner cases in dev_can_checksum()
From: David Miller @ 2010-10-27 18:00 UTC (permalink / raw)
  To: bhutchings; +Cc: jesse, netdev
In-Reply-To: <1287756739.2316.11.camel@achroite.uk.solarflarecom.com>

From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 22 Oct 2010 15:12:19 +0100

> dev_can_checksum() incorrectly returns true in these cases:
> 
> 1. The skb has both out-of-band and in-band VLAN tags and the device
>    supports checksum offload for the encapsulated protocol but only with
>    one layer of encapsulation.
> 2. The skb has a VLAN tag and the device supports generic checksumming
>    but not in conjunction with VLAN encapsulation.
> 
> Rearrange the VLAN tag checks to avoid these.
> 
> Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>

Applied, thanks for fixing this Ben.

We really do need to figure out how to handle all of the various
"super vlan" encapsulation schemes (Cisco's RFC5517, 802.1ad,
802.1ah), and their offloads.

In fact, 802.1ad and 802.1ah use completely different framing than
the current VLAN bits.

^ permalink raw reply

* Re: tap0 device stopped working in 2.6.36 (ok in 2.6.35)
From: David Miller @ 2010-10-27 17:52 UTC (permalink / raw)
  To: nolan; +Cc: jim876, netdev
In-Reply-To: <1288201734.26640.43.camel@voxel>

From: Nolan Leake <nolan@cumulusnetworks.com>
Date: Wed, 27 Oct 2010 10:48:54 -0700

> I have no idea why ipv6 vetos the upping of a link-down interface, while
> ipv4 doesn't care.
> 
> If this is all intended behavior, then I guess I'll need to make the old
> "tap devices are always link-up" mode the default, and add a way for
> newer software to opt-in into correct link-state reporting.
> 
> David (CC'd), could you comment on this?

If ipv6 cannot send multicast packets for neighbour and router
discovery, which it must do in order to function properly over the
device, the interface is unusable.

^ permalink raw reply

* Re: [Security] TIPC security issues
From: David Miller @ 2010-10-27 17:50 UTC (permalink / raw)
  To: torvalds; +Cc: drosenberg, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <AANLkTinHVXDJbgJ0zBVRnr8R4sx=NtZCaFgbFC+8DE+q@mail.gmail.com>

From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Oct 2010 10:37:46 -0700

> If you _really_ care deeply, then some packet-oriented protocol can
> just have its own private packet size limit (which would be way less
> than 2GB), and then just look at the total size and say "oh, the total
> size is bigger than my limit, so I'll just error out". Then, the fact
> that verify_iovec() may have truncated the message to 2GB-1 doesn't
> matter at all.
> 
> (Practically speaking, I bet all packet-oriented protocols already
> have a limit that is enforced by simply allocation patterns, so I
> don't think it's actually a problem even now)

This is, as it turns out, effectively what the TIPC socket layer
already does.

Most of the send calls that propagate down to this code adding up the
iov_len lengths gets passed a maximum packet size.

Anyways, here is what I came up with to kill this specific bug in
TIPC:

>From 39dc17049a5ed989bab8997945a048ffddf48387 Mon Sep 17 00:00:00 2001
From: David S. Miller <davem@davemloft.net>
Date: Wed, 27 Oct 2010 10:46:59 -0700
Subject: [PATCH] tipc: Fix iov_len handling in message send path.

Use size_t to add together iov_len's from the iovec, error
out with -EMSGSIZE if total is greater than INT_MAX.

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c |   13 ++++++++++---
 1 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ecb532f..b29eb8b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -76,11 +76,15 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
 
 int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 {
-	int dsz = 0;
+	size_t dsz = 0;
 	int i;
 
 	for (i = 0; i < num_sect; i++)
 		dsz += msg_sect[i].iov_len;
+
+	if (dsz > INT_MAX)
+		return -EMSGSIZE;
+
 	return dsz;
 }
 
@@ -93,12 +97,15 @@ int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
  */
 
 int tipc_msg_build(struct tipc_msg *hdr,
-			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf)
+		      struct iovec const *msg_sect, u32 num_sect,
+		      int max_size, int usrmem, struct sk_buff** buf)
 {
 	int dsz, sz, hsz, pos, res, cnt;
 
 	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
+	if (dsz < 0)
+		return dsz;
+
 	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
 		*buf = NULL;
 		return -EINVAL;
-- 
1.7.3.2


^ permalink raw reply related

* Re: tap0 device stopped working in 2.6.36 (ok in 2.6.35)
From: Nolan Leake @ 2010-10-27 17:48 UTC (permalink / raw)
  To: Jim; +Cc: netdev, David Miller
In-Reply-To: <4CC84EAD.7040506@xs4all.nl>

On Wed, 2010-10-27 at 18:09 +0200, Jim wrote:
> Not exactly, VirtualBox calls it "bridged adapter", it 'bridges' the
> guest machine to the tap0 interface on the host for so called host-only
> networking.
> See eg. http://forums.virtualbox.org/viewtopic.php?f=1&t=165

OK, so you have the tap0 device, and you assign an IP to it and run
dhcpd on it.  Understood.  Thank you for the explanatory link.

> And this sequence is now simply failing
>   tunctl -t tap0 -u tuxuser
>   ifconfig tap0 10.0.0.1 up

The link is not ready until some process has attached to the tap device.
tunctl simply attaches and then immediately detaches, leaving it
link-down until the virtualbox process starts and attaches.

But this doesn't cause the problem for me!  I suspect that is because I
am running an ipv4 only kernel; the "ADDRCONF(NETDEV_UP): tap0: link is
not ready" error comes from net/ipv6/addrconf.c.

I have no idea why ipv6 vetos the upping of a link-down interface, while
ipv4 doesn't care.

If this is all intended behavior, then I guess I'll need to make the old
"tap devices are always link-up" mode the default, and add a way for
newer software to opt-in into correct link-state reporting.

David (CC'd), could you comment on this?

Thanks,
Nolan

^ permalink raw reply

* Re: [Security] TIPC security issues
From: Linus Torvalds @ 2010-10-27 17:37 UTC (permalink / raw)
  To: David Miller; +Cc: drosenberg, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <20101027.102940.112580564.davem@davemloft.net>

On Wed, Oct 27, 2010 at 10:29 AM, David Miller <davem@davemloft.net> wrote:
>
> But for a datagram socket, we have to have a one-to-one correspondance
> between write() calls and packets on the wire.  So we'd either need to
> accept the entire write() length or fail it with an error.

I disagree. We had that exact issue with regular file read/write: in
theory, POSIX says that you should never do a partial write to a
regular file.

And the thing is, WE SIMPLY DON'T CARE. If somebody does a 2GB+ IO,
they damn well need to accept that it's not going to be some atomic
single event. It doesn't matter _how_ much actual real memory you
have, it's just stupid to even care about that situation. It's not
something any real app actually can reasonably ever expect to work, so
rather than say "we have to do it right or error out", you should just
see it as a "it's a stupid situation, we can do whatever the hell we
want, because anybody who cares is a f*cking moron that we don't care
about".

If you _really_ care deeply, then some packet-oriented protocol can
just have its own private packet size limit (which would be way less
than 2GB), and then just look at the total size and say "oh, the total
size is bigger than my limit, so I'll just error out". Then, the fact
that verify_iovec() may have truncated the message to 2GB-1 doesn't
matter at all.

(Practically speaking, I bet all packet-oriented protocols already
have a limit that is enforced by simply allocation patterns, so I
don't think it's actually a problem even now)

                   Linus

^ permalink raw reply

* Re: [Security] TIPC security issues
From: David Miller @ 2010-10-27 17:29 UTC (permalink / raw)
  To: torvalds; +Cc: drosenberg, jon.maloy, allan.stephens, netdev, security
In-Reply-To: <AANLkTi=V93A660+YS8C2TvC13kGUcJpFgjPHUvONd_WW@mail.gmail.com>

From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 21 Oct 2010 17:31:12 -0700

> Something like the appended UNTESTED patch. NOTE! it actually makes
> "verify_iovec()" *change* the iovec if it grows too big.

Ok I thought a bit about this patch.

How we can behave here depends upon the socket type.

For example, for a stream socket such as TCP a partial write is OK and
we could truncate the iovec like this.  That's fine.

But for a datagram socket, we have to have a one-to-one correspondance
between write() calls and packets on the wire.  So we'd either need to
accept the entire write() length or fail it with an error.

verify_iovec() (currently) doesn't have the socket type information
available, so it's not able to key off of that right now.

I agree that cutting off these cases at a high level would be the
thing to do long term, but right now verify_iovec() isn't positioned
such that we can do it just yet.

For now I'm going to look into specifically fixing the TIPC case and
also think longer term about another way to address this at a high
level.

^ permalink raw reply

* [PATCH] bonding: Fix lockdep warning after bond_vlan_rx_register()
From: Jarek Poplawski @ 2010-10-27 17:08 UTC (permalink / raw)
  To: David Miller; +Cc: Eric Dumazet, netdev, Jesse Gross, Jay Vosburgh
In-Reply-To: <1288184416.2709.109.camel@edumazet-laptop>

On Wed, Oct 27, 2010 at 03:00:16PM +0200, Eric Dumazet wrote:
> 
> Indeed this is the right fix, I wonder why I did not catch it before ?
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks,
Jarek P.
------------------------>

Fix lockdep warning:
[   52.991402] ======================================================
[   52.991511] [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ]
[   52.991569] 2.6.36-04573-g4b60626-dirty #65
[   52.991622] ------------------------------------------------------
[   52.991696] ip/4842 [HC0[0]:SC0[4]:HE1:SE0] is trying to acquire:
[   52.991758]  (&bond->lock){++++..}, at: [<efe4d300>] bond_set_multicast_list+0x60/0x2c0 [bonding]
[   52.991966] 
[   52.991967] and this task is already holding:
[   52.992008]  (&bonding_netdev_addr_lock_key){+.....}, at: [<c04e5530>] dev_mc_sync+0x50/0xa0
[   52.992008] which would create a new lock dependency:
[   52.992008]  (&bonding_netdev_addr_lock_key){+.....} -> (&bond->lock){++++..}
[   52.992008] 
[   52.992008] but this new dependency connects a SOFTIRQ-irq-safe lock:
[   52.992008]  (&(&mc->mca_lock)->rlock){+.-...}
[   52.992008] ... which became SOFTIRQ-irq-safe at:
[   52.992008]   [<c0272beb>] __lock_acquire+0x96b/0x1960
[   52.992008]   [<c027415e>] lock_acquire+0x7e/0xf0
[   52.992008]   [<c05f356d>] _raw_spin_lock_bh+0x3d/0x50
[   52.992008]   [<c0584e40>] mld_ifc_timer_expire+0xf0/0x280
[   52.992008]   [<c024cee6>] run_timer_softirq+0x146/0x310
[   52.992008]   [<c024591d>] __do_softirq+0xad/0x1c0
[   52.992008] 
[   52.992008] to a SOFTIRQ-irq-unsafe lock:
[   52.992008]  (&bond->lock){++++..}
[   52.992008] ... which became SOFTIRQ-irq-unsafe at:
[   52.992008] ...  [<c0272c3b>] __lock_acquire+0x9bb/0x1960
[   52.992008]   [<c027415e>] lock_acquire+0x7e/0xf0
[   52.992008]   [<c05f36b8>] _raw_write_lock+0x38/0x50
[   52.992008]   [<efe4cbe4>] bond_vlan_rx_register+0x24/0x70 [bonding]
[   52.992008]   [<c0598010>] register_vlan_dev+0xc0/0x280
[   52.992008]   [<c0599f3a>] vlan_newlink+0xaa/0xd0
[   52.992008]   [<c04ed4b4>] rtnl_newlink+0x404/0x490
[   52.992008]   [<c04ece35>] rtnetlink_rcv_msg+0x1e5/0x220
[   52.992008]   [<c050424e>] netlink_rcv_skb+0x8e/0xb0
[   52.992008]   [<c04ecbac>] rtnetlink_rcv+0x1c/0x30
[   52.992008]   [<c0503bfb>] netlink_unicast+0x24b/0x290
[   52.992008]   [<c0503e37>] netlink_sendmsg+0x1f7/0x310
[   52.992008]   [<c04cd41c>] sock_sendmsg+0xac/0xe0
[   52.992008]   [<c04ceb80>] sys_sendmsg+0x130/0x230
[   52.992008]   [<c04cf04e>] sys_socketcall+0xde/0x280
[   52.992008]   [<c0202d10>] sysenter_do_call+0x12/0x36
[   52.992008] 
[   52.992008] other info that might help us debug this:
...
[ Full info at netdev: Wed, 27 Oct 2010 12:24:30 +0200
  Subject: [BUG net-2.6 vlan/bonding] lockdep splats ]

Use BH variant of write_lock(&bond->lock) (as elsewhere in bond_main)
to prevent this dependency.

Fixes commit f35188faa0fbabefac476536994f4b6f3677380f [v2.6.36]

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jay Vosburgh <fubar@us.ibm.com>
---

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index beb3b7c..bdb68a6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -493,9 +493,9 @@ static void bond_vlan_rx_register(struct net_device *bond_dev,
 	struct slave *slave;
 	int i;
 
-	write_lock(&bond->lock);
+	write_lock_bh(&bond->lock);
 	bond->vlgrp = grp;
-	write_unlock(&bond->lock);
+	write_unlock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
 		struct net_device *slave_dev = slave->dev;

^ permalink raw reply related

* Re: [PATCH 5/5] tcp: ipv4 listen state scaled
From: Dmitry Popov @ 2010-10-27 16:44 UTC (permalink / raw)
  To: Alexey Kuznetsov; +Cc: netdev
In-Reply-To: <20101027150434.GA13121@ms2.inr.ac.ru>

On Wed, Oct 27, 2010 at 7:04 PM, Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> wrote:
> Hello!
>
> It looks like there is at least one hole here.
>
> You take lock, check syn table and drop lock in tcp_v4_hnd_req().
> Then you immediately enter tcp_v4_conn_request() and grab lock again.
> Oops, in the tiny hole while lock was dropped the request can be already
> created (even funnier, the whole socket can be already created and even accepted).
> So, if you drop lock, you have to restart the whole tcp_v4_rcv_listen()
> (which seems to be impossible without additional tricks)
>
> Alexey
>

Hello, Alexey!

Yes, it may happen, but I don't see any problem. On 2 same SYN-packets
we will add 2 requests to syn table. Yes, it's not so good, but
nothing criminal, no?

Regards,
Dmitry.

^ permalink raw reply

* E-Mail Quote zu aktualisieren
From: khorst @ 2010-10-27 16:09 UTC (permalink / raw)


Sie haben die Lagerung für Ihr Postfach überschritten wird.

Sie werden nicht in der Lage zu senden oder zu empfangen, bis Sie neue
E-Mail Ihre E-Mail Quote zu aktualisieren.

Kopieren Sie den untenstehenden Link und füllen Sie das Formular, um Ihr
Konto zu aktualisieren.

http://www.do.my/e-mailaccountactivation/

System-Administrator
192.168.0.1











^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox