Netdev List
 help / color / mirror / Atom feed
* [net-next RFC V3 0/6] Multiqueue support in tun/tap
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

Hello All:

This is an update of multiqueue support in tun/tap from V2. Please consider to
merge.

The main idea for this series is to let tun/tap device to benefit from
multiqueue network cards and multi-cores host by letting it to be able to
transmit and receive packets from mmultiple sockets/queues. This series allows
multiple sockets to be attached and detached to the tun/tap devices. Userspace
could utilize this parallism to achiveve higher throughput.

Some quick overview of the design:

- Moving socket from tun_device to tun_file.
- Allowing multiple sockets to be attached to a tun/tap devices.
- Using RCU to synchronize the data path and system call.
- A simple hash based queue selecting algorithm is used to choose the tx queue.
- Two new ioctls were added for the usespace to attach and detach socket to the device.
- ABI compatibility were maintained, userspace that only use one queue won't
 need any changes.

Performance test:

This series were originally designed to serve as the backend of the multiqueue
virito-net in kvm guest. But the design is generic enough to let it to be reused
by any other type of userspace.

Since I would post a series of multiqueue virtio-net as RFC, so I would post the
performance result in that thread. To summarize the performance, the multiqueue
improves the transaction in the TCP_RR test but have some regression in small
packets transmission in TCP_STREAM test.

Martin test the multiqueue tap for their userspace, and he see an improvement in
terms of packets per second.

References:
- V2 of multiqueue tun/tap, http://lwn.net/Articles/459270/
- V1 of multiqueue tun/tap, http://www.mail-archive.com/kvm@vger.kernel.org/msg59479.html

Changes from V2:

- Rebase to the latest net-next
- Fix netdev leak when tun_attach fails
- Fix return value of TUNSETOWNER
- Purge the receive queue in socket destructor
- Enable multiqueue tun (V1 and V2 only allows mq to be eanbled for tap
- Add per-queue u64 statistics
- Fix wrong BUG_ON() check in tun_detach()
- Check numqueues instead of tfile[0] in tun_set_iff() to let tunctl -d works
  correctly
- Set numqueues to MAX_TAP_QUEUES during tun_detach_all() to prevent the
  attaching.

Changes from V1:

- Simplify the sockets array management by not leaving NULL in the slot.
- Optimization on the tx queue selecting.
- Fix the bug in tun_deatch_all()

Jason Wang (6):
  tuntap: move socket to tun_file
  tuntap: categorize ioctl
  tuntap: introduce multiqueue flags
  tuntap: multiqueue support
  tuntap: per queue 64 bit stats
  tuntap: add ioctls to attach or detach a file form tuntap device

 drivers/net/tun.c      |  797 ++++++++++++++++++++++++++++++------------------
 include/linux/if_tun.h |    5 +
 2 files changed, 503 insertions(+), 299 deletions(-)

^ permalink raw reply

* [PATCH 1/6] tuntap: move socket to tun_file
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

This patch moves socket structure from tun_device and to tun_file in order to
let it possbile for multiple sockets to be attached to tun/tap device. The
reference between tap device and socket was setup during TUNSETIFF as
usual.

After this patch, we can go further towards multiqueue tun/tap support by
storing an array of pointers of tun_file in tun_device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |  352 +++++++++++++++++++++++++++--------------------------
 1 files changed, 181 insertions(+), 171 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 987aeef..1f27789 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -108,9 +108,16 @@ struct tap_filter {
 };
 
 struct tun_file {
+	struct sock sk;
+	struct socket socket;
+	struct socket_wq wq;
+	int vnet_hdr_sz;
+	struct tap_filter txflt;
 	atomic_t count;
 	struct tun_struct *tun;
 	struct net *net;
+	struct fasync_struct *fasync;
+	unsigned int flags;
 };
 
 struct tun_sock;
@@ -125,29 +132,12 @@ struct tun_struct {
 	netdev_features_t	set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
 			  NETIF_F_TSO6|NETIF_F_UFO)
-	struct fasync_struct	*fasync;
-
-	struct tap_filter       txflt;
-	struct socket		socket;
-	struct socket_wq	wq;
-
-	int			vnet_hdr_sz;
 
 #ifdef TUN_DEBUG
 	int debug;
 #endif
 };
 
-struct tun_sock {
-	struct sock		sk;
-	struct tun_struct	*tun;
-};
-
-static inline struct tun_sock *tun_sk(struct sock *sk)
-{
-	return container_of(sk, struct tun_sock, sk);
-}
-
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
@@ -168,10 +158,9 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
-	tun->socket.file = file;
 	netif_carrier_on(tun->dev);
 	dev_hold(tun->dev);
-	sock_hold(tun->socket.sk);
+	sock_hold(&tfile->sk);
 	atomic_inc(&tfile->count);
 
 out:
@@ -181,15 +170,15 @@ out:
 
 static void __tun_detach(struct tun_struct *tun)
 {
+	struct tun_file *tfile = tun->tfile;
 	/* Detach from net device */
 	netif_tx_lock_bh(tun->dev);
 	netif_carrier_off(tun->dev);
 	tun->tfile = NULL;
-	tun->socket.file = NULL;
 	netif_tx_unlock_bh(tun->dev);
 
 	/* Drop read queue */
-	skb_queue_purge(&tun->socket.sk->sk_receive_queue);
+	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
 
 	/* Drop the extra count on the net device */
 	dev_put(tun->dev);
@@ -348,19 +337,12 @@ static void tun_net_uninit(struct net_device *dev)
 	/* Inform the methods they need to stop using the dev.
 	 */
 	if (tfile) {
-		wake_up_all(&tun->wq.wait);
+		wake_up_all(&tfile->wq.wait);
 		if (atomic_dec_and_test(&tfile->count))
 			__tun_detach(tun);
 	}
 }
 
-static void tun_free_netdev(struct net_device *dev)
-{
-	struct tun_struct *tun = netdev_priv(dev);
-
-	sk_release_kernel(tun->socket.sk);
-}
-
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
@@ -379,24 +361,26 @@ static int tun_net_close(struct net_device *dev)
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile = tun->tfile;
 
 	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
 
 	/* Drop packet if interface is not attached */
-	if (!tun->tfile)
+	if (!tfile)
 		goto drop;
 
 	/* Drop if the filter does not like it.
 	 * This is a noop if the filter is disabled.
 	 * Filter can be enabled only for the TAP devices. */
-	if (!check_filter(&tun->txflt, skb))
+	if (!check_filter(&tfile->txflt, skb))
 		goto drop;
 
-	if (tun->socket.sk->sk_filter &&
-	    sk_filter(tun->socket.sk, skb))
+	if (tfile->socket.sk->sk_filter &&
+	    sk_filter(tfile->socket.sk, skb))
 		goto drop;
 
-	if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) {
+	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
+	    >= dev->tx_queue_len) {
 		if (!(tun->flags & TUN_ONE_QUEUE)) {
 			/* Normal queueing mode. */
 			/* Packet scheduler handles dropping of further packets. */
@@ -417,12 +401,12 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_orphan(skb);
 
 	/* Enqueue packet */
-	skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb);
+	skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);
 
 	/* Notify and wake up reader process */
-	if (tun->flags & TUN_FASYNC)
-		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
+	if (tfile->flags & TUN_FASYNC)
+		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
 				   POLLRDNORM | POLLRDBAND);
 	return NETDEV_TX_OK;
 
@@ -550,11 +534,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	if (!tun)
 		return POLLERR;
 
-	sk = tun->socket.sk;
+	sk = tfile->socket.sk;
 
 	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
 
-	poll_wait(file, &tun->wq.wait, wait);
+	poll_wait(file, &tfile->wq.wait, wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
@@ -573,11 +557,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 
 /* prepad is the amount to reserve at front.  len is length after that.
  * linear is a hint as to how much to copy (usually headers). */
-static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
+static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
 				     size_t prepad, size_t len,
 				     size_t linear, int noblock)
 {
-	struct sock *sk = tun->socket.sk;
+	struct sock *sk = tfile->socket.sk;
 	struct sk_buff *skb;
 	int err;
 
@@ -601,7 +585,7 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 }
 
 /* Get packet from user space buffer */
-static ssize_t tun_get_user(struct tun_struct *tun,
+static ssize_t tun_get_user(struct tun_file *tfile,
 			    const struct iovec *iv, size_t count,
 			    int noblock)
 {
@@ -610,8 +594,10 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 	size_t len = count, align = NET_SKB_PAD;
 	struct virtio_net_hdr gso = { 0 };
 	int offset = 0;
+	struct tun_struct *tun = NULL;
+	bool drop = false, error = false;
 
-	if (!(tun->flags & TUN_NO_PI)) {
+	if (!(tfile->flags & TUN_NO_PI)) {
 		if ((len -= sizeof(pi)) > count)
 			return -EINVAL;
 
@@ -620,8 +606,9 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 		offset += sizeof(pi);
 	}
 
-	if (tun->flags & TUN_VNET_HDR) {
-		if ((len -= tun->vnet_hdr_sz) > count)
+	if (tfile->flags & TUN_VNET_HDR) {
+		len -= tfile->vnet_hdr_sz;
+		if (len > count)
 			return -EINVAL;
 
 		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
@@ -633,41 +620,43 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 
 		if (gso.hdr_len > len)
 			return -EINVAL;
-		offset += tun->vnet_hdr_sz;
+		offset += tfile->vnet_hdr_sz;
 	}
 
-	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
+	if ((tfile->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
 		align += NET_IP_ALIGN;
 		if (unlikely(len < ETH_HLEN ||
 			     (gso.hdr_len && gso.hdr_len < ETH_HLEN)))
 			return -EINVAL;
 	}
 
-	skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock);
+	skb = tun_alloc_skb(tfile, align, len, gso.hdr_len, noblock);
+
 	if (IS_ERR(skb)) {
 		if (PTR_ERR(skb) != -EAGAIN)
-			tun->dev->stats.rx_dropped++;
-		return PTR_ERR(skb);
+			drop = true;
+		count = PTR_ERR(skb);
+		goto err;
 	}
 
 	if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) {
-		tun->dev->stats.rx_dropped++;
+		drop = true;
 		kfree_skb(skb);
-		return -EFAULT;
+		count = -EFAULT;
+		goto err;
 	}
 
 	if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
 		if (!skb_partial_csum_set(skb, gso.csum_start,
 					  gso.csum_offset)) {
-			tun->dev->stats.rx_frame_errors++;
-			kfree_skb(skb);
-			return -EINVAL;
+			error = true;
+			goto err_free;
 		}
 	}
 
-	switch (tun->flags & TUN_TYPE_MASK) {
+	switch (tfile->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
-		if (tun->flags & TUN_NO_PI) {
+		if (tfile->flags & TUN_NO_PI) {
 			switch (skb->data[0] & 0xf0) {
 			case 0x40:
 				pi.proto = htons(ETH_P_IP);
@@ -676,18 +665,15 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 				pi.proto = htons(ETH_P_IPV6);
 				break;
 			default:
-				tun->dev->stats.rx_dropped++;
-				kfree_skb(skb);
-				return -EINVAL;
+				drop = true;
+				goto err_free;
 			}
 		}
 
 		skb_reset_mac_header(skb);
 		skb->protocol = pi.proto;
-		skb->dev = tun->dev;
 		break;
 	case TUN_TAP_DEV:
-		skb->protocol = eth_type_trans(skb, tun->dev);
 		break;
 	}
 
@@ -704,9 +690,8 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 			break;
 		default:
-			tun->dev->stats.rx_frame_errors++;
-			kfree_skb(skb);
-			return -EINVAL;
+			error = true;
+			goto err_free;
 		}
 
 		if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
@@ -714,9 +699,8 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 
 		skb_shinfo(skb)->gso_size = gso.gso_size;
 		if (skb_shinfo(skb)->gso_size == 0) {
-			tun->dev->stats.rx_frame_errors++;
-			kfree_skb(skb);
-			return -EINVAL;
+			error = true;
+			goto err_free;
 		}
 
 		/* Header must be checked, and gso_segs computed. */
@@ -724,11 +708,38 @@ static ssize_t tun_get_user(struct tun_struct *tun,
 		skb_shinfo(skb)->gso_segs = 0;
 	}
 
-	netif_rx_ni(skb);
+	tun = __tun_get(tfile);
+	if (!tun)
+		return -EBADFD;
 
+	switch (tfile->flags & TUN_TYPE_MASK) {
+	case TUN_TUN_DEV:
+		skb->dev = tun->dev;
+		break;
+	case TUN_TAP_DEV:
+		skb->protocol = eth_type_trans(skb, tun->dev);
+		break;
+	}
+
+	netif_rx_ni(skb);
 	tun->dev->stats.rx_packets++;
 	tun->dev->stats.rx_bytes += len;
+	tun_put(tun);
+	return count;
+
+err_free:
+	count = -EINVAL;
+	kfree_skb(skb);
+err:
+	tun = __tun_get(tfile);
+	if (!tun)
+		return -EBADFD;
 
+	if (drop)
+		tun->dev->stats.rx_dropped++;
+	if (error)
+		tun->dev->stats.rx_frame_errors++;
+	tun_put(tun);
 	return count;
 }
 
@@ -736,30 +747,25 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 			      unsigned long count, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct tun_struct *tun = tun_get(file);
+	struct tun_file *tfile = file->private_data;
 	ssize_t result;
 
-	if (!tun)
-		return -EBADFD;
-
-	tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
-
-	result = tun_get_user(tun, iv, iov_length(iv, count),
+	result = tun_get_user(tfile, iv, iov_length(iv, count),
 			      file->f_flags & O_NONBLOCK);
 
-	tun_put(tun);
 	return result;
 }
 
 /* Put packet to the user space buffer */
-static ssize_t tun_put_user(struct tun_struct *tun,
+static ssize_t tun_put_user(struct tun_file *tfile,
 			    struct sk_buff *skb,
 			    const struct iovec *iv, int len)
 {
+	struct tun_struct *tun = NULL;
 	struct tun_pi pi = { 0, skb->protocol };
 	ssize_t total = 0;
 
-	if (!(tun->flags & TUN_NO_PI)) {
+	if (!(tfile->flags & TUN_NO_PI)) {
 		if ((len -= sizeof(pi)) < 0)
 			return -EINVAL;
 
@@ -773,9 +779,10 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 		total += sizeof(pi);
 	}
 
-	if (tun->flags & TUN_VNET_HDR) {
+	if (tfile->flags & TUN_VNET_HDR) {
 		struct virtio_net_hdr gso = { 0 }; /* no info leak */
-		if ((len -= tun->vnet_hdr_sz) < 0)
+		len -= tfile->vnet_hdr_sz;
+		if (len < 0)
 			return -EINVAL;
 
 		if (skb_is_gso(skb)) {
@@ -818,7 +825,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 		if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
 					       sizeof(gso))))
 			return -EFAULT;
-		total += tun->vnet_hdr_sz;
+		total += tfile->vnet_hdr_sz;
 	}
 
 	len = min_t(int, skb->len, len);
@@ -826,29 +833,33 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
 	total += skb->len;
 
-	tun->dev->stats.tx_packets++;
-	tun->dev->stats.tx_bytes += len;
+	tun = __tun_get(tfile);
+	if (tun) {
+		tun->dev->stats.tx_packets++;
+		tun->dev->stats.tx_bytes += len;
+		tun_put(tun);
+	}
 
 	return total;
 }
 
-static ssize_t tun_do_read(struct tun_struct *tun,
+static ssize_t tun_do_read(struct tun_file *tfile,
 			   struct kiocb *iocb, const struct iovec *iv,
 			   ssize_t len, int noblock)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t ret = 0;
-
-	tun_debug(KERN_INFO, tun, "tun_chr_read\n");
+	struct tun_struct *tun = NULL;
 
 	if (unlikely(!noblock))
-		add_wait_queue(&tun->wq.wait, &wait);
+		add_wait_queue(&tfile->wq.wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
+		skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue);
 		/* Read frames from the queue */
-		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
+		if (!skb) {
 			if (noblock) {
 				ret = -EAGAIN;
 				break;
@@ -857,25 +868,38 @@ static ssize_t tun_do_read(struct tun_struct *tun,
 				ret = -ERESTARTSYS;
 				break;
 			}
+
+			tun = __tun_get(tfile);
+			if (!tun) {
+				ret = -EIO;
+				break;
+			}
 			if (tun->dev->reg_state != NETREG_REGISTERED) {
 				ret = -EIO;
+				tun_put(tun);
 				break;
 			}
+			tun_put(tun);
 
 			/* Nothing to read, let's sleep */
 			schedule();
 			continue;
 		}
-		netif_wake_queue(tun->dev);
 
-		ret = tun_put_user(tun, skb, iv, len);
+		tun = __tun_get(tfile);
+		if (tun) {
+			netif_wake_queue(tun->dev);
+			tun_put(tun);
+		}
+
+		ret = tun_put_user(tfile, skb, iv, len);
 		kfree_skb(skb);
 		break;
 	}
 
 	current->state = TASK_RUNNING;
 	if (unlikely(!noblock))
-		remove_wait_queue(&tun->wq.wait, &wait);
+		remove_wait_queue(&tfile->wq.wait, &wait);
 
 	return ret;
 }
@@ -885,21 +909,17 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 {
 	struct file *file = iocb->ki_filp;
 	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun = __tun_get(tfile);
 	ssize_t len, ret;
 
-	if (!tun)
-		return -EBADFD;
 	len = iov_length(iv, count);
 	if (len < 0) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
+	ret = tun_do_read(tfile, iocb, iv, len, file->f_flags & O_NONBLOCK);
 	ret = min_t(ssize_t, ret, len);
 out:
-	tun_put(tun);
 	return ret;
 }
 
@@ -911,7 +931,7 @@ static void tun_setup(struct net_device *dev)
 	tun->group = -1;
 
 	dev->ethtool_ops = &tun_ethtool_ops;
-	dev->destructor = tun_free_netdev;
+	dev->destructor = free_netdev;
 }
 
 /* Trivial set of netlink ops to allow deleting tun or tap
@@ -931,7 +951,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
 
 static void tun_sock_write_space(struct sock *sk)
 {
-	struct tun_struct *tun;
+	struct tun_file *tfile = NULL;
 	wait_queue_head_t *wqueue;
 
 	if (!sock_writeable(sk))
@@ -945,37 +965,38 @@ static void tun_sock_write_space(struct sock *sk)
 		wake_up_interruptible_sync_poll(wqueue, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
-	tun = tun_sk(sk)->tun;
-	kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
-}
-
-static void tun_sock_destruct(struct sock *sk)
-{
-	free_netdev(tun_sk(sk)->tun->dev);
+	tfile = container_of(sk, struct tun_file, sk);
+	kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
 }
 
 static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 		       struct msghdr *m, size_t total_len)
 {
-	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
-	return tun_get_user(tun, m->msg_iov, total_len,
-			    m->msg_flags & MSG_DONTWAIT);
+	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+	ssize_t result;
+
+	result = tun_get_user(tfile, m->msg_iov, total_len,
+			      m->msg_flags & MSG_DONTWAIT);
+	return result;
 }
 
 static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 		       struct msghdr *m, size_t total_len,
 		       int flags)
 {
-	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	int ret;
+
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
 		return -EINVAL;
-	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+
+	ret = tun_do_read(tfile, iocb, m->msg_iov, total_len,
 			  flags & MSG_DONTWAIT);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
 	}
+
 	return ret;
 }
 
@@ -996,7 +1017,7 @@ static const struct proto_ops tun_socket_ops = {
 static struct proto tun_proto = {
 	.name		= "tun",
 	.owner		= THIS_MODULE,
-	.obj_size	= sizeof(struct tun_sock),
+	.obj_size	= sizeof(struct tun_file),
 };
 
 static int tun_flags(struct tun_struct *tun)
@@ -1047,8 +1068,8 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
 
 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
-	struct sock *sk;
 	struct tun_struct *tun;
+	struct tun_file *tfile = file->private_data;
 	struct net_device *dev;
 	int err;
 
@@ -1069,7 +1090,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
 		    !capable(CAP_NET_ADMIN))
 			return -EPERM;
-		err = security_tun_dev_attach(tun->socket.sk);
+		err = security_tun_dev_attach(tfile->socket.sk);
 		if (err < 0)
 			return err;
 
@@ -1113,25 +1134,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		tun = netdev_priv(dev);
 		tun->dev = dev;
 		tun->flags = flags;
-		tun->txflt.count = 0;
-		tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
-		err = -ENOMEM;
-		sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
-		if (!sk)
-			goto err_free_dev;
-
-		sk_change_net(sk, net);
-		tun->socket.wq = &tun->wq;
-		init_waitqueue_head(&tun->wq.wait);
-		tun->socket.ops = &tun_socket_ops;
-		sock_init_data(&tun->socket, sk);
-		sk->sk_write_space = tun_sock_write_space;
-		sk->sk_sndbuf = INT_MAX;
-
-		tun_sk(sk)->tun = tun;
-
-		security_tun_dev_post_create(sk);
+		security_tun_dev_post_create(&tfile->sk);
 
 		tun_net_init(dev);
 
@@ -1141,15 +1145,13 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		err = register_netdevice(tun->dev);
 		if (err < 0)
-			goto err_free_sk;
+			goto err_free_dev;
 
 		if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
 		    device_create_file(&tun->dev->dev, &dev_attr_owner) ||
 		    device_create_file(&tun->dev->dev, &dev_attr_group))
 			pr_err("Failed to create tun sysfs files\n");
 
-		sk->sk_destruct = tun_sock_destruct;
-
 		err = tun_attach(tun, file);
 		if (err < 0)
 			goto failed;
@@ -1172,6 +1174,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	else
 		tun->flags &= ~TUN_VNET_HDR;
 
+	/* Cache flags from tun device */
+	tfile->flags = tun->flags;
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
 	 */
@@ -1181,11 +1185,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	strcpy(ifr->ifr_name, tun->dev->name);
 	return 0;
 
- err_free_sk:
-	tun_free_netdev(dev);
- err_free_dev:
+err_free_dev:
 	free_netdev(dev);
- failed:
+failed:
 	return err;
 }
 
@@ -1357,9 +1359,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	case TUNSETTXFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
 			break;
-		ret = update_filter(&tun->txflt, (void __user *)arg);
+		ret = update_filter(&tfile->txflt, (void __user *)arg);
 		break;
 
 	case SIOCGIFHWADDR:
@@ -1379,7 +1381,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 
 	case TUNGETSNDBUF:
-		sndbuf = tun->socket.sk->sk_sndbuf;
+		sndbuf = tfile->socket.sk->sk_sndbuf;
 		if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
 			ret = -EFAULT;
 		break;
@@ -1390,11 +1392,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		tun->socket.sk->sk_sndbuf = sndbuf;
+		tfile->socket.sk->sk_sndbuf = sndbuf;
 		break;
 
 	case TUNGETVNETHDRSZ:
-		vnet_hdr_sz = tun->vnet_hdr_sz;
+		vnet_hdr_sz = tfile->vnet_hdr_sz;
 		if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
 			ret = -EFAULT;
 		break;
@@ -1409,27 +1411,27 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 
-		tun->vnet_hdr_sz = vnet_hdr_sz;
+		tfile->vnet_hdr_sz = vnet_hdr_sz;
 		break;
 
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
 			break;
 		ret = -EFAULT;
 		if (copy_from_user(&fprog, argp, sizeof(fprog)))
 			break;
 
-		ret = sk_attach_filter(&fprog, tun->socket.sk);
+		ret = sk_attach_filter(&fprog, tfile->socket.sk);
 		break;
 
 	case TUNDETACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
-		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
 			break;
-		ret = sk_detach_filter(tun->socket.sk);
+		ret = sk_detach_filter(tfile->socket.sk);
 		break;
 
 	default:
@@ -1481,43 +1483,50 @@ static long tun_chr_compat_ioctl(struct file *file,
 
 static int tun_chr_fasync(int fd, struct file *file, int on)
 {
-	struct tun_struct *tun = tun_get(file);
-	int ret;
-
-	if (!tun)
-		return -EBADFD;
-
-	tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on);
+	struct tun_file *tfile = file->private_data;
+	int ret = fasync_helper(fd, file, on, &tfile->fasync);
 
-	if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
+	if (ret < 0)
 		goto out;
 
 	if (on) {
 		ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
 		if (ret)
 			goto out;
-		tun->flags |= TUN_FASYNC;
+		tfile->flags |= TUN_FASYNC;
 	} else
-		tun->flags &= ~TUN_FASYNC;
+		tfile->flags &= ~TUN_FASYNC;
 	ret = 0;
 out:
-	tun_put(tun);
 	return ret;
 }
 
 static int tun_chr_open(struct inode *inode, struct file * file)
 {
+	struct net *net = current->nsproxy->net_ns;
 	struct tun_file *tfile;
 
 	DBG1(KERN_INFO, "tunX: tun_chr_open\n");
 
-	tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
+	tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+					&tun_proto);
 	if (!tfile)
 		return -ENOMEM;
-	atomic_set(&tfile->count, 0);
+
 	tfile->tun = NULL;
-	tfile->net = get_net(current->nsproxy->net_ns);
+	tfile->net = net;
+	tfile->txflt.count = 0;
+	tfile->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+	tfile->socket.wq = &tfile->wq;
+	init_waitqueue_head(&tfile->wq.wait);
+	tfile->socket.file = file;
+	tfile->socket.ops = &tun_socket_ops;
+	sock_init_data(&tfile->socket, &tfile->sk);
+
+	tfile->sk.sk_write_space = tun_sock_write_space;
+	tfile->sk.sk_sndbuf = INT_MAX;
 	file->private_data = tfile;
+
 	return 0;
 }
 
@@ -1541,14 +1550,14 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 				unregister_netdevice(dev);
 			rtnl_unlock();
 		}
-	}
 
-	tun = tfile->tun;
-	if (tun)
-		sock_put(tun->socket.sk);
+		/* drop the reference that netdevice holds */
+		sock_put(&tfile->sk);
 
-	put_net(tfile->net);
-	kfree(tfile);
+	}
+
+	/* drop the reference that file holds */
+	sock_put(&tfile->sk);
 
 	return 0;
 }
@@ -1676,13 +1685,14 @@ static void tun_cleanup(void)
 struct socket *tun_get_socket(struct file *file)
 {
 	struct tun_struct *tun;
+	struct tun_file *tfile = file->private_data;
 	if (file->f_op != &tun_fops)
 		return ERR_PTR(-EINVAL);
 	tun = tun_get(file);
 	if (!tun)
 		return ERR_PTR(-EBADFD);
 	tun_put(tun);
-	return &tun->socket;
+	return &tfile->socket;
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/6] tuntap: categorize ioctl
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

As we've moved socket related structure to file->private_data, we can optimizes
the ioctls that only touch socket out of tun_chr_ioctl() as it don't need hold
rtnl lock.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |   52 ++++++++++++++++++++++++++++++++++------------------
 1 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 1f27789..8233b0a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1248,10 +1248,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
-	struct sock_fprog fprog;
 	struct ifreq ifr;
-	int sndbuf;
-	int vnet_hdr_sz;
 	int ret;
 
 	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
@@ -1356,14 +1353,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = set_offload(tun, arg);
 		break;
 
-	case TUNSETTXFILTER:
-		/* Can be set only for TAPs */
-		ret = -EINVAL;
-		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
-			break;
-		ret = update_filter(&tfile->txflt, (void __user *)arg);
-		break;
-
 	case SIOCGIFHWADDR:
 		/* Get hw address */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
@@ -1380,6 +1369,37 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
 		break;
 
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+unlock:
+	rtnl_unlock();
+	if (tun)
+		tun_put(tun);
+	return ret;
+}
+
+static long __tun_socket_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg, int ifreq_len)
+{
+	struct tun_file *tfile = file->private_data;
+	void __user *argp = (void __user *)arg;
+	struct sock_fprog fprog;
+	int sndbuf;
+	int vnet_hdr_sz;
+	int ret = 0;
+
+	switch (cmd) {
+	case TUNSETTXFILTER:
+		/* Can be set only for TAPs */
+		ret = -EINVAL;
+		if ((tfile->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
+			break;
+		ret = update_filter(&tfile->txflt, (void __user *)arg);
+		break;
+
 	case TUNGETSNDBUF:
 		sndbuf = tfile->socket.sk->sk_sndbuf;
 		if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
@@ -1435,21 +1455,17 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 
 	default:
-		ret = -EINVAL;
+		ret = __tun_chr_ioctl(file, cmd, arg, ifreq_len);
 		break;
 	}
 
-unlock:
-	rtnl_unlock();
-	if (tun)
-		tun_put(tun);
 	return ret;
 }
 
 static long tun_chr_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long arg)
 {
-	return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq));
+	return __tun_socket_ioctl(file, cmd, arg, sizeof(struct ifreq));
 }
 
 #ifdef CONFIG_COMPAT
@@ -1477,7 +1493,7 @@ static long tun_chr_compat_ioctl(struct file *file,
 	 * driver are compatible though, we don't need to convert the
 	 * contents.
 	 */
-	return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
+	return __tun_socket_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
 }
 #endif /* CONFIG_COMPAT */
 
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/6] tuntap: introduce multiqueue flags
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

Add flags to be used by creating multiqueue tuntap device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 include/linux/if_tun.h |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 06b1829..c92a291 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -34,6 +34,7 @@
 #define TUN_ONE_QUEUE	0x0080
 #define TUN_PERSIST 	0x0100	
 #define TUN_VNET_HDR 	0x0200
+#define TUN_TAP_MQ      0x0400
 
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
@@ -61,6 +62,7 @@
 #define IFF_ONE_QUEUE	0x2000
 #define IFF_VNET_HDR	0x4000
 #define IFF_TUN_EXCL	0x8000
+#define IFF_MULTI_QUEUE 0x0100
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
-- 
1.7.1

^ permalink raw reply related

* [PATCH 5/6] tuntap: per queue 64 bit stats
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

As we've added multiqueue support for tun/tap, this patch convert the statistics
to use per-queue 64 bit statistics.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |  105 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 83 insertions(+), 22 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5c26757..37e62d3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -64,6 +64,7 @@
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
 #include <linux/rcupdate.h>
+#include <linux/u64_stats_sync.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
@@ -109,6 +110,19 @@ struct tap_filter {
 
 #define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
 
+struct tun_queue_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	rx_syncp;
+	struct u64_stats_sync	tx_syncp;
+	u32			rx_dropped;
+	u32			tx_dropped;
+	u32			rx_frame_errors;
+	u32			tx_fifo_errors;
+};
+
 struct tun_file {
 	struct sock sk;
 	struct socket socket;
@@ -119,6 +133,7 @@ struct tun_file {
 	struct tun_struct __rcu *tun;
 	struct net *net;
 	struct fasync_struct *fasync;
+	struct tun_queue_stats stats;
 	unsigned int flags;
 	u16 queue_index;
 };
@@ -134,6 +149,7 @@ struct tun_struct {
 
 	struct net_device	*dev;
 	netdev_features_t	set_features;
+	struct tun_queue_stats	stats;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
 			  NETIF_F_TSO6|NETIF_F_UFO)
 
@@ -463,7 +479,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 			/* We won't see all dropped packets individually, so overrun
 			 * error is more appropriate. */
-			dev->stats.tx_fifo_errors++;
+			tfile->stats.tx_fifo_errors++;
 		} else {
 			/* Single queue mode or multi queue mode.
 			 * Driver handles dropping of all packets itself. */
@@ -488,7 +504,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 drop:
 	rcu_read_unlock();
-	dev->stats.tx_dropped++;
+	if (tfile)
+		tfile->stats.tx_dropped++;
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
@@ -538,6 +555,56 @@ static void tun_poll_controller(struct net_device *dev)
 	return;
 }
 #endif
+
+static struct rtnl_link_stats64 *tun_net_stats(struct net_device *dev,
+					       struct rtnl_link_stats64 *stats)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile;
+	struct tun_queue_stats *qstats;
+	u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+	u32 rx_dropped = 0, tx_dropped = 0,
+	    rx_frame_errors = 0, tx_fifo_errors = 0;
+	unsigned int start;
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < tun->numqueues; i++) {
+		tfile = rcu_dereference(tun->tfiles[i]);
+		qstats = &tfile->stats;
+
+		do {
+			start = u64_stats_fetch_begin_bh(&qstats->rx_syncp);
+			rx_packets = qstats->rx_packets;
+			rx_bytes = qstats->rx_bytes;
+		} while (u64_stats_fetch_retry_bh(&qstats->rx_syncp, start));
+
+		do {
+			start = u64_stats_fetch_begin_bh(&qstats->tx_syncp);
+			tx_packets = qstats->tx_packets;
+			tx_bytes = qstats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&qstats->tx_syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes	+= rx_bytes;
+		stats->tx_packets += tx_packets;
+		stats->tx_bytes	+= tx_bytes;
+		/* following fileds are u32, no need syncp */
+		rx_dropped += qstats->rx_dropped;
+		tx_dropped += qstats->tx_dropped;
+		rx_frame_errors += qstats->rx_frame_errors;
+		tx_fifo_errors += qstats->tx_fifo_errors;
+	}
+	rcu_read_unlock();
+
+	stats->rx_dropped = rx_dropped;
+	stats->tx_dropped = tx_dropped;
+	stats->rx_frame_errors = rx_frame_errors;
+	stats->tx_fifo_errors = tx_fifo_errors;
+
+	return stats;
+}
+
 static const struct net_device_ops tun_netdev_ops = {
 	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
@@ -545,6 +612,7 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
 	.ndo_fix_features	= tun_net_fix_features,
+	.ndo_get_stats64	= tun_net_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tun_poll_controller,
 #endif
@@ -560,6 +628,7 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_set_rx_mode	= tun_net_mclist,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_get_stats64	= tun_net_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tun_poll_controller,
 #endif
@@ -808,30 +877,25 @@ static ssize_t tun_get_user(struct tun_file *tfile,
 		skb->protocol = eth_type_trans(skb, tun->dev);
 		break;
 	}
-	tun->dev->stats.rx_packets++;
-	tun->dev->stats.rx_bytes += len;
 	rcu_read_unlock();
 
 	netif_rx_ni(skb);
 
+	u64_stats_update_begin(&tfile->stats.rx_syncp);
+	tfile->stats.rx_packets++;
+	tfile->stats.rx_bytes += len;
+	u64_stats_update_end(&tfile->stats.rx_syncp);
+
 	return count;
 
 err_free:
 	count = -EINVAL;
 	kfree_skb(skb);
 err:
-	rcu_read_lock();
-	tun = rcu_dereference(tfile->tun);
-	if (!tun) {
-		rcu_read_unlock();
-		return -EBADFD;
-	}
-
 	if (drop)
-		tun->dev->stats.rx_dropped++;
+		tfile->stats.rx_dropped++;
 	if (error)
-		tun->dev->stats.rx_frame_errors++;
-	rcu_read_unlock();
+		tfile->stats.rx_frame_errors++;
 	return count;
 }
 
@@ -853,7 +917,6 @@ static ssize_t tun_put_user(struct tun_file *tfile,
 			    struct sk_buff *skb,
 			    const struct iovec *iv, int len)
 {
-	struct tun_struct *tun = NULL;
 	struct tun_pi pi = { 0, skb->protocol };
 	ssize_t total = 0;
 
@@ -925,13 +988,10 @@ static ssize_t tun_put_user(struct tun_file *tfile,
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
 	total += skb->len;
 
-	rcu_read_lock();
-	tun = rcu_dereference(tfile->tun);
-	if (tun) {
-		tun->dev->stats.tx_packets++;
-		tun->dev->stats.tx_bytes += len;
-	}
-	rcu_read_unlock();
+	u64_stats_update_begin(&tfile->stats.tx_syncp);
+	tfile->stats.tx_packets++;
+	tfile->stats.tx_bytes += total;
+	u64_stats_update_end(&tfile->stats.tx_syncp);
 
 	return total;
 }
@@ -1650,6 +1710,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	tfile->socket.file = file;
 	tfile->socket.ops = &tun_socket_ops;
 	sock_init_data(&tfile->socket, &tfile->sk);
+	memset(&tfile->stats, 0, sizeof(struct tun_queue_stats));
 
 	tfile->sk.sk_write_space = tun_sock_write_space;
 	tfile->sk.sk_destruct = tun_sock_destruct;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 6/6] tuntap: add ioctls to attach or detach a file form tuntap device
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

This patch introduces two new ioctls which is used to attach and detach a socket
from tuntap devices:

1) TUNATTACHQUEUE which is used to attach a socket to tuntap device.
2) TUNDETACHQUEUE which is used to detach a socket from tuntap device. It allows
                  a socket to be detached from the device temporarily and could
                  be re-attached again by TUNATTACHQUEUE.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c      |   25 ++++++++++++++++++++++---
 include/linux/if_tun.h |    3 +++
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 37e62d3..25d5e1f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1411,11 +1411,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 {
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun;
+	struct net_device *dev = NULL;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
 	int ret;
 
-	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+	if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89)
 		if (copy_from_user(&ifr, argp, ifreq_len))
 			return -EFAULT;
 
@@ -1424,7 +1425,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		 * This is needed because we never checked for invalid flags on
 		 * TUNSETIFF. */
 		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-				IFF_VNET_HDR,
+				IFF_VNET_HDR | IFF_MULTI_QUEUE,
 				(unsigned int __user*)argp);
 	}
 
@@ -1440,6 +1441,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 		return ret;
 	}
+	if (cmd == TUNDETACHQUEUE)
+		return tun_detach(tfile, false);
 
 	rtnl_lock();
 
@@ -1447,7 +1450,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 	ret = -EBADFD;
 	tun = rcu_dereference(tfile->tun);
-	if (!tun)
+	if (!tun && cmd != TUNATTACHQUEUE)
 		goto unlock;
 	else
 		ret = 0;
@@ -1463,6 +1466,22 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			ret = -EFAULT;
 		goto out;
 
+	case TUNATTACHQUEUE:
+		dev = __dev_get_by_name(tfile->net, ifr.ifr_name);
+		if (!dev || (dev->netdev_ops != &tap_netdev_ops &&
+				dev->netdev_ops != &tun_netdev_ops))
+			ret = -EINVAL;
+		else if (ifr.ifr_flags &
+			~(IFF_TAP | IFF_TUN | IFF_NO_PI | IFF_VNET_HDR)) {
+			/* ignore illegal flag */
+			ret = -EINVAL;
+		} else {
+			tun = netdev_priv(dev);
+			tfile->flags = tun->flags;
+			ret = tun_attach(tun, file);
+		}
+		break;
+
 	case TUNSETNOCSUM:
 		/* Disable/Enable checksum */
 
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index c92a291..d3f24d8 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -54,6 +54,9 @@
 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNATTACHQUEUE  _IOW('T', 217, int)
+#define TUNDETACHQUEUE  _IOW('T', 218, int)
+
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
1.7.1

^ permalink raw reply related

* RE: [PATCH] net-next: ipv6: ndisc: allocate a ndisc socket per inet6_dev
From: Eric Dumazet @ 2012-06-25 12:02 UTC (permalink / raw)
  To: Menny_Hamburger; +Cc: netdev
In-Reply-To: <D8C50530D6022F40A817A35C40CC06A70B34E34DA7@DUBX7MCDUB01.EMEA.DELL.COM>

On Mon, 2012-06-25 at 12:08 +0100, Menny_Hamburger@Dell.com wrote:
> I'm sorry for not responding on your post.

...

> I really want to understand how this fixes our problem.
> This fix will make the skb allocations succeed, but what mechanism releases the stuck socket associated with the bad device?

There is no stuck socket, I don't know why you believe this.

And I don't want to spend time writing a changelog if the patch doesn't
solve the problem, especially knowing you didn't even test it.

net/ipv4/arp.c arp_create() doesn't use a 'global socket' to attach skbs
on a socket.

This kind of protection makes sense for UDP / ICMP sockets, not for ARP
or ND resolution.

If you don't understand my patch, don't post your own, thats really lost
time for everyone.

We don't want adding yet another per device stuff for ipv6.
We already have too big memory footprint.

Once I am sure patch fixes the problem, I'll make an official submission
with all credits and changelog.

^ permalink raw reply

* [PATCH 4/6] tuntap: multiqueue support
From: Jason Wang @ 2012-06-25 11:59 UTC (permalink / raw)
  To: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2
  Cc: shemminger, edumazet, Jason Wang
In-Reply-To: <20120625060830.6765.27584.stgit@amd-6168-8-1.englab.nay.redhat.com>

This patch adds multiqueue support for tap device. This is done by abstracting
each queue as a file/socket and allowing multiple sockets to be attached to the
tuntap device (an array of tun_file were stored in the tun_struct). Userspace
could write and read from those files to do the parallel packet
sending/receiving.

Unlike the previous single queue implementation, the socket and device were
loosely coupled, each of them were allowed to go away first. In order to let the
tx path lockless, netif_tx_loch_bh() is replaced by RCU/NETIF_F_LLTX to
synchronize between data path and system call.

The tx queue selecting is first based on the recorded rxq index of an skb, it
there's no such one, then choosing based on rx hashing (skb_get_rxhash()).

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |  371 +++++++++++++++++++++++++++++++++--------------------
 1 files changed, 232 insertions(+), 139 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8233b0a..5c26757 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -107,6 +107,8 @@ struct tap_filter {
 	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
+#define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
+
 struct tun_file {
 	struct sock sk;
 	struct socket socket;
@@ -114,16 +116,18 @@ struct tun_file {
 	int vnet_hdr_sz;
 	struct tap_filter txflt;
 	atomic_t count;
-	struct tun_struct *tun;
+	struct tun_struct __rcu *tun;
 	struct net *net;
 	struct fasync_struct *fasync;
 	unsigned int flags;
+	u16 queue_index;
 };
 
 struct tun_sock;
 
 struct tun_struct {
-	struct tun_file		*tfile;
+	struct tun_file		*tfiles[MAX_TAP_QUEUES];
+	unsigned int            numqueues;
 	unsigned int 		flags;
 	uid_t			owner;
 	gid_t			group;
@@ -138,80 +142,159 @@ struct tun_struct {
 #endif
 };
 
-static int tun_attach(struct tun_struct *tun, struct file *file)
+static DEFINE_SPINLOCK(tun_lock);
+
+/*
+ * tun_get_queue(): calculate the queue index
+ *     - if skbs comes from mq nics, we can just borrow
+ *     - if not, calculate from the hash
+ */
+static struct tun_file *tun_get_queue(struct net_device *dev,
+				      struct sk_buff *skb)
 {
-	struct tun_file *tfile = file->private_data;
-	int err;
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile = NULL;
+	int numqueues = tun->numqueues;
+	__u32 rxq;
 
-	ASSERT_RTNL();
+	BUG_ON(!rcu_read_lock_held());
 
-	netif_tx_lock_bh(tun->dev);
+	if (!numqueues)
+		goto out;
 
-	err = -EINVAL;
-	if (tfile->tun)
+	if (numqueues == 1) {
+		tfile = rcu_dereference(tun->tfiles[0]);
 		goto out;
+	}
 
-	err = -EBUSY;
-	if (tun->tfile)
+	if (likely(skb_rx_queue_recorded(skb))) {
+		rxq = skb_get_rx_queue(skb);
+
+		while (unlikely(rxq >= numqueues))
+			rxq -= numqueues;
+
+		tfile = rcu_dereference(tun->tfiles[rxq]);
 		goto out;
+	}
 
-	err = 0;
-	tfile->tun = tun;
-	tun->tfile = tfile;
-	netif_carrier_on(tun->dev);
-	dev_hold(tun->dev);
-	sock_hold(&tfile->sk);
-	atomic_inc(&tfile->count);
+	/* Check if we can use flow to select a queue */
+	rxq = skb_get_rxhash(skb);
+	if (rxq) {
+		u32 idx = ((u64)rxq * numqueues) >> 32;
+		tfile = rcu_dereference(tun->tfiles[idx]);
+		goto out;
+	}
 
+	tfile = rcu_dereference(tun->tfiles[0]);
 out:
-	netif_tx_unlock_bh(tun->dev);
-	return err;
+	return tfile;
 }
 
-static void __tun_detach(struct tun_struct *tun)
+static int tun_detach(struct tun_file *tfile, bool clean)
 {
-	struct tun_file *tfile = tun->tfile;
-	/* Detach from net device */
-	netif_tx_lock_bh(tun->dev);
-	netif_carrier_off(tun->dev);
-	tun->tfile = NULL;
-	netif_tx_unlock_bh(tun->dev);
-
-	/* Drop read queue */
-	skb_queue_purge(&tfile->socket.sk->sk_receive_queue);
-
-	/* Drop the extra count on the net device */
-	dev_put(tun->dev);
-}
+	struct tun_struct *tun;
+	struct net_device *dev = NULL;
+	bool destroy = false;
 
-static void tun_detach(struct tun_struct *tun)
-{
-	rtnl_lock();
-	__tun_detach(tun);
-	rtnl_unlock();
-}
+	spin_lock(&tun_lock);
 
-static struct tun_struct *__tun_get(struct tun_file *tfile)
-{
-	struct tun_struct *tun = NULL;
+	tun = rcu_dereference_protected(tfile->tun,
+					lockdep_is_held(&tun_lock));
+	if (tun) {
+		u16 index = tfile->queue_index;
+		BUG_ON(index >= tun->numqueues);
+		dev = tun->dev;
+
+		rcu_assign_pointer(tun->tfiles[index],
+				   tun->tfiles[tun->numqueues - 1]);
+		tun->tfiles[index]->queue_index = index;
+		rcu_assign_pointer(tfile->tun, NULL);
+		--tun->numqueues;
+		sock_put(&tfile->sk);
 
-	if (atomic_inc_not_zero(&tfile->count))
-		tun = tfile->tun;
+		if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST))
+			destroy = true;
+	}
 
-	return tun;
+	spin_unlock(&tun_lock);
+
+	synchronize_rcu();
+	if (clean)
+		sock_put(&tfile->sk);
+
+	if (destroy) {
+		rtnl_lock();
+		if (dev->reg_state == NETREG_REGISTERED)
+			unregister_netdevice(dev);
+		rtnl_unlock();
+	}
+
+	return 0;
 }
 
-static struct tun_struct *tun_get(struct file *file)
+static void tun_detach_all(struct net_device *dev)
 {
-	return __tun_get(file->private_data);
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile, *tfile_list[MAX_TAP_QUEUES];
+	int i, j = 0;
+
+	spin_lock(&tun_lock);
+
+	for (i = 0; i < MAX_TAP_QUEUES && tun->numqueues; i++) {
+		tfile = rcu_dereference_protected(tun->tfiles[i],
+						lockdep_is_held(&tun_lock));
+		BUG_ON(!tfile);
+		wake_up_all(&tfile->wq.wait);
+		tfile_list[j++] = tfile;
+		rcu_assign_pointer(tfile->tun, NULL);
+		--tun->numqueues;
+	}
+	BUG_ON(tun->numqueues != 0);
+	/* guarantee that any future tun_attach will fail */
+	tun->numqueues = MAX_TAP_QUEUES;
+	spin_unlock(&tun_lock);
+
+	synchronize_rcu();
+	for (--j; j >= 0; j--)
+		sock_put(&tfile_list[j]->sk);
 }
 
-static void tun_put(struct tun_struct *tun)
+static int tun_attach(struct tun_struct *tun, struct file *file)
 {
-	struct tun_file *tfile = tun->tfile;
+	struct tun_file *tfile = file->private_data;
+	int err;
+
+	ASSERT_RTNL();
+
+	spin_lock(&tun_lock);
 
-	if (atomic_dec_and_test(&tfile->count))
-		tun_detach(tfile->tun);
+	err = -EINVAL;
+	if (rcu_dereference_protected(tfile->tun, lockdep_is_held(&tun_lock)))
+		goto out;
+
+	err = -EBUSY;
+	if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
+		goto out;
+
+	if (tun->numqueues == MAX_TAP_QUEUES)
+		goto out;
+
+	err = 0;
+	tfile->queue_index = tun->numqueues;
+	rcu_assign_pointer(tfile->tun, tun);
+	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
+	sock_hold(&tfile->sk);
+	tun->numqueues++;
+
+	if (tun->numqueues == 1)
+		netif_carrier_on(tun->dev);
+
+	/* device is allowed to go away first, so no need to hold extra
+	 * refcnt. */
+
+out:
+	spin_unlock(&tun_lock);
+	return err;
 }
 
 /* TAP filtering */
@@ -331,16 +414,7 @@ static const struct ethtool_ops tun_ethtool_ops;
 /* Net device detach from fd. */
 static void tun_net_uninit(struct net_device *dev)
 {
-	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile = tun->tfile;
-
-	/* Inform the methods they need to stop using the dev.
-	 */
-	if (tfile) {
-		wake_up_all(&tfile->wq.wait);
-		if (atomic_dec_and_test(&tfile->count))
-			__tun_detach(tun);
-	}
+	tun_detach_all(dev);
 }
 
 /* Net device open. */
@@ -360,10 +434,10 @@ static int tun_net_close(struct net_device *dev)
 /* Net device start xmit */
 static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct tun_struct *tun = netdev_priv(dev);
-	struct tun_file *tfile = tun->tfile;
+	struct tun_file *tfile = NULL;
 
-	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
+	rcu_read_lock();
+	tfile = tun_get_queue(dev, skb);
 
 	/* Drop packet if interface is not attached */
 	if (!tfile)
@@ -381,7 +455,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (skb_queue_len(&tfile->socket.sk->sk_receive_queue)
 	    >= dev->tx_queue_len) {
-		if (!(tun->flags & TUN_ONE_QUEUE)) {
+		if (!(tfile->flags & TUN_ONE_QUEUE) &&
+		    !(tfile->flags & TUN_TAP_MQ)) {
 			/* Normal queueing mode. */
 			/* Packet scheduler handles dropping of further packets. */
 			netif_stop_queue(dev);
@@ -390,7 +465,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 			 * error is more appropriate. */
 			dev->stats.tx_fifo_errors++;
 		} else {
-			/* Single queue mode.
+			/* Single queue mode or multi queue mode.
 			 * Driver handles dropping of all packets itself. */
 			goto drop;
 		}
@@ -408,9 +483,11 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
 	wake_up_interruptible_poll(&tfile->wq.wait, POLLIN |
 				   POLLRDNORM | POLLRDBAND);
+	rcu_read_unlock();
 	return NETDEV_TX_OK;
 
 drop:
+	rcu_read_unlock();
 	dev->stats.tx_dropped++;
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -527,16 +604,22 @@ static void tun_net_init(struct net_device *dev)
 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 {
 	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun = __tun_get(tfile);
+	struct tun_struct *tun = NULL;
 	struct sock *sk;
 	unsigned int mask = 0;
 
-	if (!tun)
+	if (!tfile)
 		return POLLERR;
 
-	sk = tfile->socket.sk;
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (!tun) {
+		rcu_read_unlock();
+		return POLLERR;
+	}
+	rcu_read_unlock();
 
-	tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
+	sk = &tfile->sk;
 
 	poll_wait(file, &tfile->wq.wait, wait);
 
@@ -548,10 +631,12 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	     sock_writeable(sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
-	if (tun->dev->reg_state != NETREG_REGISTERED)
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (!tun || tun->dev->reg_state != NETREG_REGISTERED)
 		mask = POLLERR;
+	rcu_read_unlock();
 
-	tun_put(tun);
 	return mask;
 }
 
@@ -708,9 +793,12 @@ static ssize_t tun_get_user(struct tun_file *tfile,
 		skb_shinfo(skb)->gso_segs = 0;
 	}
 
-	tun = __tun_get(tfile);
-	if (!tun)
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (!tun) {
+		rcu_read_unlock();
 		return -EBADFD;
+	}
 
 	switch (tfile->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
@@ -720,26 +808,30 @@ static ssize_t tun_get_user(struct tun_file *tfile,
 		skb->protocol = eth_type_trans(skb, tun->dev);
 		break;
 	}
-
-	netif_rx_ni(skb);
 	tun->dev->stats.rx_packets++;
 	tun->dev->stats.rx_bytes += len;
-	tun_put(tun);
+	rcu_read_unlock();
+
+	netif_rx_ni(skb);
+
 	return count;
 
 err_free:
 	count = -EINVAL;
 	kfree_skb(skb);
 err:
-	tun = __tun_get(tfile);
-	if (!tun)
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (!tun) {
+		rcu_read_unlock();
 		return -EBADFD;
+	}
 
 	if (drop)
 		tun->dev->stats.rx_dropped++;
 	if (error)
 		tun->dev->stats.rx_frame_errors++;
-	tun_put(tun);
+	rcu_read_unlock();
 	return count;
 }
 
@@ -833,12 +925,13 @@ static ssize_t tun_put_user(struct tun_file *tfile,
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
 	total += skb->len;
 
-	tun = __tun_get(tfile);
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
 	if (tun) {
 		tun->dev->stats.tx_packets++;
 		tun->dev->stats.tx_bytes += len;
-		tun_put(tun);
 	}
+	rcu_read_unlock();
 
 	return total;
 }
@@ -869,28 +962,31 @@ static ssize_t tun_do_read(struct tun_file *tfile,
 				break;
 			}
 
-			tun = __tun_get(tfile);
+			rcu_read_lock();
+			tun = rcu_dereference(tfile->tun);
 			if (!tun) {
-				ret = -EIO;
+				ret = -EBADFD;
+				rcu_read_unlock();
 				break;
 			}
 			if (tun->dev->reg_state != NETREG_REGISTERED) {
 				ret = -EIO;
-				tun_put(tun);
+				rcu_read_unlock();
 				break;
 			}
-			tun_put(tun);
+			rcu_read_unlock();
 
 			/* Nothing to read, let's sleep */
 			schedule();
 			continue;
 		}
 
-		tun = __tun_get(tfile);
+		rcu_read_lock();
+		tun = rcu_dereference(tfile->tun);
 		if (tun) {
 			netif_wake_queue(tun->dev);
-			tun_put(tun);
 		}
+		rcu_read_unlock();
 
 		ret = tun_put_user(tfile, skb, iv, len);
 		kfree_skb(skb);
@@ -1038,6 +1134,9 @@ static int tun_flags(struct tun_struct *tun)
 	if (tun->flags & TUN_VNET_HDR)
 		flags |= IFF_VNET_HDR;
 
+	if (tun->flags & TUN_TAP_MQ)
+		flags |= IFF_MULTI_QUEUE;
+
 	return flags;
 }
 
@@ -1097,8 +1196,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		err = tun_attach(tun, file);
 		if (err < 0)
 			return err;
-	}
-	else {
+	} else {
 		char *name;
 		unsigned long flags = 0;
 
@@ -1142,6 +1240,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			TUN_USER_FEATURES;
 		dev->features = dev->hw_features;
+		if (ifr->ifr_flags & IFF_MULTI_QUEUE)
+			dev->features |= NETIF_F_LLTX;
 
 		err = register_netdevice(tun->dev);
 		if (err < 0)
@@ -1154,7 +1254,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		err = tun_attach(tun, file);
 		if (err < 0)
-			goto failed;
+			goto err_free_dev;
 	}
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
@@ -1174,6 +1274,11 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	else
 		tun->flags &= ~TUN_VNET_HDR;
 
+	if (ifr->ifr_flags & IFF_MULTI_QUEUE)
+		tun->flags |= TUN_TAP_MQ;
+	else
+		tun->flags &= ~TUN_TAP_MQ;
+
 	/* Cache flags from tun device */
 	tfile->flags = tun->flags;
 	/* Make sure persistent devices do not get stuck in
@@ -1187,7 +1292,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 err_free_dev:
 	free_netdev(dev);
-failed:
 	return err;
 }
 
@@ -1264,38 +1368,40 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 				(unsigned int __user*)argp);
 	}
 
-	rtnl_lock();
-
-	tun = __tun_get(tfile);
-	if (cmd == TUNSETIFF && !tun) {
+	ret = 0;
+	if (cmd == TUNSETIFF) {
+		rtnl_lock();
 		ifr.ifr_name[IFNAMSIZ-1] = '\0';
-
 		ret = tun_set_iff(tfile->net, file, &ifr);
-
+		rtnl_unlock();
 		if (ret)
-			goto unlock;
-
+			return ret;
 		if (copy_to_user(argp, &ifr, ifreq_len))
-			ret = -EFAULT;
-		goto unlock;
+			return -EFAULT;
+		return ret;
 	}
 
+	rtnl_lock();
+
+	rcu_read_lock();
+
 	ret = -EBADFD;
+	tun = rcu_dereference(tfile->tun);
 	if (!tun)
 		goto unlock;
+	else
+		ret = 0;
 
-	tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd);
-
-	ret = 0;
 	switch (cmd) {
 	case TUNGETIFF:
 		ret = tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
+		rcu_read_unlock();
 		if (ret)
-			break;
+			goto out;
 
 		if (copy_to_user(argp, &ifr, ifreq_len))
 			ret = -EFAULT;
-		break;
+		goto out;
 
 	case TUNSETNOCSUM:
 		/* Disable/Enable checksum */
@@ -1357,9 +1463,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		/* Get hw address */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
 		ifr.ifr_hwaddr.sa_family = tun->dev->type;
+		rcu_read_unlock();
 		if (copy_to_user(argp, &ifr, ifreq_len))
 			ret = -EFAULT;
-		break;
+		goto out;
 
 	case SIOCSIFHWADDR:
 		/* Set hw address */
@@ -1375,9 +1482,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	}
 
 unlock:
+	rcu_read_unlock();
+out:
 	rtnl_unlock();
-	if (tun)
-		tun_put(tun);
 	return ret;
 }
 
@@ -1517,6 +1624,11 @@ out:
 	return ret;
 }
 
+static void tun_sock_destruct(struct sock *sk)
+{
+	skb_queue_purge(&sk->sk_receive_queue);
+}
+
 static int tun_chr_open(struct inode *inode, struct file * file)
 {
 	struct net *net = current->nsproxy->net_ns;
@@ -1540,6 +1652,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	sock_init_data(&tfile->socket, &tfile->sk);
 
 	tfile->sk.sk_write_space = tun_sock_write_space;
+	tfile->sk.sk_destruct = tun_sock_destruct;
 	tfile->sk.sk_sndbuf = INT_MAX;
 	file->private_data = tfile;
 
@@ -1549,31 +1662,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 static int tun_chr_close(struct inode *inode, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun;
-
-	tun = __tun_get(tfile);
-	if (tun) {
-		struct net_device *dev = tun->dev;
-
-		tun_debug(KERN_INFO, tun, "tun_chr_close\n");
-
-		__tun_detach(tun);
-
-		/* If desirable, unregister the netdevice. */
-		if (!(tun->flags & TUN_PERSIST)) {
-			rtnl_lock();
-			if (dev->reg_state == NETREG_REGISTERED)
-				unregister_netdevice(dev);
-			rtnl_unlock();
-		}
 
-		/* drop the reference that netdevice holds */
-		sock_put(&tfile->sk);
-
-	}
-
-	/* drop the reference that file holds */
-	sock_put(&tfile->sk);
+	tun_detach(tfile, true);
 
 	return 0;
 }
@@ -1700,14 +1790,17 @@ static void tun_cleanup(void)
  * holding a reference to the file for as long as the socket is in use. */
 struct socket *tun_get_socket(struct file *file)
 {
-	struct tun_struct *tun;
+	struct tun_struct *tun = NULL;
 	struct tun_file *tfile = file->private_data;
 	if (file->f_op != &tun_fops)
 		return ERR_PTR(-EINVAL);
-	tun = tun_get(file);
-	if (!tun)
+	rcu_read_lock();
+	tun = rcu_dereference(tfile->tun);
+	if (!tun) {
+		rcu_read_unlock();
 		return ERR_PTR(-EBADFD);
-	tun_put(tun);
+	}
+	rcu_read_unlock();
 	return &tfile->socket;
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH net] net: qmi_wwan: fix Oops while disconnecting
From: Oliver Neukum @ 2012-06-25 12:10 UTC (permalink / raw)
  To: Ming Lei
  Cc: Bjørn Mork, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, Marius Bjørnstad Kotsbak
In-Reply-To: <CACVXFVNUv6w5OjSZdQDbcLEPpDU5vO_Nmarm+fAfh0RkkL_hdg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Am Montag, 25. Juni 2012, 09:15:21 schrieb Ming Lei:
> On Mon, Jun 25, 2012 at 2:15 PM, Oliver Neukum <oliver-GvhC2dPhHPQdnm+yROfE0A@public.gmane.org> wrote:
> > Am Montag, 25. Juni 2012, 05:37:20 schrieb Ming Lei:
> 
> >> The current problem is caused by the set to NULL without any
> >> protection or sync mechanism on it, and it is really a bug.
> >
> > Minidrivers can test for NULL.
> > That may not be enough and locking may be needed.
> 
> Any locking isn't needed if the set to NULL is put after
> driver_info->unbind,  since ->unbind will call subdriver->disconnect,
> which will hold the open/disconnect lock of wdm_mutex.

True for cdc_wdm. But usbnet needs to work well for everything.

> > We can move to after unregister_netdev()
> > I am unhappy with it going after unbind.
> >
> 
> Could you let us know the reason? I think it may let the
> patch not necessary.

Very well. This is the code:

 void usbnet_disconnect (struct usb_interface *intf)
{
        struct usbnet           *dev;
        struct usb_device       *xdev;
        struct net_device       *net;

        dev = usb_get_intfdata(intf);
        usb_set_intfdata(intf, NULL);
        if (!dev)
                return;

This code needs to check for NULL (cdc_ether and similar drivers)
It is cleaner that if we need to check for NULL we also set to NULL.
But that is no good reason to keep it if there's real trouble 

        xdev = interface_to_usbdev (intf);

        netif_info(dev, probe, dev->net, "unregister '%s' usb-%s-%s, %s\n",
                   intf->dev.driver->name,
                   xdev->bus->bus_name, xdev->devpath,
                   dev->driver_info->description);

        net = dev->net;
        unregister_netdev (net);

Here intfdata is NULL.

        cancel_work_sync(&dev->kevent);

        if (dev->driver_info->unbind)
                dev->driver_info->unbind (dev, intf);

At this point a minidriver must not follow the intfdata pointer,
because the interface may again be probed. So if here a minidriver
still uses intfdata, locking will be needed. We want to catch those
casees.

        usb_kill_urb(dev->interrupt);
        usb_free_urb(dev->interrupt);

        free_netdev(net);
        usb_put_dev (xdev);
}

> > Sure, it is a debugging aid. It has the drawback that minidrivers have
> > to be able to deal with intfdata being NULL. That is not hard to do.
> 
> The check isn't needed if the set to NULL is put after  driver_info->unbind
> in usbnet_disconnect.

True, but we don't catch bugs. 

	Regards
		Oliver
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 5/6] tuntap: per queue 64 bit stats
From: Eric Dumazet @ 2012-06-25 12:52 UTC (permalink / raw)
  To: Jason Wang
  Cc: mst, akong, habanero, tahm, haixiao, jwhan, ernesto.martin,
	mashirle, davem, netdev, linux-kernel, krkumar2, shemminger,
	edumazet
In-Reply-To: <1340625563-9300-6-git-send-email-jasowang@redhat.com>

On Mon, 2012-06-25 at 19:59 +0800, Jason Wang wrote:
> As we've added multiqueue support for tun/tap, this patch convert the statistics
> to use per-queue 64 bit statistics.

LLTX means you can have several cpus calling TX path in parallel.

So tx stats are wrong (even before this patch), and racy after this
patch (if several cpu access same queue, it seems to be possible)

       u64_stats_update_begin(&tfile->stats.tx_syncp);
       tfile->stats.tx_packets++;
       tfile->stats.tx_bytes += total;
       u64_stats_update_end(&tfile->stats.tx_syncp);
 
This can break horribly if several cpus run this code using same 'tfile'
pointer.

I suggest this patch comes before 'tuntap: multiqueue support' in the
serie.

^ permalink raw reply

* RE: [PATCH 4/4] net/mlx4_en: Use atomic counter to decide when queue is full
From: Yevgeny Petrilin @ 2012-06-25 13:06 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org
In-Reply-To: <20120622.172353.1092394608024377078.davem@davemloft.net>

> > The Transmit and transmit completion flows execute from different
> > contexts, which are not synchronized. Hence naive reading the of
> > consumer index might give wrong value by the time it is being used, That could lead to a state of transmit timeout.
> > Fix that by using atomic variable to maintain that index.
> >
> > Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
> 
> I'm not convinced.  There is only one place that actually changes the counter.
> 
> So it seems more like you have a missing memory barrier somewhere.
> 
> Other drivers do not need to use something as expansive as an atomic
> variable for this and neither should you.
> 
> I'm not applying this patch series, you'll need to resubmit it in it's
> entirety once you fix this patch.

Thanks,
I'll resubmit the other 3 and continue to work on this one.

Thanks,
Yevgeny

^ permalink raw reply

* Re: [PATCH] netfilter: ipvs: fix dst leak in __ip_vs_addr_is_local_v6
From: Pablo Neira Ayuso @ 2012-06-25 13:11 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netfilter-devel, netdev, Wensong Zhang, Simon Horman,
	Julian Anastasov, David Miller
In-Reply-To: <1340611103.10893.14.camel@edumazet-glaptop>

On Mon, Jun 25, 2012 at 09:58:23AM +0200, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> After call to ip6_route_output() we must release dst or we leak it.
> 
> Also should test dst->error, as ip6_route_output() never returns NULL.
> 
> Use boolean while we are at it.

Applied, thanks Eric.

^ permalink raw reply

* [PATCH V1 1/3] net/mlx4_en: Set correct port parameters during device initialization
From: Yevgeny Petrilin @ 2012-06-25 10:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340619853-29804-1-git-send-email-yevgenyp@mellanox.co.il>

Set valid port parameters: MTU and flow control configuration when
configuring the port during HW device initialization,
prior to the net device open() being called.
Using  invalid parameters (such as all zeros)
could lead to bad firmware behavior.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 926d8aa..a80280e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1204,9 +1204,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
 	/* Configure port */
+	mlx4_en_calc_rx_buf(dev);
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
-				    MLX4_EN_MIN_MTU,
-				    0, 0, 0, 0);
+				    priv->rx_skb_size + ETH_FCS_LEN,
+				    prof->tx_pause, prof->tx_ppp,
+				    prof->rx_pause, prof->rx_ppp);
 	if (err) {
 		en_err(priv, "Failed setting port general configurations "
 		       "for port %d, with error %d\n", priv->port, err);
-- 
1.7.1

^ permalink raw reply related

* [PATCH V1 2/3] net/mlx4: Use single completion vector after NOP failure
From: Yevgeny Petrilin @ 2012-06-25 10:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340619853-29804-1-git-send-email-yevgenyp@mellanox.co.il>

Fix a crash at the error flow of NOP command which caused the driver to try and use
a completion vector which wasn't allocated.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/main.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 18c8deb..c91a2b8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1978,6 +1978,8 @@ slave_start:
 	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
 	    !mlx4_is_mfunc(dev)) {
 		dev->flags &= ~MLX4_FLAG_MSI_X;
+		dev->caps.num_comp_vectors = 1;
+		dev->caps.comp_pool	   = 0;
 		pci_disable_msix(pdev);
 		err = mlx4_setup_hca(dev);
 	}
-- 
1.7.1

^ permalink raw reply related

* [PATCH V1 3/3] net/mlx4_en: Release QP range in free_resources
From: Yevgeny Petrilin @ 2012-06-25 10:24 UTC (permalink / raw)
  To: davem; +Cc: netdev, Yevgeny Petrilin
In-Reply-To: <1340619853-29804-1-git-send-email-yevgenyp@mellanox.co.il>

Add a missing resource release in ring cleanup.
Not doing this leaves a range of QPs that are being reserved,
and no one can use them.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   12 ++++++++----
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a80280e..073b85b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -929,15 +929,20 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 		if (priv->rx_cq[i].buf)
 			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
 	}
+
+	if (priv->base_tx_qpn) {
+		mlx4_qp_release_range(priv->mdev->dev, priv->base_tx_qpn, priv->tx_ring_num);
+		priv->base_tx_qpn = 0;
+	}
 }
 
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
-	int base_tx_qpn, err;
+	int err;
 
-	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &base_tx_qpn);
+	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
 	if (err) {
 		en_err(priv, "failed reserving range for TX rings\n");
 		return err;
@@ -949,7 +954,7 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 				      prof->tx_ring_size, i, TX))
 			goto err;
 
-		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], base_tx_qpn + i,
+		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
 					   prof->tx_ring_size, TXBB_SIZE))
 			goto err;
 	}
@@ -969,7 +974,6 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 
 err:
 	en_err(priv, "Failed to allocate NIC resources\n");
-	mlx4_qp_release_range(priv->mdev->dev, base_tx_qpn, priv->tx_ring_num);
 	return -ENOMEM;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 6ae3509..225c20d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -495,6 +495,7 @@ struct mlx4_en_priv {
 	int vids[128];
 	bool wol;
 	struct device *ddev;
+	int base_tx_qpn;
 
 #ifdef CONFIG_MLX4_EN_DCB
 	struct ieee_ets ets;
-- 
1.7.1

^ permalink raw reply related

* [PATCH net V1 0/3] net/mlx4: Bug fixes for the mlx4_en driver
From: Yevgeny Petrilin @ 2012-06-25 10:24 UTC (permalink / raw)
  To: davem; +Cc: netdev

This is a set of 3 bug fixes generated agains net tree:

---
Diff from V1:
Removed atomic counter patch for rework.

Yevgeny Petrilin (3):
	net/mlx4_en: Set correct port parameters during device initialization
	net/mlx4: Use single completion vector after NOP failure
	net/mlx4_en: Release QP range in free_resources

 en_netdev.c |   18 ++++++++++++------
 main.c      |    2 ++
 mlx4_en.h   |    1 +
 3 files changed, 15 insertions(+), 6 deletions(-)

Thanks,
Yevgeny

^ permalink raw reply

* pneigh/NLF_PROXY: should NLM_F_EXCL be honored?
From: Christian Franke @ 2012-06-25 13:23 UTC (permalink / raw)
  To: netdev

Hi,

by sending RTM_NEWNEIGH with NLF_PROXY in the ndmsg, a new proxy entry
can be added.

The code path taken ignores NLM_F_EXCL, returning success, even if the
proxy entry does already exist. Is this intended behaviour?

Best Regards,
Christian Franke

^ permalink raw reply

* [PATCH fix] mac802154: add missed braces
From: Alexander Smirnov @ 2012-06-25 13:30 UTC (permalink / raw)
  To: davem; +Cc: netdev, Alexander Smirnov

Add missed braces after 'if' operator.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/tx.c |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 8781d8f9..434b687 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -83,9 +83,10 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 {
 	struct xmit_work *work;
 
-	if (!(priv->phy->channels_supported[page] & (1 << chan)))
+	if (!(priv->phy->channels_supported[page] & (1 << chan))) {
 		WARN_ON(1);
 		return NETDEV_TX_OK;
+	}
 
 	if (!(priv->hw.flags & IEEE802154_HW_OMIT_CKSUM)) {
 		u16 crc = crc_ccitt(0, skb->data, skb->len);
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 0/7] mac802154: basic wpan class-device support
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov

Hello David,

this patch-set binds the IEEE 802.15.4 Linux stack to the radio transceivers.
A new device-class called "wpan" is added. It represents an interface for the
radio device drivers to communicate the information with Linux networking
stack.

This is the basic support, only data-packets transmission is supported
(no MAC-layer specific commands support included here). But this set represents
a necessary minimum to implement IEEE 802.15.4 standard features and test them
on real hardware.

To test/try the wpan device-class the Atmel RF230 transceiver driver included
in this patch set. The how-to instructions are available in wiki at the
linux-wsn project site: http://code.google.com/p/linux-wsn/

Thank you for your time!

With best regards,
Alex

8<--

Alexander Smirnov (7):
  mac802154: add wpan device-class support
  mac802154: set and get PAN id
  mac802154: short address setter
  mac802154: page and channel setter
  mac802154: mlme start request
  drivers/ieee802154: add support for the at86rf230/231 transceivers
  mac802154: add monitor listener to TX datapath

 drivers/ieee802154/Kconfig     |    6 +
 drivers/ieee802154/Makefile    |    1 +
 drivers/ieee802154/at86rf230.c |  966 ++++++++++++++++++++++++++++++++++++++++
 include/linux/nl802154.h       |   14 +-
 include/linux/spi/at86rf230.h  |   32 ++
 include/net/mac802154.h        |    8 +
 net/mac802154/Makefile         |    2 +-
 net/mac802154/ieee802154_dev.c |    4 +
 net/mac802154/mac802154.h      |    8 +
 net/mac802154/mac_cmd.c        |   29 ++
 net/mac802154/mib.c            |   93 ++++
 net/mac802154/rx.c             |    1 +
 net/mac802154/tx.c             |    2 +
 net/mac802154/wpan.c           |  560 +++++++++++++++++++++++
 14 files changed, 1713 insertions(+), 13 deletions(-)
 create mode 100644 drivers/ieee802154/at86rf230.c
 create mode 100644 include/linux/spi/at86rf230.h
 create mode 100644 net/mac802154/wpan.c

--
1.7.2.3

^ permalink raw reply

* [PATCH 1/7] mac802154: add wpan device-class support
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

Every real 802.15.4 transceiver, which works with software MAC layer,
can be classified as a wpan device in this stack. So the wpan device
implementation provides missing link in datapath between the device
drivers and the Linux network queue.

According to the IEEE 802.15.4 standard each packet can be one of the
following types:
 - beacon
 - MAC layer command
 - ACK
 - data

This patch adds support for the data packet-type only, but this is
enough to perform data transmission and receiving over radio.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 include/linux/nl802154.h       |   14 +-
 include/net/mac802154.h        |    8 +
 net/mac802154/Makefile         |    2 +-
 net/mac802154/ieee802154_dev.c |    4 +
 net/mac802154/mac802154.h      |    4 +
 net/mac802154/mac_cmd.c        |    4 +
 net/mac802154/rx.c             |    1 +
 net/mac802154/wpan.c           |  560 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 584 insertions(+), 13 deletions(-)
 create mode 100644 net/mac802154/wpan.c

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 5a3db3a..fd4f2d1 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -130,18 +130,8 @@ enum {
 enum {
 	__IEEE802154_DEV_INVALID = -1,
 
-	 /* TODO:
-	 * Nowadays three device types supported by this stack at linux-zigbee
-	 * project: WPAN = 0, MONITOR = 1 and SMAC = 2.
-	 *
-	 * Since this stack implementation exists many years, it's definitely
-	 * bad idea to change the assigned values due to they are already used
-	 * by third-party userspace software like: iz-tools, wireshark...
-	 *
-	 * Currently only monitor device is added and initialized by '1' for
-	 * compatibility.
-	 */
-	IEEE802154_DEV_MONITOR = 1,
+	IEEE802154_DEV_WPAN,
+	IEEE802154_DEV_MONITOR,
 
 	__IEEE802154_DEV_MAX,
 };
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index c9f8ab5..d0d11df 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -21,6 +21,14 @@
 
 #include <net/af_ieee802154.h>
 
+/* General MAC frame format:
+ *  2 bytes: Frame Control
+ *  1 byte:  Sequence Number
+ * 20 bytes: Addressing fields
+ * 14 bytes: Auxiliary Security Header
+ */
+#define MAC802154_FRAME_HARD_HEADER_LEN		(2 + 1 + 20 + 14)
+
 /* The following flags are used to indicate changed address settings from
  * the stack to the hardware.
  */
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index ec1bd3f..57cf5d1 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_MAC802154)	+= mac802154.o
-mac802154-objs		:= ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o
+mac802154-objs		:= ieee802154_dev.o rx.o tx.o mac_cmd.o mib.o monitor.o wpan.o
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index e3edfb0..e748aed 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -140,6 +140,10 @@ mac802154_add_iface(struct wpan_phy *phy, const char *name, int type)
 		dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
 				   name, mac802154_monitor_setup);
 		break;
+	case IEEE802154_DEV_WPAN:
+		dev = alloc_netdev(sizeof(struct mac802154_sub_if_data),
+				   name, mac802154_wpan_setup);
+		break;
 	default:
 		dev = NULL;
 		err = -EINVAL;
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 789d9c9..c0efcf1 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -93,6 +93,7 @@ struct mac802154_sub_if_data {
 #define MAC802154_CHAN_NONE		(~(u8)0) /* No channel is assigned */
 
 extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
+extern struct ieee802154_mlme_ops mac802154_mlme_wpan;
 
 int mac802154_slave_open(struct net_device *dev);
 int mac802154_slave_close(struct net_device *dev);
@@ -100,6 +101,9 @@ int mac802154_slave_close(struct net_device *dev);
 void mac802154_monitors_rx(struct mac802154_priv *priv, struct sk_buff *skb);
 void mac802154_monitor_setup(struct net_device *dev);
 
+void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb);
+void mac802154_wpan_setup(struct net_device *dev);
+
 netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 			 u8 page, u8 chan);
 
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index 7a5d0e0..db83419 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -43,3 +43,7 @@ struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
 struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
 	.get_phy = mac802154_get_phy,
 };
+
+struct ieee802154_mlme_ops mac802154_mlme_wpan = {
+	.get_phy = mac802154_get_phy,
+};
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 4a7d76d..38548ec 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -77,6 +77,7 @@ mac802154_subif_rx(struct ieee802154_dev *hw, struct sk_buff *skb, u8 lqi)
 	}
 
 	mac802154_monitors_rx(priv, skb);
+	mac802154_wpans_rx(priv, skb);
 out:
 	dev_kfree_skb(skb);
 	return;
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
new file mode 100644
index 0000000..caab599
--- /dev/null
+++ b/net/mac802154/wpan.c
@@ -0,0 +1,560 @@
+/*
+ * Copyright 2007-2012 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Sergey Lapin <slapin@ossfans.org>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+
+#include <net/rtnetlink.h>
+#include <linux/nl802154.h>
+#include <net/af_ieee802154.h>
+#include <net/mac802154.h>
+#include <net/ieee802154_netdev.h>
+#include <net/ieee802154.h>
+#include <net/wpan-phy.h>
+
+#include "mac802154.h"
+
+static inline int mac802154_fetch_skb_u8(struct sk_buff *skb, u8 *val)
+{
+	if (unlikely(!pskb_may_pull(skb, 1)))
+		return -EINVAL;
+
+	*val = skb->data[0];
+	 skb_pull(skb, 1);
+
+	return 0;
+}
+
+static inline int mac802154_fetch_skb_u16(struct sk_buff *skb, u16 *val)
+{
+	if (unlikely(!pskb_may_pull(skb, 2)))
+		return -EINVAL;
+
+	*val = skb->data[0] | (skb->data[1] << 8);
+	skb_pull(skb, 2);
+
+	return 0;
+}
+
+static inline void mac802154_haddr_copy_swap(u8 *dest, const u8 *src)
+{
+	int i;
+	for (i = 0; i < IEEE802154_ADDR_LEN; i++)
+		dest[IEEE802154_ADDR_LEN - i - 1] = src[i];
+}
+
+static int
+mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct sockaddr_ieee802154 *sa =
+		(struct sockaddr_ieee802154 *)&ifr->ifr_addr;
+	int err = -ENOIOCTLCMD;
+
+	spin_lock_bh(&priv->mib_lock);
+
+	switch (cmd) {
+	case SIOCGIFADDR:
+		if (priv->pan_id == IEEE802154_PANID_BROADCAST ||
+		    priv->short_addr == IEEE802154_ADDR_BROADCAST) {
+			err = -EADDRNOTAVAIL;
+			break;
+		}
+
+		sa->family = AF_IEEE802154;
+		sa->addr.addr_type = IEEE802154_ADDR_SHORT;
+		sa->addr.pan_id = priv->pan_id;
+		sa->addr.short_addr = priv->short_addr;
+
+		err = 0;
+		break;
+	case SIOCSIFADDR:
+		dev_warn(&dev->dev,
+			 "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n");
+		if (sa->family != AF_IEEE802154 ||
+		    sa->addr.addr_type != IEEE802154_ADDR_SHORT ||
+		    sa->addr.pan_id == IEEE802154_PANID_BROADCAST ||
+		    sa->addr.short_addr == IEEE802154_ADDR_BROADCAST ||
+		    sa->addr.short_addr == IEEE802154_ADDR_UNDEF) {
+			err = -EINVAL;
+			break;
+		}
+
+		priv->pan_id = sa->addr.pan_id;
+		priv->short_addr = sa->addr.short_addr;
+
+		err = 0;
+		break;
+	}
+
+	spin_unlock_bh(&priv->mib_lock);
+	return err;
+}
+
+static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	/* FIXME: validate addr */
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	mac802154_dev_set_ieee_addr(dev);
+	return 0;
+}
+
+static int mac802154_header_create(struct sk_buff *skb,
+				   struct net_device *dev,
+				   unsigned short type,
+				   const void *_daddr,
+				   const void *_saddr,
+				   unsigned len)
+{
+	const struct ieee802154_addr *saddr = _saddr;
+	const struct ieee802154_addr *daddr = _daddr;
+	struct ieee802154_addr dev_addr;
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	int pos = 2;
+	u8 *head;
+	u16 fc;
+
+	if (!daddr)
+		return -EINVAL;
+
+	head = kzalloc(MAC802154_FRAME_HARD_HEADER_LEN, GFP_KERNEL);
+	if (head == NULL)
+		return -ENOMEM;
+
+	head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
+	fc = mac_cb_type(skb);
+
+	if (!saddr) {
+		spin_lock_bh(&priv->mib_lock);
+
+		if (priv->short_addr == IEEE802154_ADDR_BROADCAST ||
+		    priv->short_addr == IEEE802154_ADDR_UNDEF ||
+		    priv->pan_id == IEEE802154_PANID_BROADCAST) {
+			dev_addr.addr_type = IEEE802154_ADDR_LONG;
+			memcpy(dev_addr.hwaddr, dev->dev_addr,
+			       IEEE802154_ADDR_LEN);
+		} else {
+			dev_addr.addr_type = IEEE802154_ADDR_SHORT;
+			dev_addr.short_addr = priv->short_addr;
+		}
+
+		dev_addr.pan_id = priv->pan_id;
+		saddr = &dev_addr;
+
+		spin_unlock_bh(&priv->mib_lock);
+	}
+
+	if (daddr->addr_type != IEEE802154_ADDR_NONE) {
+		fc |= (daddr->addr_type << IEEE802154_FC_DAMODE_SHIFT);
+
+		head[pos++] = daddr->pan_id & 0xff;
+		head[pos++] = daddr->pan_id >> 8;
+
+		if (daddr->addr_type == IEEE802154_ADDR_SHORT) {
+			head[pos++] = daddr->short_addr & 0xff;
+			head[pos++] = daddr->short_addr >> 8;
+		} else {
+			mac802154_haddr_copy_swap(head + pos, daddr->hwaddr);
+			pos += IEEE802154_ADDR_LEN;
+		}
+	}
+
+	if (saddr->addr_type != IEEE802154_ADDR_NONE) {
+		fc |= (saddr->addr_type << IEEE802154_FC_SAMODE_SHIFT);
+
+		if ((saddr->pan_id == daddr->pan_id) &&
+		    (saddr->pan_id != IEEE802154_PANID_BROADCAST))
+			/* PANID compression/ intra PAN */
+			fc |= IEEE802154_FC_INTRA_PAN;
+		else {
+			head[pos++] = saddr->pan_id & 0xff;
+			head[pos++] = saddr->pan_id >> 8;
+		}
+
+		if (saddr->addr_type == IEEE802154_ADDR_SHORT) {
+			head[pos++] = saddr->short_addr & 0xff;
+			head[pos++] = saddr->short_addr >> 8;
+		} else {
+			mac802154_haddr_copy_swap(head + pos, saddr->hwaddr);
+			pos += IEEE802154_ADDR_LEN;
+		}
+	}
+
+	head[0] = fc;
+	head[1] = fc >> 8;
+
+	memcpy(skb_push(skb, pos), head, pos);
+	kfree(head);
+
+	return pos;
+}
+
+static int
+mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+	const u8 *hdr = skb_mac_header(skb);
+	const u8 *tail = skb_tail_pointer(skb);
+	struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
+	u16 fc;
+	int da_type;
+
+	if (hdr + 3 > tail)
+		goto malformed;
+
+	fc = hdr[0] | (hdr[1] << 8);
+
+	hdr += 3;
+
+	da_type = IEEE802154_FC_DAMODE(fc);
+	addr->addr_type = IEEE802154_FC_SAMODE(fc);
+
+	switch (da_type) {
+	case IEEE802154_ADDR_NONE:
+		if (fc & IEEE802154_FC_INTRA_PAN)
+			goto malformed;
+		break;
+	case IEEE802154_ADDR_LONG:
+		if (fc & IEEE802154_FC_INTRA_PAN) {
+			if (hdr + 2 > tail)
+				goto malformed;
+			addr->pan_id = hdr[0] | (hdr[1] << 8);
+			hdr += 2;
+		}
+
+		if (hdr + IEEE802154_ADDR_LEN > tail)
+			goto malformed;
+
+		hdr += IEEE802154_ADDR_LEN;
+		break;
+	case IEEE802154_ADDR_SHORT:
+		if (fc & IEEE802154_FC_INTRA_PAN) {
+			if (hdr + 2 > tail)
+				goto malformed;
+			addr->pan_id = hdr[0] | (hdr[1] << 8);
+			hdr += 2;
+		}
+
+		if (hdr + 2 > tail)
+			goto malformed;
+
+		hdr += 2;
+		break;
+	default:
+		goto malformed;
+
+	}
+
+	switch (addr->addr_type) {
+	case IEEE802154_ADDR_NONE:
+		break;
+	case IEEE802154_ADDR_LONG:
+		if (!(fc & IEEE802154_FC_INTRA_PAN)) {
+			if (hdr + 2 > tail)
+				goto malformed;
+			addr->pan_id = hdr[0] | (hdr[1] << 8);
+			hdr += 2;
+		}
+
+		if (hdr + IEEE802154_ADDR_LEN > tail)
+			goto malformed;
+
+		mac802154_haddr_copy_swap(addr->hwaddr, hdr);
+		hdr += IEEE802154_ADDR_LEN;
+		break;
+	case IEEE802154_ADDR_SHORT:
+		if (!(fc & IEEE802154_FC_INTRA_PAN)) {
+			if (hdr + 2 > tail)
+				goto malformed;
+			addr->pan_id = hdr[0] | (hdr[1] << 8);
+			hdr += 2;
+		}
+
+		if (hdr + 2 > tail)
+			goto malformed;
+
+		addr->short_addr = hdr[0] | (hdr[1] << 8);
+		hdr += 2;
+		break;
+	default:
+		goto malformed;
+	}
+
+	return sizeof(struct ieee802154_addr);
+
+malformed:
+	pr_debug("malformed packet\n");
+	return 0;
+}
+
+static netdev_tx_t
+mac802154_wpan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv;
+	u8 chan, page;
+
+	priv = netdev_priv(dev);
+
+	spin_lock_bh(&priv->mib_lock);
+	chan = priv->chan;
+	page = priv->page;
+	spin_unlock_bh(&priv->mib_lock);
+
+	if (chan == MAC802154_CHAN_NONE ||
+	    page >= WPAN_NUM_PAGES ||
+	    chan >= WPAN_NUM_CHANNELS)
+		return NETDEV_TX_OK;
+
+	skb->skb_iif = dev->ifindex;
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	return mac802154_tx(priv->hw, skb, page, chan);
+}
+
+static struct header_ops mac802154_header_ops = {
+	.create		= mac802154_header_create,
+	.parse		= mac802154_header_parse,
+};
+
+static const struct net_device_ops mac802154_wpan_ops = {
+	.ndo_open		= mac802154_slave_open,
+	.ndo_stop		= mac802154_slave_close,
+	.ndo_start_xmit		= mac802154_wpan_xmit,
+	.ndo_do_ioctl		= mac802154_wpan_ioctl,
+	.ndo_set_mac_address	= mac802154_wpan_mac_addr,
+};
+
+void mac802154_wpan_setup(struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv;
+
+	dev->addr_len		= IEEE802154_ADDR_LEN;
+	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+
+	dev->hard_header_len	= MAC802154_FRAME_HARD_HEADER_LEN;
+	dev->header_ops		= &mac802154_header_ops;
+	dev->needed_tailroom	= 2; /* FCS */
+	dev->mtu		= IEEE802154_MTU;
+	dev->tx_queue_len	= 10;
+	dev->type		= ARPHRD_IEEE802154;
+	dev->flags		= IFF_NOARP | IFF_BROADCAST;
+	dev->watchdog_timeo	= 0;
+
+	dev->destructor		= free_netdev;
+	dev->netdev_ops		= &mac802154_wpan_ops;
+	dev->ml_priv		= &mac802154_mlme_wpan;
+
+	priv = netdev_priv(dev);
+	priv->type = IEEE802154_DEV_WPAN;
+
+	priv->chan = MAC802154_CHAN_NONE;
+	priv->page = 0;
+
+	spin_lock_init(&priv->mib_lock);
+
+	get_random_bytes(&priv->bsn, 1);
+	get_random_bytes(&priv->dsn, 1);
+
+	priv->pan_id = IEEE802154_PANID_BROADCAST;
+	priv->short_addr = IEEE802154_ADDR_BROADCAST;
+}
+
+static int mac802154_process_data(struct net_device *dev, struct sk_buff *skb)
+{
+	return netif_rx(skb);
+}
+
+static int
+mac802154_subif_frame(struct mac802154_sub_if_data *sdata, struct sk_buff *skb)
+{
+	pr_debug("getting packet via slave interface %s\n", sdata->dev->name);
+
+	spin_lock_bh(&sdata->mib_lock);
+
+	switch (mac_cb(skb)->da.addr_type) {
+	case IEEE802154_ADDR_NONE:
+		if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE)
+			/* FIXME: check if we are PAN coordinator */
+			skb->pkt_type = PACKET_OTHERHOST;
+		else
+			/* ACK comes with both addresses empty */
+			skb->pkt_type = PACKET_HOST;
+		break;
+	case IEEE802154_ADDR_LONG:
+		if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
+		    mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+			skb->pkt_type = PACKET_OTHERHOST;
+		else if (!memcmp(mac_cb(skb)->da.hwaddr, sdata->dev->dev_addr,
+				 IEEE802154_ADDR_LEN))
+			skb->pkt_type = PACKET_HOST;
+		else
+			skb->pkt_type = PACKET_OTHERHOST;
+		break;
+	case IEEE802154_ADDR_SHORT:
+		if (mac_cb(skb)->da.pan_id != sdata->pan_id &&
+		    mac_cb(skb)->da.pan_id != IEEE802154_PANID_BROADCAST)
+			skb->pkt_type = PACKET_OTHERHOST;
+		else if (mac_cb(skb)->da.short_addr == sdata->short_addr)
+			skb->pkt_type = PACKET_HOST;
+		else if (mac_cb(skb)->da.short_addr ==
+					IEEE802154_ADDR_BROADCAST)
+			skb->pkt_type = PACKET_BROADCAST;
+		else
+			skb->pkt_type = PACKET_OTHERHOST;
+		break;
+	default:
+		break;
+	}
+
+	spin_unlock_bh(&sdata->mib_lock);
+
+	skb->dev = sdata->dev;
+
+	sdata->dev->stats.rx_packets++;
+	sdata->dev->stats.rx_bytes += skb->len;
+
+	switch (mac_cb_type(skb)) {
+	case IEEE802154_FC_TYPE_DATA:
+		return mac802154_process_data(sdata->dev, skb);
+	default:
+		pr_warning("ieee802154: bad frame received (type = %d)\n",
+			   mac_cb_type(skb));
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+}
+
+static int mac802154_parse_frame_start(struct sk_buff *skb)
+{
+	u8 *head = skb->data;
+	u16 fc;
+
+	if (mac802154_fetch_skb_u16(skb, &fc) ||
+	    mac802154_fetch_skb_u8(skb, &(mac_cb(skb)->seq)))
+		goto err;
+
+	pr_debug("fc: %04x dsn: %02x\n", fc, head[2]);
+
+	mac_cb(skb)->flags = IEEE802154_FC_TYPE(fc);
+	mac_cb(skb)->sa.addr_type = IEEE802154_FC_SAMODE(fc);
+	mac_cb(skb)->da.addr_type = IEEE802154_FC_DAMODE(fc);
+
+	if (fc & IEEE802154_FC_INTRA_PAN)
+		mac_cb(skb)->flags |= MAC_CB_FLAG_INTRAPAN;
+
+	if (mac_cb(skb)->da.addr_type != IEEE802154_ADDR_NONE) {
+		if (mac802154_fetch_skb_u16(skb, &(mac_cb(skb)->da.pan_id)))
+			goto err;
+
+		/* source PAN id compression */
+		if (mac_cb_is_intrapan(skb))
+			mac_cb(skb)->sa.pan_id = mac_cb(skb)->da.pan_id;
+
+		pr_debug("dest PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
+
+		if (mac_cb(skb)->da.addr_type == IEEE802154_ADDR_SHORT) {
+			u16 *da = &(mac_cb(skb)->da.short_addr);
+
+			if (mac802154_fetch_skb_u16(skb, da))
+				goto err;
+
+			pr_debug("destination address is short: %04x\n",
+				 mac_cb(skb)->da.short_addr);
+		} else {
+			if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
+				goto err;
+
+			mac802154_haddr_copy_swap(mac_cb(skb)->da.hwaddr,
+						  skb->data);
+			skb_pull(skb, IEEE802154_ADDR_LEN);
+
+			pr_debug("destination address is hardware\n");
+		}
+	}
+
+	if (mac_cb(skb)->sa.addr_type != IEEE802154_ADDR_NONE) {
+		/* non PAN-compression, fetch source address id */
+		if (!(mac_cb_is_intrapan(skb))) {
+			u16 *sa_pan = &(mac_cb(skb)->sa.pan_id);
+
+			if (mac802154_fetch_skb_u16(skb, sa_pan))
+				goto err;
+		}
+
+		pr_debug("source PAN addr: %04x\n", mac_cb(skb)->da.pan_id);
+
+		if (mac_cb(skb)->sa.addr_type == IEEE802154_ADDR_SHORT) {
+			u16 *sa = &(mac_cb(skb)->sa.short_addr);
+
+			if (mac802154_fetch_skb_u16(skb, sa))
+				goto err;
+
+			pr_debug("source address is short: %04x\n",
+				 mac_cb(skb)->sa.short_addr);
+		} else {
+			if (!pskb_may_pull(skb, IEEE802154_ADDR_LEN))
+				goto err;
+
+			mac802154_haddr_copy_swap(mac_cb(skb)->sa.hwaddr,
+						  skb->data);
+			skb_pull(skb, IEEE802154_ADDR_LEN);
+
+			pr_debug("source address is hardware\n");
+		}
+	}
+
+	return 0;
+err:
+	return -EINVAL;
+}
+
+void mac802154_wpans_rx(struct mac802154_priv *priv, struct sk_buff *skb)
+{
+	int ret;
+	struct sk_buff *sskb;
+	struct mac802154_sub_if_data *sdata;
+
+	ret = mac802154_parse_frame_start(skb);
+	if (ret) {
+		pr_debug("got invalid frame\n");
+		return;
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &priv->slaves, list)
+	{
+		if (sdata->type != IEEE802154_DEV_WPAN)
+			continue;
+
+		sskb = skb_clone(skb, GFP_ATOMIC);
+		if (sskb)
+			mac802154_subif_frame(sdata, sskb);
+	}
+	rcu_read_unlock();
+}
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 3/7] mac802154: short address setter
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

A method to assign the IEEE802.15.4 short address was added to the
MIB implementation.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac802154.h |    1 +
 net/mac802154/mib.c       |   17 +++++++++++++++++
 2 files changed, 18 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 5cb7dc2..9951072 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -108,6 +108,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 			 u8 page, u8 chan);
 
 /* MIB callbacks */
+void mac802154_dev_set_short_addr(struct net_device *dev, u16 val);
 void mac802154_dev_set_ieee_addr(struct net_device *dev);
 u16 mac802154_dev_get_pan_id(const struct net_device *dev);
 void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 8e772ed..d74503b 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -78,6 +78,23 @@ static void set_hw_addr_filt(struct net_device *dev, unsigned long changed)
 	return;
 }
 
+void mac802154_dev_set_short_addr(struct net_device *dev, u16 val)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	spin_lock_bh(&priv->mib_lock);
+	priv->short_addr = val;
+	spin_unlock_bh(&priv->mib_lock);
+
+	if ((priv->hw->ops->set_hw_addr_filt) &&
+	    (priv->hw->hw.hw_filt.short_addr != priv->short_addr)) {
+		priv->hw->hw.hw_filt.short_addr = priv->short_addr;
+		set_hw_addr_filt(dev, IEEE802515_AFILT_SADDR_CHANGED);
+	}
+}
+
 void mac802154_dev_set_ieee_addr(struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 2/7] mac802154: set and get PAN id
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

Two methods intended to get and set the Private Area Network identifier
were added to the MIB implementation.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac802154.h |    2 ++
 net/mac802154/mib.c       |   31 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index c0efcf1..5cb7dc2 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -109,5 +109,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
 
 /* MIB callbacks */
 void mac802154_dev_set_ieee_addr(struct net_device *dev);
+u16 mac802154_dev_get_pan_id(const struct net_device *dev);
+void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
 
 #endif /* MAC802154_H */
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index ab59821..8e772ed 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -91,3 +91,34 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev)
 		set_hw_addr_filt(dev, IEEE802515_AFILT_IEEEADDR_CHANGED);
 	}
 }
+
+u16 mac802154_dev_get_pan_id(const struct net_device *dev)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	u16 ret;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	spin_lock_bh(&priv->mib_lock);
+	ret = priv->pan_id;
+	spin_unlock_bh(&priv->mib_lock);
+
+	return ret;
+}
+
+void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	spin_lock_bh(&priv->mib_lock);
+	priv->pan_id = val;
+	spin_unlock_bh(&priv->mib_lock);
+
+	if ((priv->hw->ops->set_hw_addr_filt) &&
+	    (priv->hw->hw.hw_filt.pan_id != priv->pan_id)) {
+		priv->hw->hw.hw_filt.pan_id = priv->pan_id;
+		set_hw_addr_filt(dev, IEEE802515_AFILT_PANID_CHANGED);
+	}
+}
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 4/7] mac802154: page and channel setter
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

A new method to set page and channel values for a transceiver
was added to the MIB.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac802154.h |    1 +
 net/mac802154/mib.c       |   45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 9951072..6967864 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -112,5 +112,6 @@ void mac802154_dev_set_short_addr(struct net_device *dev, u16 val);
 void mac802154_dev_set_ieee_addr(struct net_device *dev);
 u16 mac802154_dev_get_pan_id(const struct net_device *dev);
 void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
+void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
 
 #endif /* MAC802154_H */
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index d74503b..4bba4e1 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -28,6 +28,11 @@
 
 #include "mac802154.h"
 
+struct phy_chan_notify_work {
+	struct work_struct work;
+	struct net_device *dev;
+};
+
 struct hw_addr_filt_notify_work {
 	struct work_struct work;
 	struct net_device *dev;
@@ -139,3 +144,43 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
 		set_hw_addr_filt(dev, IEEE802515_AFILT_PANID_CHANGED);
 	}
 }
+
+static void phy_chan_notify(struct work_struct *work)
+{
+	struct phy_chan_notify_work *nw = container_of(work,
+					  struct phy_chan_notify_work, work);
+	struct mac802154_priv *hw = mac802154_slave_get_priv(nw->dev);
+	struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
+	int res;
+
+	res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
+	if (res)
+		pr_debug("set_channel failed\n");
+
+	kfree(nw);
+}
+
+void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
+{
+	struct mac802154_sub_if_data *priv = netdev_priv(dev);
+	struct phy_chan_notify_work *work;
+
+	BUG_ON(dev->type != ARPHRD_IEEE802154);
+
+	spin_lock_bh(&priv->mib_lock);
+	priv->page = page;
+	priv->chan = chan;
+	spin_unlock_bh(&priv->mib_lock);
+
+	if (priv->hw->phy->current_channel != priv->chan ||
+	    priv->hw->phy->current_page != priv->page) {
+		work = kzalloc(sizeof(*work), GFP_ATOMIC);
+		if (!work)
+			return;
+
+		INIT_WORK(&work->work, phy_chan_notify);
+		work->dev = dev;
+		queue_work(priv->hw->dev_workqueue, &work->work);
+	}
+}
+
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 5/7] mac802154: mlme start request
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

Basic preparations to start the interface.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 net/mac802154/mac_cmd.c |   25 +++++++++++++++++++++++++
 1 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index db83419..7f5403e 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -25,12 +25,36 @@
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 
+#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/wpan-phy.h>
 #include <net/mac802154.h>
+#include <net/nl802154.h>
 
 #include "mac802154.h"
 
+static int mac802154_mlme_start_req(struct net_device *dev,
+				    struct ieee802154_addr *addr,
+				    u8 channel, u8 page,
+				    u8 bcn_ord, u8 sf_ord,
+				    u8 pan_coord, u8 blx,
+				    u8 coord_realign)
+{
+	BUG_ON(addr->addr_type != IEEE802154_ADDR_SHORT);
+
+	mac802154_dev_set_pan_id(dev, addr->pan_id);
+	mac802154_dev_set_short_addr(dev, addr->short_addr);
+	mac802154_dev_set_ieee_addr(dev);
+	mac802154_dev_set_page_channel(dev, page, channel);
+
+	/* FIXME: add validation for unused parameters to be sane
+	 * for SoftMAC
+	 */
+	ieee802154_nl_start_confirm(dev, IEEE802154_SUCCESS);
+
+	return 0;
+}
+
 struct wpan_phy *mac802154_get_phy(const struct net_device *dev)
 {
 	struct mac802154_sub_if_data *priv = netdev_priv(dev);
@@ -46,4 +70,5 @@ struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced = {
 
 struct ieee802154_mlme_ops mac802154_mlme_wpan = {
 	.get_phy = mac802154_get_phy,
+	.start_req = mac802154_mlme_start_req,
 };
-- 
1.7.2.3

^ permalink raw reply related

* [PATCH 6/7] drivers/ieee802154: add support for the at86rf230/231 transceivers
From: Alexander Smirnov @ 2012-06-25 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, dbaryshkov, Alexander Smirnov
In-Reply-To: <1340631197-27691-1-git-send-email-alex.bluesman.smirnov@gmail.com>

The AT86RF231 is a feature rich, low-power 2.4 GHz radio transceiver
designed for industrial and consumer ZigBee/IEEE 802.15.4, 6LoWPAN,
RF4CE and high data rate 2.4 GHz ISM band applications.

This patch adds support for the Atmel RF230/231 radio transceivers.

Signed-off-by: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
---
 drivers/ieee802154/Kconfig     |    6 +
 drivers/ieee802154/Makefile    |    1 +
 drivers/ieee802154/at86rf230.c |  966 ++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/at86rf230.h  |   32 ++
 4 files changed, 1005 insertions(+), 0 deletions(-)
 create mode 100644 drivers/ieee802154/at86rf230.c
 create mode 100644 include/linux/spi/at86rf230.h

diff --git a/drivers/ieee802154/Kconfig b/drivers/ieee802154/Kconfig
index 15c0640..1fc4eef 100644
--- a/drivers/ieee802154/Kconfig
+++ b/drivers/ieee802154/Kconfig
@@ -19,6 +19,7 @@ config IEEE802154_FAKEHARD
 
           This driver can also be built as a module. To do so say M here.
 	  The module will be called 'fakehard'.
+
 config IEEE802154_FAKELB
 	depends on IEEE802154_DRIVERS && MAC802154
 	tristate "IEEE 802.15.4 loopback driver"
@@ -28,3 +29,8 @@ config IEEE802154_FAKELB
 
 	  This driver can also be built as a module. To do so say M here.
 	  The module will be called 'fakelb'.
+
+config IEEE802154_AT86RF230
+        depends on IEEE802154_DRIVERS && MAC802154
+        tristate "AT86RF230/231 transceiver driver"
+        depends on SPI
diff --git a/drivers/ieee802154/Makefile b/drivers/ieee802154/Makefile
index ea784ea..4f4371d 100644
--- a/drivers/ieee802154/Makefile
+++ b/drivers/ieee802154/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_IEEE802154_FAKEHARD) += fakehard.o
 obj-$(CONFIG_IEEE802154_FAKELB) += fakelb.o
+obj-$(CONFIG_IEEE802154_AT86RF230) += at86rf230.o
diff --git a/drivers/ieee802154/at86rf230.c b/drivers/ieee802154/at86rf230.c
new file mode 100644
index 0000000..2b52374
--- /dev/null
+++ b/drivers/ieee802154/at86rf230.c
@@ -0,0 +1,966 @@
+/*
+ * AT86RF230/RF231 driver
+ *
+ * Copyright (C) 2009-2012 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/at86rf230.h>
+#include <linux/skbuff.h>
+
+#include <net/mac802154.h>
+#include <net/wpan-phy.h>
+
+struct at86rf230_local {
+	struct spi_device *spi;
+	int rstn, slp_tr, dig2;
+
+	u8 part;
+	u8 vers;
+
+	u8 buf[2];
+	struct mutex bmux;
+
+	struct work_struct irqwork;
+	struct completion tx_complete;
+
+	struct ieee802154_dev *dev;
+
+	spinlock_t lock;
+	bool irq_disabled;
+	bool is_tx;
+};
+
+#define	RG_TRX_STATUS	(0x01)
+#define	SR_TRX_STATUS		0x01, 0x1f, 0
+#define	SR_RESERVED_01_3	0x01, 0x20, 5
+#define	SR_CCA_STATUS		0x01, 0x40, 6
+#define	SR_CCA_DONE		0x01, 0x80, 7
+#define	RG_TRX_STATE	(0x02)
+#define	SR_TRX_CMD		0x02, 0x1f, 0
+#define	SR_TRAC_STATUS		0x02, 0xe0, 5
+#define	RG_TRX_CTRL_0	(0x03)
+#define	SR_CLKM_CTRL		0x03, 0x07, 0
+#define	SR_CLKM_SHA_SEL		0x03, 0x08, 3
+#define	SR_PAD_IO_CLKM		0x03, 0x30, 4
+#define	SR_PAD_IO		0x03, 0xc0, 6
+#define	RG_TRX_CTRL_1	(0x04)
+#define	SR_IRQ_POLARITY		0x04, 0x01, 0
+#define	SR_IRQ_MASK_MODE	0x04, 0x02, 1
+#define	SR_SPI_CMD_MODE		0x04, 0x0c, 2
+#define	SR_RX_BL_CTRL		0x04, 0x10, 4
+#define	SR_TX_AUTO_CRC_ON	0x04, 0x20, 5
+#define	SR_IRQ_2_EXT_EN		0x04, 0x40, 6
+#define	SR_PA_EXT_EN		0x04, 0x80, 7
+#define	RG_PHY_TX_PWR	(0x05)
+#define	SR_TX_PWR		0x05, 0x0f, 0
+#define	SR_PA_LT		0x05, 0x30, 4
+#define	SR_PA_BUF_LT		0x05, 0xc0, 6
+#define	RG_PHY_RSSI	(0x06)
+#define	SR_RSSI			0x06, 0x1f, 0
+#define	SR_RND_VALUE		0x06, 0x60, 5
+#define	SR_RX_CRC_VALID		0x06, 0x80, 7
+#define	RG_PHY_ED_LEVEL	(0x07)
+#define	SR_ED_LEVEL		0x07, 0xff, 0
+#define	RG_PHY_CC_CCA	(0x08)
+#define	SR_CHANNEL		0x08, 0x1f, 0
+#define	SR_CCA_MODE		0x08, 0x60, 5
+#define	SR_CCA_REQUEST		0x08, 0x80, 7
+#define	RG_CCA_THRES	(0x09)
+#define	SR_CCA_ED_THRES		0x09, 0x0f, 0
+#define	SR_RESERVED_09_1	0x09, 0xf0, 4
+#define	RG_RX_CTRL	(0x0a)
+#define	SR_PDT_THRES		0x0a, 0x0f, 0
+#define	SR_RESERVED_0a_1	0x0a, 0xf0, 4
+#define	RG_SFD_VALUE	(0x0b)
+#define	SR_SFD_VALUE		0x0b, 0xff, 0
+#define	RG_TRX_CTRL_2	(0x0c)
+#define	SR_OQPSK_DATA_RATE	0x0c, 0x03, 0
+#define	SR_RESERVED_0c_2	0x0c, 0x7c, 2
+#define	SR_RX_SAFE_MODE		0x0c, 0x80, 7
+#define	RG_ANT_DIV	(0x0d)
+#define	SR_ANT_CTRL		0x0d, 0x03, 0
+#define	SR_ANT_EXT_SW_EN	0x0d, 0x04, 2
+#define	SR_ANT_DIV_EN		0x0d, 0x08, 3
+#define	SR_RESERVED_0d_2	0x0d, 0x70, 4
+#define	SR_ANT_SEL		0x0d, 0x80, 7
+#define	RG_IRQ_MASK	(0x0e)
+#define	SR_IRQ_MASK		0x0e, 0xff, 0
+#define	RG_IRQ_STATUS	(0x0f)
+#define	SR_IRQ_0_PLL_LOCK	0x0f, 0x01, 0
+#define	SR_IRQ_1_PLL_UNLOCK	0x0f, 0x02, 1
+#define	SR_IRQ_2_RX_START	0x0f, 0x04, 2
+#define	SR_IRQ_3_TRX_END	0x0f, 0x08, 3
+#define	SR_IRQ_4_CCA_ED_DONE	0x0f, 0x10, 4
+#define	SR_IRQ_5_AMI		0x0f, 0x20, 5
+#define	SR_IRQ_6_TRX_UR		0x0f, 0x40, 6
+#define	SR_IRQ_7_BAT_LOW	0x0f, 0x80, 7
+#define	RG_VREG_CTRL	(0x10)
+#define	SR_RESERVED_10_6	0x10, 0x03, 0
+#define	SR_DVDD_OK		0x10, 0x04, 2
+#define	SR_DVREG_EXT		0x10, 0x08, 3
+#define	SR_RESERVED_10_3	0x10, 0x30, 4
+#define	SR_AVDD_OK		0x10, 0x40, 6
+#define	SR_AVREG_EXT		0x10, 0x80, 7
+#define	RG_BATMON	(0x11)
+#define	SR_BATMON_VTH		0x11, 0x0f, 0
+#define	SR_BATMON_HR		0x11, 0x10, 4
+#define	SR_BATMON_OK		0x11, 0x20, 5
+#define	SR_RESERVED_11_1	0x11, 0xc0, 6
+#define	RG_XOSC_CTRL	(0x12)
+#define	SR_XTAL_TRIM		0x12, 0x0f, 0
+#define	SR_XTAL_MODE		0x12, 0xf0, 4
+#define	RG_RX_SYN	(0x15)
+#define	SR_RX_PDT_LEVEL		0x15, 0x0f, 0
+#define	SR_RESERVED_15_2	0x15, 0x70, 4
+#define	SR_RX_PDT_DIS		0x15, 0x80, 7
+#define	RG_XAH_CTRL_1	(0x17)
+#define	SR_RESERVED_17_8	0x17, 0x01, 0
+#define	SR_AACK_PROM_MODE	0x17, 0x02, 1
+#define	SR_AACK_ACK_TIME	0x17, 0x04, 2
+#define	SR_RESERVED_17_5	0x17, 0x08, 3
+#define	SR_AACK_UPLD_RES_FT	0x17, 0x10, 4
+#define	SR_AACK_FLTR_RES_FT	0x17, 0x20, 5
+#define	SR_RESERVED_17_2	0x17, 0x40, 6
+#define	SR_RESERVED_17_1	0x17, 0x80, 7
+#define	RG_FTN_CTRL	(0x18)
+#define	SR_RESERVED_18_2	0x18, 0x7f, 0
+#define	SR_FTN_START		0x18, 0x80, 7
+#define	RG_PLL_CF	(0x1a)
+#define	SR_RESERVED_1a_2	0x1a, 0x7f, 0
+#define	SR_PLL_CF_START		0x1a, 0x80, 7
+#define	RG_PLL_DCU	(0x1b)
+#define	SR_RESERVED_1b_3	0x1b, 0x3f, 0
+#define	SR_RESERVED_1b_2	0x1b, 0x40, 6
+#define	SR_PLL_DCU_START	0x1b, 0x80, 7
+#define	RG_PART_NUM	(0x1c)
+#define	SR_PART_NUM		0x1c, 0xff, 0
+#define	RG_VERSION_NUM	(0x1d)
+#define	SR_VERSION_NUM		0x1d, 0xff, 0
+#define	RG_MAN_ID_0	(0x1e)
+#define	SR_MAN_ID_0		0x1e, 0xff, 0
+#define	RG_MAN_ID_1	(0x1f)
+#define	SR_MAN_ID_1		0x1f, 0xff, 0
+#define	RG_SHORT_ADDR_0	(0x20)
+#define	SR_SHORT_ADDR_0		0x20, 0xff, 0
+#define	RG_SHORT_ADDR_1	(0x21)
+#define	SR_SHORT_ADDR_1		0x21, 0xff, 0
+#define	RG_PAN_ID_0	(0x22)
+#define	SR_PAN_ID_0		0x22, 0xff, 0
+#define	RG_PAN_ID_1	(0x23)
+#define	SR_PAN_ID_1		0x23, 0xff, 0
+#define	RG_IEEE_ADDR_0	(0x24)
+#define	SR_IEEE_ADDR_0		0x24, 0xff, 0
+#define	RG_IEEE_ADDR_1	(0x25)
+#define	SR_IEEE_ADDR_1		0x25, 0xff, 0
+#define	RG_IEEE_ADDR_2	(0x26)
+#define	SR_IEEE_ADDR_2		0x26, 0xff, 0
+#define	RG_IEEE_ADDR_3	(0x27)
+#define	SR_IEEE_ADDR_3		0x27, 0xff, 0
+#define	RG_IEEE_ADDR_4	(0x28)
+#define	SR_IEEE_ADDR_4		0x28, 0xff, 0
+#define	RG_IEEE_ADDR_5	(0x29)
+#define	SR_IEEE_ADDR_5		0x29, 0xff, 0
+#define	RG_IEEE_ADDR_6	(0x2a)
+#define	SR_IEEE_ADDR_6		0x2a, 0xff, 0
+#define	RG_IEEE_ADDR_7	(0x2b)
+#define	SR_IEEE_ADDR_7		0x2b, 0xff, 0
+#define	RG_XAH_CTRL_0	(0x2c)
+#define	SR_SLOTTED_OPERATION	0x2c, 0x01, 0
+#define	SR_MAX_CSMA_RETRIES	0x2c, 0x0e, 1
+#define	SR_MAX_FRAME_RETRIES	0x2c, 0xf0, 4
+#define	RG_CSMA_SEED_0	(0x2d)
+#define	SR_CSMA_SEED_0		0x2d, 0xff, 0
+#define	RG_CSMA_SEED_1	(0x2e)
+#define	SR_CSMA_SEED_1		0x2e, 0x07, 0
+#define	SR_AACK_I_AM_COORD	0x2e, 0x08, 3
+#define	SR_AACK_DIS_ACK		0x2e, 0x10, 4
+#define	SR_AACK_SET_PD		0x2e, 0x20, 5
+#define	SR_AACK_FVN_MODE	0x2e, 0xc0, 6
+#define	RG_CSMA_BE	(0x2f)
+#define	SR_MIN_BE		0x2f, 0x0f, 0
+#define	SR_MAX_BE		0x2f, 0xf0, 4
+
+#define CMD_REG		0x80
+#define CMD_REG_MASK	0x3f
+#define CMD_WRITE	0x40
+#define CMD_FB		0x20
+
+#define IRQ_BAT_LOW	(1 << 7)
+#define IRQ_TRX_UR	(1 << 6)
+#define IRQ_AMI		(1 << 5)
+#define IRQ_CCA_ED	(1 << 4)
+#define IRQ_TRX_END	(1 << 3)
+#define IRQ_RX_START	(1 << 2)
+#define IRQ_PLL_UNL	(1 << 1)
+#define IRQ_PLL_LOCK	(1 << 0)
+
+#define STATE_P_ON		0x00	/* BUSY */
+#define STATE_BUSY_RX		0x01
+#define STATE_BUSY_TX		0x02
+#define STATE_FORCE_TRX_OFF	0x03
+#define STATE_FORCE_TX_ON	0x04	/* IDLE */
+/* 0x05 */				/* INVALID_PARAMETER */
+#define STATE_RX_ON		0x06
+/* 0x07 */				/* SUCCESS */
+#define STATE_TRX_OFF		0x08
+#define STATE_TX_ON		0x09
+/* 0x0a - 0x0e */			/* 0x0a - UNSUPPORTED_ATTRIBUTE */
+#define STATE_SLEEP		0x0F
+#define STATE_BUSY_RX_AACK	0x11
+#define STATE_BUSY_TX_ARET	0x12
+#define STATE_BUSY_RX_AACK_ON	0x16
+#define STATE_BUSY_TX_ARET_ON	0x19
+#define STATE_RX_ON_NOCLK	0x1C
+#define STATE_RX_AACK_ON_NOCLK	0x1D
+#define STATE_BUSY_RX_AACK_NOCLK 0x1E
+#define STATE_TRANSITION_IN_PROGRESS 0x1F
+
+static int
+__at86rf230_write(struct at86rf230_local *lp, u8 addr, u8 data)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf	= buf,
+	};
+
+	buf[0] = (addr & CMD_REG_MASK) | CMD_REG | CMD_WRITE;
+	buf[1] = data;
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	return status;
+}
+
+static int
+__at86rf230_read_subreg(struct at86rf230_local *lp,
+			u8 addr, u8 mask, int shift, u8 *data)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf	= buf,
+		.rx_buf	= buf,
+	};
+
+	buf[0] = (addr & CMD_REG_MASK) | CMD_REG;
+	buf[1] = 0xff;
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	if (status == 0)
+		*data = buf[1];
+
+	return status;
+}
+
+static int
+at86rf230_read_subreg(struct at86rf230_local *lp,
+		      u8 addr, u8 mask, int shift, u8 *data)
+{
+	int status;
+
+	mutex_lock(&lp->bmux);
+	status = __at86rf230_read_subreg(lp, addr, mask, shift, data);
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_write_subreg(struct at86rf230_local *lp,
+		       u8 addr, u8 mask, int shift, u8 data)
+{
+	int status;
+	u8 val;
+
+	mutex_lock(&lp->bmux);
+	status = __at86rf230_read_subreg(lp, addr, 0xff, 0, &val);
+	if (status)
+		goto out;
+
+	val &= ~mask;
+	val |= (data << shift) & mask;
+
+	status = __at86rf230_write(lp, addr, val);
+out:
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_write_fbuf(struct at86rf230_local *lp, u8 *data, u8 len)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer_head = {
+		.len		= 2,
+		.tx_buf		= buf,
+
+	};
+	struct spi_transfer xfer_buf = {
+		.len		= len,
+		.tx_buf		= data,
+	};
+
+	mutex_lock(&lp->bmux);
+	buf[0] = CMD_WRITE | CMD_FB;
+	buf[1] = len + 2; /* 2 bytes for CRC that isn't written */
+
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	mutex_unlock(&lp->bmux);
+	return status;
+}
+
+static int
+at86rf230_read_fbuf(struct at86rf230_local *lp, u8 *data, u8 *len, u8 *lqi)
+{
+	u8 *buf = lp->buf;
+	int status;
+	struct spi_message msg;
+	struct spi_transfer xfer_head = {
+		.len		= 2,
+		.tx_buf		= buf,
+		.rx_buf		= buf,
+	};
+	struct spi_transfer xfer_head1 = {
+		.len		= 2,
+		.tx_buf		= buf,
+		.rx_buf		= buf,
+	};
+	struct spi_transfer xfer_buf = {
+		.len		= 0,
+		.rx_buf		= data,
+	};
+
+	mutex_lock(&lp->bmux);
+
+	buf[0] = CMD_FB;
+	buf[1] = 0x00;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+
+	xfer_buf.len = *(buf + 1) + 1;
+	*len = buf[1];
+
+	buf[0] = CMD_FB;
+	buf[1] = 0x00;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer_head1, &msg);
+	spi_message_add_tail(&xfer_buf, &msg);
+
+	status = spi_sync(lp->spi, &msg);
+
+	if (msg.status)
+		status = msg.status;
+
+	dev_vdbg(&lp->spi->dev, "status = %d\n", status);
+	dev_vdbg(&lp->spi->dev, "buf[0] = %02x\n", buf[0]);
+	dev_vdbg(&lp->spi->dev, "buf[1] = %02x\n", buf[1]);
+
+	if (status) {
+		if (lqi && (*len > lp->buf[1]))
+			*lqi = data[lp->buf[1]];
+	}
+	mutex_unlock(&lp->bmux);
+
+	return status;
+}
+
+static int
+at86rf230_ed(struct ieee802154_dev *dev, u8 *level)
+{
+	might_sleep();
+	BUG_ON(!level);
+	*level = 0xbe;
+	return 0;
+}
+
+static int
+at86rf230_state(struct ieee802154_dev *dev, int state)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+	u8 val;
+	u8 desired_status;
+
+	might_sleep();
+
+	if (state == STATE_FORCE_TX_ON)
+		desired_status = STATE_TX_ON;
+	else if (state == STATE_FORCE_TRX_OFF)
+		desired_status = STATE_TRX_OFF;
+	else
+		desired_status = state;
+
+	do {
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &val);
+		if (rc)
+			goto err;
+	} while (val == STATE_TRANSITION_IN_PROGRESS);
+
+	if (val == desired_status)
+		return 0;
+
+	/* state is equal to phy states */
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, state);
+	if (rc)
+		goto err;
+
+	do {
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &val);
+		if (rc)
+			goto err;
+	} while (val == STATE_TRANSITION_IN_PROGRESS);
+
+
+	if (val == desired_status)
+		return 0;
+
+	pr_err("unexpected state change: %d, asked for %d\n", val, state);
+	return -EBUSY;
+
+err:
+	pr_err("error: %d\n", rc);
+	return rc;
+}
+
+static int
+at86rf230_start(struct ieee802154_dev *dev)
+{
+	struct at86rf230_local *lp = dev->priv;
+	u8 rc;
+
+	rc = at86rf230_write_subreg(lp, SR_RX_SAFE_MODE, 1);
+	if (rc)
+		return rc;
+
+	return at86rf230_state(dev, STATE_RX_ON);
+}
+
+static void
+at86rf230_stop(struct ieee802154_dev *dev)
+{
+	at86rf230_state(dev, STATE_FORCE_TRX_OFF);
+}
+
+static int
+at86rf230_channel(struct ieee802154_dev *dev, int page, int channel)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+
+	might_sleep();
+
+	if (page != 0 || channel < 11 || channel > 26) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	rc = at86rf230_write_subreg(lp, SR_CHANNEL, channel);
+	msleep(1); /* Wait for PLL */
+	dev->phy->current_channel = channel;
+
+	return 0;
+}
+
+static int
+at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb)
+{
+	struct at86rf230_local *lp = dev->priv;
+	int rc;
+	unsigned long flags;
+
+	might_sleep();
+
+	rc = at86rf230_state(dev, STATE_FORCE_TX_ON);
+	if (rc)
+		goto err;
+
+	spin_lock_irqsave(&lp->lock, flags);
+	lp->is_tx = 1;
+	INIT_COMPLETION(lp->tx_complete);
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	rc = at86rf230_write_fbuf(lp, skb->data, skb->len);
+	if (rc)
+		goto err_rx;
+
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_BUSY_TX);
+	if (rc)
+		goto err_rx;
+
+	rc = wait_for_completion_interruptible(&lp->tx_complete);
+	if (rc < 0)
+		goto err_rx;
+
+	rc = at86rf230_start(dev);
+
+	return rc;
+
+err_rx:
+	at86rf230_start(dev);
+err:
+	pr_err("error: %d\n", rc);
+
+	spin_lock_irqsave(&lp->lock, flags);
+	lp->is_tx = 0;
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return rc;
+}
+
+static int at86rf230_rx(struct at86rf230_local *lp)
+{
+	u8 len = 128, lqi = 0;
+	int rc;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(len, GFP_KERNEL);
+
+	if (!skb)
+		return -ENOMEM;
+
+	if (at86rf230_write_subreg(lp, SR_RX_PDT_DIS, 1) ||
+	    at86rf230_read_fbuf(lp, skb_put(skb, len), &len, &lqi) ||
+	    at86rf230_write_subreg(lp, SR_RX_SAFE_MODE, 1) ||
+	    at86rf230_write_subreg(lp, SR_RX_PDT_DIS, 0)) {
+		goto err;
+	}
+
+	if (len < 2)
+		goto err;
+
+	skb_trim(skb, len - 2); /* We do not put CRC into the frame */
+
+	ieee802154_rx_irqsafe(lp->dev, skb, lqi);
+
+	dev_dbg(&lp->spi->dev, "READ_FBUF: %d %d %x\n", rc, len, lqi);
+
+	return 0;
+err:
+	pr_debug("received frame is too small\n");
+
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static struct ieee802154_ops at86rf230_ops = {
+	.owner = THIS_MODULE,
+	.xmit = at86rf230_xmit,
+	.ed = at86rf230_ed,
+	.set_channel = at86rf230_channel,
+	.start = at86rf230_start,
+	.stop = at86rf230_stop,
+};
+
+static void at86rf230_irqwork(struct work_struct *work)
+{
+	struct at86rf230_local *lp =
+		container_of(work, struct at86rf230_local, irqwork);
+	u8 status = 0, val;
+	int rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&lp->lock, flags);
+	rc = at86rf230_read_subreg(lp, RG_IRQ_STATUS, 0xff, 0, &val);
+	status |= val;
+
+	status &= ~IRQ_PLL_LOCK; /* ignore */
+	status &= ~IRQ_RX_START; /* ignore */
+	status &= ~IRQ_AMI; /* ignore */
+	status &= ~IRQ_TRX_UR; /* FIXME: possibly handle ???*/
+
+	if (status & IRQ_TRX_END) {
+		status &= ~IRQ_TRX_END;
+		if (lp->is_tx) {
+			lp->is_tx = 0;
+			complete(&lp->tx_complete);
+		} else {
+			at86rf230_rx(lp);
+		}
+	}
+
+	if (lp->irq_disabled) {
+		lp->irq_disabled = 0;
+		enable_irq(lp->spi->irq);
+	}
+	spin_unlock_irqrestore(&lp->lock, flags);
+}
+
+static irqreturn_t at86rf230_isr(int irq, void *data)
+{
+	struct at86rf230_local *lp = data;
+
+	spin_lock(&lp->lock);
+	if (!lp->irq_disabled) {
+		disable_irq_nosync(irq);
+		lp->irq_disabled = 1;
+	}
+	spin_unlock(&lp->lock);
+
+	schedule_work(&lp->irqwork);
+
+	return IRQ_HANDLED;
+}
+
+
+static int at86rf230_hw_init(struct at86rf230_local *lp)
+{
+	u8 status;
+	int rc;
+
+	rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+	if (rc)
+		return rc;
+
+	dev_info(&lp->spi->dev, "Status: %02x\n", status);
+	if (status == STATE_P_ON) {
+		rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_TRX_OFF);
+		if (rc)
+			return rc;
+		msleep(1);
+		rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+		if (rc)
+			return rc;
+		dev_info(&lp->spi->dev, "Status: %02x\n", status);
+	}
+
+	rc = at86rf230_write_subreg(lp, SR_IRQ_MASK, 0xff); /* IRQ_TRX_UR |
+							     * IRQ_CCA_ED |
+							     * IRQ_TRX_END |
+							     * IRQ_PLL_UNL |
+							     * IRQ_PLL_LOCK
+							     */
+	if (rc)
+		return rc;
+
+	/* CLKM changes are applied immediately */
+	rc = at86rf230_write_subreg(lp, SR_CLKM_SHA_SEL, 0x00);
+	if (rc)
+		return rc;
+
+	/* Turn CLKM Off */
+	rc = at86rf230_write_subreg(lp, SR_CLKM_CTRL, 0x00);
+	if (rc)
+		return rc;
+	/* Wait the next SLEEP cycle */
+	msleep(100);
+
+	rc = at86rf230_write_subreg(lp, SR_TRX_CMD, STATE_TX_ON);
+	if (rc)
+		return rc;
+	msleep(1);
+
+	rc = at86rf230_read_subreg(lp, SR_TRX_STATUS, &status);
+	if (rc)
+		return rc;
+	dev_info(&lp->spi->dev, "Status: %02x\n", status);
+
+	rc = at86rf230_read_subreg(lp, SR_DVDD_OK, &status);
+	if (rc)
+		return rc;
+	if (!status) {
+		dev_err(&lp->spi->dev, "DVDD error\n");
+		return -EINVAL;
+	}
+
+	rc = at86rf230_read_subreg(lp, SR_AVDD_OK, &status);
+	if (rc)
+		return rc;
+	if (!status) {
+		dev_err(&lp->spi->dev, "AVDD error\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int at86rf230_suspend(struct spi_device *spi, pm_message_t message)
+{
+	return 0;
+}
+
+static int at86rf230_resume(struct spi_device *spi)
+{
+	return 0;
+}
+
+static int at86rf230_fill_data(struct spi_device *spi)
+{
+	struct at86rf230_local *lp = spi_get_drvdata(spi);
+	struct at86rf230_platform_data *pdata = spi->dev.platform_data;
+
+	if (!pdata) {
+		dev_err(&spi->dev, "no platform_data\n");
+		return -EINVAL;
+	}
+
+	lp->rstn = pdata->rstn;
+	lp->slp_tr = pdata->slp_tr;
+	lp->dig2 = pdata->dig2;
+
+	return 0;
+}
+
+static int __devinit at86rf230_probe(struct spi_device *spi)
+{
+	struct ieee802154_dev *dev;
+	struct at86rf230_local *lp;
+	u8 man_id_0, man_id_1;
+	int rc;
+	const char *chip;
+	int supported = 0;
+
+	if (!spi->irq) {
+		dev_err(&spi->dev, "no IRQ specified\n");
+		return -EINVAL;
+	}
+
+	dev = ieee802154_alloc_device(sizeof(*lp), &at86rf230_ops);
+	if (!dev)
+		return -ENOMEM;
+
+	lp = dev->priv;
+	lp->dev = dev;
+
+	lp->spi = spi;
+
+	dev->priv = lp;
+	dev->parent = &spi->dev;
+	dev->extra_tx_headroom = 0;
+	/* We do support only 2.4 Ghz */
+	dev->phy->channels_supported[0] = 0x7FFF800;
+	dev->flags = IEEE802154_HW_OMIT_CKSUM;
+
+	mutex_init(&lp->bmux);
+	INIT_WORK(&lp->irqwork, at86rf230_irqwork);
+	spin_lock_init(&lp->lock);
+	init_completion(&lp->tx_complete);
+
+	spi_set_drvdata(spi, lp);
+
+	rc = at86rf230_fill_data(spi);
+	if (rc)
+		goto err_fill;
+
+	rc = gpio_request(lp->rstn, "rstn");
+	if (rc)
+		goto err_rstn;
+
+	if (gpio_is_valid(lp->slp_tr)) {
+		rc = gpio_request(lp->slp_tr, "slp_tr");
+		if (rc)
+			goto err_slp_tr;
+	}
+
+	rc = gpio_direction_output(lp->rstn, 1);
+	if (rc)
+		goto err_gpio_dir;
+
+	if (gpio_is_valid(lp->slp_tr)) {
+		rc = gpio_direction_output(lp->slp_tr, 0);
+		if (rc)
+			goto err_gpio_dir;
+	}
+
+	/* Reset */
+	msleep(1);
+	gpio_set_value(lp->rstn, 0);
+	msleep(1);
+	gpio_set_value(lp->rstn, 1);
+	msleep(1);
+
+	rc = at86rf230_read_subreg(lp, SR_MAN_ID_0, &man_id_0);
+	if (rc)
+		goto err_gpio_dir;
+	rc = at86rf230_read_subreg(lp, SR_MAN_ID_1, &man_id_1);
+	if (rc)
+		goto err_gpio_dir;
+
+	if (man_id_1 != 0x00 || man_id_0 != 0x1f) {
+		dev_err(&spi->dev, "Non-Atmel dev found (MAN_ID %02x %02x)\n",
+			man_id_1, man_id_0);
+		rc = -EINVAL;
+		goto err_gpio_dir;
+	}
+
+	rc = at86rf230_read_subreg(lp, SR_PART_NUM, &lp->part);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = at86rf230_read_subreg(lp, SR_VERSION_NUM, &lp->vers);
+	if (rc)
+		goto err_gpio_dir;
+
+	switch (lp->part) {
+	case 2:
+		chip = "at86rf230";
+		/* supported = 1;  FIXME: should be easy to support; */
+		break;
+	case 3:
+		chip = "at86rf231";
+		supported = 1;
+		break;
+	default:
+		chip = "UNKNOWN";
+		break;
+	}
+
+	dev_info(&spi->dev, "Detected %s chip version %d\n", chip, lp->vers);
+	if (!supported) {
+		rc = -ENOTSUPP;
+		goto err_gpio_dir;
+	}
+
+	rc = at86rf230_hw_init(lp);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = request_irq(spi->irq, at86rf230_isr, IRQF_SHARED,
+			 dev_name(&spi->dev), lp);
+	if (rc)
+		goto err_gpio_dir;
+
+	rc = ieee802154_register_device(lp->dev);
+	if (rc)
+		goto err_irq;
+
+	return rc;
+
+	ieee802154_unregister_device(lp->dev);
+err_irq:
+	free_irq(spi->irq, lp);
+	flush_work(&lp->irqwork);
+err_gpio_dir:
+	if (gpio_is_valid(lp->slp_tr))
+		gpio_free(lp->slp_tr);
+err_slp_tr:
+	gpio_free(lp->rstn);
+err_rstn:
+err_fill:
+	spi_set_drvdata(spi, NULL);
+	mutex_destroy(&lp->bmux);
+	ieee802154_free_device(lp->dev);
+	return rc;
+}
+
+static int __devexit at86rf230_remove(struct spi_device *spi)
+{
+	struct at86rf230_local *lp = spi_get_drvdata(spi);
+
+	ieee802154_unregister_device(lp->dev);
+
+	free_irq(spi->irq, lp);
+	flush_work(&lp->irqwork);
+
+	if (gpio_is_valid(lp->slp_tr))
+		gpio_free(lp->slp_tr);
+	gpio_free(lp->rstn);
+
+	spi_set_drvdata(spi, NULL);
+	mutex_destroy(&lp->bmux);
+	ieee802154_free_device(lp->dev);
+
+	dev_dbg(&spi->dev, "unregistered at86rf230\n");
+	return 0;
+}
+
+static struct spi_driver at86rf230_driver = {
+	.driver = {
+		.name	= "at86rf230",
+		.owner	= THIS_MODULE,
+	},
+	.probe      = at86rf230_probe,
+	.remove     = __devexit_p(at86rf230_remove),
+	.suspend    = at86rf230_suspend,
+	.resume     = at86rf230_resume,
+};
+
+static int __init at86rf230_init(void)
+{
+	return spi_register_driver(&at86rf230_driver);
+}
+module_init(at86rf230_init);
+
+static void __exit at86rf230_exit(void)
+{
+	spi_unregister_driver(&at86rf230_driver);
+}
+module_exit(at86rf230_exit);
+
+MODULE_DESCRIPTION("AT86RF230 Transceiver Driver");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
new file mode 100644
index 0000000..958f88d
--- /dev/null
+++ b/include/linux/spi/at86rf230.h
@@ -0,0 +1,32 @@
+/*
+ * AT86RF230/RF231 driver
+ *
+ * Copyright (C) 2009-2012 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dmitry.baryshkov@siemens.com>
+ */
+#ifndef AT86RF230_H
+#define AT86RF230_H
+
+struct at86rf230_platform_data {
+	int rstn;
+	int slp_tr;
+	int dig2;
+};
+
+#endif
+
-- 
1.7.2.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox