Netdev List
 help / color / mirror / Atom feed
* [PATCH] Generalize socket rx gap / receive queue overflow cmsg
From: Neil Horman @ 2009-10-07 18:08 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, davem, socketcan, nhorman

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted.

Neil

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991c..7cde78e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL		40
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 529a093..b7dafdd 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -69,6 +69,7 @@ struct net;
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
+#define SOCK_RXQ_OVFL		5
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23a..8c866b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6a..8bd366f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	skb_len = skb->len;
 
 	skb_queue_tail(&sk->sk_receive_queue, skb);
+	skb->dropcount = atomic_read(&sk->sk_drops);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +703,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			set_bit(SOCK_RXQ_OVFL, &sock->flags);
+		else
+			clear_bit(SOCK_RXQ_OVFL, &sock->flags);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7ecca0..920ae1e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	skb_dst_drop(skb);
@@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 drop_n_acct:
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_drops++;
+	atomic_inc(&sk->sk_drops);
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
diff --git a/net/socket.c b/net/socket.c
index 7565536..ad157a3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	int err;
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
+	struct sk_buff *skb;
+	int rc;
+	struct sock *sk = sock->sk;
+	unsigned long cpu_flags;
+	__u32 gap = 0;
+	int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
 
 	si->sock = sock;
 	si->scm = NULL;
@@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		return err;
 
-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+	if (check_drops) {
+		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
+				flags & MSG_DONTWAIT, &err);
+		if (skb) {
+			gap = skb->dropcount;
+			consume_skb(skb);
+		}
+	}
+
+	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+
+	if (check_drops && (rc > 0))
+		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
+
+	return rc;
 }
 
 int sock_recvmsg(struct socket *sock, struct msghdr *msg,

^ permalink raw reply related

* Re: [PATCH 2/2] [RFC] Add c/r support for connected INET sockets
From: Dan Smith @ 2009-10-07 17:22 UTC (permalink / raw)
  To: Serge E. Hallyn
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, John Dykstra,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20091007171907.GA20572-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

>> +	CKPT_COPY(op, hh->daddr, sk->daddr);
>> +	CKPT_COPY(op, hh->rcv_saddr, sk->rcv_saddr);
>> +	CKPT_COPY(op, hh->dport, sk->dport);
>> +	CKPT_COPY(op, hh->num, sk->num);
>> +	CKPT_COPY(op, hh->saddr, sk->saddr);
>> +	CKPT_COPY(op, hh->sport, sk->sport);

SH> This becomes an easy way around CAP_NET_BIND_SERVICE right?  Or
SH> will that be caught by something already done in your listen patch
SH> after this step?

Actually, yeah, you're right.  I was going to say that we'd catch it
because we also do a bind(), but there's no guarantee that the
sockaddr_in we use for bind() is the same as this :D

-- 
Dan Smith
IBM Linux Technology Center
email: danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org

^ permalink raw reply

* Re: [PATCH 2/2] [RFC] Add c/r support for connected INET sockets
From: Serge E. Hallyn @ 2009-10-07 17:19 UTC (permalink / raw)
  To: Dan Smith
  Cc: containers-qjLDD68F18O7TbgM5vRIOg, John Dykstra,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1254932945-12578-3-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

Quoting Dan Smith (danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org):
> This patch adds basic support for C/R of open INET sockets.  I think that
> all the important bits of the TCP and ICSK socket structures is saved,
> but I think there is still some additional IPv6 stuff that needs to be
> handled.
> 
> With this patch applied, the following script can be used to demonstrate
> the functionality:
> 
>   https://lists.linux-foundation.org/pipermail/containers/2009-October/021239.html
> 
> It shows that this enables migration of a sendmail process with open
> connections from one machine to another without dropping.

neato

> Now that listening socket support is in the c/r tree, I think it is
> a good time to start fielding comments and suggestions on the
> connected part, as I think lots of folks have input on how to make it
> better, safer, etc.

One thing:

> +static int sock_inet_cptrst(struct ckpt_ctx *ctx,
> +			    struct sock *sock,
> +			    struct ckpt_hdr_socket_inet *hh,
> +			    int op)
> +{
> +	struct inet_sock *sk = inet_sk(sock);
> +	struct inet_connection_sock *icsk = inet_csk(sock);
> +	int ret;
> +
> +	CKPT_COPY(op, hh->daddr, sk->daddr);
> +	CKPT_COPY(op, hh->rcv_saddr, sk->rcv_saddr);
> +	CKPT_COPY(op, hh->dport, sk->dport);
> +	CKPT_COPY(op, hh->num, sk->num);
> +	CKPT_COPY(op, hh->saddr, sk->saddr);
> +	CKPT_COPY(op, hh->sport, sk->sport);

This becomes an easy way around CAP_NET_BIND_SERVICE right?  Or
will that be caught by something already done in your listen
patch after this step?

thanks,
-serge

^ permalink raw reply

* Re: [BUG net-2.6] bluetooth/rfcomm : sleeping function called from invalid context at mm/slub.c:1719
From: Oliver Hartkopp @ 2009-10-07 17:16 UTC (permalink / raw)
  To: Dave Young
  Cc: Marcel Holtmann, Linux Netdev List,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <4AC6247E.7050308-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

Oliver Hartkopp wrote:
> Dave Young wrote:
>> On Fri, Oct 2, 2009 at 2:28 PM, Oliver Hartkopp <oliver-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org> wrote:
>>> Hello Marcel,
>>>
>>> with current net-2.6 tree ...
>>>
>>> While starting my PPP Bluetooth dialup networking, i got this:
>> Hi, oliver
>>
>> please try following patch:
>> http://patchwork.kernel.org/patch/51326/
> 
> Hi Dave,
> 
> that fixed it at ppp startup!
> 
> Tested-by: Oliver Hartkopp <oliver-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

Hi Dave,

what's the state of this patch?

Has it gone upstream?

Regards,
Oliver

> 
> Btw. when shutting down the ppp connection i still get this:
> 
> [  361.996887] INFO: trying to register non-static key.
> [  361.996897] the code is fine but needs lockdep annotation.
> [  361.996902] turning off the locking correctness validator.
> [  361.996912] Pid: 0, comm: swapper Not tainted 2.6.31-08939-gdb8abec-dirty #22
> [  361.996919] Call Trace:
> [  361.996933]  [<c12e4fb2>] ? printk+0xf/0x11
> [  361.996947]  [<c1042214>] register_lock_class+0x5a/0x295
> [  361.996957]  [<c1043af2>] __lock_acquire+0x9b/0xc03
> [  361.996967]  [<c104464b>] ? __lock_acquire+0xbf4/0xc03
> [  361.996985]  [<fa59a168>] ? l2cap_get_chan_by_scid+0x35/0x43 [l2cap]
> [  361.996995]  [<c104491f>] ? lock_release_non_nested+0x17b/0x1db
> [  361.997008]  [<fa59a168>] ? l2cap_get_chan_by_scid+0x35/0x43 [l2cap]
> [  361.997018]  [<c10426fd>] ? trace_hardirqs_off+0xb/0xd
> [  361.997028]  [<c10446b6>] lock_acquire+0x5c/0x73
> [  361.997039]  [<c124cd14>] ? skb_dequeue+0x12/0x4c
> [  361.997049]  [<c12e6e23>] _spin_lock_irqsave+0x24/0x34
> [  361.997058]  [<c124cd14>] ? skb_dequeue+0x12/0x4c
> [  361.997066]  [<c124cd14>] skb_dequeue+0x12/0x4c
> [  361.997075]  [<c124d579>] skb_queue_purge+0x14/0x1b
> [  361.997088]  [<fa59ce3f>] l2cap_recv_frame+0xe9e/0x129a [l2cap]
> [  361.997099]  [<c10421d1>] ? register_lock_class+0x17/0x295
> [  361.997110]  [<c104464b>] ? __lock_acquire+0xbf4/0xc03
> [  361.997128]  [<c104464b>] ? __lock_acquire+0xbf4/0xc03
> [  361.997139]  [<c120de74>] ? uhci_giveback_urb+0xf2/0x162
> [  361.997163]  [<f8bb4c45>] ? hci_rx_task+0xfe/0x1f8 [bluetooth]
> [  361.997177]  [<fa59d2e4>] l2cap_recv_acldata+0xa9/0x1be [l2cap]
> [  361.997190]  [<fa59d23b>] ? l2cap_recv_acldata+0x0/0x1be [l2cap]
> [  361.997208]  [<f8bb4c77>] hci_rx_task+0x130/0x1f8 [bluetooth]
> [  361.997219]  [<c102a098>] tasklet_action+0x6b/0xb2
> [  361.997228]  [<c102a46b>] __do_softirq+0x82/0x101
> [  361.997237]  [<c102a515>] do_softirq+0x2b/0x43
> [  361.997246]  [<c102a619>] irq_exit+0x35/0x68
> [  361.997256]  [<c1004513>] do_IRQ+0x80/0x96
> [  361.997265]  [<c10030ae>] common_interrupt+0x2e/0x34
> [  361.997275]  [<c104007b>] ? tick_device_uses_broadcast+0x71/0x7c
> [  361.997286]  [<c11747a8>] ? acpi_idle_enter_simple+0x103/0x12e
> [  361.997296]  [<c1174515>] acpi_idle_enter_bm+0xc3/0x253
> [  361.997306]  [<c1238b6f>] cpuidle_idle_call+0x60/0x91
> [  361.997315]  [<c1001d44>] cpu_idle+0x49/0x65
> [  361.997324]  [<c12e2f0e>] start_secondary+0x190/0x195
> 
> 
> Thanks,
> Oliver
> 
> 

^ permalink raw reply

* [PATCH 2/2] [RFC] Add c/r support for connected INET sockets
From: Dan Smith @ 2009-10-07 16:29 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg
  Cc: John Dykstra, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1254932945-12578-1-git-send-email-danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

This patch adds basic support for C/R of open INET sockets.  I think that
all the important bits of the TCP and ICSK socket structures is saved,
but I think there is still some additional IPv6 stuff that needs to be
handled.

With this patch applied, the following script can be used to demonstrate
the functionality:

  https://lists.linux-foundation.org/pipermail/containers/2009-October/021239.html

It shows that this enables migration of a sendmail process with open
connections from one machine to another without dropping.

Now that listening socket support is in the c/r tree, I think it is
a good time to start fielding comments and suggestions on the
connected part, as I think lots of folks have input on how to make it
better, safer, etc.

Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Oren Laadan <orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
Cc: John Dykstra <jdykstra72-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Dan Smith <danms-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint/sys.c                 |    4 +
 include/linux/checkpoint_hdr.h   |   97 +++++++++++++++++++
 include/linux/checkpoint_types.h |    2 +
 net/checkpoint.c                 |   25 ++----
 net/ipv4/checkpoint.c            |  192 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 303 insertions(+), 17 deletions(-)

diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 260a1ee..4ec4dd9 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -221,6 +221,8 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
 
 	kfree(ctx->pids_arr);
 
+	sock_list_free(&ctx->listen_sockets);
+
 	kfree(ctx);
 }
 
@@ -249,6 +251,8 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
 	spin_lock_init(&ctx->lock);
 #endif
 
+	INIT_LIST_HEAD(&ctx->listen_sockets);
+
 	err = -EBADF;
 	ctx->file = fget(fd);
 	if (!ctx->file)
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index b5f958e..2693a5d 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -16,6 +16,7 @@
 #include <linux/socket.h>
 #include <linux/un.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #else
 #include <sys/socket.h>
 #include <sys/un.h>
@@ -475,6 +476,102 @@ struct ckpt_hdr_socket_unix {
 
 struct ckpt_hdr_socket_inet {
 	struct ckpt_hdr h;
+	__u32 daddr;
+	__u32 rcv_saddr;
+	__u32 saddr;
+	__u16 dport;
+	__u16 num;
+	__u16 sport;
+	__s16 uc_ttl;
+	__u16 cmsg_flags;
+
+	struct {
+		__u64 timeout;
+		__u32 ato;
+		__u32 lrcvtime;
+		__u16 last_seg_size;
+		__u16 rcv_mss;
+		__u8 pending;
+		__u8 quick;
+		__u8 pingpong;
+		__u8 blocked;
+	} icsk_ack __attribute__ ((aligned(8)));
+
+	/* FIXME: Skipped opt, tos, multicast, cork settings */
+
+	struct {
+		__u64 last_synq_overflow;
+
+		__u32 rcv_nxt;
+		__u32 copied_seq;
+		__u32 rcv_wup;
+		__u32 snd_nxt;
+		__u32 snd_una;
+		__u32 snd_sml;
+		__u32 rcv_tstamp;
+		__u32 lsndtime;
+
+		__u32 snd_wl1;
+		__u32 snd_wnd;
+		__u32 max_window;
+		__u32 mss_cache;
+		__u32 window_clamp;
+		__u32 rcv_ssthresh;
+		__u32 frto_highmark;
+
+		__u32 srtt;
+		__u32 mdev;
+		__u32 mdev_max;
+		__u32 rttvar;
+		__u32 rtt_seq;
+
+		__u32 packets_out;
+		__u32 retrans_out;
+
+		__u32 snd_up;
+		__u32 rcv_wnd;
+		__u32 write_seq;
+		__u32 pushed_seq;
+		__u32 lost_out;
+		__u32 sacked_out;
+		__u32 fackets_out;
+		__u32 tso_deferred;
+		__u32 bytes_acked;
+
+		__s32 lost_cnt_hint;
+		__u32 retransmit_high;
+
+		__u32 lost_retrans_low;
+
+		__u32 prior_ssthresh;
+		__u32 high_seq;
+
+		__u32 retrans_stamp;
+		__u32 undo_marker;
+		__s32 undo_retrans;
+		__u32 total_retrans;
+
+		__u32 urg_seq;
+		__u32 keepalive_time;
+		__u32 keepalive_intvl;
+
+		__u16 urg_data;
+		__u16 advmss;
+		__u8 frto_counter;
+		__u8 nonagle;
+
+		__u8 ecn_flags;
+		__u8 reordering;
+
+		__u8 keepalive_probes;
+	} tcp __attribute__ ((aligned(8)));
+
+	struct {
+		struct in6_addr saddr;
+		struct in6_addr rcv_saddr;
+		struct in6_addr daddr;
+	} inet6 __attribute__ ((aligned(8)));
+
 	__u32 laddr_len;
 	__u32 raddr_len;
 	struct sockaddr_in laddr;
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index fa57cdc..91c141b 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -65,6 +65,8 @@ struct ckpt_ctx {
 	struct list_head pgarr_list;	/* page array to dump VMA contents */
 	struct list_head pgarr_pool;	/* pool of empty page arrays chain */
 
+	struct list_head listen_sockets;/* listening parent sockets */
+
 	/* [multi-process checkpoint] */
 	struct task_struct **tasks_arr; /* array of all tasks [checkpoint] */
 	int nr_tasks;                   /* size of tasks array */
diff --git a/net/checkpoint.c b/net/checkpoint.c
index e7e8e75..3d6da68 100644
--- a/net/checkpoint.c
+++ b/net/checkpoint.c
@@ -90,6 +90,7 @@ static int sock_copy_buffers(struct sk_buff_head *from,
 
 static int __sock_write_buffers(struct ckpt_ctx *ctx,
 				struct sk_buff_head *queue,
+				uint16_t family,
 				int dst_objref)
 {
 	struct sk_buff *skb;
@@ -98,11 +99,7 @@ static int __sock_write_buffers(struct ckpt_ctx *ctx,
 		struct ckpt_hdr_socket_buffer *h;
 		int ret = 0;
 
-		/* FIXME: This could be a false positive for non-unix
-		 *        buffers, so add a type check here in the
-		 *        future
-		 */
-		if (UNIXCB(skb).fp) {
+		if ((family == AF_UNIX) && UNIXCB(skb).fp) {
 			ckpt_write_err(ctx, "TE", "af_unix: pass fd", -EBUSY);
 			return -EBUSY;
 		}
@@ -141,6 +138,7 @@ static int __sock_write_buffers(struct ckpt_ctx *ctx,
 
 static int sock_write_buffers(struct ckpt_ctx *ctx,
 			      struct sk_buff_head *queue,
+			      uint16_t family,
 			      int dst_objref)
 {
 	struct ckpt_hdr_socket_queue *h;
@@ -160,7 +158,7 @@ static int sock_write_buffers(struct ckpt_ctx *ctx,
 	h->skb_count = ret;
 	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
 	if (!ret)
-		ret = __sock_write_buffers(ctx, &tmpq, dst_objref);
+		ret = __sock_write_buffers(ctx, &tmpq, family, dst_objref);
 
  out:
 	ckpt_hdr_put(ctx, h);
@@ -182,12 +180,14 @@ int sock_deferred_write_buffers(void *data)
 		return dst_objref;
 	}
 
-	ret = sock_write_buffers(ctx, &dq->sk->sk_receive_queue, dst_objref);
+	ret = sock_write_buffers(ctx, &dq->sk->sk_receive_queue,
+				 dq->sk->sk_family, dst_objref);
 	ckpt_debug("write recv buffers: %i\n", ret);
 	if (ret < 0)
 		return ret;
 
-	ret = sock_write_buffers(ctx, &dq->sk->sk_write_queue, dst_objref);
+	ret = sock_write_buffers(ctx, &dq->sk->sk_write_queue,
+				 dq->sk->sk_family, dst_objref);
 	ckpt_debug("write send buffers: %i\n", ret);
 
 	return ret;
@@ -710,15 +710,6 @@ struct sock *do_sock_restore(struct ckpt_ctx *ctx)
 	if (ret < 0)
 		goto err;
 
-	if ((h->sock_common.family == AF_INET) &&
-	    (h->sock.state != TCP_LISTEN)) {
-		/* Temporary hack to enable restore of TCP_LISTEN sockets
-		 * while forcing anything else to a closed state
-		 */
-		sock->sk->sk_state = TCP_CLOSE;
-		sock->state = SS_UNCONNECTED;
-	}
-
 	ckpt_hdr_put(ctx, h);
 	return sock->sk;
  err:
diff --git a/net/ipv4/checkpoint.c b/net/ipv4/checkpoint.c
index 9cbbf5e..0edfa3e 100644
--- a/net/ipv4/checkpoint.c
+++ b/net/ipv4/checkpoint.c
@@ -17,6 +17,7 @@
 #include <linux/deferqueue.h>
 #include <net/tcp_states.h>
 #include <net/tcp.h>
+#include <net/ipv6.h>
 
 struct dq_sock {
 	struct ckpt_ctx *ctx;
@@ -28,6 +29,176 @@ struct dq_buffers {
 	struct sock *sk;
 };
 
+static int sock_is_parent(struct sock *sk, struct sock *parent)
+{
+	return inet_sk(sk)->sport == inet_sk(parent)->sport;
+}
+
+static struct sock *sock_get_parent(struct ckpt_ctx *ctx, struct sock *sk)
+{
+	return sock_list_find(&ctx->listen_sockets, sk, sock_is_parent);
+}
+
+static int sock_hash_parent(void *data)
+{
+	struct dq_sock *dq = (struct dq_sock *)data;
+	struct sock *parent;
+
+	printk("Doing post-restart hash\n");
+
+	dq->sk->sk_prot->hash(dq->sk);
+
+	parent = sock_get_parent(dq->ctx, dq->sk);
+	if (parent) {
+		inet_sk(dq->sk)->num = ntohs(inet_sk(dq->sk)->sport);
+		local_bh_disable();
+		__inet_inherit_port(parent, dq->sk);
+		local_bh_enable();
+	} else {
+		inet_sk(dq->sk)->num = 0;
+		inet_hash_connect(&tcp_death_row, dq->sk);
+		inet_sk(dq->sk)->num = ntohs(inet_sk(dq->sk)->sport);
+	}
+
+	return 0;
+}
+
+static int sock_defer_hash(struct ckpt_ctx *ctx, struct sock *sock)
+{
+	struct dq_sock dq;
+
+	dq.sk = sock;
+	dq.ctx = ctx;
+
+	return deferqueue_add(ctx->deferqueue, &dq, sizeof(dq),
+			      sock_hash_parent, NULL);
+}
+
+static int sock_inet_tcp_cptrst(struct ckpt_ctx *ctx,
+				struct tcp_sock *sk,
+				struct ckpt_hdr_socket_inet *hh,
+				int op)
+{
+	CKPT_COPY(op, hh->tcp.rcv_nxt, sk->rcv_nxt);
+	CKPT_COPY(op, hh->tcp.copied_seq, sk->copied_seq);
+	CKPT_COPY(op, hh->tcp.rcv_wup, sk->rcv_wup);
+	CKPT_COPY(op, hh->tcp.snd_nxt, sk->snd_nxt);
+	CKPT_COPY(op, hh->tcp.snd_una, sk->snd_una);
+	CKPT_COPY(op, hh->tcp.snd_sml, sk->snd_sml);
+	CKPT_COPY(op, hh->tcp.rcv_tstamp, sk->rcv_tstamp);
+	CKPT_COPY(op, hh->tcp.lsndtime, sk->lsndtime);
+
+	CKPT_COPY(op, hh->tcp.snd_wl1, sk->snd_wl1);
+	CKPT_COPY(op, hh->tcp.snd_wnd, sk->snd_wnd);
+	CKPT_COPY(op, hh->tcp.max_window, sk->max_window);
+	CKPT_COPY(op, hh->tcp.mss_cache, sk->mss_cache);
+	CKPT_COPY(op, hh->tcp.window_clamp, sk->window_clamp);
+	CKPT_COPY(op, hh->tcp.rcv_ssthresh, sk->rcv_ssthresh);
+	CKPT_COPY(op, hh->tcp.frto_highmark, sk->frto_highmark);
+	CKPT_COPY(op, hh->tcp.advmss, sk->advmss);
+	CKPT_COPY(op, hh->tcp.frto_counter, sk->frto_counter);
+	CKPT_COPY(op, hh->tcp.nonagle, sk->nonagle);
+
+	CKPT_COPY(op, hh->tcp.srtt, sk->srtt);
+	CKPT_COPY(op, hh->tcp.mdev, sk->mdev);
+	CKPT_COPY(op, hh->tcp.mdev_max, sk->mdev_max);
+	CKPT_COPY(op, hh->tcp.rttvar, sk->rttvar);
+	CKPT_COPY(op, hh->tcp.rtt_seq, sk->rtt_seq);
+
+	CKPT_COPY(op, hh->tcp.packets_out, sk->packets_out);
+	CKPT_COPY(op, hh->tcp.retrans_out, sk->retrans_out);
+
+	CKPT_COPY(op, hh->tcp.urg_data, sk->urg_data);
+	CKPT_COPY(op, hh->tcp.ecn_flags, sk->ecn_flags);
+	CKPT_COPY(op, hh->tcp.reordering, sk->reordering);
+	CKPT_COPY(op, hh->tcp.snd_up, sk->snd_up);
+
+	CKPT_COPY(op, hh->tcp.keepalive_probes, sk->keepalive_probes);
+
+	CKPT_COPY(op, hh->tcp.rcv_wnd, sk->rcv_wnd);
+	CKPT_COPY(op, hh->tcp.write_seq, sk->write_seq);
+	CKPT_COPY(op, hh->tcp.pushed_seq, sk->pushed_seq);
+	CKPT_COPY(op, hh->tcp.lost_out, sk->lost_out);
+	CKPT_COPY(op, hh->tcp.sacked_out, sk->sacked_out);
+	CKPT_COPY(op, hh->tcp.fackets_out, sk->fackets_out);
+	CKPT_COPY(op, hh->tcp.tso_deferred, sk->tso_deferred);
+	CKPT_COPY(op, hh->tcp.bytes_acked, sk->bytes_acked);
+
+	CKPT_COPY(op, hh->tcp.lost_cnt_hint, sk->lost_cnt_hint);
+	CKPT_COPY(op, hh->tcp.retransmit_high, sk->retransmit_high);
+
+	CKPT_COPY(op, hh->tcp.lost_retrans_low, sk->lost_retrans_low);
+
+	CKPT_COPY(op, hh->tcp.prior_ssthresh, sk->prior_ssthresh);
+	CKPT_COPY(op, hh->tcp.high_seq, sk->high_seq);
+
+	CKPT_COPY(op, hh->tcp.retrans_stamp, sk->retrans_stamp);
+	CKPT_COPY(op, hh->tcp.undo_marker, sk->undo_marker);
+	CKPT_COPY(op, hh->tcp.undo_retrans, sk->undo_retrans);
+	CKPT_COPY(op, hh->tcp.total_retrans, sk->total_retrans);
+
+	CKPT_COPY(op, hh->tcp.urg_seq, sk->urg_seq);
+	CKPT_COPY(op, hh->tcp.keepalive_time, sk->keepalive_time);
+	CKPT_COPY(op, hh->tcp.keepalive_intvl, sk->keepalive_intvl);
+
+	return 0;
+}
+
+static int sock_inet_cptrst(struct ckpt_ctx *ctx,
+			    struct sock *sock,
+			    struct ckpt_hdr_socket_inet *hh,
+			    int op)
+{
+	struct inet_sock *sk = inet_sk(sock);
+	struct inet_connection_sock *icsk = inet_csk(sock);
+	int ret;
+
+	CKPT_COPY(op, hh->daddr, sk->daddr);
+	CKPT_COPY(op, hh->rcv_saddr, sk->rcv_saddr);
+	CKPT_COPY(op, hh->dport, sk->dport);
+	CKPT_COPY(op, hh->num, sk->num);
+	CKPT_COPY(op, hh->saddr, sk->saddr);
+	CKPT_COPY(op, hh->sport, sk->sport);
+	CKPT_COPY(op, hh->uc_ttl, sk->uc_ttl);
+	CKPT_COPY(op, hh->cmsg_flags, sk->cmsg_flags);
+
+	CKPT_COPY(op, hh->icsk_ack.pending, icsk->icsk_ack.pending);
+	CKPT_COPY(op, hh->icsk_ack.quick, icsk->icsk_ack.quick);
+	CKPT_COPY(op, hh->icsk_ack.pingpong, icsk->icsk_ack.pingpong);
+	CKPT_COPY(op, hh->icsk_ack.blocked, icsk->icsk_ack.blocked);
+	CKPT_COPY(op, hh->icsk_ack.ato, icsk->icsk_ack.ato);
+	CKPT_COPY(op, hh->icsk_ack.timeout, icsk->icsk_ack.timeout);
+	CKPT_COPY(op, hh->icsk_ack.lrcvtime, icsk->icsk_ack.lrcvtime);
+	CKPT_COPY(op,
+		  hh->icsk_ack.last_seg_size, icsk->icsk_ack.last_seg_size);
+	CKPT_COPY(op, hh->icsk_ack.rcv_mss, icsk->icsk_ack.rcv_mss);
+
+	if (sock->sk_protocol == IPPROTO_TCP)
+		ret = sock_inet_tcp_cptrst(ctx, tcp_sk(sock), hh, op);
+	else if (sock->sk_protocol == IPPROTO_UDP)
+		ret = 0;
+	else {
+		ckpt_write_err(ctx, "T", "unknown socket protocol %d",
+			       sock->sk_protocol);
+		ret = -EINVAL;
+	}
+
+	if (sock->sk_family == AF_INET6) {
+		struct ipv6_pinfo *inet6 = inet6_sk(sock);
+		if (op == CKPT_CPT) {
+			ipv6_addr_copy(&hh->inet6.saddr, &inet6->saddr);
+			ipv6_addr_copy(&hh->inet6.rcv_saddr, &inet6->rcv_saddr);
+			ipv6_addr_copy(&hh->inet6.daddr, &inet6->daddr);
+		} else {
+			ipv6_addr_copy(&inet6->saddr, &hh->inet6.saddr);
+			ipv6_addr_copy(&inet6->rcv_saddr, &hh->inet6.rcv_saddr);
+			ipv6_addr_copy(&inet6->daddr, &hh->inet6.daddr);
+		}
+	}
+
+	return ret;
+}
+
 int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
 {
 	struct ckpt_hdr_socket_inet *in;
@@ -43,6 +214,10 @@ int inet_checkpoint(struct ckpt_ctx *ctx, struct socket *sock)
 	if (ret)
 		goto out;
 
+	ret = sock_inet_cptrst(ctx, sock->sk, in, CKPT_CPT);
+	if (ret < 0)
+		goto out;
+
 	ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) in);
  out:
 	ckpt_hdr_put(ctx, in);
@@ -209,8 +384,25 @@ int inet_restore(struct ckpt_ctx *ctx,
 			ckpt_debug("inet listen: %i\n", ret);
 			if (ret < 0)
 				goto out;
+
+			ret = sock_list_add(&ctx->listen_sockets, sock->sk);
+			if (ret < 0)
+				goto out;
 		}
 	} else {
+		ret = sock_inet_cptrst(ctx, sock->sk, in, CKPT_RST);
+		printk("sock_inet_cptrst: %i\n", ret);
+		if (ret)
+			goto out;
+
+		if ((h->sock.state == TCP_ESTABLISHED) &&
+		    (h->sock.protocol == IPPROTO_TCP)) {
+			/* Delay hashing this sock until the end so we can
+			 * hook it up with its parent (if appropriate)
+			 */
+			ret = sock_defer_hash(ctx, sock->sk);
+		}
+
 		if (!sock_flag(sock->sk, SOCK_DEAD))
 			ret = inet_defer_restore_buffers(ctx, sock->sk);
 	}
-- 
1.6.2.5

^ permalink raw reply related

* Re: Nested function in drivers/of/of_mdio.c
From: vb @ 2009-10-07 16:23 UTC (permalink / raw)
  To: Grant Likely; +Cc: netdev, linuxppc, Andy Fleming, David S. Miller
In-Reply-To: <fa686aa40910070911x769d7d41u908fbcf6b0980962@mail.gmail.com>

Guys, are there other instances of nested C functions in the codebase
or was this the first attempt?


On Wed, Oct 7, 2009 at 9:11 AM, Grant Likely <grant.likely@secretlab.ca> wrote:
> On Wed, Oct 7, 2009 at 9:15 AM, Jérôme Pouiller <jezz@sysmic.org> wrote:
>> Dear,
>>
>> I have a problem with commit 8bc487d150b939e69830c39322df4ee486efe381
>> in file drivers/of/of_mdio.c in function of_phy_find_device.
>>
>> As you see, this function define match() as a nested function. My
>> compiler (powerpc-e500-linux-gnu-gcc-3.4.1) raise an error during link
>> due to this nested definition:
>>  drivers/built-in.o(.text+0x5e2a4): In function `of_phy_find_device': /home/jezz/linux-next/drivers/of/of_mdio.c:107:
>> undefined reference to `__trampoline_setup'
>>
>> I am sure I could solve problem by rebuilding my toolchain.
>> Nevertheless, I think nested function definition is not perfectly
>> supported by all compilers. Also, I suggest to place function match()
>> outside of scope of of_phy_find_device as in following patch.
>
> I'm okay with that, but if you're moving code out of the file scope,
> then please rename the function to of_phy_match() to avoid global
> namespace conflicts.
>
> g.
>
>>
>> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
>> index bacaa53..c7b2e26 100644
>> --- a/drivers/of/of_mdio.c
>> +++ b/drivers/of/of_mdio.c
>> @@ -97,6 +97,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
>>  }
>>  EXPORT_SYMBOL(of_mdiobus_register);
>>
>> +static int match(struct device *dev, void *phy_np)
>> +{
>> +       return dev_archdata_get_node(&dev->archdata) == phy_np;
>> +}
>>  /**
>>  * of_phy_find_device - Give a PHY node, find the phy_device
>>  * @phy_np: Pointer to the phy's device tree node
>> @@ -106,11 +110,6 @@ EXPORT_SYMBOL(of_mdiobus_register);
>>  struct phy_device *of_phy_find_device(struct device_node *phy_np)
>>  {
>>        struct device *d;
>> -       int match(struct device *dev, void *phy_np)
>> -       {
>> -               return dev_archdata_get_node(&dev->archdata) == phy_np;
>> -       }
>> -
>>        if (!phy_np)
>>                return NULL;
>>
>>
>> What do you think about it?
>>
>> Best regards,
>>
>> --
>> Jérôme Pouiller (jezz AT sysmic DOT org)
>>
>
>
>
> --
> Grant Likely, B.Sc., P.Eng.
> Secret Lab Technologies Ltd.
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>

^ permalink raw reply

* Re: skb_shinfo(skb)->nr_frags > 0 while skb_is_gso(skb) == 0?
From: Stephen Hemminger @ 2009-10-07 16:16 UTC (permalink / raw)
  To: John Wright; +Cc: netdev, Michael Chan, Bob Montgomery
In-Reply-To: <20091007090420.GA1977@supernova.localdomain>

On Wed, 7 Oct 2009 03:04:20 -0600
John Wright <john.wright@hp.com> wrote:

> Hi Stephen,
> 
> On Tue, Oct 06, 2009 at 06:21:31PM -0700, Stephen Hemminger wrote:
> > On Tue, 6 Oct 2009 19:03:15 -0600
> > John Wright <john.wright@hp.com> wrote:
> > > Bob Montgomery and I are debugging an OOPS in the bnx2 driver.  The
> > > driver OOPSes in bnx2_tx_int(), getting a NULL pointer dereference when
> > > checking if the skb is GSO.  (This is on 2.6.29, before is_gso was
> > > cached in the tx_buf (commit d62fda08), but bear with me - while kernels
> > > with that commit might not crash in the same place, I think we have
> > > discovered a bug that would manifest itself another way.)
> > > 
> > > So, first, a question for someone who knows more about sk_buff's than I:
> > > is it reasonable/legal for an skb for which skb_is_gso(skb) == 0 to also
> > > have skb_shinfo(skb)->nr_frags > 0?
> > 
> > Yes, if driver support Scatter/Gather and Checksum offload,
> > TCP (especially splice) will hand fragmented frames to device.
> 
> Is there a good way to generate lots of these types of packets?  Is
> disabling tso and gso with ethtool and sendmsg()ing big chunks of data
> enough?
> 

netperf should be able to do this easily.  Also try NFS as well.

-- 

^ permalink raw reply

* Re: [PATCH 3/4] ethoc: align received packet to make IP header at word boundary
From: Stephen Hemminger @ 2009-10-07 16:13 UTC (permalink / raw)
  To: Thomas Chou; +Cc: netdev
In-Reply-To: <1254735200-2718-3-git-send-email-thomas@wytron.com.tw>

On Mon,  5 Oct 2009 17:33:19 +0800
Thomas Chou <thomas@wytron.com.tw> wrote:

> The packet buffer is allocated at 4 bytes boundary, but the IP header
> length and version bits is located at byte 14. These bit fields access
> as 32 bits word and caused exception on processors that do not support
> unaligned access.
> 
> The patch adds 2 bytes offset to make the bit fields word aligned.
> 
> Signed-off-by: Thomas Chou <thomas@wytron.com.tw>
> ---
>  drivers/net/ethoc.c |    4 ++++
>  1 files changed, 4 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
> index f92747f..0c6c7f4 100644
> --- a/drivers/net/ethoc.c
> +++ b/drivers/net/ethoc.c
> @@ -399,6 +399,10 @@ static int ethoc_rx(struct net_device *dev, int limit)
>  		if (ethoc_update_rx_stats(priv, &bd) == 0) {
>  			int size = bd.stat >> 16;
>  			struct sk_buff *skb = netdev_alloc_skb(dev, size);
> +
> +			size -= 4; /* strip the CRC */
> +			skb_reserve(skb, 2); /* align TCP/IP header */

Please use NET_IP_ALIGN rather than hard coding 2 so that the value
can be changed on a per-cpu architecture basis if desired.

-- 

^ permalink raw reply

* Re: Nested function in drivers/of/of_mdio.c
From: Grant Likely @ 2009-10-07 16:11 UTC (permalink / raw)
  To: Jérôme Pouiller; +Cc: netdev, linuxppc, Andy Fleming, David S. Miller
In-Reply-To: <200910071715.57249.jezz@sysmic.org>

On Wed, Oct 7, 2009 at 9:15 AM, Jérôme Pouiller <jezz@sysmic.org> wrote:
> Dear,
>
> I have a problem with commit 8bc487d150b939e69830c39322df4ee486efe381
> in file drivers/of/of_mdio.c in function of_phy_find_device.
>
> As you see, this function define match() as a nested function. My
> compiler (powerpc-e500-linux-gnu-gcc-3.4.1) raise an error during link
> due to this nested definition:
>  drivers/built-in.o(.text+0x5e2a4): In function `of_phy_find_device': /home/jezz/linux-next/drivers/of/of_mdio.c:107:
> undefined reference to `__trampoline_setup'
>
> I am sure I could solve problem by rebuilding my toolchain.
> Nevertheless, I think nested function definition is not perfectly
> supported by all compilers. Also, I suggest to place function match()
> outside of scope of of_phy_find_device as in following patch.

I'm okay with that, but if you're moving code out of the file scope,
then please rename the function to of_phy_match() to avoid global
namespace conflicts.

g.

>
> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
> index bacaa53..c7b2e26 100644
> --- a/drivers/of/of_mdio.c
> +++ b/drivers/of/of_mdio.c
> @@ -97,6 +97,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
>  }
>  EXPORT_SYMBOL(of_mdiobus_register);
>
> +static int match(struct device *dev, void *phy_np)
> +{
> +       return dev_archdata_get_node(&dev->archdata) == phy_np;
> +}
>  /**
>  * of_phy_find_device - Give a PHY node, find the phy_device
>  * @phy_np: Pointer to the phy's device tree node
> @@ -106,11 +110,6 @@ EXPORT_SYMBOL(of_mdiobus_register);
>  struct phy_device *of_phy_find_device(struct device_node *phy_np)
>  {
>        struct device *d;
> -       int match(struct device *dev, void *phy_np)
> -       {
> -               return dev_archdata_get_node(&dev->archdata) == phy_np;
> -       }
> -
>        if (!phy_np)
>                return NULL;
>
>
> What do you think about it?
>
> Best regards,
>
> --
> Jérôme Pouiller (jezz AT sysmic DOT org)
>



-- 
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* Re: [RFC net-next-2.6] net: speedup sk_wake_async()
From: Rick Jones @ 2009-10-07 15:53 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, Linux Netdev List
In-Reply-To: <4ACC0CDE.1020907@gmail.com>

Eric Dumazet wrote:
> Rick Jones a écrit :
> 
>>How about 64-bit?
> 
> 
> No data yet, but larger footprint unfortunatly :-(

True - nothing comes for free.  I'm not "in touch" with the embedded side, where 
I presume 32 bit will be if not already is now the primary bitness, but over in 
the server side of the world, at least the part I see, 64 bit is de rigeur, 
hence my curiousity.

>>Got any netperf service demand changes?
> 
> 
> I was going to setup a bench lab, with a typical RTP mediaserver, with say
> 4000 UDP sockets, 2000 sockets exchanging 50 G.711 Alaw/ulaw
> messages per second tx and rx. (Total : 100.000 packets per second each way)
> 
> Is netperf able to simulate this workload ?

Touche :)

It would be, well, cumbersome with netperf2, but possible.  One would 
./configure --enable-intervals and then run some variation of:

netperf -t UDP_STREAM -l <time> -H <remote> -b <burst size> -w <burst interval> 
-- -m <message size>

a large number of times.  Given the lack of test synchronization in netperf2 I 
probably would not try to aggregate the results of N thousand simultaneous 
netperf2 instances and would rely instead on external (relative to netperf) 
packet rate reports.

Still, if the cache miss removed is a non-trivial fraction of the overhead I 
would think that something like:

netperf -t UDP_RR -l <time> -I 99,0.5 -i 30,3 -c -C -H remote -- -r 4

run with and without the change would show a difference in the service demand, 
and if you hit the confidence intervals you would be able, per the above be 
confident in the "reality" of a CPU utilization difference of +/- 0.25% . 
Getting that test to that level of confidence probably means pinning the NIC 
interrupts to a specific CPU and then binding netperf/netserver on either side 
using the global -T option.

Barring getting sutiable confidence intervals, somewhere in the middle of all 
that would be ./configure --enable-burst and then, still with pinning and 
binding for "stability" something like:

netperf -t UDP_RR -l <time> -I 99,0.5 -i 30,3 -H <remote> -- -r 4 -b <burst>

to put multiple transactions in flight across that flow - choosing <burst> to 
take the CPU on which either netperf, netserver, or the interrupts are running 
to 100% saturation.  Here I left-off the CPU utilization since that is often the 
thing that cannot hit the confidence intervals, and leave the aggregate 
throughput as the proxy for efficiency change - which is why <burst> needs to 
take something to saturation in each case.

happy benchmarking,

rick jones

^ permalink raw reply

* [PATCH] IPv6: use ipv6_addr_set_v4mapped()
From: Brian Haley @ 2009-10-07 15:47 UTC (permalink / raw)
  To: david Miller; +Cc: netdev@vger.kernel.org

Might as well use the ipv6_addr_set_v4mapped() inline we created last
year.

Signed-off-by: Brian Haley <brian.haley@hp.com>
---

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index e48ca5d..a2afb55 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -510,11 +510,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->daddr);
+		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
 
-		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->saddr);
+		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
@@ -971,10 +969,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
 			goto failure;
 		} else {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->saddr);
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 		}
 
 		return err;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e2bdc6d..6ebf0ff 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -98,17 +98,14 @@ ipv4_connected:
 		if (err)
 			goto out;
 
-		ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
+		ipv6_addr_set_v4mapped(inet->daddr, &np->daddr);
 
-		if (ipv6_addr_any(&np->saddr)) {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
-				      inet->saddr);
-		}
+		if (ipv6_addr_any(&np->saddr))
+			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+
+		if (ipv6_addr_any(&np->rcv_saddr))
+			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 
-		if (ipv6_addr_any(&np->rcv_saddr)) {
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
-				      inet->rcv_saddr);
-		}
 		goto out;
 	}
 
@@ -329,9 +326,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      *(__be32 *)(nh + serr->addr_offset));
+			ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
+					       &sin->sin6_addr);
 		}
 	}
 
@@ -351,8 +347,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		} else {
 			struct inet_sock *inet = inet_sk(sk);
 
-			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff), ip_hdr(skb)->saddr);
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin->sin6_addr);
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 21d100b..93dad9a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -226,10 +226,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 #endif
 			goto failure;
 		} else {
-			ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->saddr);
-			ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
-				      inet->rcv_saddr);
+			ipv6_addr_set_v4mapped(inet->saddr, &np->saddr);
+			ipv6_addr_set_v4mapped(inet->rcv_saddr, &np->rcv_saddr);
 		}
 
 		return err;
@@ -1290,11 +1288,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-		ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->daddr);
+		ipv6_addr_set_v4mapped(newinet->daddr, &newnp->daddr);
 
-		ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
-			      newinet->saddr);
+		ipv6_addr_set_v4mapped(newinet->saddr, &newnp->saddr);
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3a60f12..0dae3f7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -265,8 +265,8 @@ try_again:
 		sin6->sin6_scope_id = 0;
 
 		if (is_udp4)
-			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				      htonl(0xffff), ip_hdr(skb)->saddr);
+			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
+					       &sin6->sin6_addr);
 		else {
 			ipv6_addr_copy(&sin6->sin6_addr,
 				       &ipv6_hdr(skb)->saddr);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 117f68a..f4c7ff3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -686,8 +686,7 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
 	case AF_INET:
 		sin = svc_addr_in(rqstp);
 		sin6 = &sin6_storage;
-		ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				htonl(0x0000FFFF), sin->sin_addr.s_addr);
+		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &sin6->sin6_addr);
 		break;
 	case AF_INET6:
 		sin6 = svc_addr_in6(rqstp);

^ permalink raw reply related

* [PATCH] IPv6: use ipv6_addr_copy() in ip6_route_redirect()
From: Brian Haley @ 2009-10-07 15:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org

Change ip6_route_redirect() to use ipv6_addr_copy().

Signed-off-by: Brian Haley <brian.haley@hp.com>
---

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d6fe764..df9432a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1471,9 +1471,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
 				},
 			},
 		},
-		.gateway = *gateway,
 	};
 
+	ipv6_addr_copy(&rdfl.gateway, gateway);
+
 	if (rt6_need_strict(dest))
 		flags |= RT6_LOOKUP_F_IFACE;
 

^ permalink raw reply related

* Re: [Bug #14301] WARNING: at net/ipv4/af_inet.c:154
From: Eric Dumazet @ 2009-10-07 15:41 UTC (permalink / raw)
  To: Herbert Xu, David S. Miller
  Cc: Rafael J. Wysocki, Ralf Hildebrandt, Linux Kernel Mailing List,
	Kernel Testers List, Linux Netdev List, Wei Yongjun,
	Takahiro Yasui, Hideo Aoki
In-Reply-To: <4AC78F7C.40908-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Eric Dumazet a écrit :
> Eric Dumazet a écrit :
>> Eric Dumazet a écrit :
>>> Rafael J. Wysocki a écrit :
>>>> This message has been generated automatically as a part of a report
>>>> of regressions introduced between 2.6.30 and 2.6.31.
>>>>
>>>> The following bug entry is on the current list of known regressions
>>>> introduced between 2.6.30 and 2.6.31.  Please verify if it still should
>>>> be listed and let me know (either way).
>>>>
>>>>
>>>> Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14301
>>>> Subject		: WARNING: at net/ipv4/af_inet.c:154
>>>> Submitter	: Ralf Hildebrandt <Ralf.Hildebrandt-jq1tPX9l7E6ELgA04lAiVw@public.gmane.org>
>>>> Date		: 2009-09-30 12:24 (2 days old)
>>>> References	: http://marc.info/?l=linux-kernel&m=125431350218137&w=4
>>>>
> 
> Investigation still needed...
> 

OK, my last (buggy ???) feeling is about commit 95766fff6b9a78d1

[UDP]: Add memory accounting.

(Its a two years old patch, oh well...)

Problem is the udp_poll() :

We check the first frame to be dequeued from sk_receive_queue has a good checksum.

If it doesnt, we drop the frame ( calling kfree_skb(skb); )

Problem is now we perform memory accounting on UDP, this kfree_skb()
should be done with socket locked, but we are allowed to
call lock_sock() from this udp_poll() context

unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
        unsigned int mask = datagram_poll(file, sock, wait);
        struct sock *sk = sock->sk;
        int     is_lite = IS_UDPLITE(sk);

        /* Check for false positives due to checksum errors */
        if ((mask & POLLRDNORM) &&
            !(file->f_flags & O_NONBLOCK) &&
            !(sk->sk_shutdown & RCV_SHUTDOWN)) {
                struct sk_buff_head *rcvq = &sk->sk_receive_queue;
                struct sk_buff *skb;

                spin_lock_bh(&rcvq->lock);
                while ((skb = skb_peek(rcvq)) != NULL &&
                       udp_lib_checksum_complete(skb)) {
                        UDP_INC_STATS_BH(sock_net(sk),
                                        UDP_MIB_INERRORS, is_lite);
                        __skb_unlink(skb, rcvq);
<<HERE>>                kfree_skb(skb);
                }
                spin_unlock_bh(&rcvq->lock);



David, Herbert, any idea how to solve this problem ?

1) Allow false positives

Or

2) Maybe we should finally convert sk_forward_alloc to an atomic_t after all...
   This would make things easier, and speedup UDP (no more need to lock_sock())

Or 

3) ???

^ permalink raw reply

* Nested function in drivers/of/of_mdio.c
From: Jérôme Pouiller @ 2009-10-07 15:15 UTC (permalink / raw)
  To: Grant Likely; +Cc: netdev, linuxppc, Andy Fleming, David S. Miller

Dear,

I have a problem with commit 8bc487d150b939e69830c39322df4ee486efe381 
in file drivers/of/of_mdio.c in function of_phy_find_device.

As you see, this function define match() as a nested function. My 
compiler (powerpc-e500-linux-gnu-gcc-3.4.1) raise an error during link 
due to this nested definition:
  drivers/built-in.o(.text+0x5e2a4): In function `of_phy_find_device': /home/jezz/linux-next/drivers/of/of_mdio.c:107: 
undefined reference to `__trampoline_setup'

I am sure I could solve problem by rebuilding my toolchain. 
Nevertheless, I think nested function definition is not perfectly 
supported by all compilers. Also, I suggest to place function match() 
outside of scope of of_phy_find_device as in following patch.

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index bacaa53..c7b2e26 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -97,6 +97,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdiobus_register);

+static int match(struct device *dev, void *phy_np)
+{
+       return dev_archdata_get_node(&dev->archdata) == phy_np;
+}
 /**
  * of_phy_find_device - Give a PHY node, find the phy_device
  * @phy_np: Pointer to the phy's device tree node
@@ -106,11 +110,6 @@ EXPORT_SYMBOL(of_mdiobus_register);
 struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
        struct device *d;
-       int match(struct device *dev, void *phy_np)
-       {
-               return dev_archdata_get_node(&dev->archdata) == phy_np;
-       }
-
        if (!phy_np)
                return NULL;


What do you think about it?

Best regards,

-- 
Jérôme Pouiller (jezz AT sysmic DOT org)

^ permalink raw reply related

* [PATCH] IPv6: Fix 6RD build error
From: Brian Haley @ 2009-10-07 15:01 UTC (permalink / raw)
  To: David Miller, YOSHIFUJI Hideaki; +Cc: netdev@vger.kernel.org

Fix build error introduced in commit fa857afcf - ipv6 sit: 6rd
(IPv6 Rapid Deployment) Support.  Struct in6_addr is the issue.
I'm only seeing this on x86_64 systems, not on 32-bit with same
IPv6 config options, so it could be there's a missing forward
declaration somewhere, but including the correct header file
fixes the problem too.

  CC [M]  net/ipv6/ip6_tunnel.o
In file included from net/ipv6/ip6_tunnel.c:31:
include/linux/if_tunnel.h:59: error: field ‘prefix’ has incomplete type
make[2]: *** [net/ipv6/ip6_tunnel.o] Error 1
make[1]: *** [net/ipv6] Error 2

Signed-off-by: Brian Haley <brian.haley@hp.com>
---

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index c53c8e0..8d76cb4 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -5,6 +5,7 @@
 
 #ifdef __KERNEL__
 #include <linux/ip.h>
+#include <linux/in6.h>
 #endif
 
 #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)

^ permalink raw reply related

* Re: Nested function in drivers/of/of_mdio.c
From: Grant Likely @ 2009-10-07 14:55 UTC (permalink / raw)
  To: Jérôme Pouiller; +Cc: David S. Miller, Andy Fleming, netdev
In-Reply-To: <200910071629.15804.jezz@sysmic.org>

Please resend your question with both linuxppc-dev@lists.ozlabs.org
and netdev@vger.kernel.org added to the cc: list.

g.

On Wed, Oct 7, 2009 at 8:29 AM, Jérôme Pouiller <jezz@sysmic.org> wrote:
> Dear,
>
> I have a problem with commit 8bc487d150b939e69830c39322df4ee486efe381
> in file drivers/of/of_mdio.c in function of_phy_find_device.
>
> As you see, this function define match() as a nested function. My
> compiler (powerpc-e500-linux-gnu-gcc-3.4.1) raise an error during link
> due to this nested definition:
>  drivers/built-in.o(.text+0x5e2a4): In function `of_phy_find_device': /home/jezz/linux-next/drivers/of/of_mdio.c:107:
> undefined reference to `__trampoline_setup'
>
> I am sure I could solve problem by rebuilding my toolchain.
> Nevertheless, I think nested function definition is not perfectly
> supported by all compilers. Also, I suggest to place function match()
> outside of scope of of_phy_find_device as in following patch.
>
> diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
> index bacaa53..c7b2e26 100644
> --- a/drivers/of/of_mdio.c
> +++ b/drivers/of/of_mdio.c
> @@ -97,6 +97,10 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
>  }
>  EXPORT_SYMBOL(of_mdiobus_register);
>
> +static int match(struct device *dev, void *phy_np)
> +{
> +       return dev_archdata_get_node(&dev->archdata) == phy_np;
> +}
>  /**
>  * of_phy_find_device - Give a PHY node, find the phy_device
>  * @phy_np: Pointer to the phy's device tree node
> @@ -106,11 +110,6 @@ EXPORT_SYMBOL(of_mdiobus_register);
>  struct phy_device *of_phy_find_device(struct device_node *phy_np)
>  {
>        struct device *d;
> -       int match(struct device *dev, void *phy_np)
> -       {
> -               return dev_archdata_get_node(&dev->archdata) == phy_np;
> -       }
> -
>        if (!phy_np)
>                return NULL;
>
>
> What do you think about it?
>
> Best regards,
>
> --
> Jérôme Pouiller (jezz AT sysmic DOT org)
>



-- 
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

^ permalink raw reply

* Re: IP header identification field is zero, why?
From: Eric Dumazet @ 2009-10-07 14:55 UTC (permalink / raw)
  To: thomas yang; +Cc: netdev
In-Reply-To: <f4f837ab0910070704k7a193be9i285573b423b96ba2@mail.gmail.com>

thomas yang a écrit :
> This is captured on my PC (Fedora 11, Linux)
> 
> [root@localhost ~]# tcpdump -i eth1 icmp -n -x
> tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
> listening on eth1, link-type EN10MB (Ethernet), capture size 96 bytes
> 17:02:39.025882 IP 192.168.1.64 > 192.168.1.1: ICMP echo request, id
> 25096, seq 1, length 64
> 	0x0000:  4500 0054 0000 4000 4001 b717 c0a8 0140
> ......
> 17:02:39.027866 IP 192.168.1.64 > 192.168.1.1: ICMP echo request, id
> 25096, seq 2, length 64
> 	0x0000:  4500 0054 0000 4000
> 
> The  IP header 'identification' field is zero, why?
> 
> I wrote a simple UDP server and UDP client programs, and captured some
> packets, the IP identification is also zero.
> 
> Should the host increase this field for each packet it sends?
> 
> 
> I captured some TCP packets, all of the IP identification are
> different, not zero.
> 

Very good questions, this bothered me too.


ping sends "echo request" datagrams with DF set (Dont Fragment),
and ID=0, this is a user program building a packet from scratch.

When linux replies with a "echo reply", DF is not set and an ID is included
in the answer, increasing at each packet.

About your UDP tests, DF is automatically set, and
I believe ID on DF frames is generated only for connected sockets.

cf include/net/ip.h

static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, struct sock *sk)
{
        if (iph->frag_off & htons(IP_DF)) {
                /* This is only to work around buggy Windows95/2000
                 * VJ compression implementations.  If the ID field
                 * does not change, they drop every other packet in
                 * a TCP stream using header compression.
                 */
                iph->id = (sk && inet_sk(sk)->daddr) ?
                                        htons(inet_sk(sk)->id++) : 0;
        } else
                __ip_select_ident(iph, dst, 0);
}


^ permalink raw reply

* Re: [PATCH][RESEND 3] IPv6: 6rd tunnel mode
From: Steffen Klassert @ 2009-10-07 14:18 UTC (permalink / raw)
  To: David Miller; +Cc: yoshfuji, acassen, netdev
In-Reply-To: <20091007.012436.74941374.davem@davemloft.net>

On Wed, Oct 07, 2009 at 01:24:36AM -0700, David Miller wrote:
> From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> Date: Wed, 23 Sep 2009 18:43:14 +0900
> 
> > Subject: [PATCH] ipv6 sit: 6rd (IPv6 Rapid Deployment) Support.
> > 

I'm getting the following compile error after applying this patch:

  CC      net/ipv6/ip6_tunnel.o
 In file included from
/home/klassert/git/linux-sinafe-2.6/net/ipv6/ip6_tunnel.c:30:
 /home/klassert/git/linux-sinafe-2.6/include/linux/if_tunnel.h:59:
error: field 'prefix' has incomplete type
 make[3]: *** [net/ipv6/ip6_tunnel.o] Error 1

We probaply missing include/linux/in6.h in include/linux/if_tunnel.h
The patch below adds the missing include.

---  

Subject: [PATCH] if_tunnel.h: Add missing include

This patch fixes the following compile error:

  CC      net/ipv6/ip6_tunnel.o
 In file included from /home/klassert/git/linux-sinafe-2.6/net/ipv6/ip6_tunnel.c:30:
 /home/klassert/git/linux-sinafe-2.6/include/linux/if_tunnel.h:59: error: field 'prefix' has incomplete type
 make[3]: *** [net/ipv6/ip6_tunnel.o] Error 1

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/linux/if_tunnel.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index c53c8e0..8d76cb4 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -5,6 +5,7 @@
 
 #ifdef __KERNEL__
 #include <linux/ip.h>
+#include <linux/in6.h>
 #endif
 
 #define SIOCGETTUNNEL   (SIOCDEVPRIVATE + 0)
-- 
1.5.4.2


^ permalink raw reply related

* [BUG] znet.c sleeping function called from invalid context
From: Alexander Strakh @ 2009-10-07 18:47 UTC (permalink / raw)
  To: David S. Miller, netdev, Linux Kernlel Mailing List

	KERNEL_VERSION: 2.6.31
	DESCRIBE:
Driver drivers/net/znet.c might sleep in atomic context, because it calls 
free_dma under claim_dma_lock:

.drivers/net/znet.c:
 168 static int znet_request_resources (struct net_device *dev)
...
 189        flags = claim_dma_lock();
 190        free_dma (znet->tx_dma);
 191        release_dma_lock (flags);
...

Path to might_sleep macro from znet_request_resources:
1. znet_request_resources calls free_dma at 
arch/blackfin/kernel/bfin_dma_5xx.c:181
2. free_dma calls arch/blackfin/kernel/bfin_dma_5xx.c:195

Found by: Linux Driver Verification


^ permalink raw reply

* IP header identification field is zero, why?
From: thomas yang @ 2009-10-07 14:04 UTC (permalink / raw)
  To: netdev

This is captured on my PC (Fedora 11, Linux)

[root@localhost ~]# tcpdump -i eth1 icmp -n -x
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth1, link-type EN10MB (Ethernet), capture size 96 bytes
17:02:39.025882 IP 192.168.1.64 > 192.168.1.1: ICMP echo request, id
25096, seq 1, length 64
	0x0000:  4500 0054 0000 4000 4001 b717 c0a8 0140
......
17:02:39.027866 IP 192.168.1.64 > 192.168.1.1: ICMP echo request, id
25096, seq 2, length 64
	0x0000:  4500 0054 0000 4000

The  IP header 'identification' field is zero, why?

I wrote a simple UDP server and UDP client programs, and captured some
packets, the IP identification is also zero.

Should the host increase this field for each packet it sends?


I captured some TCP packets, all of the IP identification are
different, not zero.


--
thomas

^ permalink raw reply

* Re: [PATCH 0/4][RFC]: coding convention for CCID-struct prefixes
From: Arnaldo Carvalho de Melo @ 2009-10-07 13:31 UTC (permalink / raw)
  To: Gerrit Renker; +Cc: davem, dccp, netdev
In-Reply-To: <1254739993-5051-1-git-send-email-gerrit@erg.abdn.ac.uk>

Em Mon, Oct 05, 2009 at 12:53:09PM +0200, Gerrit Renker escreveu:
> Arnaldo, Dave,
> 
> this codifies new naming scheme discussed at netconf. Can you please have a
> look and indicate whether you are ok with the set?
> 
> They are divided into 4 for readability, but can be combined into
> one large patch if agreed.
> 
> It is all just regexp substitutions:
> 
> Patch #1: Introduces the naming scheme on CCID-2:
> 	    s#hc\(tx\|rx\)->ccid2hc\1_#hc\1->\1_#g;
>             s#ccid2hc\(tx\|rx\)_#\1_#g;
> 
> Patch #2: Extends the changes of patch#1 to CCID-3.
> 
> Patch #3: Removes the second, repetitive redundancy in this scheme:
> 	    s#hc\(tx\|rx\)->\1_#hc->\1_#g;
> 
> Patch #4: Extends patch#3 from CCID-2 to CCID-3.

For the 4 patches:

Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Best Regards,

- Arnaldo

^ permalink raw reply

* [PATCH RESEND] include/netdevice.h: fix nanodoc mismatch
From: Wolfram Sang @ 2009-10-07 13:05 UTC (permalink / raw)
  To: netdev; +Cc: Wolfram Sang, David S. Miller
In-Reply-To: <20091007.031047.252761731.davem@davemloft.net>

nanodoc was missing an ndo_-prefix.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
---
 include/linux/netdevice.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 94958c1..812a5f3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -557,7 +557,7 @@ struct netdev_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct net_device_stats* (*get_stats)(struct net_device *dev);
+ * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. If not defined, the counters in dev->stats will
  *	be used.
-- 
1.6.3.3


^ permalink raw reply related

* Re: [PATCH] Add sk_mark route lookup support for IPv4 listening sockets, and for IPv4 multicast forwarding
From: Atis Elsts @ 2009-10-07 12:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, panther, eric.dumazet, brian.haley, zenczykowski
In-Reply-To: <20091007.031957.197632672.davem@davemloft.net>

On Wednesday 07 October 2009 13:19:57 David Miller wrote:
> From: Atis Elsts <atis@mikrotik.com>
> Date: Mon, 5 Oct 2009 16:46:34 +0300
> 
> > @@ -1238,6 +1238,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
> >  
> >  	if (vif->flags&VIFF_TUNNEL) {
> >  		struct flowi fl = { .oif = vif->link,
> > +				    .mark = skb->mark,
> >  				    .nl_u = { .ip4_u =
> >  					      { .daddr = vif->remote,
> >  						.saddr = vif->local,
> 
> I'm not so sure if this is right.
> 
> I understand what you're trying to do, inherit the socket's
> mark when it goes over a multicast tunnel.
> 
> But I'm not so sure that's what we want to do, semantically.
> 
> Could you split out these skb->mark cases into a seperate
> patch?  The parts that only use sk->mark are fine and I
> would like to apply a patch from you which just does that
> while we discuss the skb->mark case.
> 

Here is the sk_mark part.
     
As for the ipmr.c code, I agree with your comment. Using mark from skb probably is wrong in case of tunnel interface (i.e. in the "if (vif->flags&VIFF_TUNNEL)" part of the patch), my mistake. I still think that the "else" part is correct, though, because using mark from skb there mirrors behaviour for unicast forwarding routing lookup in ip_route_input_slow(). The same applies to IPv6 code in ip6mr_forward2().


Add support for route lookup using sk_mark on IPv4 listening sockets.
Signed-off-by: Atis Elsts <atis@mikrotik.com>
---
 net/ipv4/inet_connection_sock.c |    1 +
 net/ipv4/syncookies.c           |    3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4351ca2..9139e8f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -358,6 +358,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct ip_options *opt = inet_rsk(req)->opt;
 	struct flowi fl = { .oif = sk->sk_bound_dev_if,
+			    .mark = sk->sk_mark,
 			    .nl_u = { .ip4_u =
 				      { .daddr = ((opt && opt->srr) ?
 						  opt->faddr :
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..5ec678a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -333,7 +333,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	 * no easy way to do this.
 	 */
 	{
-		struct flowi fl = { .nl_u = { .ip4_u =
+		struct flowi fl = { .mark = sk->sk_mark,
+				    .nl_u = { .ip4_u =
 					      { .daddr = ((opt && opt->srr) ?
 							  opt->faddr :
 							  ireq->rmt_addr),

^ permalink raw reply related

* [net-next-2.6 PATCH V3] can: add TI CAN (HECC) driver
From: Anant Gole @ 2009-10-07 12:59 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	linux-arm-kernel-xIg/pKzrS19vn6HldHNs0ANdhmdF6hFW

TI HECC (High End CAN Controller) module is found on many TI devices. It
has 32 hardware mailboxes with full implementation of CAN protocol 2.0B
with bus speeds up to 1Mbps. Specifications of the module are available
on TI web <http://www.ti.com>

Signed-off-by: Anant Gole <anantgole-l0cyMroinI0@public.gmane.org>
---
 drivers/net/can/Kconfig              |    7 +
 drivers/net/can/Makefile             |    1 +
 drivers/net/can/ti_hecc.c            | 1006 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/ti_hecc.h |   40 ++
 4 files changed, 1054 insertions(+), 0 deletions(-)
 create mode 100644 drivers/net/can/ti_hecc.c
 create mode 100644 include/linux/can/platform/ti_hecc.h

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index df32c10..26d77cc 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -95,6 +95,13 @@ config CAN_AT91
 	---help---
 	  This is a driver for the SoC CAN controller in Atmel's AT91SAM9263.
 
+config CAN_TI_HECC
+	depends on CAN_DEV && ARCH_OMAP3
+	tristate "TI High End CAN Controller"
+	---help---
+	  Driver for TI HECC (High End CAN Controller) module found on many
+	  TI devices. The device specifications are available from www.ti.com
+
 config CAN_DEBUG_DEVICES
 	bool "CAN devices debugging messages"
 	depends on CAN
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 0dea627..31f4ab5 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -11,5 +11,6 @@ obj-y				+= usb/
 
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
+obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
new file mode 100644
index 0000000..814e6c5
--- /dev/null
+++ b/drivers/net/can/ti_hecc.c
@@ -0,0 +1,1006 @@
+/*
+ * TI HECC (CAN) device driver
+ *
+ * This driver supports TI's HECC (High End CAN Controller module) and the
+ * specs for the same is available at <http://www.ti.com>
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed as is WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/*
+ * Your platform definitions should specify module ram offsets and interrupt
+ * number to use as follows:
+ *
+ * static struct ti_hecc_platform_data am3517_evm_hecc_pdata = {
+ *         .scc_hecc_offset        = 0,
+ *         .scc_ram_offset         = 0x3000,
+ *         .hecc_ram_offset        = 0x3000,
+ *         .mbx_offset             = 0x2000,
+ *         .int_line               = 0,
+ *         .revision               = 1,
+ * };
+ *
+ * Please see include/can/platform/ti_hecc.h for description of above fields
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/platform/ti_hecc.h>
+
+#define DRV_NAME "ti_hecc"
+#define HECC_MODULE_VERSION     "0.7"
+MODULE_VERSION(HECC_MODULE_VERSION);
+#define DRV_DESC "TI High End CAN Controller Driver " HECC_MODULE_VERSION
+
+/* TX / RX Mailbox Configuration */
+#define HECC_MAX_MAILBOXES	32	/* hardware mailboxes - do not change */
+#define MAX_TX_PRIO		0x3F	/* hardware value - do not change */
+
+/*
+ * Important Note: TX mailbox configuration
+ * TX mailboxes should be restricted to the number of SKB buffers to avoid
+ * maintaining SKB buffers separately. TX mailboxes should be a power of 2
+ * for the mailbox logic to work.  Top mailbox numbers are reserved for RX
+ * and lower mailboxes for TX.
+ *
+ * HECC_MAX_TX_MBOX	HECC_MB_TX_SHIFT
+ * 4 (default)		2
+ * 8			3
+ * 16			4
+ */
+#define HECC_MB_TX_SHIFT	2 /* as per table above */
+#define HECC_MAX_TX_MBOX	BIT(HECC_MB_TX_SHIFT)
+
+#if (HECC_MAX_TX_MBOX > CAN_ECHO_SKB_MAX)
+#error "HECC: MAX TX mailboxes should be equal or less than CAN_ECHO_SKB_MAX"
+#endif
+
+#define HECC_TX_PRIO_SHIFT	(HECC_MB_TX_SHIFT)
+#define HECC_TX_PRIO_MASK	(MAX_TX_PRIO << HECC_MB_TX_SHIFT)
+#define HECC_TX_MB_MASK		(HECC_MAX_TX_MBOX - 1)
+#define HECC_TX_MASK		((HECC_MAX_TX_MBOX - 1) | HECC_TX_PRIO_MASK)
+#define HECC_TX_MBOX_MASK	(~(BIT(HECC_MAX_TX_MBOX) - 1))
+#define HECC_DEF_NAPI_WEIGHT	HECC_MAX_RX_MBOX
+
+/*
+ * Important Note: RX mailbox configuration
+ * RX mailboxes are further logically split into two - main and buffer
+ * mailboxes. The goal is to get all packets into main mailboxes as
+ * driven by mailbox number and receive priority (higher to lower) and
+ * buffer mailboxes are used to receive pkts while main mailboxes are being
+ * processed. This ensures in-order packet reception.
+ *
+ * Here are the recommended values for buffer mailbox. Note that RX mailboxes
+ * start after TX mailboxes:
+ *
+ * HECC_MAX_RX_MBOX		HECC_RX_BUFFER_MBOX	No of buffer mailboxes
+ * 28				12			8
+ * 16				20			4
+ */
+
+#define HECC_MAX_RX_MBOX	(HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX)
+#define HECC_RX_BUFFER_MBOX	12 /* as per table above */
+#define HECC_RX_FIRST_MBOX	(HECC_MAX_MAILBOXES - 1)
+#define HECC_RX_HIGH_MBOX_MASK	(~(BIT(HECC_RX_BUFFER_MBOX) - 1))
+
+/* TI HECC module registers */
+#define HECC_CANME		0x0	/* Mailbox enable */
+#define HECC_CANMD		0x4	/* Mailbox direction */
+#define HECC_CANTRS		0x8	/* Transmit request set */
+#define HECC_CANTRR		0xC	/* Transmit request */
+#define HECC_CANTA		0x10	/* Transmission acknowledge */
+#define HECC_CANAA		0x14	/* Abort acknowledge */
+#define HECC_CANRMP		0x18	/* Receive message pending */
+#define HECC_CANRML		0x1C	/* Remote message lost */
+#define HECC_CANRFP		0x20	/* Remote frame pending */
+#define HECC_CANGAM		0x24	/* SECC only:Global acceptance mask */
+#define HECC_CANMC		0x28	/* Master control */
+#define HECC_CANBTC		0x2C	/* Bit timing configuration */
+#define HECC_CANES		0x30	/* Error and status */
+#define HECC_CANTEC		0x34	/* Transmit error counter */
+#define HECC_CANREC		0x38	/* Receive error counter */
+#define HECC_CANGIF0		0x3C	/* Global interrupt flag 0 */
+#define HECC_CANGIM		0x40	/* Global interrupt mask */
+#define HECC_CANGIF1		0x44	/* Global interrupt flag 1 */
+#define HECC_CANMIM		0x48	/* Mailbox interrupt mask */
+#define HECC_CANMIL		0x4C	/* Mailbox interrupt level */
+#define HECC_CANOPC		0x50	/* Overwrite protection control */
+#define HECC_CANTIOC		0x54	/* Transmit I/O control */
+#define HECC_CANRIOC		0x58	/* Receive I/O control */
+#define HECC_CANLNT		0x5C	/* HECC only: Local network time */
+#define HECC_CANTOC		0x60	/* HECC only: Time-out control */
+#define HECC_CANTOS		0x64	/* HECC only: Time-out status */
+#define HECC_CANTIOCE		0x68	/* SCC only:Enhanced TX I/O control */
+#define HECC_CANRIOCE		0x6C	/* SCC only:Enhanced RX I/O control */
+
+/* Mailbox registers */
+#define HECC_CANMID		0x0
+#define HECC_CANMCF		0x4
+#define HECC_CANMDL		0x8
+#define HECC_CANMDH		0xC
+
+#define HECC_SET_REG		0xFFFFFFFF
+#define HECC_CANID_MASK		0x3FF	/* 18 bits mask for extended id's */
+#define HECC_CCE_WAIT_COUNT     100	/* Wait for ~1 sec for CCE bit */
+
+#define HECC_CANMC_SCM		BIT(13)	/* SCC compat mode */
+#define HECC_CANMC_CCR		BIT(12)	/* Change config request */
+#define HECC_CANMC_PDR		BIT(11)	/* Local Power down - for sleep mode */
+#define HECC_CANMC_ABO		BIT(7)	/* Auto Bus On */
+#define HECC_CANMC_STM		BIT(6)	/* Self test mode - loopback */
+#define HECC_CANMC_SRES		BIT(5)	/* Software reset */
+
+#define HECC_CANTIOC_EN		BIT(3)	/* Enable CAN TX I/O pin */
+#define HECC_CANRIOC_EN		BIT(3)	/* Enable CAN RX I/O pin */
+
+#define HECC_CANMID_IDE		BIT(31)	/* Extended frame format */
+#define HECC_CANMID_AME		BIT(30)	/* Acceptance mask enable */
+#define HECC_CANMID_AAM		BIT(29)	/* Auto answer mode */
+
+#define HECC_CANES_FE		BIT(24)	/* form error */
+#define HECC_CANES_BE		BIT(23)	/* bit error */
+#define HECC_CANES_SA1		BIT(22)	/* stuck at dominant error */
+#define HECC_CANES_CRCE		BIT(21)	/* CRC error */
+#define HECC_CANES_SE		BIT(20)	/* stuff bit error */
+#define HECC_CANES_ACKE		BIT(19)	/* ack error */
+#define HECC_CANES_BO		BIT(18)	/* Bus off status */
+#define HECC_CANES_EP		BIT(17)	/* Error passive status */
+#define HECC_CANES_EW		BIT(16)	/* Error warning status */
+#define HECC_CANES_SMA		BIT(5)	/* suspend mode ack */
+#define HECC_CANES_CCE		BIT(4)	/* Change config enabled */
+#define HECC_CANES_PDA		BIT(3)	/* Power down mode ack */
+
+#define HECC_CANBTC_SAM		BIT(7)	/* sample points */
+
+#define HECC_BUS_ERROR		(HECC_CANES_FE | HECC_CANES_BE |\
+				HECC_CANES_CRCE | HECC_CANES_SE |\
+				HECC_CANES_ACKE)
+
+#define HECC_CANMCF_RTR		BIT(4)	/* Remote transmit request */
+
+#define HECC_CANGIF_MAIF	BIT(17)	/* Message alarm interrupt */
+#define HECC_CANGIF_TCOIF	BIT(16) /* Timer counter overflow int */
+#define HECC_CANGIF_GMIF	BIT(15)	/* Global mailbox interrupt */
+#define HECC_CANGIF_AAIF	BIT(14)	/* Abort ack interrupt */
+#define HECC_CANGIF_WDIF	BIT(13)	/* Write denied interrupt */
+#define HECC_CANGIF_WUIF	BIT(12)	/* Wake up interrupt */
+#define HECC_CANGIF_RMLIF	BIT(11)	/* Receive message lost interrupt */
+#define HECC_CANGIF_BOIF	BIT(10)	/* Bus off interrupt */
+#define HECC_CANGIF_EPIF	BIT(9)	/* Error passive interrupt */
+#define HECC_CANGIF_WLIF	BIT(8)	/* Warning level interrupt */
+#define HECC_CANGIF_MBOX_MASK	0x1F	/* Mailbox number mask */
+#define HECC_CANGIM_I1EN	BIT(1)	/* Int line 1 enable */
+#define HECC_CANGIM_I0EN	BIT(0)	/* Int line 0 enable */
+#define HECC_CANGIM_DEF_MASK	0x700	/* only busoff/warning/passive */
+#define HECC_CANGIM_SIL		BIT(2)	/* system interrupts to int line 1 */
+
+/* CAN Bittiming constants as per HECC specs */
+static struct can_bittiming_const ti_hecc_bittiming_const = {
+	.name = DRV_NAME,
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 256,
+	.brp_inc = 1,
+};
+
+struct ti_hecc_priv {
+	struct can_priv can;	/* MUST be first member/field */
+	struct napi_struct napi;
+	struct net_device *ndev;
+	struct clk *clk;
+	void __iomem *base;
+	u32 scc_ram_offset;
+	u32 hecc_ram_offset;
+	u32 mbx_offset;
+	u32 int_line;
+	spinlock_t mbx_lock; /* CANME register needs protection */
+	u32 tx_head;
+	u32 tx_tail;
+	u32 rx_next;
+};
+
+static inline int get_tx_head_mb(struct ti_hecc_priv *priv)
+{
+	return priv->tx_head & HECC_TX_MB_MASK;
+}
+
+static inline int get_tx_tail_mb(struct ti_hecc_priv *priv)
+{
+	return priv->tx_tail & HECC_TX_MB_MASK;
+}
+
+static inline int get_tx_head_prio(struct ti_hecc_priv *priv)
+{
+	return (priv->tx_head >> HECC_TX_PRIO_SHIFT) & MAX_TX_PRIO;
+}
+
+static inline void hecc_write_lam(struct ti_hecc_priv *priv, u32 mbxno, u32 val)
+{
+	__raw_writel(val, priv->base + priv->hecc_ram_offset + mbxno * 4);
+}
+
+static inline void hecc_write_mbx(struct ti_hecc_priv *priv, u32 mbxno,
+	u32 reg, u32 val)
+{
+	__raw_writel(val, priv->base + priv->mbx_offset + mbxno * 0x10 +
+			reg);
+}
+
+static inline u32 hecc_read_mbx(struct ti_hecc_priv *priv, u32 mbxno, u32 reg)
+{
+	return __raw_readl(priv->base + priv->mbx_offset + mbxno * 0x10 +
+			reg);
+}
+
+static inline void hecc_write(struct ti_hecc_priv *priv, u32 reg, u32 val)
+{
+	__raw_writel(val, priv->base + reg);
+}
+
+static inline u32 hecc_read(struct ti_hecc_priv *priv, int reg)
+{
+	return __raw_readl(priv->base + reg);
+}
+
+static inline void hecc_set_bit(struct ti_hecc_priv *priv, int reg,
+	u32 bit_mask)
+{
+	hecc_write(priv, reg, hecc_read(priv, reg) | bit_mask);
+}
+
+static inline void hecc_clear_bit(struct ti_hecc_priv *priv, int reg,
+	u32 bit_mask)
+{
+	hecc_write(priv, reg, hecc_read(priv, reg) & ~bit_mask);
+}
+
+static inline u32 hecc_get_bit(struct ti_hecc_priv *priv, int reg, u32 bit_mask)
+{
+	return (hecc_read(priv, reg) & bit_mask) ? 1 : 0;
+}
+
+static int ti_hecc_get_state(const struct net_device *ndev,
+	enum can_state *state)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	*state = priv->can.state;
+	return 0;
+}
+
+static int ti_hecc_set_btc(struct ti_hecc_priv *priv)
+{
+	struct can_bittiming *bit_timing = &priv->can.bittiming;
+	u32 can_btc;
+
+	can_btc = (bit_timing->phase_seg2 - 1) & 0x7;
+	can_btc |= ((bit_timing->phase_seg1 + bit_timing->prop_seg - 1)
+			& 0xF) << 3;
+	if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) {
+		if (bit_timing->brp > 4)
+			can_btc |= HECC_CANBTC_SAM;
+		else
+			dev_warn(priv->ndev->dev.parent, "WARN: Triple" \
+				"sampling not set due to h/w limitations");
+	}
+	can_btc |= ((bit_timing->sjw - 1) & 0x3) << 8;
+	can_btc |= ((bit_timing->brp - 1) & 0xFF) << 16;
+
+	/* ERM being set to 0 by default meaning resync at falling edge */
+
+	hecc_write(priv, HECC_CANBTC, can_btc);
+	dev_info(priv->ndev->dev.parent, "setting CANBTC=%#x\n", can_btc);
+
+	return 0;
+}
+
+static void ti_hecc_reset(struct net_device *ndev)
+{
+	u32 cnt;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	dev_dbg(ndev->dev.parent, "resetting hecc ...\n");
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SRES);
+
+	/* Set change control request and wait till enabled */
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+
+	/*
+	 * INFO: It has been observed that at times CCE bit may not be
+	 * set and hw seems to be ok even if this bit is not set so
+	 * timing out with a timing of 1ms to respect the specs
+	 */
+	cnt = HECC_CCE_WAIT_COUNT;
+	while (!hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) {
+		--cnt;
+		udelay(10);
+	}
+
+	/*
+	 * Note: On HECC, BTC can be programmed only in initialization mode, so
+	 * it is expected that the can bittiming parameters are set via ip
+	 * utility before the device is opened
+	 */
+	ti_hecc_set_btc(priv);
+
+	/* Clear CCR (and CANMC register) and wait for CCE = 0 enable */
+	hecc_write(priv, HECC_CANMC, 0);
+
+	/*
+	 * INFO: CAN net stack handles bus off and hence disabling auto-bus-on
+	 * hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_ABO);
+	 */
+
+	/*
+	 * INFO: It has been observed that at times CCE bit may not be
+	 * set and hw seems to be ok even if this bit is not set so
+	 */
+	cnt = HECC_CCE_WAIT_COUNT;
+	while (hecc_get_bit(priv, HECC_CANES, HECC_CANES_CCE) && cnt != 0) {
+		--cnt;
+		udelay(10);
+	}
+
+	/* Enable TX and RX I/O Control pins */
+	hecc_write(priv, HECC_CANTIOC, HECC_CANTIOC_EN);
+	hecc_write(priv, HECC_CANRIOC, HECC_CANRIOC_EN);
+
+	/* Clear registers for clean operation */
+	hecc_write(priv, HECC_CANTA, HECC_SET_REG);
+	hecc_write(priv, HECC_CANRMP, HECC_SET_REG);
+	hecc_write(priv, HECC_CANGIF0, HECC_SET_REG);
+	hecc_write(priv, HECC_CANGIF1, HECC_SET_REG);
+	hecc_write(priv, HECC_CANME, 0);
+	hecc_write(priv, HECC_CANMD, 0);
+
+	/* SCC compat mode NOT supported (and not needed too) */
+	hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_SCM);
+}
+
+static void ti_hecc_start(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	u32 cnt, mbxno, mbx_mask;
+
+	/* put HECC in initialization mode and set btc */
+	ti_hecc_reset(ndev);
+
+	priv->tx_head = priv->tx_tail = HECC_TX_MASK;
+	priv->rx_next = HECC_RX_FIRST_MBOX;
+
+	/* Enable local and global acceptance mask registers */
+	hecc_write(priv, HECC_CANGAM, HECC_SET_REG);
+
+	/* Prepare configured mailboxes to receive messages */
+	for (cnt = 0; cnt < HECC_MAX_RX_MBOX; cnt++) {
+		mbxno = HECC_MAX_MAILBOXES - 1 - cnt;
+		mbx_mask = BIT(mbxno);
+		hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+		hecc_write_mbx(priv, mbxno, HECC_CANMID, HECC_CANMID_AME);
+		hecc_write_lam(priv, mbxno, HECC_SET_REG);
+		hecc_set_bit(priv, HECC_CANMD, mbx_mask);
+		hecc_set_bit(priv, HECC_CANME, mbx_mask);
+		hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
+	}
+
+	/* Prevent message over-write & Enable interrupts */
+	hecc_write(priv, HECC_CANOPC, HECC_SET_REG);
+	if (priv->int_line) {
+		hecc_write(priv, HECC_CANMIL, HECC_SET_REG);
+		hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK |
+			HECC_CANGIM_I1EN | HECC_CANGIM_SIL);
+	} else {
+		hecc_write(priv, HECC_CANMIL, 0);
+		hecc_write(priv, HECC_CANGIM,
+			HECC_CANGIM_DEF_MASK | HECC_CANGIM_I0EN);
+	}
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+static void ti_hecc_stop(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	/* Disable interrupts and disable mailboxes */
+	hecc_write(priv, HECC_CANGIM, 0);
+	hecc_write(priv, HECC_CANMIM, 0);
+	hecc_write(priv, HECC_CANME, 0);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int ti_hecc_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	int ret = 0;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		ti_hecc_start(ndev);
+		netif_wake_queue(ndev);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * ti_hecc_xmit: HECC Transmit
+ *
+ * The transmit mailboxes start from 0 to HECC_MAX_TX_MBOX. In HECC the
+ * priority of the mailbox for tranmission is dependent upon priority setting
+ * field in mailbox registers. The mailbox with highest value in priority field
+ * is transmitted first. Only when two mailboxes have the same value in
+ * priority field the highest numbered mailbox is transmitted first.
+ *
+ * To utilize the HECC priority feature as described above we start with the
+ * highest numbered mailbox with highest priority level and move on to the next
+ * mailbox with the same priority level and so on. Once we loop through all the
+ * transmit mailboxes we choose the next priority level (lower) and so on
+ * until we reach the lowest priority level on the lowest numbered mailbox
+ * when we stop transmission until all mailboxes are transmitted and then
+ * restart at highest numbered mailbox with highest priority.
+ *
+ * Two counters (head and tail) are used to track the next mailbox to transmit
+ * and to track the echo buffer for already transmitted mailbox. The queue
+ * is stopped when all the mailboxes are busy or when there is a priority
+ * value roll-over happens.
+ */
+static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 mbxno, mbx_mask, data;
+	unsigned long flags;
+
+	mbxno = get_tx_head_mb(priv);
+	mbx_mask = BIT(mbxno);
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	if (unlikely(hecc_read(priv, HECC_CANME) & mbx_mask)) {
+		spin_unlock_irqrestore(&priv->mbx_lock, flags);
+		netif_stop_queue(ndev);
+		dev_err(priv->ndev->dev.parent,
+			"BUG: TX mbx not ready tx_head=%08X, tx_tail=%08X\n",
+			priv->tx_head, priv->tx_tail);
+		return NETDEV_TX_BUSY;
+	}
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	/* Prepare mailbox for transmission */
+	data = min_t(u8, cf->can_dlc, 8);
+	if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
+		data |= HECC_CANMCF_RTR;
+	data |= get_tx_head_prio(priv) << 8;
+	hecc_write_mbx(priv, mbxno, HECC_CANMCF, data);
+
+	if (cf->can_id & CAN_EFF_FLAG) /* Extended frame format */
+		data = (cf->can_id & CAN_EFF_MASK) | HECC_CANMID_IDE;
+	else /* Standard frame format */
+		data = (cf->can_id & CAN_SFF_MASK) << 18;
+	hecc_write_mbx(priv, mbxno, HECC_CANMID, data);
+	hecc_write_mbx(priv, mbxno, HECC_CANMDL,
+		be32_to_cpu(*(u32 *)(cf->data)));
+	if (cf->can_dlc > 4)
+		hecc_write_mbx(priv, mbxno, HECC_CANMDH,
+			be32_to_cpu(*(u32 *)(cf->data + 4)));
+	else
+		*(u32 *)(cf->data + 4) = 0;
+	can_put_echo_skb(skb, ndev, mbxno);
+
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	--priv->tx_head;
+	if ((hecc_read(priv, HECC_CANME) & BIT(get_tx_head_mb(priv))) ||
+		(priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK) {
+		netif_stop_queue(ndev);
+	}
+	hecc_set_bit(priv, HECC_CANME, mbx_mask);
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	hecc_clear_bit(priv, HECC_CANMD, mbx_mask);
+	hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
+	hecc_write(priv, HECC_CANTRS, mbx_mask);
+
+	return NETDEV_TX_OK;
+}
+
+static int ti_hecc_rx_pkt(struct ti_hecc_priv *priv, int mbxno)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 data, mbx_mask;
+	unsigned long flags;
+
+	skb = netdev_alloc_skb(priv->ndev, sizeof(struct can_frame));
+	if (!skb) {
+		if (printk_ratelimit())
+			dev_err(priv->ndev->dev.parent,
+				"ti_hecc_rx_pkt: netdev_alloc_skb() failed\n");
+		return -ENOMEM;
+	}
+	skb->protocol = __constant_htons(ETH_P_CAN);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	mbx_mask = BIT(mbxno);
+	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
+	if (data & HECC_CANMID_IDE)
+		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (data >> 18) & CAN_SFF_MASK;
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMCF);
+	if (data & HECC_CANMCF_RTR)
+		cf->can_id |= CAN_RTR_FLAG;
+	cf->can_dlc = data & 0xF;
+	data = hecc_read_mbx(priv, mbxno, HECC_CANMDL);
+	*(u32 *)(cf->data) = cpu_to_be32(data);
+	if (cf->can_dlc > 4) {
+		data = hecc_read_mbx(priv, mbxno, HECC_CANMDH);
+		*(u32 *)(cf->data + 4) = cpu_to_be32(data);
+	} else {
+		*(u32 *)(cf->data + 4) = 0;
+	}
+	spin_lock_irqsave(&priv->mbx_lock, flags);
+	hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+	hecc_write(priv, HECC_CANRMP, mbx_mask);
+	/* enable mailbox only if it is part of rx buffer mailboxes */
+	if (priv->rx_next < HECC_RX_BUFFER_MBOX)
+		hecc_set_bit(priv, HECC_CANME, mbx_mask);
+	spin_unlock_irqrestore(&priv->mbx_lock, flags);
+
+	stats->rx_bytes += cf->can_dlc;
+	netif_receive_skb(skb);
+	stats->rx_packets++;
+
+	return 0;
+}
+
+/*
+ * ti_hecc_rx_poll - HECC receive pkts
+ *
+ * The receive mailboxes start from highest numbered mailbox till last xmit
+ * mailbox. On CAN frame reception the hardware places the data into highest
+ * numbered mailbox that matches the CAN ID filter. Since all receive mailboxes
+ * have same filtering (ALL CAN frames) packets will arrive in the highest
+ * available RX mailbox and we need to ensure in-order packet reception.
+ *
+ * To ensure the packets are received in the right order we logically divide
+ * the RX mailboxes into main and buffer mailboxes. Packets are received as per
+ * mailbox priotity (higher to lower) in the main bank and once it is full we
+ * disable further reception into main mailboxes. While the main mailboxes are
+ * processed in NAPI, further packets are received in buffer mailboxes.
+ *
+ * We maintain a RX next mailbox counter to process packets and once all main
+ * mailboxe packets are passed to the upper stack we enable all of them but
+ * continue to process packets received in buffer mailboxes. With each packet
+ * received from buffer mailbox we enable it immediately so as to handle the
+ * overflow from higher mailboxes.
+ */
+static int ti_hecc_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct net_device *ndev = napi->dev;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	u32 num_pkts = 0;
+	u32 mbx_mask;
+	unsigned long pending_pkts, flags;
+
+	if (!netif_running(ndev))
+		return 0;
+
+	while ((pending_pkts = hecc_read(priv, HECC_CANRMP)) &&
+		num_pkts < quota) {
+		mbx_mask = BIT(priv->rx_next); /* next rx mailbox to process */
+		if (mbx_mask & pending_pkts) {
+			if (ti_hecc_rx_pkt(priv, priv->rx_next) < 0)
+				return num_pkts;
+			++num_pkts;
+		} else if (priv->rx_next > HECC_RX_BUFFER_MBOX) {
+			break; /* pkt not received yet */
+		}
+		--priv->rx_next;
+		if (priv->rx_next == HECC_RX_BUFFER_MBOX) {
+			/* enable high bank mailboxes */
+			spin_lock_irqsave(&priv->mbx_lock, flags);
+			mbx_mask = hecc_read(priv, HECC_CANME);
+			mbx_mask |= HECC_RX_HIGH_MBOX_MASK;
+			hecc_write(priv, HECC_CANME, mbx_mask);
+			spin_unlock_irqrestore(&priv->mbx_lock, flags);
+		} else if (priv->rx_next == HECC_MAX_TX_MBOX - 1) {
+			priv->rx_next = HECC_RX_FIRST_MBOX;
+			break;
+		}
+	}
+
+	/* Enable packet interrupt if all pkts are handled */
+	if (hecc_read(priv, HECC_CANRMP) == 0) {
+		napi_complete(napi);
+		/* Re-enable RX mailbox interrupts */
+		mbx_mask = hecc_read(priv, HECC_CANMIM);
+		mbx_mask |= HECC_TX_MBOX_MASK;
+		hecc_write(priv, HECC_CANMIM, mbx_mask);
+	}
+
+	return num_pkts;
+}
+
+static int ti_hecc_error(struct net_device *ndev, int int_status,
+	int err_status)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+
+	/* propogate the error condition to the can stack */
+	skb = netdev_alloc_skb(ndev, sizeof(struct can_frame));
+	if (!skb) {
+		if (printk_ratelimit())
+			dev_err(priv->ndev->dev.parent,
+				"ti_hecc_error: netdev_alloc_skb() failed\n");
+		return -ENOMEM;
+	}
+	skb->protocol = __constant_htons(ETH_P_CAN);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
+	memset(cf, 0, sizeof(struct can_frame));
+	cf->can_id = CAN_ERR_FLAG;
+	cf->can_dlc = CAN_ERR_DLC;
+
+	if (int_status & HECC_CANGIF_WLIF) { /* warning level int */
+		if ((int_status & HECC_CANGIF_BOIF) == 0) {
+			priv->can.state = CAN_STATE_ERROR_WARNING;
+			++priv->can.can_stats.error_warning;
+			cf->can_id |= CAN_ERR_CRTL;
+			if (hecc_read(priv, HECC_CANTEC) > 96)
+				cf->data[1] |= CAN_ERR_CRTL_TX_WARNING;
+			if (hecc_read(priv, HECC_CANREC) > 96)
+				cf->data[1] |= CAN_ERR_CRTL_RX_WARNING;
+		}
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW);
+		dev_dbg(priv->ndev->dev.parent, "Error Warning interrupt\n");
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+	}
+
+	if (int_status & HECC_CANGIF_EPIF) { /* error passive int */
+		if ((int_status & HECC_CANGIF_BOIF) == 0) {
+			priv->can.state = CAN_STATE_ERROR_PASSIVE;
+			++priv->can.can_stats.error_passive;
+			cf->can_id |= CAN_ERR_CRTL;
+			if (hecc_read(priv, HECC_CANTEC) > 127)
+				cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE;
+			if (hecc_read(priv, HECC_CANREC) > 127)
+				cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE;
+		}
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP);
+		dev_dbg(priv->ndev->dev.parent, "Error passive interrupt\n");
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+	}
+
+	/*
+	 * Need to check busoff condition in error status register too to
+	 * ensure warning interrupts don't hog the system
+	 */
+	if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) {
+		priv->can.state = CAN_STATE_BUS_OFF;
+		cf->can_id |= CAN_ERR_BUSOFF;
+		hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO);
+		hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+		/* Disable all interrupts in bus-off to avoid int hog */
+		hecc_write(priv, HECC_CANGIM, 0);
+		can_bus_off(ndev);
+	}
+
+	if (err_status & HECC_BUS_ERROR) {
+		++priv->can.can_stats.bus_error;
+		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
+		if (err_status & HECC_CANES_FE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
+			cf->data[2] |= CAN_ERR_PROT_FORM;
+		}
+		if (err_status & HECC_CANES_BE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE);
+			cf->data[2] |= CAN_ERR_PROT_BIT;
+		}
+		if (err_status & HECC_CANES_SE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE);
+			cf->data[2] |= CAN_ERR_PROT_STUFF;
+		}
+		if (err_status & HECC_CANES_CRCE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
+			cf->data[2] |= CAN_ERR_PROT_LOC_CRC_SEQ |
+					CAN_ERR_PROT_LOC_CRC_DEL;
+		}
+		if (err_status & HECC_CANES_ACKE) {
+			hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
+			cf->data[2] |= CAN_ERR_PROT_LOC_ACK |
+					CAN_ERR_PROT_LOC_ACK_DEL;
+		}
+	}
+
+	netif_receive_skb(skb);
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+	return 0;
+}
+
+static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u32 mbxno, mbx_mask, int_status, err_status;
+	unsigned long ack, flags;
+
+	int_status = hecc_read(priv,
+		(priv->int_line) ? HECC_CANGIF1 : HECC_CANGIF0);
+
+	if (!int_status)
+		return IRQ_NONE;
+
+	err_status = hecc_read(priv, HECC_CANES);
+	if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO |
+		HECC_CANES_EP | HECC_CANES_EW))
+			ti_hecc_error(ndev, int_status, err_status);
+
+	if (int_status & HECC_CANGIF_GMIF) {
+		while (priv->tx_tail - priv->tx_head > 0) {
+			mbxno = get_tx_tail_mb(priv);
+			mbx_mask = BIT(mbxno);
+			if (!(mbx_mask & hecc_read(priv, HECC_CANTA)))
+				break;
+			hecc_clear_bit(priv, HECC_CANMIM, mbx_mask);
+			hecc_write(priv, HECC_CANTA, mbx_mask);
+			spin_lock_irqsave(&priv->mbx_lock, flags);
+			hecc_clear_bit(priv, HECC_CANME, mbx_mask);
+			spin_unlock_irqrestore(&priv->mbx_lock, flags);
+			stats->tx_bytes += hecc_read_mbx(priv, mbxno,
+						HECC_CANMCF) & 0xF;
+			stats->tx_packets++;
+			can_get_echo_skb(ndev, mbxno);
+			--priv->tx_tail;
+		}
+
+		/* restart queue if wrap-up or if queue stalled on last pkt */
+		if (((priv->tx_head == priv->tx_tail) &&
+		((priv->tx_head & HECC_TX_MASK) != HECC_TX_MASK)) ||
+		(((priv->tx_tail & HECC_TX_MASK) == HECC_TX_MASK) &&
+		((priv->tx_head & HECC_TX_MASK) == HECC_TX_MASK)))
+			netif_wake_queue(ndev);
+
+		/* Disable RX mailbox interrupts and let NAPI reenable them */
+		if (hecc_read(priv, HECC_CANRMP)) {
+			ack = hecc_read(priv, HECC_CANMIM);
+			ack &= BIT(HECC_MAX_TX_MBOX) - 1;
+			hecc_write(priv, HECC_CANMIM, ack);
+			napi_schedule(&priv->napi);
+		}
+	}
+
+	/* clear all interrupt conditions - read back to avoid spurious ints */
+	if (priv->int_line) {
+		hecc_write(priv, HECC_CANGIF1, HECC_SET_REG);
+		int_status = hecc_read(priv, HECC_CANGIF1);
+	} else {
+		hecc_write(priv, HECC_CANGIF0, HECC_SET_REG);
+		int_status = hecc_read(priv, HECC_CANGIF0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int ti_hecc_open(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+	int err;
+
+	err = request_irq(ndev->irq, ti_hecc_interrupt, IRQF_SHARED,
+			ndev->name, ndev);
+	if (err) {
+		dev_err(ndev->dev.parent, "error requesting interrupt\n");
+		return err;
+	}
+
+	/* Open common can device */
+	err = open_candev(ndev);
+	if (err) {
+		dev_err(ndev->dev.parent, "open_candev() failed %d\n", err);
+		free_irq(ndev->irq, ndev);
+		return err;
+	}
+
+	clk_enable(priv->clk);
+	ti_hecc_start(ndev);
+	napi_enable(&priv->napi);
+	netif_start_queue(ndev);
+
+	return 0;
+}
+
+static int ti_hecc_close(struct net_device *ndev)
+{
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	napi_disable(&priv->napi);
+	ti_hecc_stop(ndev);
+	free_irq(ndev->irq, ndev);
+	clk_disable(priv->clk);
+	close_candev(ndev);
+
+	return 0;
+}
+
+static const struct net_device_ops ti_hecc_netdev_ops = {
+	.ndo_open		= ti_hecc_open,
+	.ndo_stop		= ti_hecc_close,
+	.ndo_start_xmit		= ti_hecc_xmit,
+};
+
+static int ti_hecc_probe(struct platform_device *pdev)
+{
+	struct net_device *ndev = (struct net_device *)0;
+	struct ti_hecc_priv *priv;
+	struct ti_hecc_platform_data *pdata;
+	struct resource *mem, *irq;
+	void __iomem *addr;
+	int err = -ENODEV;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data\n");
+		goto probe_exit;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&pdev->dev, "No mem resources\n");
+		goto probe_exit;
+	}
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "No irq resource\n");
+		goto probe_exit;
+	}
+	if (!request_mem_region(mem->start, resource_size(mem), pdev->name)) {
+		dev_err(&pdev->dev, "HECC region already claimed\n");
+		err = -EBUSY;
+		goto probe_exit;
+	}
+	addr = ioremap(mem->start, resource_size(mem));
+	if (!addr) {
+		dev_err(&pdev->dev, "ioremap failed\n");
+		err = -ENOMEM;
+		goto probe_exit_free_region;
+	}
+
+	ndev = alloc_candev(sizeof(struct ti_hecc_priv));
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev failed\n");
+		err = -ENOMEM;
+		goto probe_exit_iounmap;
+	}
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	priv->base = addr;
+	priv->scc_ram_offset = pdata->scc_ram_offset;
+	priv->hecc_ram_offset = pdata->hecc_ram_offset;
+	priv->mbx_offset = pdata->mbx_offset;
+	priv->int_line = pdata->int_line;
+
+	priv->can.bittiming_const = &ti_hecc_bittiming_const;
+	priv->can.do_set_mode = ti_hecc_do_set_mode;
+	priv->can.do_get_state = ti_hecc_get_state;
+
+	ndev->irq = irq->start;
+	ndev->flags |= IFF_ECHO;
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+	ndev->netdev_ops = &ti_hecc_netdev_ops;
+
+	priv->clk = clk_get(&pdev->dev, "hecc_ck");
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "No clock available\n");
+		err = PTR_ERR(priv->clk);
+		priv->clk = NULL;
+		goto probe_exit_candev;
+	}
+	priv->can.clock.freq = clk_get_rate(priv->clk);
+	netif_napi_add(ndev, &priv->napi, ti_hecc_rx_poll,
+		HECC_DEF_NAPI_WEIGHT);
+
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed\n");
+		goto probe_exit_clk;
+	}
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n",
+		priv->base, (u32) ndev->irq);
+
+	return 0;
+
+probe_exit_clk:
+	clk_put(priv->clk);
+probe_exit_candev:
+	free_candev(ndev);
+probe_exit_iounmap:
+	iounmap(addr);
+probe_exit_free_region:
+	release_mem_region(mem->start, resource_size(mem));
+probe_exit:
+	return err;
+}
+
+static int __devexit ti_hecc_remove(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct ti_hecc_priv *priv = netdev_priv(ndev);
+
+	clk_put(priv->clk);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iounmap(priv->base);
+	release_mem_region(res->start, resource_size(res));
+	unregister_candev(ndev);
+	free_candev(ndev);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+/* TI HECC netdevice driver: platform driver structure */
+static struct platform_driver ti_hecc_driver = {
+	.driver = {
+		.name    = DRV_NAME,
+		.owner   = THIS_MODULE,
+	},
+	.probe = ti_hecc_probe,
+	.remove = __devexit_p(ti_hecc_remove),
+};
+
+static int __init ti_hecc_init_driver(void)
+{
+	printk(KERN_INFO DRV_DESC "\n");
+	return platform_driver_register(&ti_hecc_driver);
+}
+module_init(ti_hecc_init_driver);
+
+static void __exit ti_hecc_exit_driver(void)
+{
+	printk(KERN_INFO DRV_DESC " unloaded\n");
+	platform_driver_unregister(&ti_hecc_driver);
+}
+module_exit(ti_hecc_exit_driver);
+
+MODULE_AUTHOR("Anant Gole <anantgole-l0cyMroinI0@public.gmane.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION(DRV_DESC);
diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
new file mode 100644
index 0000000..4688c7b
--- /dev/null
+++ b/include/linux/can/platform/ti_hecc.h
@@ -0,0 +1,40 @@
+/*
+ * TI HECC (High End CAN Controller) driver platform header
+ *
+ * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed as is WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/**
+ * struct hecc_platform_data - HECC Platform Data
+ *
+ * @scc_hecc_offset:	mostly 0 - should really never change
+ * @scc_ram_offset:	SCC RAM offset
+ * @hecc_ram_offset:	HECC RAM offset
+ * @mbx_offset:		Mailbox RAM offset
+ * @int_line:		Interrupt line to use - 0 or 1
+ * @version:		version for future use
+ *
+ * Platform data structure to get all platform specific settings.
+ * this structure also accounts the fact that the IP may have different
+ * RAM and mailbox offsets for different SOC's
+ */
+struct ti_hecc_platform_data {
+	u32 scc_hecc_offset;
+	u32 scc_ram_offset;
+	u32 hecc_ram_offset;
+	u32 mbx_offset;
+	u32 int_line;
+	u32 version;
+};
+
+
-- 
1.6.2.4

^ permalink raw reply related

* [PATCH net-next-2.6] ixgb: Use the instance of net_device_stats from net_device.
From: Ajit Khaparde @ 2009-10-07 12:46 UTC (permalink / raw)
  To: davem, netdev

Since net_device has an instance of net_device_stats,
we can remove the instance of this from the private adapter structure.

Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
---
 drivers/net/ixgb/ixgb.h         |    1 -
 drivers/net/ixgb/ixgb_ethtool.c |   44 ++++++++++++++++++++------------------
 drivers/net/ixgb/ixgb_main.c    |   44 ++++++++++++++++++--------------------
 3 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ixgb/ixgb.h b/drivers/net/ixgb/ixgb.h
index d85717e..e95d9b6 100644
--- a/drivers/net/ixgb/ixgb.h
+++ b/drivers/net/ixgb/ixgb.h
@@ -183,7 +183,6 @@ struct ixgb_adapter {
 	struct napi_struct napi;
 	struct net_device *netdev;
 	struct pci_dev *pdev;
-	struct net_device_stats net_stats;
 
 	/* structs defined in ixgb_hw.h */
 	struct ixgb_hw hw;
diff --git a/drivers/net/ixgb/ixgb_ethtool.c b/drivers/net/ixgb/ixgb_ethtool.c
index 288ee1d..deeb25d 100644
--- a/drivers/net/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ixgb/ixgb_ethtool.c
@@ -42,30 +42,32 @@ struct ixgb_stats {
 
 #define IXGB_STAT(m) FIELD_SIZEOF(struct ixgb_adapter, m), \
 		      offsetof(struct ixgb_adapter, m)
+#define IXGB_NETDEV_STAT(m) FIELD_SIZEOF(struct net_device, m), \
+		      offsetof(struct net_device, m)
 static struct ixgb_stats ixgb_gstrings_stats[] = {
-	{"rx_packets", IXGB_STAT(net_stats.rx_packets)},
-	{"tx_packets", IXGB_STAT(net_stats.tx_packets)},
-	{"rx_bytes", IXGB_STAT(net_stats.rx_bytes)},
-	{"tx_bytes", IXGB_STAT(net_stats.tx_bytes)},
-	{"rx_errors", IXGB_STAT(net_stats.rx_errors)},
-	{"tx_errors", IXGB_STAT(net_stats.tx_errors)},
-	{"rx_dropped", IXGB_STAT(net_stats.rx_dropped)},
-	{"tx_dropped", IXGB_STAT(net_stats.tx_dropped)},
-	{"multicast", IXGB_STAT(net_stats.multicast)},
-	{"collisions", IXGB_STAT(net_stats.collisions)},
-
-/*	{ "rx_length_errors", IXGB_STAT(net_stats.rx_length_errors) },	*/
-	{"rx_over_errors", IXGB_STAT(net_stats.rx_over_errors)},
-	{"rx_crc_errors", IXGB_STAT(net_stats.rx_crc_errors)},
-	{"rx_frame_errors", IXGB_STAT(net_stats.rx_frame_errors)},
+	{"rx_packets", IXGB_NETDEV_STAT(stats.rx_packets)},
+	{"tx_packets", IXGB_NETDEV_STAT(stats.tx_packets)},
+	{"rx_bytes", IXGB_NETDEV_STAT(stats.rx_bytes)},
+	{"tx_bytes", IXGB_NETDEV_STAT(stats.tx_bytes)},
+	{"rx_errors", IXGB_NETDEV_STAT(stats.rx_errors)},
+	{"tx_errors", IXGB_NETDEV_STAT(stats.tx_errors)},
+	{"rx_dropped", IXGB_NETDEV_STAT(stats.rx_dropped)},
+	{"tx_dropped", IXGB_NETDEV_STAT(stats.tx_dropped)},
+	{"multicast", IXGB_NETDEV_STAT(stats.multicast)},
+	{"collisions", IXGB_NETDEV_STAT(stats.collisions)},
+
+/*	{ "rx_length_errors", IXGB_NETDEV_STAT(stats.rx_length_errors) },	*/
+	{"rx_over_errors", IXGB_NETDEV_STAT(stats.rx_over_errors)},
+	{"rx_crc_errors", IXGB_NETDEV_STAT(stats.rx_crc_errors)},
+	{"rx_frame_errors", IXGB_NETDEV_STAT(stats.rx_frame_errors)},
 	{"rx_no_buffer_count", IXGB_STAT(stats.rnbc)},
-	{"rx_fifo_errors", IXGB_STAT(net_stats.rx_fifo_errors)},
-	{"rx_missed_errors", IXGB_STAT(net_stats.rx_missed_errors)},
-	{"tx_aborted_errors", IXGB_STAT(net_stats.tx_aborted_errors)},
-	{"tx_carrier_errors", IXGB_STAT(net_stats.tx_carrier_errors)},
-	{"tx_fifo_errors", IXGB_STAT(net_stats.tx_fifo_errors)},
-	{"tx_heartbeat_errors", IXGB_STAT(net_stats.tx_heartbeat_errors)},
-	{"tx_window_errors", IXGB_STAT(net_stats.tx_window_errors)},
+	{"rx_fifo_errors", IXGB_NETDEV_STAT(stats.rx_fifo_errors)},
+	{"rx_missed_errors", IXGB_NETDEV_STAT(stats.rx_missed_errors)},
+	{"tx_aborted_errors", IXGB_NETDEV_STAT(stats.tx_aborted_errors)},
+	{"tx_carrier_errors", IXGB_NETDEV_STAT(stats.tx_carrier_errors)},
+	{"tx_fifo_errors", IXGB_NETDEV_STAT(stats.tx_fifo_errors)},
+	{"tx_heartbeat_errors", IXGB_NETDEV_STAT(stats.tx_heartbeat_errors)},
+	{"tx_window_errors", IXGB_NETDEV_STAT(stats.tx_window_errors)},
 	{"tx_deferred_ok", IXGB_STAT(stats.dc)},
 	{"tx_timeout_count", IXGB_STAT(tx_timeout_count) },
 	{"tx_restart_queue", IXGB_STAT(restart_queue) },
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 8aa44dc..f9f633c 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1537,9 +1537,7 @@ ixgb_tx_timeout_task(struct work_struct *work)
 static struct net_device_stats *
 ixgb_get_stats(struct net_device *netdev)
 {
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	return &adapter->net_stats;
+	return &netdev->stats;
 }
 
 /**
@@ -1676,16 +1674,16 @@ ixgb_update_stats(struct ixgb_adapter *adapter)
 
 	/* Fill out the OS statistics structure */
 
-	adapter->net_stats.rx_packets = adapter->stats.gprcl;
-	adapter->net_stats.tx_packets = adapter->stats.gptcl;
-	adapter->net_stats.rx_bytes = adapter->stats.gorcl;
-	adapter->net_stats.tx_bytes = adapter->stats.gotcl;
-	adapter->net_stats.multicast = adapter->stats.mprcl;
-	adapter->net_stats.collisions = 0;
+	netdev->stats.rx_packets = adapter->stats.gprcl;
+	netdev->stats.tx_packets = adapter->stats.gptcl;
+	netdev->stats.rx_bytes = adapter->stats.gorcl;
+	netdev->stats.tx_bytes = adapter->stats.gotcl;
+	netdev->stats.multicast = adapter->stats.mprcl;
+	netdev->stats.collisions = 0;
 
 	/* ignore RLEC as it reports errors for padded (<64bytes) frames
 	 * with a length in the type/len field */
-	adapter->net_stats.rx_errors =
+	netdev->stats.rx_errors =
 	    /* adapter->stats.rnbc + */ adapter->stats.crcerrs +
 	    adapter->stats.ruc +
 	    adapter->stats.roc /*+ adapter->stats.rlec */  +
@@ -1693,21 +1691,21 @@ ixgb_update_stats(struct ixgb_adapter *adapter)
 	    adapter->stats.ecbc + adapter->stats.mpc;
 
 	/* see above
-	 * adapter->net_stats.rx_length_errors = adapter->stats.rlec;
+	 * netdev->stats.rx_length_errors = adapter->stats.rlec;
 	 */
 
-	adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
-	adapter->net_stats.rx_fifo_errors = adapter->stats.mpc;
-	adapter->net_stats.rx_missed_errors = adapter->stats.mpc;
-	adapter->net_stats.rx_over_errors = adapter->stats.mpc;
-
-	adapter->net_stats.tx_errors = 0;
-	adapter->net_stats.rx_frame_errors = 0;
-	adapter->net_stats.tx_aborted_errors = 0;
-	adapter->net_stats.tx_carrier_errors = 0;
-	adapter->net_stats.tx_fifo_errors = 0;
-	adapter->net_stats.tx_heartbeat_errors = 0;
-	adapter->net_stats.tx_window_errors = 0;
+	netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
+	netdev->stats.rx_fifo_errors = adapter->stats.mpc;
+	netdev->stats.rx_missed_errors = adapter->stats.mpc;
+	netdev->stats.rx_over_errors = adapter->stats.mpc;
+
+	netdev->stats.tx_errors = 0;
+	netdev->stats.rx_frame_errors = 0;
+	netdev->stats.tx_aborted_errors = 0;
+	netdev->stats.tx_carrier_errors = 0;
+	netdev->stats.tx_fifo_errors = 0;
+	netdev->stats.tx_heartbeat_errors = 0;
+	netdev->stats.tx_window_errors = 0;
 }
 
 #define IXGB_MAX_INTR 10
-- 
1.6.0.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox