Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] tun: Fixed unsigned/signed comparison
From: Alex Gartrell @ 2014-12-26  7:05 UTC (permalink / raw)
  To: davem, herbert; +Cc: netdev, linux-kernel, kernel-team, Alex Gartrell

Validated that this was actually using the unsigned comparison with gdb.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
---
 drivers/net/tun.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a5cbf67..6c63e21 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1499,7 +1499,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT);
-	if (ret > total_len) {
+	if (ret > (ssize_t)total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
 	}
-- 
Alex Gartrell <agartrell@fb.com>

^ permalink raw reply related

* [PATCH net-next 2/2] tun: enable socket system calls
From: Alex Gartrell @ 2014-12-26  6:50 UTC (permalink / raw)
  To: davem, herbert; +Cc: netdev, linux-kernel, kernel-team, Alex Gartrell
In-Reply-To: <1419576624-8999-1-git-send-email-agartrell@fb.com>

By setting private_data to a socket and private_data_is_socket to true, we
can use the socket syscalls.  We also can't just blindly use private_data
anymore, so there's a __tun_file_get function that returns the container_of
private_data appropriately.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
---
 drivers/net/tun.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a5cbf67..b16ddc5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -547,9 +547,18 @@ static void tun_detach_all(struct net_device *dev)
 		module_put(THIS_MODULE);
 }
 
+static struct tun_file *tun_file_from_file(struct file *file)
+{
+	struct socket *s = (struct socket *)file->private_data;
+
+	if (!s)
+		return NULL;
+	return container_of(s, struct tun_file, socket);
+}
+
 static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	int err;
 
 	err = security_tun_dev_attach(tfile->socket.sk, tun->security);
@@ -612,7 +621,7 @@ static struct tun_struct *__tun_get(struct tun_file *tfile)
 
 static struct tun_struct *tun_get(struct file *file)
 {
-	return __tun_get(file->private_data);
+	return __tun_get(tun_file_from_file(file));
 }
 
 static void tun_put(struct tun_struct *tun)
@@ -973,7 +982,7 @@ static void tun_net_init(struct net_device *dev)
 /* Poll */
 static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct tun_struct *tun = __tun_get(tfile);
 	struct sock *sk;
 	unsigned int mask = 0;
@@ -1235,7 +1244,7 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct tun_struct *tun = tun_get(file);
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	ssize_t result;
 
 	if (!tun)
@@ -1392,7 +1401,7 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct tun_struct *tun = __tun_get(tfile);
 	ssize_t len = iov_iter_count(to), ret;
 
@@ -1567,7 +1576,7 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
 	struct tun_struct *tun;
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct net_device *dev;
 	int err;
 
@@ -1801,7 +1810,7 @@ static void tun_set_sndbuf(struct tun_struct *tun)
 
 static int tun_set_queue(struct file *file, struct ifreq *ifr)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct tun_struct *tun;
 	int ret = 0;
 
@@ -1834,7 +1843,7 @@ unlock:
 static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg, int ifreq_len)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
@@ -2122,7 +2131,7 @@ static long tun_chr_compat_ioctl(struct file *file,
 
 static int tun_chr_fasync(int fd, struct file *file, int on)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	int ret;
 
 	if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
@@ -2165,7 +2174,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	tfile->sk.sk_write_space = tun_sock_write_space;
 	tfile->sk.sk_sndbuf = INT_MAX;
 
-	file->private_data = tfile;
+	file->private_data = &tfile->socket;
+	file->private_data_is_socket = true;
 	set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags);
 	INIT_LIST_HEAD(&tfile->next);
 
@@ -2176,7 +2186,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 
 static int tun_chr_close(struct inode *inode, struct file *file)
 {
-	struct tun_file *tfile = file->private_data;
+	struct tun_file *tfile = tun_file_from_file(file);
 	struct net *net = tfile->net;
 
 	tun_detach(tfile, true);
@@ -2335,7 +2345,7 @@ struct socket *tun_get_socket(struct file *file)
 	struct tun_file *tfile;
 	if (file->f_op != &tun_fops)
 		return ERR_PTR(-EINVAL);
-	tfile = file->private_data;
+	tfile = tun_file_from_file(file);
 	if (!tfile)
 		return ERR_PTR(-EBADFD);
 	return &tfile->socket;
-- 
Alex Gartrell <agartrell@fb.com>

^ permalink raw reply related

* [PATCH net-next 1/2] socket: Allow external sockets to use socket syscalls
From: Alex Gartrell @ 2014-12-26  6:50 UTC (permalink / raw)
  To: davem, herbert; +Cc: netdev, linux-kernel, kernel-team, Alex Gartrell
In-Reply-To: <1419576624-8999-1-git-send-email-agartrell@fb.com>

Currently the "is-socket" test for a file compares the ops table pointer,
which is static and local to the socket.c.  Instead, this adds a flag for
private_data_is_socket.  This is an exceptionally long commit message for a
two-line patch.

Signed-off-by: Alex Gartrell <agartrell@fb.com>
---
 include/linux/fs.h | 2 +-
 net/socket.c       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb29b02..d162476 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -809,7 +809,7 @@ struct file {
 #endif
 	/* needed for tty driver, and maybe others */
 	void			*private_data;
-
+	bool			private_data_is_socket : 1;
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
 	struct list_head	f_ep_links;
diff --git a/net/socket.c b/net/socket.c
index 8809afc..cd853be 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -388,6 +388,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	sock->file = file;
 	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->private_data = sock;
+	file->private_data_is_socket = true;
 	return file;
 }
 EXPORT_SYMBOL(sock_alloc_file);
@@ -411,7 +412,7 @@ static int sock_map_fd(struct socket *sock, int flags)
 
 struct socket *sock_from_file(struct file *file, int *err)
 {
-	if (file->f_op == &socket_file_ops)
+	if (file->private_data_is_socket)
 		return file->private_data;	/* set in sock_map_fd */
 
 	*err = -ENOTSOCK;
-- 
Alex Gartrell <agartrell@fb.com>

^ permalink raw reply related

* [PATCH net-next 0/2] tun: support socket system calls
From: Alex Gartrell @ 2014-12-26  6:50 UTC (permalink / raw)
  To: davem, herbert; +Cc: netdev, linux-kernel, kernel-team, Alex Gartrell

There is an underlying socket object in struct tun that isn't accessible,
so if you try to do a recvmmsg on a tun file descriptor you'll get an
error. This small patchset allows external file objects to claim socket
status and enables it for tun_files.

Alex Gartrell (2):
  socket: Allow external sockets to use socket syscalls
  tun: enable socket system calls

 drivers/net/tun.c  | 34 ++++++++++++++++++++++------------
 include/linux/fs.h |  2 +-
 net/socket.c       |  3 ++-
 3 files changed, 25 insertions(+), 14 deletions(-)

-- 
Alex Gartrell <agartrell@fb.com>

^ permalink raw reply

* [PATCH iproute2] man ip-link: Small example of 'ip link show master'
From: Vadim Kochan @ 2014-12-26  2:46 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 man/man8/ip-link.8.in | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index 5134e28..1209b55 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -739,6 +739,11 @@ ip link show type vlan
 Shows the vlan devices.
 .RE
 .PP
+ip link show master br0
+.RS 4
+Shows devices enslaved by br0
+.RE
+.PP
 ip link set dev ppp0 mtu 1400
 .RS 4
 Change the MTU the ppp0 device.
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2] ss: Use rtnl_dump_filter for inet_show_netlink
From: Vadim Kochan @ 2014-12-26  2:26 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Just another refactoring for ss to use rtnl API from lib

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
It would be good to make better testing with opened different
kind of sockets to test if ss behaves correctly and does not
shows any errors.

 include/libnetlink.h |   1 +
 lib/libnetlink.c     |  17 +++++--
 misc/ss.c            | 139 ++++++++++++++-------------------------------------
 3 files changed, 50 insertions(+), 107 deletions(-)

diff --git a/include/libnetlink.h b/include/libnetlink.h
index 3794ef1..db04969 100644
--- a/include/libnetlink.h
+++ b/include/libnetlink.h
@@ -19,6 +19,7 @@ struct rtnl_handle
 	__u32			seq;
 	__u32			dump;
 	int			proto;
+	FILE		       *dump_fp;
 };
 
 extern int rcvbuf;
diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index e3b7862..45ff90a 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -220,12 +220,15 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
 			return -1;
 		}
 
+		if (rth->dump_fp)
+			fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp);
+
 		for (a = arg; a->filter; a++) {
 			struct nlmsghdr *h = (struct nlmsghdr*)buf;
 			msglen = status;
 
 			while (NLMSG_OK(h, msglen)) {
-				int err;
+				int err = 0;
 
 				if (nladdr.nl_pid != 0 ||
 				    h->nlmsg_pid != rth->local.nl_pid ||
@@ -247,16 +250,20 @@ int rtnl_dump_filter_l(struct rtnl_handle *rth,
 					} else {
 						errno = -err->error;
 						if (rth->proto == NETLINK_SOCK_DIAG &&
-						    errno == ENOENT)
+						    (errno == ENOENT ||
+						     errno == EOPNOTSUPP))
 							return -1;
 
 						perror("RTNETLINK answers");
 					}
 					return -1;
 				}
-				err = a->filter(&nladdr, h, a->arg1);
-				if (err < 0)
-					return err;
+
+				if (!rth->dump_fp) {
+					err = a->filter(&nladdr, h, a->arg1);
+					if (err < 0)
+						return err;
+				}
 
 skip_it:
 				h = NLMSG_NEXT(h, msglen);
diff --git a/misc/ss.c b/misc/ss.c
index 706b5ba..f0c7b34 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -1871,122 +1871,57 @@ static int sockdiag_send(int family, int fd, int protocol, struct filter *f)
 	return 0;
 }
 
-static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
-{
-	int fd, family;
-	struct sockaddr_nl nladdr;
-	struct msghdr msg;
-	char	buf[16384];
-	struct iovec iov[3];
-
-	if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG)) < 0)
-		return -1;
-
-	family = PF_INET;
-again:
-	if (sockdiag_send(family, fd, protocol, f))
-		return -1;
-
-	memset(&nladdr, 0, sizeof(nladdr));
-	nladdr.nl_family = AF_NETLINK;
-
-	iov[0] = (struct iovec){
-		.iov_base = buf,
-		.iov_len = sizeof(buf)
-	};
-
-	while (1) {
-		int status;
-		struct nlmsghdr *h;
-
-		msg = (struct msghdr) {
-			(void*)&nladdr, sizeof(nladdr),
-			iov,	1,
-			NULL,	0,
-			0
-		};
-
-		status = recvmsg(fd, &msg, 0);
-
-		if (status < 0) {
-			if (errno == EINTR)
-				continue;
-			perror("OVERRUN");
-			continue;
-		}
-		if (status == 0) {
-			fprintf(stderr, "EOF on netlink\n");
-			close(fd);
-			return 0;
-		}
-
-		if (dump_fp)
-			fwrite(buf, 1, NLMSG_ALIGN(status), dump_fp);
+struct inet_diag_arg {
+	struct filter *f;
+	int protocol;
+};
 
-		h = (struct nlmsghdr*)buf;
-		while (NLMSG_OK(h, status)) {
-			int err;
-			struct inet_diag_msg *r = NLMSG_DATA(h);
+static int show_one_inet_sock(const struct sockaddr_nl *addr,
+		struct nlmsghdr *h, void *arg)
+{
+	int err;
+	struct inet_diag_arg *diag_arg = arg;
+	struct inet_diag_msg *r = NLMSG_DATA(h);
 
-			if (/*h->nlmsg_pid != rth->local.nl_pid ||*/
-			    h->nlmsg_seq != MAGIC_SEQ)
-				goto skip_it;
+	if (!(diag_arg->f->families & (1 << r->idiag_family)))
+		return 0;
+	if ((err = inet_show_sock(h, NULL, diag_arg->protocol)) < 0)
+		return err;
 
-			if (h->nlmsg_type == NLMSG_DONE)
-				goto done;
+	return 0;
+}
 
-			if (h->nlmsg_type == NLMSG_ERROR) {
-				struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
-				if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
-					fprintf(stderr, "ERROR truncated\n");
-				} else {
-					if (family != PF_UNSPEC) {
-						family = PF_UNSPEC;
-						goto again;
-					}
+static int inet_show_netlink(struct filter *f, FILE *dump_fp, int protocol)
+{
+	int err = 0;
+	struct rtnl_handle rth;
+	int family = PF_INET;
+	struct inet_diag_arg arg = { .f = f, .protocol = protocol };
 
-					errno = -err->error;
-					if (errno == EOPNOTSUPP) {
-						close(fd);
-						return -1;
-					}
-					perror("TCPDIAG answers");
-				}
+	if (rtnl_open_byproto(&rth, 0, NETLINK_SOCK_DIAG))
+		return -1;
+	rth.dump = MAGIC_SEQ;
+	rth.dump_fp = dump_fp;
 
-				goto done;
-			}
-			if (!dump_fp) {
-				if (!(f->families & (1<<r->idiag_family))) {
-					h = NLMSG_NEXT(h, status);
-					continue;
-				}
-				err = inet_show_sock(h, NULL, protocol);
-				if (err < 0) {
-					close(fd);
-					return err;
-				}
-			}
+again:
+	if ((err = sockdiag_send(family, rth.fd, protocol, f)))
+		goto Exit;
 
-skip_it:
-			h = NLMSG_NEXT(h, status);
-		}
-		if (msg.msg_flags & MSG_TRUNC) {
-			fprintf(stderr, "Message truncated\n");
-			continue;
-		}
-		if (status) {
-			fprintf(stderr, "!!!Remnant of size %d\n", status);
-			exit(1);
+	if ((err = rtnl_dump_filter(&rth, show_one_inet_sock, &arg))) {
+		if (family != PF_UNSPEC) {
+			family = PF_UNSPEC;
+			goto again;
 		}
+		goto Exit;
 	}
-done:
 	if (family == PF_INET) {
 		family = PF_INET6;
 		goto again;
 	}
 
-	close(fd);
-	return 0;
+Exit:
+	rtnl_close(&rth);
+	return err;
 }
 
 static int tcp_show_netlink_file(struct filter *f)
-- 
2.1.3

^ permalink raw reply related

* Re: [PATCH 1/1 net-next] tipc: replace 0 by NULL for pointers
From: Ying Xue @ 2014-12-26  1:02 UTC (permalink / raw)
  To: Fabian Frederick, linux-kernel
  Cc: Jon Maloy, Allan Stephens, David S. Miller, netdev,
	tipc-discussion
In-Reply-To: <1419505550-17336-1-git-send-email-fabf@skynet.be>

On 12/25/2014 07:05 PM, Fabian Frederick wrote:
> Fix sparse warning:
> net/tipc/link.c:1924:40: warning: Using plain integer as NULL pointer
> 
> Signed-off-by: Fabian Frederick <fabf@skynet.be>
> ---

Acked-by: Ying Xue <ying.xue@windriver.com>

>  net/tipc/link.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/net/tipc/link.c b/net/tipc/link.c
> index 23bcc11..082c3b5 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -1921,7 +1921,7 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name,
>  {
>  	struct tipc_link *l_ptr;
>  	struct tipc_node *n_ptr;
> -	struct tipc_node *found_node = 0;
> +	struct tipc_node *found_node = NULL;
>  	int i;
>  
>  	*bearer_id = 0;
> 

^ permalink raw reply

* [PATCH iproute2] tc class: Show classes as ASCII graph
From: Vadim Kochan @ 2014-12-26  0:10 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Added new '-g[raph]' option which shows classes in the graph view.

Meanwhile only generic stats info output is supported.

e.g.:

$ tc/tc -g class show dev tap0
+---(1:2) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    +---(1:40) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
|    +---(1:50) htb rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    +---(1:51) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|    |
|    +---(1:60) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|
+---(1:1) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
     +---(1:10) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
     +---(1:20) htb prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
     +---(1:30) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b

$ tc/tc -g -s class show dev tap0
+---(1:2) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |    rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:40) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
|    |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |          rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:50) htb rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
|    |    |     Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |    |     rate 0bit 0pps backlog 0b 0p requeues 0
|    |    |
|    |    +---(1:51) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|    |               Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|    |               rate 0bit 0pps backlog 0b 0p requeues 0
|    |
|    +---(1:60) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
|               Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
|               rate 0bit 0pps backlog 0b 0p requeues 0
|
+---(1:1) htb rate 6Mbit ceil 6Mbit burst 15Kb cburst 1599b
     |    Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |    rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:10) htb prio 0 rate 5Mbit ceil 5Mbit burst 15Kb cburst 1600b
     |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |          rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:20) htb prio 0 rate 3Mbit ceil 6Mbit burst 15Kb cburst 1599b
     |          Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
     |          rate 0bit 0pps backlog 0b 0p requeues 0
     |
     +---(1:30) htb prio 0 rate 1Kbit ceil 6Mbit burst 15Kb cburst 1599b
                Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
                rate 0bit 0pps backlog 0b 0p requeues 0

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 tc/tc.c        |   5 +-
 tc/tc_class.c  | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 tc/tc_common.h |   2 +
 3 files changed, 171 insertions(+), 3 deletions(-)

diff --git a/tc/tc.c b/tc/tc.c
index 9b50e74..25a1c68 100644
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -34,8 +34,9 @@ int show_stats = 0;
 int show_details = 0;
 int show_raw = 0;
 int show_pretty = 0;
-int batch_mode = 0;
+int show_graph = 0;
 
+int batch_mode = 0;
 int resolve_hosts = 0;
 int use_iec = 0;
 int force = 0;
@@ -278,6 +279,8 @@ int main(int argc, char **argv)
 			++show_raw;
 		} else if (matches(argv[1], "-pretty") == 0) {
 			++show_pretty;
+		} else if (matches(argv[1], "-graph") == 0) {
+			show_graph = 1;
 		} else if (matches(argv[1], "-Version") == 0) {
 			printf("tc utility, iproute2-ss%s\n", SNAPSHOT);
 			return 0;
diff --git a/tc/tc_class.c b/tc/tc_class.c
index ba7869b..877048a 100644
--- a/tc/tc_class.c
+++ b/tc/tc_class.c
@@ -24,6 +24,21 @@
 #include "utils.h"
 #include "tc_util.h"
 #include "tc_common.h"
+#include "hlist.h"
+
+struct graph_node {
+	struct hlist_node hlist;
+	__u32 id;
+	__u32 parent_id;
+	struct graph_node *parent_node;
+	struct graph_node *right_node;
+	void *data;
+	int data_len;
+	int nodes_count;
+};
+
+static struct hlist_head cls_list = {};
+static struct hlist_head root_cls_list = {};
 
 static void usage(void);
 
@@ -148,13 +163,152 @@ int filter_ifindex;
 __u32 filter_qdisc;
 __u32 filter_classid;
 
+static void graph_node_add(__u32 parent_id, __u32 id, void *data,
+		int len)
+{
+	struct graph_node *node = malloc(sizeof(struct graph_node));
+
+	memset(node, 0, sizeof(*node));
+	node->id         = id;
+	node->parent_id  = parent_id;
+
+	if (data && len) {
+		node->data       = malloc(len);
+		node->data_len   = len;
+		memcpy(node->data, data, len);
+	}
+
+	if (parent_id == TC_H_ROOT)
+		hlist_add_head(&node->hlist, &root_cls_list);
+	else
+		hlist_add_head(&node->hlist, &cls_list);
+}
+
+static void graph_indent(char *buf, struct graph_node *node, int is_newline,
+		int add_spaces)
+{
+	char spaces[100] = {0};
+
+	while (node && node->parent_node) {
+		node->parent_node->right_node = node;
+		node = node->parent_node;
+	}
+	while (node && node->right_node) {
+		if (node->hlist.next)
+			strcat(buf, "|    ");
+		else
+			strcat(buf, "     ");
+
+		node = node->right_node;
+	}
+
+	if (is_newline) {
+		if (node->hlist.next && node->nodes_count)
+			strcat(buf, "|    |");
+		else if (node->hlist.next)
+			strcat(buf, "|     ");
+		else if (node->nodes_count)
+			strcat(buf, "     |");
+		else if (!node->hlist.next)
+			strcat(buf, "      ");
+	}
+	if (add_spaces > 0) {
+		sprintf(spaces, "%-*s", add_spaces, "");
+		strcat(buf, spaces);
+	}
+}
+
+static void graph_cls_show(FILE *fp, char *buf, struct hlist_head *root_list,
+		int level)
+{
+	struct hlist_node *n, *tmp_cls;
+	char cls_id_str[256] = {};
+	struct rtattr *tb[TCA_MAX + 1] = {};
+	struct qdisc_util *q;
+	char str[100] = {};
+
+	hlist_for_each_safe(n, tmp_cls, root_list) {
+		struct hlist_node *c, *tmp_chld;
+		struct hlist_head children = {};
+		struct graph_node *cls = container_of(n, struct graph_node,
+				hlist);
+
+		hlist_for_each_safe(c, tmp_chld, &cls_list) {
+			struct graph_node *child = container_of(c,
+					struct graph_node, hlist);
+
+			if (cls->id == child->parent_id) {
+				hlist_del(c);
+				hlist_add_head(c, &children);
+				cls->nodes_count++;
+				child->parent_node = cls;
+			}
+		}
+
+		graph_indent(buf, cls, 0, 0);
+
+		print_tc_classid(cls_id_str, sizeof(cls_id_str), cls->id);
+		sprintf(str, "+---(%s)", cls_id_str);
+		strcat(buf, str);
+
+		parse_rtattr(tb, TCA_MAX, (struct rtattr *)cls->data,
+				cls->data_len);
+
+		if (tb[TCA_KIND] == NULL) {
+			strcat(buf, " [unknown qdisc kind] ");
+		} else {
+			const char *kind = rta_getattr_str(tb[TCA_KIND]);
+
+			sprintf(str, " %s ", kind);
+			strcat(buf, str);
+			fprintf(fp, "%s", buf);
+			buf[0] = '\0';
+
+			q = get_qdisc_kind(kind);
+			if (q && q->print_copt) {
+				q->print_copt(q, fp, tb[TCA_OPTIONS]);
+			}
+			if (q && show_stats) {
+				int cls_indent = strlen(q->id) - 2 +
+					strlen(cls_id_str);
+				struct rtattr *stats = NULL;
+
+				graph_indent(buf, cls, 1, cls_indent);
+
+				if (tb[TCA_STATS] || tb[TCA_STATS2]) {
+					fprintf(fp, "\n");
+					print_tcstats_attr(fp, tb, buf, &stats);
+					buf[0] = '\0';
+				}
+				if (cls->hlist.next || cls->nodes_count) {
+					strcat(buf, "\n");
+					graph_indent(buf, cls, 1, 0);
+				}
+			}
+		}
+		free(cls->data);
+		fprintf(fp, "%s\n", buf);
+		buf[0] = '\0';
+
+		graph_cls_show(fp, buf, &children, level + 1);
+		if (!cls->hlist.next) {
+			graph_indent(buf, cls, 0, 0);
+			strcat(buf, "\n");
+		}
+
+		fprintf(fp, "%s", buf);
+		buf[0] = '\0';
+		free(cls);
+	}
+}
+
 int print_class(const struct sockaddr_nl *who,
 		       struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = (FILE*)arg;
 	struct tcmsg *t = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr * tb[TCA_MAX+1];
+	struct rtattr *tb[TCA_MAX + 1] = {};
 	struct qdisc_util *q;
 	char abuf[256];
 
@@ -167,13 +321,18 @@ int print_class(const struct sockaddr_nl *who,
 		fprintf(stderr, "Wrong len %d\n", len);
 		return -1;
 	}
+
+	if (show_graph) {
+		graph_node_add(t->tcm_parent, t->tcm_handle, TCA_RTA(t), len);
+		return 0;
+	}
+
 	if (filter_qdisc && TC_H_MAJ(t->tcm_handle^filter_qdisc))
 		return 0;
 
 	if (filter_classid && t->tcm_handle != filter_classid)
 		return 0;
 
-	memset(tb, 0, sizeof(tb));
 	parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
 
 	if (tb[TCA_KIND] == NULL) {
@@ -236,6 +395,7 @@ static int tc_class_list(int argc, char **argv)
 {
 	struct tcmsg t;
 	char d[16];
+	char buf[1024] = {0};
 
 	memset(&t, 0, sizeof(t));
 	t.tcm_family = AF_UNSPEC;
@@ -306,6 +466,9 @@ static int tc_class_list(int argc, char **argv)
 		return 1;
 	}
 
+	if (show_graph)
+		graph_cls_show(stdout, &buf[0], &root_cls_list, 0);
+
 	return 0;
 }
 
diff --git a/tc/tc_common.h b/tc/tc_common.h
index 4f88856..ea16f7f 100644
--- a/tc/tc_common.h
+++ b/tc/tc_common.h
@@ -19,3 +19,5 @@ extern int parse_estimator(int *p_argc, char ***p_argv, struct tc_estimator *est
 struct tc_sizespec;
 extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec *s);
 extern int check_size_table_opts(struct tc_sizespec *s);
+
+extern int show_graph;
-- 
2.1.3

^ permalink raw reply related

* Re: [PATCH v3] ne2k-pci: Add pci_disable_device in error handling
From: David Miller @ 2014-12-25 23:30 UTC (permalink / raw)
  To: baijiaju1990; +Cc: bhelgaas, benoit.taine, netdev
In-Reply-To: <1419305343-29032-1-git-send-email-baijiaju1990@163.com>

From: Jia-Ju Bai <baijiaju1990@163.com>
Date: Tue, 23 Dec 2014 11:29:03 +0800

> For linux-3.18.0
> The driver lacks pci_disable_device in error handling code of
> ne2k_pci_init_one, so the device enabled by pci_enable_device is not
> disabled when errors occur.
> This patch fixes this problem.
> 
> Signed-off-by: Jia-Ju Bai <baijiaju1990@163.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH] bonding: change error message to debug message in __bond_release_one()
From: David Miller @ 2014-12-25 23:29 UTC (permalink / raw)
  To: wen.gang.wang; +Cc: netdev, gospo, dingtianhong
In-Reply-To: <1419297876-1412-1-git-send-email-wen.gang.wang@oracle.com>

From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Tue, 23 Dec 2014 09:24:36 +0800

> In __bond_release_one(), when the interface is not a slave or not a slave of
> "this" master, it log error message.
> 
> The message actually should be a debug message matching what bond_enslave()
> does.
> 
> Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH 0/5] netlink/genetlink cleanups & multicast improvements
From: David Miller @ 2014-12-25 23:26 UTC (permalink / raw)
  To: johannes; +Cc: netdev
In-Reply-To: <1419270999-22165-1-git-send-email-johannes@sipsolutions.net>

From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 22 Dec 2014 18:56:34 +0100

> I'm looking at using the multicast group functionality in a way that would
> benefit from knowing when there are subscribers to avoid collecting the
> required data when there aren't any. During this I noticed that the unbind
> for multicast groups doesn't actually work - it's never called when sockets
> are closed. Luckily, nobody actually uses the functionality.
> 
> While looking at the code trying to find why it's not called and where the
> multicast listeners are actually removed, I found the potential cleanup in
> patch 3. Patch 2 also has a cleanup for a generic netlink API in this area.

Series applied, using v2 of patch #5.

^ permalink raw reply

* [PATCH] ipnetns: fix exec for netns not in NETNS_RUN_DIR
From: Shahar Lev @ 2014-12-25 22:15 UTC (permalink / raw)
  To: netdev; +Cc: Shahar Lev

Enabling "ip netns exec" to be run with a net namespace
specified by a file path rather than a filename under /var/run/nets.

Signed-off-by: Shahar Lev <shahar@stratoscale.com>
---
 ip/ipnetns.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 1c8aa02..5310d0c 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -66,7 +66,7 @@ static int usage(void)
 	exit(-1);
 }
 
-int get_netns_fd(const char *name)
+static int get_netns_fd_flags(const char *name, int flags)
 {
 	char pathbuf[MAXPATHLEN];
 	const char *path, *ptr;
@@ -78,7 +78,12 @@ int get_netns_fd(const char *name)
 			NETNS_RUN_DIR, name );
 		path = pathbuf;
 	}
-	return open(path, O_RDONLY);
+	return open(path, flags);
+}
+
+int get_netns_fd(const char *name)
+{
+	return get_netns_fd_flags(name, O_RDONLY);
 }
 
 static int netns_list(int argc, char **argv)
@@ -135,7 +140,6 @@ static int netns_exec(int argc, char **argv)
 	 * aware, and execute a program in that environment.
 	 */
 	const char *name, *cmd;
-	char net_path[MAXPATHLEN];
 	int netns;
 
 	if (argc < 1) {
@@ -149,8 +153,7 @@ static int netns_exec(int argc, char **argv)
 
 	name = argv[0];
 	cmd = argv[1];
-	snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name);
-	netns = open(net_path, O_RDONLY | O_CLOEXEC);
+	netns = get_netns_fd_flags(name, O_RDONLY | O_CLOEXEC);
 	if (netns < 0) {
 		fprintf(stderr, "Cannot open network namespace \"%s\": %s\n",
 			name, strerror(errno));
-- 
1.8.3.1

^ permalink raw reply related

* Re: Marvell Kirkwood - MV643XX: near 100% UDP RX packet loss
From: Rick Jones @ 2014-12-25 21:54 UTC (permalink / raw)
  To: Bruno Prémont, Sebastian Hesselbarth; +Cc: netdev
In-Reply-To: <20141224001844.15e13db8@neptune.home>


> Why are so many packets being discarded?

You should also check the netstat statistics, particularly UDP on the 
receiving side.  Look before and after the test and see how they change, 
if at all.

rick jones

^ permalink raw reply

* [PATCH v2 2/2] vhost: relax used address alignment
From: Michael S. Tsirkin @ 2014-12-25 15:05 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, kvm, virtualization
In-Reply-To: <1419517332-12291-1-git-send-email-mst@redhat.com>

virtio 1.0 only requires used address to be 4 byte aligned,
vhost required 8 bytes (size of vring_used_elem).
Fix up vhost to match that.

Additionally, while vhost correctly requires 8 byte
alignment for log, it's unconnected to used ring:
it's a consequence that log has u64 entries.
Tweak code to make that clearer.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vhost.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ed71b53..cb807d0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -713,9 +713,13 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
 			r = -EFAULT;
 			break;
 		}
-		if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) ||
-		    (a.used_user_addr & (sizeof *vq->used->ring - 1)) ||
-		    (a.log_guest_addr & (sizeof *vq->used->ring - 1))) {
+
+		/* Make sure it's safe to cast pointers to vring types. */
+		BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
+		BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
+		if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
+		    (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
+		    (a.log_guest_addr & (sizeof(u64) - 1))) {
 			r = -EINVAL;
 			break;
 		}
-- 
MST

^ permalink raw reply related

* Re: [Nios2-dev] [PATCH] Altera TSE: Add missing phydev
From: Tobias Klauser @ 2014-12-25 14:55 UTC (permalink / raw)
  To: Kostya Belezko; +Cc: Nios2-dev, Vince Bridgers, netdev
In-Reply-To: <20141225125239.GP16916@distanz.ch>

On 2014-12-25 at 13:52:39 +0100, Tobias Klauser <tklauser@distanz.ch> wrote:
> On 2014-12-22 at 23:37:19 +0100, Kostya Belezko <bkostya@hotmail.com> wrote:
> > Altera network device doesn't come up after
> > 
> > ifconfig eth0 down
> > ifconfig eth0 up
> > 
> > The reason behind is clearing priv->phydev during tse_shutdown().
> > The phydev is not restored back at tse_open().
> > 
> > Signed-off-by: Kostya Belezko <bkostya@hotmail.com>
> 
> Please Cc: netdev@vger.kernel.org for network driver patches.
> 
> > ---
> >  drivers/net/ethernet/altera/altera_tse_main.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
> > index 4efc435..361bf35 100644
> > --- a/drivers/net/ethernet/altera/altera_tse_main.c
> > +++ b/drivers/net/ethernet/altera/altera_tse_main.c
> > @@ -1150,6 +1150,9 @@ static int tse_open(struct net_device *dev)
> >  
> >  	spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
> >  
> > +	if (!priv->phydev)
> > +		init_phy(dev);
> 
> That way the PHY will be initialized twice on first start.

It actually won't. Sorry, I didn't read careful enough.

Still, the solution suggested below is preferable IMO and this is also
what other ethernet drivers are doing (at least those I checked).

> IMO the proper solution would be to disconnect and NULL the phydev in
> altera_tse_remove, not in tse_shutdown. There, only phy_stop should be
> called.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* NULL pointer dereference at skb_queue_tail()
From: Tetsuo Handa @ 2014-12-25 13:22 UTC (permalink / raw)
  To: netdev

Hello.

I can reproduce below oops when testing Linux 3.18 with memory allocation
failure injection module at https://lkml.org/lkml/2014/12/25/64 .

Looks similar to http://oops.kernel.org/oops/bug-unable-to-handle-kernel-null-pointer-dereference-at-skb_queue_tail/ .
Where should I check?

----------
[  273.709905] BUG: unable to handle kernel NULL pointer dereference at           (null)
[  273.713845] IP: [<ffffffff81535e27>] skb_queue_tail+0x37/0x60
[  273.716720] PGD 7887d067 PUD 7bc5b067 PMD 0 
[  273.718647] Oops: 0002 [#1] SMP 
[  273.719508] Modules linked in: fault_injection(OE) ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_mangle ip6table_raw ip6table_filter ip6_tables iptable_mangle iptable_raw iptable_filter ip_tables coretemp crct10dif_pclmul crc32_pclmul dm_mirror crc32c_intel dm_region_hash ghash_clmulni_intel dm_log aesni_intel glue_helper dm_mod lrw gf128mul ablk_helper cryptd vmw_balloon ppdev microcode parport_pc serio_raw pcspkr vmw_vmci parport i2c_piix4 shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc uinput sd_mod ata_generic pata_acpi vmwgfx drm_kms_helper ttm drm mptspi e1000 scsi_transport_spi mptscsih mptba
 se ata_piix libata i2c_core floppy [last unloaded: fault_injection]
[  273.739290] CPU: 2 PID: 2866 Comm: Xorg Tainted: G        W  OE  3.18.0+ #337
[  273.741001] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013
[  273.743534] task: ffff880079f18000 ti: ffff88007a894000 task.ti: ffff88007a894000
[  273.745288] RIP: 0010:[<ffffffff81535e27>]  [<ffffffff81535e27>] skb_queue_tail+0x37/0x60
[  273.747275] RSP: 0018:ffff88007a897c18  EFLAGS: 00010046
[  273.748535] RAX: 0000000000000296 RBX: ffff8800360c0b10 RCX: 0000000000000000
[  273.750216] RDX: 0000000000000000 RSI: 0000000000000296 RDI: ffff8800360c0b24
[  273.751921] RBP: ffff88007a897c38 R08: 0000000000000296 R09: 0000000000000300
[  273.753624] R10: ffff88007f803600 R11: ffff88007a9dbd00 R12: ffff8800360c0b10
[  273.755336] R13: ffff8800360c0b24 R14: 0000000000000000 R15: 0000000000000000
[  273.757046] FS:  00007f512a6b0980(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
[  273.758940] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  273.760295] CR2: 0000000000000000 CR3: 000000007bf4f000 CR4: 00000000000407e0
[  273.762047] Stack:
[  273.762541]  0000000000000020 ffff8800360c0b10 0000000000000020 ffff8800360c0a80
[  273.764392]  ffff88007a897cf8 ffffffff815e9b11 ffff880048861498 ffff8800360c0b10
[  273.766256]  0000002000000296 ffff88007a897d08 0000000000000020 ffff8800360c0d78
[  273.768099] Call Trace:
[  273.768695]  [<ffffffff815e9b11>] unix_stream_sendmsg+0x1d1/0x420
[  273.770157]  [<ffffffff8152cf2a>] sock_aio_write+0xca/0xe0
[  273.771472]  [<ffffffff811af8bc>] do_sync_readv_writev+0x4c/0x80
[  273.772910]  [<ffffffff811b1255>] do_readv_writev+0x1e5/0x280
[  273.774277]  [<ffffffffa01656d5>] ? vmw_unlocked_ioctl+0x15/0x20 [vmwgfx]
[  273.775899]  [<ffffffff811c2f40>] ? do_vfs_ioctl+0x2e0/0x4c0
[  273.777254]  [<ffffffff811ccfa5>] ? __fget_light+0x25/0x70
[  273.778557]  [<ffffffff81100e84>] ? __audit_syscall_entry+0xb4/0x110
[  273.780056]  [<ffffffff811b1379>] vfs_writev+0x39/0x50
[  273.781492]  [<ffffffff811b14aa>] SyS_writev+0x4a/0xd0
[  273.782741]  [<ffffffff81647729>] system_call_fastpath+0x12/0x17
[  273.784192] Code: 8d 6f 14 41 54 49 89 f4 53 48 89 fb 4c 89 ef 48 83 ec 08 e8 ec 13 11 00 48 8b 53 08 49 89 1c 24 4c 89 ef 48 89 c6 49 89 54 24 08 <4c> 89 22 83 43 10 01 4c 89 63 08 e8 19 10 11 00 48 83 c4 08 5b 
[  273.790477] RIP  [<ffffffff81535e27>] skb_queue_tail+0x37/0x60
[  273.791954]  RSP <ffff88007a897c18>
[  273.792798] CR2: 0000000000000000
----------

----------
crash> bt -l
PID: 2866   TASK: ffff880079f18000  CPU: 2   COMMAND: "Xorg"
 #0 [ffff88007a8977f0] machine_kexec at ffffffff8104d092
    /root/linux/arch/x86/kernel/machine_kexec_64.c: 319
 #1 [ffff88007a897840] crash_kexec at ffffffff810ea6d3
    /root/linux/kernel/kexec.c: 1482
 #2 [ffff88007a897910] oops_end at ffffffff81016678
    /root/linux/arch/x86/kernel/dumpstack.c: 231
 #3 [ffff88007a897940] no_context at ffffffff8163bbc2
    /root/linux/arch/x86/mm/fault.c: 724
 #4 [ffff88007a8979a0] __bad_area_nosemaphore at ffffffff8163bc99
    /root/linux/arch/x86/mm/fault.c: 804
 #5 [ffff88007a8979f0] bad_area at ffffffff8163be4b
    /root/linux/arch/x86/mm/fault.c: 833
 #6 [ffff88007a897a20] __do_page_fault at ffffffff81057933
    /root/linux/arch/x86/mm/fault.c: 1220
 #7 [ffff88007a897b30] do_page_fault at ffffffff810579f1
    /root/linux/arch/x86/mm/fault.c: 1299
 #8 [ffff88007a897b60] page_fault at ffffffff816495e8
    /root/linux/arch/x86/kernel/entry_64.S: 1255
    [exception RIP: skb_queue_tail+55]
    RIP: ffffffff81535e27  RSP: ffff88007a897c18  RFLAGS: 00010046
    RAX: 0000000000000296  RBX: ffff8800360c0b10  RCX: 0000000000000000
    RDX: 0000000000000000  RSI: 0000000000000296  RDI: ffff8800360c0b24
    RBP: ffff88007a897c38   R8: 0000000000000296   R9: 0000000000000300
    R10: ffff88007f803600  R11: ffff88007a9dbd00  R12: ffff8800360c0b10
    R13: ffff8800360c0b24  R14: 0000000000000000  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff88007a897c40] unix_stream_sendmsg at ffffffff815e9b11
    /root/linux/net/unix/af_unix.c: 1712
#10 [ffff88007a897d00] sock_aio_write at ffffffff8152cf2a
    /root/linux/net/socket.c: 980
#11 [ffff88007a897d90] do_sync_readv_writev at ffffffff811af8bc
    /root/linux/fs/read_write.c: 685
#12 [ffff88007a897e20] do_readv_writev at ffffffff811b1255
    /root/linux/fs/read_write.c: 839
#13 [ffff88007a897f20] vfs_writev at ffffffff811b1379
    /root/linux/fs/read_write.c: 881
#14 [ffff88007a897f30] sys_writev at ffffffff811b14aa
    /root/linux/fs/read_write.c: 914
#15 [ffff88007a897f80] system_call_fastpath at ffffffff81647729
    /root/linux/arch/x86/kernel/entry_64.S: 423
    RIP: 00007f51285923c0  RSP: 00007fff324a0e00  RFLAGS: 00013202
    RAX: ffffffffffffffda  RBX: ffffffff81647729  RCX: 00000000004c66f0
    RDX: 0000000000000001  RSI: 00007fff324a0840  RDI: 0000000000000013
    RBP: 000000000210e4b0   R8: 0000000000000000   R9: 0000000000400000
    R10: 0000000000000000  R11: 0000000000003293  R12: 00007f512a6b06a0
    R13: 0000000000000001  R14: 00007fff324a0840  R15: 0000000000000000
    ORIG_RAX: 0000000000000014  CS: 0033  SS: 002b
----------

void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
{
        unsigned long flags;

        spin_lock_irqsave(&list->lock, flags);
        __skb_queue_tail(list, newsk);
        spin_unlock_irqrestore(&list->lock, flags);
}

static inline void __skb_queue_tail(struct sk_buff_head *list,
                                   struct sk_buff *newsk)
{
        __skb_queue_before(list, (struct sk_buff *)list, newsk);
}

static inline void __skb_queue_before(struct sk_buff_head *list,
                                      struct sk_buff *next,
                                      struct sk_buff *newsk)
{
        __skb_insert(newsk, next->prev, next, list);
}

static inline void __skb_insert(struct sk_buff *newsk,
                                struct sk_buff *prev, struct sk_buff *next,
                                struct sk_buff_head *list)
{
        newsk->next = next;
        newsk->prev = prev;
        next->prev  = prev->next = newsk; // <= ffffffff81535e27 is here.
        list->qlen++;
}

^ permalink raw reply

* Re: [Nios2-dev] [PATCH] Altera TSE: Add missing phydev
From: Tobias Klauser @ 2014-12-25 12:52 UTC (permalink / raw)
  To: Kostya Belezko; +Cc: Nios2-dev, Vince Bridgers, netdev
In-Reply-To: <BLU436-SMTP58313D55680097A2C4AAA0B6560@phx.gbl>

On 2014-12-22 at 23:37:19 +0100, Kostya Belezko <bkostya@hotmail.com> wrote:
> Altera network device doesn't come up after
> 
> ifconfig eth0 down
> ifconfig eth0 up
> 
> The reason behind is clearing priv->phydev during tse_shutdown().
> The phydev is not restored back at tse_open().
> 
> Signed-off-by: Kostya Belezko <bkostya@hotmail.com>

Please Cc: netdev@vger.kernel.org for network driver patches.

> ---
>  drivers/net/ethernet/altera/altera_tse_main.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
> index 4efc435..361bf35 100644
> --- a/drivers/net/ethernet/altera/altera_tse_main.c
> +++ b/drivers/net/ethernet/altera/altera_tse_main.c
> @@ -1150,6 +1150,9 @@ static int tse_open(struct net_device *dev)
>  
>  	spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags);
>  
> +	if (!priv->phydev)
> +		init_phy(dev);

That way the PHY will be initialized twice on first start.

IMO the proper solution would be to disconnect and NULL the phydev in
altera_tse_remove, not in tse_shutdown. There, only phy_stop should be
called.

^ permalink raw reply

* [PATCH 1/1 net-next] tipc: replace 0 by NULL for pointers
From: Fabian Frederick @ 2014-12-25 11:05 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, Jon Maloy, Allan Stephens, David S. Miller,
	netdev, tipc-discussion

Fix sparse warning:
net/tipc/link.c:1924:40: warning: Using plain integer as NULL pointer

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/tipc/link.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 23bcc11..082c3b5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1921,7 +1921,7 @@ static struct tipc_node *tipc_link_find_owner(const char *link_name,
 {
 	struct tipc_link *l_ptr;
 	struct tipc_node *n_ptr;
-	struct tipc_node *found_node = 0;
+	struct tipc_node *found_node = NULL;
 	int i;
 
 	*bearer_id = 0;
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH 00/10] Split UFO into v4 and v6 versions.
From: Jason Wang @ 2014-12-25  9:50 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, Vladislav Yasevich, Ben Hutchings, stefanha,
	virtualization
In-Reply-To: <20141225071435.GA6687@redhat.com>



On Thu, Dec 25, 2014 at 3:14 PM, Michael S. Tsirkin <mst@redhat.com> 
wrote:
> On Thu, Dec 25, 2014 at 03:10:01AM +0008, Jason Wang wrote:
>>  
>>  
>>  On Thu, Dec 25, 2014 at 2:59 AM, Michael S. Tsirkin 
>> <mst@redhat.com> wrote:
>>  >On Wed, Dec 24, 2014 at 07:11:20PM +0100, Ben Hutchings wrote:
>>  >> On Thu, 2014-12-18 at 00:28 -0500, Jason Wang wrote:
>>  >> >  > ----- Original Message -----
>>  >> > > UFO support in the kernel applies to both IPv4 and IPv6 
>> protocols
>>  >> > > with the same device feature.  However some devices may not 
>> be able
>>  >> > > to support one of the offloads.  For this we split the UFO 
>> offload
>>  >> > > feature into 2 pieces.  NETIF_F_UFO now controlls the IPv4 
>> part and
>>  >> > > this series introduces NETIF_F_UFO6.
>>  >> > >  > > As a result of this work, we can now re-enable 
>> NETIF_F_UFO on
>>  >> > > virtio_net devices and restore UDP over IPv4 performance for
>>  >>guests.
>>  >> > > We also continue to support legacy guests that assume that 
>> UFO6
>>  >> > > support included into UFO(4).
>>  >> > >  > > Without this work, migrating a guest to a 3.18 kernel 
>> fails.
>>  >> > >  >  > This series eliminate the ambiguous NETIF_F_UFO.
>>  >> >  > But VIRTIO_NET_F_{HOST|GUEST}_UFO and 
>> VIRTIO_NET_F_HDR_GSO_UDP is
>>  >>still
>>  >> > ambigious. I know it was used to keep compatibility for legacy 
>> guest.
>>  >>But
>>  >> > what's the future plan? Differentiate UFOv4 and UFOv6 in virtio
>>  >>features and
>>  >> > gso type in vnet header looks sufficient?
>>  >> [...]
>>  >>   The IPv6 fragmentation ID needs to be added to the vnet 
>> header, to do
>>  >> UFOv6 properly.  If it wasn't for that lack, we wouldn't have to 
>> split
>>  >> the feature flag.
>>  >> Ben.
>>  >
>>  >
>>  >Right.
>>  >
>>  >I think a good plan is as follows:
>>  >
>>  >1. add code generating IDs on rx path for virtio,
>>  >   and re-enable UFO (4+6).
>>  >
>>  >2. Ben's patch doing similar things for tun/macvtap
>>  
>>  I wonder whether or not we can do this lazily.
>>  
>>  E.g during UFO6 software segment for dodgy packets?
>>  
>>  And this can eliminate the effort of duplicating it in all untrusted
>>  sources?
> 
> But then we'll need to then change it again in step 5,
> probably by setting some flag?

The idea is if no segmentation (either software or hardware), no need 
for an id.
 
So looks like dodgy is sufficient since we are sure kernel should set 
id in all other cases. And we can add the id only when we see it was 
not set for the ufo packet(legacy guest). This seems not conflict with 
step 5.
> 
> Might as well do it directly.
> 
>>  >
>>  >3. similarly for packet sockets
>>  >
>>  >
>>  >above seem appropriate for stable
>>  >
>>  >4. 4+6 split, to make it easier for drivers
>>  >   to identify when fragment ID is needed
>>  >
>>  >5. extend vnet header, and add ways to enable it,
>>  >   when enabled, use that to pass
>>  >   fragment ID for tun/virtio/vhost/macvtap/packet socket
>>  >
>>  >4 is what this patchset does.
>>  >
>>  >
>>  >> --  Ben Hutchings
>>  >> If more than one person is responsible for a bug, no one is at 
>> fault.
>>  >
>>  >
>>  >--
>>  >To unsubscribe from this list: send the line "unsubscribe netdev" 
>> in
>>  >the body of a message to majordomo@vger.kernel.org
>>  >More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/4] can: kvaser_usb: Don't free packets when tight on URBs
From: Ahmed S. Darwish @ 2014-12-25  9:38 UTC (permalink / raw)
  To: Greg KH
  Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
	Marc Kleine-Budde, David S. Miller, Paul Gortmaker, Linux-CAN,
	netdev, Linux-stable, LKML
In-Reply-To: <20141225025011.GA10491@kroah.com>

On Wed, Dec 24, 2014 at 06:50:11PM -0800, Greg KH wrote:
> On Thu, Dec 25, 2014 at 01:56:44AM +0200, Ahmed S. Darwish wrote:
> > From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> > 
  > > Flooding the Kvaser CAN to USB dongle with multiple reads and
> > writes in high frequency caused seemingly-random panics in the
> > kernel.
> > 
> > On further inspection, it seems the driver erroneously freed the
> > to-be-transmitted packet upon getting tight on URBs and returning
> > NETDEV_TX_BUSY, leading to invalid memory writes and double frees
> > at a later point in time.
> > 
> > Note:
> > 
> > Finding no more URBs/transmit-contexts and returning NETDEV_TX_BUSY
> > is a driver bug in and out of itself: it means that our start/stop
> > queue flow control is broken.
> > 
> > This patch only fixes the (buggy) error handling code; the root
> > cause shall be fixed in a later commit.
> > 
> > Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> > ---
> >  drivers/net/can/usb/kvaser_usb.c | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> >  (Marc, Greg, I believe this should also be added to -stable?)
> 
> 
> <formletter>
> 
> This is not the correct way to submit patches for inclusion in the
> stable kernel tree.  Please read Documentation/stable_kernel_rules.txt
> for how to do this properly.
> 
> </formletter>

<msg-response>

Note taken. Sorry about that ;-)

</msg-response>

^ permalink raw reply

* Re: [PATCH 00/10] Split UFO into v4 and v6 versions.
From: Michael S. Tsirkin @ 2014-12-25  7:14 UTC (permalink / raw)
  To: Jason Wang
  Cc: netdev, Vladislav Yasevich, Ben Hutchings, stefanha,
	virtualization
In-Reply-To: <1419476521.27992.1@smtp.corp.redhat.com>

On Thu, Dec 25, 2014 at 03:10:01AM +0008, Jason Wang wrote:
> 
> 
> On Thu, Dec 25, 2014 at 2:59 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> >On Wed, Dec 24, 2014 at 07:11:20PM +0100, Ben Hutchings wrote:
> >> On Thu, 2014-12-18 at 00:28 -0500, Jason Wang wrote:
> >> >  > ----- Original Message -----
> >> > > UFO support in the kernel applies to both IPv4 and IPv6 protocols
> >> > > with the same device feature.  However some devices may not be able
> >> > > to support one of the offloads.  For this we split the UFO offload
> >> > > feature into 2 pieces.  NETIF_F_UFO now controlls the IPv4 part and
> >> > > this series introduces NETIF_F_UFO6.
> >> > >  > > As a result of this work, we can now re-enable NETIF_F_UFO on
> >> > > virtio_net devices and restore UDP over IPv4 performance for
> >>guests.
> >> > > We also continue to support legacy guests that assume that UFO6
> >> > > support included into UFO(4).
> >> > >  > > Without this work, migrating a guest to a 3.18 kernel fails.
> >> > >  >  > This series eliminate the ambiguous NETIF_F_UFO.
> >> >  > But VIRTIO_NET_F_{HOST|GUEST}_UFO and VIRTIO_NET_F_HDR_GSO_UDP is
> >>still
> >> > ambigious. I know it was used to keep compatibility for legacy guest.
> >>But
> >> > what's the future plan? Differentiate UFOv4 and UFOv6 in virtio
> >>features and
> >> > gso type in vnet header looks sufficient?
> >> [...]
> >>   The IPv6 fragmentation ID needs to be added to the vnet header, to do
> >> UFOv6 properly.  If it wasn't for that lack, we wouldn't have to split
> >> the feature flag.
> >> Ben.
> >
> >
> >Right.
> >
> >I think a good plan is as follows:
> >
> >1. add code generating IDs on rx path for virtio,
> >   and re-enable UFO (4+6).
> >
> >2. Ben's patch doing similar things for tun/macvtap
> 
> I wonder whether or not we can do this lazily.
> 
> E.g during UFO6 software segment for dodgy packets?
> 
> And this can eliminate the effort of duplicating it in all untrusted
> sources?

But then we'll need to then change it again in step 5,
probably by setting some flag?
Might as well do it directly.

> >
> >3. similarly for packet sockets
> >
> >
> >above seem appropriate for stable
> >
> >4. 4+6 split, to make it easier for drivers
> >   to identify when fragment ID is needed
> >
> >5. extend vnet header, and add ways to enable it,
> >   when enabled, use that to pass
> >   fragment ID for tun/virtio/vhost/macvtap/packet socket
> >
> >4 is what this patchset does.
> >
> >
> >> --  Ben Hutchings
> >> If more than one person is responsible for a bug, no one is at fault.
> >
> >
> >--
> >To unsubscribe from this list: send the line "unsubscribe netdev" in
> >the body of a message to majordomo@vger.kernel.org
> >More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [PATCH net v2] net: Generalize ndo_gso_check tondo_features_check
From: Hayes Wang @ 2014-12-25  3:29 UTC (permalink / raw)
  To: Jesse Gross, David Miller
  Cc: netdev@vger.kernel.org, Tom Herbert, Joe Stringer, Eric Dumazet
In-Reply-To: <1419403046-43165-1-git-send-email-jesse@nicira.com>

 Jesse Gross [mailto:jesse@nicira.com] 
> Sent: Wednesday, December 24, 2014 2:37 PM
[...]
> GSO isn't the only offload feature with restrictions that
> potentially can't be expressed with the current features mechanism.
> Checksum is another although it's a general issue that could in
> theory apply to anything. Even if it may be possible to
> implement these restrictions in other ways, it can result in
> duplicate code or inefficient per-packet behavior.
> 
> This generalizes ndo_gso_check so that drivers can remove any
> features that don't make sense for a given packet, similar to
> netif_skb_features(). It also converts existing driver
> restrictions to the new format, completing the work that was
> done to support tunnel protocols since the issues apply to
> checksums as well.
> 
> By actually removing features from the set that are used to do
> offloading, it solves another problem with the existing
> interface. In these cases, GSO would run with the original set
> of features and not do anything because it appears that
> segmentation is not required.
> 
> CC: Tom Herbert <therbert@google.com>
> CC: Joe Stringer <joestringer@nicira.com>
> CC: Eric Dumazet <edumazet@google.com>
> CC: Hayes Wang <hayeswang@realtek.com>
> Signed-off-by: Jesse Gross <jesse@nicira.com>
> Acked-by:  Tom Herbert <therbert@google.com>
> Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
> ---
> v2: Rebase
>     Drop overzealous use of netdev_intersect_features()

Tested-by: Hayes Wang <hayeswang@realtek.com>

^ permalink raw reply

* Re: [PATCH 00/10] Split UFO into v4 and v6 versions.
From: Jason Wang @ 2014-12-25  3:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev, Vladislav Yasevich, Ben Hutchings, stefanha,
	virtualization
In-Reply-To: <20141224185922.GA30934@redhat.com>



On Thu, Dec 25, 2014 at 2:59 AM, Michael S. Tsirkin <mst@redhat.com> 
wrote:
> On Wed, Dec 24, 2014 at 07:11:20PM +0100, Ben Hutchings wrote:
>>  On Thu, 2014-12-18 at 00:28 -0500, Jason Wang wrote:
>>  > 
>>  > ----- Original Message -----
>>  > > UFO support in the kernel applies to both IPv4 and IPv6 
>> protocols
>>  > > with the same device feature.  However some devices may not be 
>> able
>>  > > to support one of the offloads.  For this we split the UFO 
>> offload
>>  > > feature into 2 pieces.  NETIF_F_UFO now controlls the IPv4 part 
>> and
>>  > > this series introduces NETIF_F_UFO6.
>>  > > 
>>  > > As a result of this work, we can now re-enable NETIF_F_UFO on
>>  > > virtio_net devices and restore UDP over IPv4 performance for 
>> guests.
>>  > > We also continue to support legacy guests that assume that UFO6
>>  > > support included into UFO(4).
>>  > > 
>>  > > Without this work, migrating a guest to a 3.18 kernel fails.
>>  > > 
>>  > 
>>  > This series eliminate the ambiguous NETIF_F_UFO.
>>  > 
>>  > But VIRTIO_NET_F_{HOST|GUEST}_UFO and VIRTIO_NET_F_HDR_GSO_UDP is 
>> still
>>  > ambigious. I know it was used to keep compatibility for legacy 
>> guest. But
>>  > what's the future plan? Differentiate UFOv4 and UFOv6 in virtio 
>> features and
>>  > gso type in vnet header looks sufficient?
>>  [...]
>>  
>>  The IPv6 fragmentation ID needs to be added to the vnet header, to 
>> do
>>  UFOv6 properly.  If it wasn't for that lack, we wouldn't have to 
>> split
>>  the feature flag.
>>  
>>  Ben.
> 
> 
> Right.
> 
> I think a good plan is as follows:
> 
> 1. add code generating IDs on rx path for virtio,
>    and re-enable UFO (4+6).
> 
> 2. Ben's patch doing similar things for tun/macvtap

I wonder whether or not we can do this lazily.

E.g during UFO6 software segment for dodgy packets?

And this can eliminate the effort of duplicating it in all untrusted 
sources?
> 
> 3. similarly for packet sockets
> 
> 
> above seem appropriate for stable
> 
> 4. 4+6 split, to make it easier for drivers
>    to identify when fragment ID is needed
> 
> 5. extend vnet header, and add ways to enable it,
>    when enabled, use that to pass
>    fragment ID for tun/virtio/vhost/macvtap/packet socket
> 
> 4 is what this patchset does.
> 
> 
>>  -- 
>>  Ben Hutchings
>>  If more than one person is responsible for a bug, no one is at 
>> fault.
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v2 1/4] can: kvaser_usb: Don't free packets when tight on URBs
From: Greg KH @ 2014-12-25  2:50 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
	Marc Kleine-Budde, David S. Miller, Paul Gortmaker, Linux-CAN,
	netdev, Linux-stable, LKML
In-Reply-To: <20141224235644.GA3778@vivalin-002>

On Thu, Dec 25, 2014 at 01:56:44AM +0200, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> 
> Flooding the Kvaser CAN to USB dongle with multiple reads and
> writes in high frequency caused seemingly-random panics in the
> kernel.
> 
> On further inspection, it seems the driver erroneously freed the
> to-be-transmitted packet upon getting tight on URBs and returning
> NETDEV_TX_BUSY, leading to invalid memory writes and double frees
> at a later point in time.
> 
> Note:
> 
> Finding no more URBs/transmit-contexts and returning NETDEV_TX_BUSY
> is a driver bug in and out of itself: it means that our start/stop
> queue flow control is broken.
> 
> This patch only fixes the (buggy) error handling code; the root
> cause shall be fixed in a later commit.
> 
> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> ---
>  drivers/net/can/usb/kvaser_usb.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
>  (Marc, Greg, I believe this should also be added to -stable?)


<formletter>

This is not the correct way to submit patches for inclusion in the
stable kernel tree.  Please read Documentation/stable_kernel_rules.txt
for how to do this properly.

</formletter>

^ permalink raw reply

* Re: [PATCH v2 1/6] GMAC: add driver for Rockchip RK3288 SoCs integrated GMAC
From: Chen-Yu Tsai @ 2014-12-25  2:32 UTC (permalink / raw)
  To: Roger
  Cc: Heiko Stübner, Giuseppe Cavallaro, netdev, linux-kernel,
	linux-rockchip, kever.yang, eddie.cai
In-Reply-To: <547EC24E.6050905@rock-chips.com>

Hi,

On Wed, Dec 3, 2014 at 3:57 PM, Roger <roger.chen@rock-chips.com> wrote:
> Hi! Heiko
>
> about regulator, power gpio,  reset gpio and irq gpio
> please refer to my comment inline, tks.
>
>
> On 2014/12/2 7:44, Heiko Stübner wrote:
>>
>> Hi Roger,
>>
>> the comments inline are a rough first review. I hope to get a clearer
>> picture
>> for the stuff I'm not sure about in v3 once the big issues are fixed.
>>
>>
>> Am Donnerstag, 27. November 2014, 10:52:08 schrieb Roger Chen:
>>>
>>> This driver is based on stmmac driver.
>>>
>>> modification based on Giuseppe CAVALLARO's suggestion:
>>> 1. use BIT()
>>>
>>>         > +/*RK3288_GRF_SOC_CON3*/
>>>         > +#define GMAC_TXCLK_DLY_ENABLE   ((0x4000 << 16) | (0x4000))
>>>         > +#define GMAC_TXCLK_DLY_DISABLE  ((0x4000 << 16) | (0x0000))
>>>
>>>         ...
>>>
>>>         why do not use BIT and BIT_MASK where possible?
>>>
>>>         ===>after modification:
>>>
>>>         #define GRF_BIT(nr)     (BIT(nr) | BIT(nr+16))
>>>         #define GRF_CLR_BIT(nr) (BIT(nr+16))
>>>         #define GMAC_TXCLK_DLY_ENABLE   GRF_BIT(14)
>>>         #define GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(14)
>>>         ...
>>> 2.
>>>
>>>         > +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>>         > +             GMAC_PHY_INTF_SEL_RGMII);
>>>         > +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>>         > +             GMAC_RMII_MODE_CLR);
>>>
>>>         maybe you could perform just one write unless there is some HW
>>>         constraint.
>>>
>>>         ===>after modification:
>>>
>>>         regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>>                          GMAC_PHY_INTF_SEL_RGMII | GMAC_RMII_MODE_CLR);
>>>
>>> 3. use macros
>>>
>>>         > +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO3D_E,
>>> 0xFFFFFFFF);
>>>         > +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E,
>>>         > +             0x3<<2<<16 | 0x3<<2);
>>>
>>>         pls use macros, these shift sequence is really help to decode
>>>
>>>         ===>after modification:
>>>
>>>         regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4A_E, GPIO4A_12MA);
>>>         regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E, GPIO4B_2_12MA);
>>>
>>> 4. remove grf fail check in rk_gmac_setup()
>>>
>>>         > +    if (IS_ERR(bsp_priv->grf))
>>>         > +        dev_err(&pdev->dev, "Missing rockchip,grf
>>> property\n");
>>>
>>>         I wonder if you can fail on here and save all the check in
>>>         set_rgmii_speed etc.
>>>         Maybe this can be considered a mandatory property for the
>>> glue-logic.
>>>
>>> 5. remove .tx_coe=1
>>>
>>>         > +const struct stmmac_of_data rk_gmac_data = {
>>>         > +    .has_gmac = 1,
>>>         > +    .tx_coe = 1,
>>>
>>>         FYI, on new gmac there is the HW capability register to
>>> dinamically
>>>         provide you if coe is supported.
>>>
>>>         IMO you should add the OF "compatible" string and in case of mac
>>>         newer than the 3.50a you can remove coe.
>>
>> changelogs like these, should be compact and also not be in the commit
>> message
>> itself, but in the "comment"-section below the "---" and before the
>> diffstat.
>>
>>
>>> Signed-off-by: Roger Chen <roger.chen@rock-chips.com>
>>> ---
>>
>> changelog here ... the commonly used pattern is something like
>>
>> changes since v2:
>> - ...
>> - ...
>>
>> changes since v1:
>> - ...
>>
>>>   drivers/net/ethernet/stmicro/stmmac/Makefile       |    2 +-
>>>   drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c     |  636
>>> ++++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |
>>>   3 +
>>>   .../net/ethernet/stmicro/stmmac/stmmac_platform.h  |    1 +
>>>   4 files changed, 641 insertions(+), 1 deletion(-)
>>>   create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
>>>
>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile
>>> b/drivers/net/ethernet/stmicro/stmmac/Makefile index ac4d562..73c2715
>>> 100644
>>> --- a/drivers/net/ethernet/stmicro/stmmac/Makefile
>>> +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
>>> @@ -6,7 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o
>>> stmmac_mdio.o
>>> ring_mode.o     \
>>>
>>>   obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
>>>   stmmac-platform-objs:= stmmac_platform.o dwmac-meson.o dwmac-sunxi.o  \
>>> -                      dwmac-sti.o dwmac-socfpga.o
>>> +                      dwmac-sti.o dwmac-socfpga.o dwmac-rk.o
>>>
>>>   obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
>>>   stmmac-pci-objs:= stmmac_pci.o
>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
>>> b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c new file mode 100644
>>> index 0000000..870563f
>>> --- /dev/null
>>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
>>> @@ -0,0 +1,636 @@
>>> +/**
>>> + * dwmac-rk.c - Rockchip RK3288 DWMAC specific glue layer
>>> + *
>>> + * Copyright (C) 2014 Chen-Zhi (Roger Chen)
>>> + *
>>> + * Chen-Zhi (Roger Chen)  <roger.chen@rock-chips.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License as published by
>>> + * the Free Software Foundation; either version 2 of the License, or
>>> + * (at your option) any later version.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + */
>>> +
>>> +#include <linux/stmmac.h>
>>> +#include <linux/bitops.h>
>>> +#include <linux/clk.h>
>>> +#include <linux/phy.h>
>>> +#include <linux/of_net.h>
>>> +#include <linux/gpio.h>
>>> +#include <linux/of_gpio.h>
>>> +#include <linux/of_device.h>
>>> +#include <linux/regulator/consumer.h>
>>> +#include <linux/delay.h>
>>> +#include <linux/mfd/syscon.h>
>>> +#include <linux/regmap.h>
>>> +
>>> +struct rk_priv_data {
>>> +       struct platform_device *pdev;
>>> +       int phy_iface;
>>> +       bool power_ctrl_by_pmu;
>>> +       char pmu_regulator[32];
>>> +       int pmu_enable_level;
>>> +
>>> +       int power_io;
>>> +       int power_io_level;
>>> +       int reset_io;
>>> +       int reset_io_level;
>>> +       int phyirq_io;
>>> +       int phyirq_io_level;
>>> +
>>> +       bool clk_enabled;
>>> +       bool clock_input;
>>> +
>>> +       struct clk *clk_mac;
>>> +       struct clk *clk_mac_pll;
>>> +       struct clk *gmac_clkin;
>>> +       struct clk *mac_clk_rx;
>>> +       struct clk *mac_clk_tx;
>>> +       struct clk *clk_mac_ref;
>>> +       struct clk *clk_mac_refout;
>>> +       struct clk *aclk_mac;
>>> +       struct clk *pclk_mac;
>>> +
>>> +       int tx_delay;
>>> +       int rx_delay;
>>> +
>>> +       struct regmap *grf;
>>> +};
>>> +
>>> +#define RK3288_GRF_SOC_CON1 0x0248
>>> +#define RK3288_GRF_SOC_CON3 0x0250
>>> +#define RK3288_GRF_GPIO3D_E 0x01ec
>>> +#define RK3288_GRF_GPIO4A_E 0x01f0
>>> +#define RK3288_GRF_GPIO4B_E 0x01f4
>>
>> here you're using a space instead of a tab, please select one pattern
>> either
>> tabs or space but do not mix them.
>>
>>
>>> +#define GPIO3D_2MA     0xFFFF0000
>>> +#define GPIO3D_4MA     0xFFFF5555
>>> +#define GPIO3D_8MA     0xFFFFAAAA
>>> +#define GPIO3D_12MA    0xFFFFFFFF
>>> +
>>> +#define GPIO4A_2MA     0xFFFF0000
>>> +#define GPIO4A_4MA     0xFFFF5555
>>> +#define GPIO4A_8MA     0xFFFFAAAA
>>> +#define GPIO4A_12MA    0xFFFFFFFF
>>
>> see comment about pin settings below
>>
>>
>>> +
>>> +#define GRF_BIT(nr)    (BIT(nr) | BIT(nr+16))
>>> +#define GRF_CLR_BIT(nr)        (BIT(nr+16))
>>> +
>>> +#define GPIO4B_2_2MA   (GRF_CLR_BIT(2) | GRF_CLR_BIT(3))
>>> +#define GPIO4B_2_4MA   (GRF_BIT(2) | GRF_CLR_BIT(3))
>>> +#define GPIO4B_2_8MA   (GRF_CLR_BIT(2) | GRF_BIT(3))
>>> +#define GPIO4B_2_12MA  (GRF_BIT(2) | GRF_BIT(3))
>>> +
>>> +/*RK3288_GRF_SOC_CON1*/
>>> +#define GMAC_PHY_INTF_SEL_RGMII        (GRF_BIT(6) | GRF_CLR_BIT(7) |
>>> GRF_CLR_BIT(8))
>>> +#define GMAC_PHY_INTF_SEL_RMII  (GRF_CLR_BIT(6) |
>>> GRF_CLR_BIT(7) | GRF_BIT(8))
>>> +#define GMAC_FLOW_CTRL          GRF_BIT(9)
>>> +#define GMAC_FLOW_CTRL_CLR      GRF_CLR_BIT(9)
>>> +#define GMAC_SPEED_10M          GRF_CLR_BIT(10)
>>> +#define GMAC_SPEED_100M         GRF_BIT(10)
>>> +#define GMAC_RMII_CLK_25M       GRF_BIT(11)
>>> +#define GMAC_RMII_CLK_2_5M      GRF_CLR_BIT(11)
>>> +#define GMAC_CLK_125M           (GRF_CLR_BIT(12) | GRF_CLR_BIT(13))
>>> +#define GMAC_CLK_25M            (GRF_BIT(12) | GRF_BIT(13))
>>> +#define GMAC_CLK_2_5M           (GRF_CLR_BIT(12) | GRF_BIT(13))
>>> +#define GMAC_RMII_MODE          GRF_BIT(14)
>>> +#define GMAC_RMII_MODE_CLR      GRF_CLR_BIT(14)
>>> +
>>> +/*RK3288_GRF_SOC_CON3*/
>>> +#define GMAC_TXCLK_DLY_ENABLE   GRF_BIT(14)
>>> +#define GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(14)
>>> +#define GMAC_RXCLK_DLY_ENABLE   GRF_BIT(15)
>>> +#define GMAC_RXCLK_DLY_DISABLE  GRF_CLR_BIT(15)
>>> +#define GMAC_CLK_RX_DL_CFG(val) ((0x7F<<7<<16) | (val<<7))
>>> +#define GMAC_CLK_TX_DL_CFG(val) ((0x7F<<16) | (val))
>>
>> again mixed tabs and spaces as delimiters.
>>
>> Also the _CFG macros are not well abstracted. You could take a look at the
>> HIWORD_UPDATE macro in drivers/clk/rockchip/clk.h:
>>
>> #define GMAC_CLK_DL_MASK        0x7f
>> #define GMAC_CLK_RX_DL_CFG(val)  HIWORD_UPDATE(val, GMAC_CLK_DL_MASK, 7)
>> #define GMAC_CLK_TX_DL_CFG(val)  HIWORD_UPDATE(val, GMAC_CLK_DL_MASK, 0)
>>
>>
>>> +
>>> +static void set_to_rgmii(struct rk_priv_data *bsp_priv,
>>> +                        int tx_delay, int rx_delay)
>>> +{
>>> +       if (IS_ERR(bsp_priv->grf)) {
>>> +               pr_err("%s: Missing rockchip,grf property\n", __func__);
>>> +               return;
>>> +       }
>>> +
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                    GMAC_PHY_INTF_SEL_RGMII | GMAC_RMII_MODE_CLR);
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON3,
>>> +                    GMAC_RXCLK_DLY_ENABLE | GMAC_TXCLK_DLY_ENABLE |
>>> +                    GMAC_CLK_RX_DL_CFG(rx_delay) |
>>> +                    GMAC_CLK_TX_DL_CFG(tx_delay));
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_GPIO3D_E, GPIO3D_12MA);
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4A_E, GPIO4A_12MA);
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E, GPIO4B_2_12MA);
>>
>> please don't write to parts controlled by other drivers - here the drive
>> strength settings of pins is controlled by the pinctrl driver. Instead you
>> can
>> just set the drive-strength in the pinctrl settings.
>>
>>
>>> +
>>> +       pr_debug("%s: tx delay=0x%x; rx delay=0x%x;\n",
>>> +                __func__, tx_delay, rx_delay);
>>> +}
>>> +
>>> +static void set_to_rmii(struct rk_priv_data *bsp_priv)
>>> +{
>>> +       if (IS_ERR(bsp_priv->grf)) {
>>> +               pr_err("%s: Missing rockchip,grf property\n", __func__);
>>
>> you have a device-reference in rk_priv_data, so you could use dev_err
>> here.
>> Same for all other pr_err/pr_debug/pr_* calls in this file.
>>
>>
>>> +               return;
>>> +       }
>>> +
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                    GMAC_PHY_INTF_SEL_RMII);
>>> +       regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                    GMAC_RMII_MODE);
>>
>> these two could be combined?
>>
>>
>>> +}
>>> +
>>> +static void set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
>>> +{
>>> +       if (IS_ERR(bsp_priv->grf)) {
>>> +               pr_err("%s: Missing rockchip,grf property\n", __func__);
>>> +               return;
>>> +       }
>>> +
>>> +       if (speed == 10)
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> GMAC_CLK_2_5M);
>>> +       else if (speed == 100)
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> GMAC_CLK_25M);
>>> +       else if (speed == 1000)
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> GMAC_CLK_125M);
>>> +       else
>>> +               pr_err("unknown speed value for RGMII! speed=%d", speed);
>>> +}
>>> +
>>> +static void set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
>>> +{
>>> +       if (IS_ERR(bsp_priv->grf)) {
>>> +               pr_err("%s: Missing rockchip,grf property\n", __func__);
>>> +               return;
>>> +       }
>>> +
>>> +       if (speed == 10) {
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                            GMAC_RMII_CLK_2_5M);
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                            GMAC_SPEED_10M);
>>
>> combine into one write?
>>
>>
>>> +       } else if (speed == 100) {
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                            GMAC_RMII_CLK_25M);
>>> +               regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
>>> +                            GMAC_SPEED_100M);
>>
>> combine into one write?
>>
>>
>>> +       } else {
>>> +               pr_err("unknown speed value for RMII! speed=%d", speed);
>>> +       }
>>> +}
>>> +
>>> +#define MAC_CLK_RX     "mac_clk_rx"
>>> +#define MAC_CLK_TX     "mac_clk_tx"
>>> +#define CLK_MAC_REF    "clk_mac_ref"
>>> +#define CLK_MAC_REF_OUT        "clk_mac_refout"
>>> +#define CLK_MAC_PLL    "clk_mac_pll"
>>> +#define ACLK_MAC       "aclk_mac"
>>> +#define PCLK_MAC       "pclk_mac"
>>> +#define MAC_CLKIN      "ext_gmac"
>>> +#define CLK_MAC                "stmmaceth"
>>
>> why the need to extra constants for the clock names and not use the real
>> names
>> directly like most other drivers do?
>>
>>
>>> +
>>> +static int gmac_clk_init(struct rk_priv_data *bsp_priv)
>>> +{
>>> +       struct device *dev = &bsp_priv->pdev->dev;
>>> +
>>> +       bsp_priv->clk_enabled = false;
>>> +
>>> +       bsp_priv->mac_clk_rx = clk_get(dev, MAC_CLK_RX);
>>> +       if (IS_ERR(bsp_priv->mac_clk_rx))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, MAC_CLK_RX);
>>> +
>>> +       bsp_priv->mac_clk_tx = clk_get(dev, MAC_CLK_TX);
>>> +       if (IS_ERR(bsp_priv->mac_clk_tx))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, MAC_CLK_TX);
>>> +
>>> +       bsp_priv->clk_mac_ref = clk_get(dev, CLK_MAC_REF);
>>> +       if (IS_ERR(bsp_priv->clk_mac_ref))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, CLK_MAC_REF);
>>> +
>>> +       bsp_priv->clk_mac_refout = clk_get(dev, CLK_MAC_REF_OUT);
>>> +       if (IS_ERR(bsp_priv->clk_mac_refout))
>>> +               pr_warn("%s: warning:cannot get clock %s\n",
>>> +                       __func__, CLK_MAC_REF_OUT);
>>> +
>>> +       bsp_priv->aclk_mac = clk_get(dev, ACLK_MAC);
>>> +       if (IS_ERR(bsp_priv->aclk_mac))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, ACLK_MAC);
>>> +
>>> +       bsp_priv->pclk_mac = clk_get(dev, PCLK_MAC);
>>> +       if (IS_ERR(bsp_priv->pclk_mac))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, PCLK_MAC);
>>> +
>>> +       bsp_priv->clk_mac_pll = clk_get(dev, CLK_MAC_PLL);
>>> +       if (IS_ERR(bsp_priv->clk_mac_pll))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, CLK_MAC_PLL);
>>> +
>>> +       bsp_priv->gmac_clkin = clk_get(dev, MAC_CLKIN);
>>> +       if (IS_ERR(bsp_priv->gmac_clkin))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, MAC_CLKIN);
>>> +
>>> +       bsp_priv->clk_mac = clk_get(dev, CLK_MAC);
>>> +       if (IS_ERR(bsp_priv->clk_mac))
>>> +               pr_warn("%s: warning: cannot get clock %s\n",
>>> +                       __func__, CLK_MAC);
>>
>> there is not clk_put in the _remove case ... maybe you could simply use
>> devm_clk_get here so that all clocks are put on device removal.
>>
>> Also you're warning on every missing clock. Below it looks like you need a
>> different set of them for rgmii and rmii, so maybe you should simply error
>> out
>> when core clocks for the selected phy-mode are missing.
>>
>>
>>> +
>>> +       if (bsp_priv->clock_input) {
>>> +               pr_info("%s: clock input from PHY\n", __func__);
>>> +       } else {
>>> +               if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
>>> +                       clk_set_rate(bsp_priv->clk_mac_pll, 50000000);
>>> +
>>> +               clk_set_parent(bsp_priv->clk_mac, bsp_priv->clk_mac_pll);
>>
>> why the explicit reparenting. The common clock-framework is intelligent
>> enough
>> to select the best suitable parent.
>>
>> In general I'm thinking the clock-handling inside this driver should be
>> simplyfied, as the common-clock framework can handle most cases itself.
>> I.e. if
>> a 125MHz external clock is present and so on. But haven't looked to deep
>> yet.
>>
>>
>>
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
>>> +{
>>> +       int phy_iface = phy_iface = bsp_priv->phy_iface;
>>> +
>>> +       if (enable) {
>>> +               if (!bsp_priv->clk_enabled) {
>>> +                       if (phy_iface == PHY_INTERFACE_MODE_RMII) {
>>> +                               if (!IS_ERR(bsp_priv->mac_clk_rx))
>>> +                                       clk_prepare_enable(
>>> +                                               bsp_priv->mac_clk_rx);
>>> +
>>> +                               if (!IS_ERR(bsp_priv->clk_mac_ref))
>>> +                                       clk_prepare_enable(
>>> +                                               bsp_priv->clk_mac_ref);
>>> +
>>> +                               if (!IS_ERR(bsp_priv->clk_mac_refout))
>>> +                                       clk_prepare_enable(
>>> +
>>> bsp_priv->clk_mac_refout);
>>> +                       }
>>> +
>>> +                       if (!IS_ERR(bsp_priv->aclk_mac))
>>> +                               clk_prepare_enable(bsp_priv->aclk_mac);
>>> +
>>> +                       if (!IS_ERR(bsp_priv->pclk_mac))
>>> +                               clk_prepare_enable(bsp_priv->pclk_mac);
>>> +
>>> +                       if (!IS_ERR(bsp_priv->mac_clk_tx))
>>> +                               clk_prepare_enable(bsp_priv->mac_clk_tx);
>>> +
>>> +                       /**
>>> +                        * if (!IS_ERR(bsp_priv->clk_mac))
>>> +                        *      clk_prepare_enable(bsp_priv->clk_mac);
>>> +                        */
>>> +                       mdelay(5);
>>> +                       bsp_priv->clk_enabled = true;
>>> +               }
>>> +       } else {
>>> +               if (bsp_priv->clk_enabled) {
>>> +                       if (phy_iface == PHY_INTERFACE_MODE_RMII) {
>>> +                               if (!IS_ERR(bsp_priv->mac_clk_rx))
>>> +                                       clk_disable_unprepare(
>>> +                                               bsp_priv->mac_clk_rx);
>>> +
>>> +                               if (!IS_ERR(bsp_priv->clk_mac_ref))
>>> +                                       clk_disable_unprepare(
>>> +                                               bsp_priv->clk_mac_ref);
>>> +
>>> +                               if (!IS_ERR(bsp_priv->clk_mac_refout))
>>> +                                       clk_disable_unprepare(
>>> +
>>> bsp_priv->clk_mac_refout);
>>> +                       }
>>> +
>>> +                       if (!IS_ERR(bsp_priv->aclk_mac))
>>> +
>>> clk_disable_unprepare(bsp_priv->aclk_mac);
>>> +
>>> +                       if (!IS_ERR(bsp_priv->pclk_mac))
>>> +
>>> clk_disable_unprepare(bsp_priv->pclk_mac);
>>> +
>>> +                       if (!IS_ERR(bsp_priv->mac_clk_tx))
>>> +
>>> clk_disable_unprepare(bsp_priv->mac_clk_tx);
>>> +                       /**
>>> +                        * if (!IS_ERR(bsp_priv->clk_mac))
>>> +                        *      clk_disable_unprepare(bsp_priv->clk_mac);
>>> +                        */
>>> +                       bsp_priv->clk_enabled = false;
>>> +               }
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int power_on_by_pmu(struct rk_priv_data *bsp_priv, bool enable)
>>> +{
>>> +       struct regulator *ldo;
>>> +       char *ldostr = bsp_priv->pmu_regulator;
>>> +       int ret;
>>> +
>>> +       if (!ldostr) {
>>> +               pr_err("%s: no ldo found\n", __func__);
>>> +               return -1;
>>> +       }
>>> +
>>> +       ldo = regulator_get(NULL, ldostr);
>>> +       if (!ldo) {
>>> +               pr_err("\n%s get ldo %s failed\n", __func__, ldostr);
>>> +       } else {
>>> +               if (enable) {
>>> +                       if (!regulator_is_enabled(ldo)) {
>>> +                               regulator_set_voltage(ldo, 3300000,
>>> 3300000);
>>> +                               ret = regulator_enable(ldo);
>>> +                               if (ret != 0)
>>> +                                       pr_err("%s: fail to enable %s\n",
>>> +                                              __func__, ldostr);
>>> +                               else
>>> +                                       pr_info("turn on ldo done.\n");
>>> +                       } else {
>>> +                               pr_warn("%s is enabled before enable",
>>> ldostr);
>>> +                       }
>>> +               } else {
>>> +                       if (regulator_is_enabled(ldo)) {
>>> +                               ret = regulator_disable(ldo);
>>> +                               if (ret != 0)
>>> +                                       pr_err("%s: fail to disable
>>> %s\n",
>>> +                                              __func__, ldostr);
>>> +                               else
>>> +                                       pr_info("turn off ldo done.\n");
>>> +                       } else {
>>> +                               pr_warn("%s is disabled before disable",
>>> +                                       ldostr);
>>> +                       }
>>> +               }
>>> +               regulator_put(ldo);
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int power_on_by_gpio(struct rk_priv_data *bsp_priv, bool enable)
>>> +{
>>> +       if (enable) {
>>> +               /*power on*/
>>> +               if (gpio_is_valid(bsp_priv->power_io))
>>> +                       gpio_direction_output(bsp_priv->power_io,
>>> +                                             bsp_priv->power_io_level);
>>> +       } else {
>>> +               /*power off*/
>>> +               if (gpio_is_valid(bsp_priv->power_io))
>>> +                       gpio_direction_output(bsp_priv->power_io,
>>> +                                             !bsp_priv->power_io_level);
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
>>> +{
>>> +       int ret = -1;
>>> +
>>> +       pr_info("Ethernet PHY power %s\n", enable == 1 ? "on" : "off");
>>> +
>>> +       if (bsp_priv->power_ctrl_by_pmu)
>>> +               ret = power_on_by_pmu(bsp_priv, enable);
>>> +       else
>>> +               ret =  power_on_by_gpio(bsp_priv, enable);
>>
>> this looks wrong. This should always be a regulator. Even a regulator +
>> switch
>> controlled by a gpio can still be modelled as regulator, so that you don't
>> need this switch and assorted special handling - so just use the regulator
>> API
>>
> In some case, it would be a switching circuit to control the power for PHY.
> All I need to do is to control a GPIO to make switch on/off.  So...

A regulator would probably be a better choice. You can use fixed-regulator.
The regulator framework should also take care of enabling any upstream
regulators, such as an LDO output from the PMIC that powers some external
pull-ups or what not.

The sunxi glue driver has this support. Maybe you could move that to the
stmmac platform core.

>>>
>>> +
>>> +       if (enable) {
>>> +               /*reset*/
>>> +               if (gpio_is_valid(bsp_priv->reset_io)) {
>>> +                       gpio_direction_output(bsp_priv->reset_io,
>>> +                                             bsp_priv->reset_io_level);
>>> +                       mdelay(5);
>>> +                       gpio_direction_output(bsp_priv->reset_io,
>>> +                                             !bsp_priv->reset_io_level);
>>> +               }
>>> +               mdelay(30);
>>> +
>>> +       } else {
>>> +               /*pull down reset*/
>>> +               if (gpio_is_valid(bsp_priv->reset_io)) {
>>> +                       gpio_direction_output(bsp_priv->reset_io,
>>> +                                             bsp_priv->reset_io_level);
>>> +               }
>>> +       }
>>
>> I'm not sure yet if it would be better to use the reset framework for
>> this.
>> While it says it is also meant for reset-gpios, there does not seem a
>> driver
>> for this to exist yet.
>>
> What should I do?

The stmmac driver has support for handling phy resets using gpios.
Please use it. See Documentation/devicetree/bindings/net/stmmac.txt
for details. I think this was discussed some time ago, though
probably in a different context. If it's just a gpio that needs to
be poked, it should stay a gpio, at least for external resets.

>
>>
>>> +
>>> +       return ret;
>>> +}
>>> +
>>> +#define GPIO_PHY_POWER "gmac_phy_power"
>>> +#define GPIO_PHY_RESET "gmac_phy_reset"
>>> +#define GPIO_PHY_IRQ   "gmac_phy_irq"
>>
>> again I don't understand why these constants are necessary
>>
>>> +
>>> +static void *rk_gmac_setup(struct platform_device *pdev)
>>> +{
>>> +       struct rk_priv_data *bsp_priv;
>>> +       struct device *dev = &pdev->dev;
>>> +       enum of_gpio_flags flags;
>>> +       int ret;
>>> +       const char *strings = NULL;
>>> +       int value;
>>> +       int irq;
>>> +
>>> +       bsp_priv = devm_kzalloc(dev, sizeof(*bsp_priv), GFP_KERNEL);
>>> +       if (!bsp_priv)
>>> +               return ERR_PTR(-ENOMEM);
>>> +
>>> +       bsp_priv->phy_iface = of_get_phy_mode(dev->of_node);
>>> +
>>> +       ret = of_property_read_string(dev->of_node, "pmu_regulator",
>>> &strings);
>>> +       if (ret) {
>>> +               pr_err("%s: Can not read property: pmu_regulator.\n",
>>> __func__);
>>> +               bsp_priv->power_ctrl_by_pmu = false;
>>> +       } else {
>>> +               pr_info("%s: ethernet phy power controlled by
>>> pmu(%s).\n",
>>> +                       __func__, strings);
>>> +               bsp_priv->power_ctrl_by_pmu = true;
>>> +               strcpy(bsp_priv->pmu_regulator, strings);
>>> +       }
>>
>> There is a generic regulator-dt-binding for regulator-consumers available
>> which you should of course use.
>>
> The same explanation as above
>
>>> +
>>> +       ret = of_property_read_u32(dev->of_node, "pmu_enable_level",
>>> &value);
>>> +       if (ret) {
>>> +               pr_err("%s: Can not read property: pmu_enable_level.\n",
>>> +                      __func__);
>>> +               bsp_priv->power_ctrl_by_pmu = false;
>>> +       } else {
>>> +               pr_info("%s: PHY power controlled by pmu(level = %s).\n",
>>> +                       __func__, (value == 1) ? "HIGH" : "LOW");
>>> +               bsp_priv->power_ctrl_by_pmu = true;
>>> +               bsp_priv->pmu_enable_level = value;
>>> +       }
>>
>> What is this used for? Enabling should of course be done via
>> regulator_enable
>> and disabling using regulator_disable.

Second. Even if it's handled by the PMU, you should also model the
PMU as a set of regulators. That will get rid of all the ifs in
this section.

>>
>>
>>> +
>>> +       ret = of_property_read_string(dev->of_node, "clock_in_out",
>>> &strings);
>>> +       if (ret) {
>>> +               pr_err("%s: Can not read property: clock_in_out.\n",
>>> __func__);
>>> +               bsp_priv->clock_input = true;
>>> +       } else {
>>> +               pr_info("%s: clock input or output? (%s).\n",
>>> +                       __func__, strings);
>>> +               if (!strcmp(strings, "input"))
>>> +                       bsp_priv->clock_input = true;
>>> +               else
>>> +                       bsp_priv->clock_input = false;
>>> +       }
>>> +
>>> +       ret = of_property_read_u32(dev->of_node, "tx_delay", &value);
>>> +       if (ret) {
>>> +               bsp_priv->tx_delay = 0x30;
>>> +               pr_err("%s: Can not read property: tx_delay.", __func__);
>>> +               pr_err("%s: set tx_delay to 0x%x\n",
>>> +                      __func__, bsp_priv->tx_delay);
>>> +       } else {
>>> +               pr_info("%s: TX delay(0x%x).\n", __func__, value);
>>> +               bsp_priv->tx_delay = value;
>>> +       }
>>> +
>>> +       ret = of_property_read_u32(dev->of_node, "rx_delay", &value);
>>> +       if (ret) {
>>> +               bsp_priv->rx_delay = 0x10;
>>> +               pr_err("%s: Can not read property: rx_delay.", __func__);
>>> +               pr_err("%s: set rx_delay to 0x%x\n",
>>> +                      __func__, bsp_priv->rx_delay);
>>> +       } else {
>>> +               pr_info("%s: RX delay(0x%x).\n", __func__, value);
>>> +               bsp_priv->rx_delay = value;
>>> +       }
>>> +
>>> +       bsp_priv->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
>>> +                                                       "rockchip,grf");
>>> +       bsp_priv->phyirq_io =
>>> +               of_get_named_gpio_flags(dev->of_node,
>>> +                                       "phyirq-gpio", 0, &flags);
>>> +       bsp_priv->phyirq_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
>>> +
>>> +       bsp_priv->reset_io =
>>> +               of_get_named_gpio_flags(dev->of_node,
>>> +                                       "reset-gpio", 0, &flags);
>>> +       bsp_priv->reset_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
>>> +
>>> +       bsp_priv->power_io =
>>> +               of_get_named_gpio_flags(dev->of_node, "power-gpio", 0,
>>> &flags);
>>> +       bsp_priv->power_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
>>> +
>>> +       /*power*/
>>> +       if (!gpio_is_valid(bsp_priv->power_io)) {
>>> +               pr_err("%s: Failed to get GPIO %s.\n",
>>> +                      __func__, "power-gpio");
>>> +       } else {
>>> +               ret = gpio_request(bsp_priv->power_io, GPIO_PHY_POWER);
>>> +               if (ret)
>>> +                       pr_err("%s: ERROR: Failed to request GPIO %s.\n",
>>> +                              __func__, GPIO_PHY_POWER);
>>> +       }
>>
>> When everything power-related is handled using the regulator api, you
>> don't
>> need this
>
> The same explanation as above
>>
>>
>>> +
>>> +       if (!gpio_is_valid(bsp_priv->reset_io)) {
>>> +               pr_err("%s: ERROR: Get reset-gpio failed.\n", __func__);
>>> +       } else {
>>> +               ret = gpio_request(bsp_priv->reset_io, GPIO_PHY_RESET);
>>> +               if (ret)
>>> +                       pr_err("%s: ERROR: Failed to request GPIO %s.\n",
>>> +                              __func__, GPIO_PHY_RESET);
>>> +       }
>>> +
>>> +       if (bsp_priv->phyirq_io > 0) {
>>
>> This is more for my understanding: why does the mac driver need to handle
>> the
>> phy interrupt - but I might be overlooking something.

stmmac does not have a separate mdio bus driver. They are combined.

>>
> phy interrupt is not mandatory.  In most of the time,  in order to find
> something happen in PHY, for example,
> link is up or down,  we just use polling method to read the phy's register
> in a timer.
> Buf if phy interrupt is in use, when link is up or down,  phy interrupt pin
> will be assert to inform the CPU.
> I just implement the driver for phy interrupt gpio,  not enable it as
> default.

You should probably do it in the stmmac platform/core driver.

>
>
>>> +               struct plat_stmmacenet_data *plat_dat;
>>> +
>>> +               pr_info("PHY irq in use\n");
>>> +               ret = gpio_request(bsp_priv->phyirq_io, GPIO_PHY_IRQ);
>>> +               if (ret < 0) {
>>> +                       pr_warn("%s: Failed to request GPIO %s\n",
>>> +                               __func__, GPIO_PHY_IRQ);
>>> +                       goto goon;
>>> +               }
>>> +
>>> +               ret = gpio_direction_input(bsp_priv->phyirq_io);
>>> +               if (ret < 0) {
>>> +                       pr_err("%s, Failed to set input for GPIO %s\n",
>>> +                              __func__, GPIO_PHY_IRQ);
>>> +                       gpio_free(bsp_priv->phyirq_io);
>>> +                       goto goon;
>>> +               }
>>> +
>>> +               irq = gpio_to_irq(bsp_priv->phyirq_io);
>>> +               if (irq < 0) {
>>> +                       ret = irq;
>>> +                       pr_err("Failed to set irq for %s\n",
>>> +                              GPIO_PHY_IRQ);
>>> +                       gpio_free(bsp_priv->phyirq_io);
>>> +                       goto goon;
>>> +               }
>>> +
>>> +               plat_dat = dev_get_platdata(&pdev->dev);
>>> +               if (plat_dat)
>>> +                       plat_dat->mdio_bus_data->probed_phy_irq = irq;
>>> +               else
>>> +                       pr_err("%s: plat_data is NULL\n", __func__);

The glue layer should never ever touch the platform data. If you need
to add interrupts for the phy, please do so in the stmmac driver proper,
and add the proper DT bindings/

>>> +       }
>>> +
>>> +goon:
>>> +       /*rmii or rgmii*/
>>> +       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
>>> +               pr_info("%s: init for RGMII\n", __func__);
>>> +               set_to_rgmii(bsp_priv, bsp_priv->tx_delay,
>>> bsp_priv->rx_delay);
>>> +       } else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) {
>>> +               pr_info("%s: init for RMII\n", __func__);
>>> +               set_to_rmii(bsp_priv);
>>> +       } else {
>>> +               pr_err("%s: ERROR: NO interface defined!\n", __func__);
>>> +       }
>>> +
>>> +       bsp_priv->pdev = pdev;
>>> +
>>> +       gmac_clk_init(bsp_priv);
>>> +
>>> +       return bsp_priv;
>>> +}
>>> +
>>> +static int rk_gmac_init(struct platform_device *pdev, void *priv)
>>> +{
>>> +       struct rk_priv_data *bsp_priv = priv;
>>> +       int ret;
>>> +
>>> +       ret = phy_power_on(bsp_priv, true);
>>> +       if (ret)
>>> +               return ret;
>>> +
>>> +       ret = gmac_clk_enable(bsp_priv, true);
>>> +       if (ret)
>>> +               return ret;
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static void rk_gmac_exit(struct platform_device *pdev, void *priv)
>>> +{
>>> +       struct rk_priv_data *gmac = priv;
>>> +
>>> +       phy_power_on(gmac, false);
>>> +       gmac_clk_enable(gmac, false);
>>> +}
>>> +
>>> +static void rk_fix_speed(void *priv, unsigned int speed)
>>> +{
>>> +       struct rk_priv_data *bsp_priv = priv;
>>> +
>>> +       if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII)
>>> +               set_rgmii_speed(bsp_priv, speed);
>>> +       else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
>>> +               set_rmii_speed(bsp_priv, speed);
>>> +       else
>>> +               pr_err("unsupported interface %d", bsp_priv->phy_iface);
>>> +}
>>> +
>>> +const struct stmmac_of_data rk_gmac_data = {
>>> +       .has_gmac = 1,
>>> +       .fix_mac_speed = rk_fix_speed,
>>> +       .setup = rk_gmac_setup,
>>> +       .init = rk_gmac_init,
>>> +       .exit = rk_gmac_exit,
>>> +};
>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
>>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index
>>> 15814b7..b4dee96 100644
>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
>>> @@ -33,6 +33,7 @@
>>>
>>>   static const struct of_device_id stmmac_dt_ids[] = {
>>>         /* SoC specific glue layers should come before generic bindings
>>> */
>>> +       { .compatible = "rockchip,rk3288-gmac", .data = &rk_gmac_data},
>>
>> please name that rk3288_gmac_data [of course the other occurences too]
>> It makes it easier to see which soc it is meant for and it's also not save
>> to
>> assume that the next one will use the same register + bit positions in the
>> grf.
>>
>>
>>>         { .compatible = "amlogic,meson6-dwmac", .data =
>>> &meson6_dwmac_data},
>>>         { .compatible = "allwinner,sun7i-a20-gmac", .data =
>>> &sun7i_gmac_data},
>>>         { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data},
>>> @@ -291,6 +292,8 @@ static int stmmac_pltfr_probe(struct platform_device
>>> *pdev) return  -ENOMEM;
>>>                 }
>>>
>>> +               pdev->dev.platform_data = plat_dat;
>>> +

When we re-did the DT layer for stmmac, there was consensus to _not_
use platform_data. Please do not use it.

>>>                 ret = stmmac_probe_config_dt(pdev, plat_dat, &mac);
>>>                 if (ret) {
>>>                         pr_err("%s: main dt probe failed", __func__);
>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
>>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index
>>> 25dd1f7..32a0516 100644
>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
>>> @@ -24,5 +24,6 @@ extern const struct stmmac_of_data sun7i_gmac_data;
>>>   extern const struct stmmac_of_data stih4xx_dwmac_data;
>>>   extern const struct stmmac_of_data stid127_dwmac_data;
>>>   extern const struct stmmac_of_data socfpga_gmac_data;
>>> +extern const struct stmmac_of_data rk_gmac_data;
>>>
>>>   #endif /* __STMMAC_PLATFORM_H__ */
>>
>>
>>

I may have repeated myself a few times. Please take a look at how
the other glue layers are written. This one is larger than it needs
to be.

Regards,
ChenYu

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox