* [PATCH V3 net-next 12/15] smc: send data (through RDMA)
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
copy data to kernel send buffer, and trigger RDMA write
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
net/smc/Makefile | 2 +-
net/smc/af_smc.c | 13 +-
net/smc/smc.h | 1 +
net/smc/smc_cdc.c | 7 +-
net/smc/smc_tx.c | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_tx.h | 34 +++++
6 files changed, 491 insertions(+), 4 deletions(-)
create mode 100644 net/smc/smc_tx.c
create mode 100644 net/smc/smc_tx.h
diff --git a/net/smc/Makefile b/net/smc/Makefile
index ec0fd03..fc28d79 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_SMC) += smc.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o
+smc-y += smc_cdc.o smc_tx.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 36a111c..4738f5a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -37,6 +37,7 @@
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_pnet.h"
+#include "smc_tx.h"
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
@@ -420,6 +421,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
}
mutex_unlock(&smc_create_lgr_pending);
+ smc_tx_init(smc);
+
out_connected:
smc_copy_sock_settings_to_clc(smc);
smc->sk.sk_state = SMC_ACTIVE;
@@ -761,6 +764,8 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
+ smc_tx_init(new_smc);
+
out_connected:
sk_refcnt_debug_inc(newsmcsk);
newsmcsk->sk_state = SMC_ACTIVE;
@@ -934,7 +939,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (smc->use_fallback)
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
else
- rc = sock_no_sendmsg(sock, msg, len);
+ rc = smc_tx_sendmsg(smc, msg, len);
out:
release_sock(sk);
return rc;
@@ -1015,6 +1020,12 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
mask |= smc_accept_poll(sk);
if (sk->sk_err)
mask |= POLLERR;
+ if (atomic_read(&smc->conn.sndbuf_space)) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
/* for now - to be enhanced in follow-on patch */
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 6cb9496..2956135 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -130,6 +130,7 @@ struct smc_connection {
atomic_t sndbuf_space; /* remaining space in sndbuf */
u16 tx_cdc_seq; /* sequence # for CDC send */
spinlock_t send_lock; /* protect wr_sends */
+ struct work_struct tx_work; /* retry of smc_cdc_msg_send */
struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
* .prod cf. TCP rcv_nxt
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index e14809e..22af635 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -14,6 +14,7 @@
#include "smc.h"
#include "smc_wr.h"
#include "smc_cdc.h"
+#include "smc_tx.h"
/********************************** send *************************************/
@@ -52,7 +53,7 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
smc_curs_read(&cdcpend->cursor, cdcpend->conn),
cdcpend->conn);
}
- /* subsequent patch: wake if send buffer space available */
+ smc_tx_sndbuf_nonfull(smc);
bh_unlock_sock(&smc->sk);
}
@@ -208,7 +209,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
}
/* piggy backed tx info */
- /* subsequent patch: wake receivers if receive buffer space available */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn))
+ smc_tx_sndbuf_nonempty(conn);
/* subsequent patch: trigger socket release if connection closed */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
new file mode 100644
index 0000000..d86bef6
--- /dev/null
+++ b/net/smc/smc_tx.c
@@ -0,0 +1,438 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Manage send buffer.
+ * Producer:
+ * Copy user space data into send buffer, if send buffer space available.
+ * Consumer:
+ * Trigger RDMA write into RMBE of peer and send CDC, if RMBE space available.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/net.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "smc.h"
+#include "smc_wr.h"
+#include "smc_cdc.h"
+#include "smc_tx.h"
+
+/***************************** sndbuf producer *******************************/
+
+/* callback implementation for sk.sk_write_space()
+ * to wakeup sndbuf producers that blocked with smc_tx_wait_memory().
+ * called under sk_socket lock.
+ */
+static void smc_tx_write_space(struct sock *sk)
+{
+ struct socket *sock = sk->sk_socket;
+ struct smc_sock *smc = smc_sk(sk);
+ struct socket_wq *wq;
+
+ /* similar to sk_stream_write_space */
+ if (atomic_read(&smc->conn.sndbuf_space) && sock) {
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_poll(&wq->wait,
+ POLLOUT | POLLWRNORM |
+ POLLWRBAND);
+ if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+ sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
+ rcu_read_unlock();
+ }
+}
+
+/* Wakeup sndbuf producers that blocked with smc_tx_wait_memory().
+ * Cf. tcp_data_snd_check()=>tcp_check_space()=>tcp_new_space().
+ */
+void smc_tx_sndbuf_nonfull(struct smc_sock *smc)
+{
+ if (smc->sk.sk_socket &&
+ test_bit(SOCK_NOSPACE, &smc->sk.sk_socket->flags))
+ smc->sk.sk_write_space(&smc->sk);
+}
+
+/* blocks sndbuf producer until at least one byte of free space available */
+static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct smc_connection *conn = &smc->conn;
+ struct sock *sk = &smc->sk;
+ bool noblock;
+ long timeo;
+ int rc = 0;
+
+ /* similar to sk_stream_wait_memory */
+ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ noblock = timeo ? false : true;
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (1) {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ if (sk->sk_err ||
+ (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
+ rc = -EPIPE;
+ break;
+ }
+ if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ rc = -ECONNRESET;
+ break;
+ }
+ if (!timeo) {
+ if (noblock)
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ rc = -EAGAIN;
+ break;
+ }
+ if (signal_pending(current)) {
+ rc = sock_intr_errno(timeo);
+ break;
+ }
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ if (atomic_read(&conn->sndbuf_space))
+ break; /* at least 1 byte of free space available */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_pending++;
+ sk_wait_event(sk, &timeo,
+ sk->sk_err ||
+ (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ smc_cdc_rxed_any_close_or_senddone(conn) ||
+ atomic_read(&conn->sndbuf_space),
+ &wait);
+ sk->sk_write_pending--;
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return rc;
+}
+
+/* sndbuf producer: main API called by socket layer.
+ * called under sock lock.
+ */
+int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
+{
+ size_t copylen, send_done = 0, send_remaining = len;
+ size_t chunk_len, chunk_off, chunk_len_sum;
+ struct smc_connection *conn = &smc->conn;
+ union smc_host_cursor prep;
+ struct sock *sk = &smc->sk;
+ char *sndbuf_base;
+ int tx_cnt_prep;
+ int writespace;
+ int rc, chunk;
+
+ /* This should be in poll */
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
+ rc = -EPIPE;
+ goto out_err;
+ }
+
+ while (msg_data_left(msg)) {
+ if (sk->sk_state == SMC_INIT)
+ return -ENOTCONN;
+ if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ return -EPIPE;
+ if (smc_cdc_rxed_any_close(conn))
+ return send_done ?: -ECONNRESET;
+
+ if (!atomic_read(&conn->sndbuf_space)) {
+ rc = smc_tx_wait_memory(smc, msg->msg_flags);
+ if (rc) {
+ if (send_done)
+ return send_done;
+ goto out_err;
+ }
+ continue;
+ }
+
+ /* initialize variables for 1st iteration of subsequent loop */
+ /* could be just 1 byte, even after smc_tx_wait_memory above */
+ writespace = atomic_read(&conn->sndbuf_space);
+ /* not more than what user space asked for */
+ copylen = min_t(size_t, send_remaining, writespace);
+ /* determine start of sndbuf */
+ sndbuf_base = conn->sndbuf_desc->cpu_addr;
+ smc_curs_write(&prep,
+ smc_curs_read(&conn->tx_curs_prep, conn),
+ conn);
+ tx_cnt_prep = prep.count;
+ /* determine chunks where to write into sndbuf */
+ /* either unwrapped case, or 1st chunk of wrapped case */
+ chunk_len = min_t(size_t,
+ copylen, conn->sndbuf_size - tx_cnt_prep);
+ chunk_len_sum = chunk_len;
+ chunk_off = tx_cnt_prep;
+ for (chunk = 0; chunk < 2; chunk++) {
+ rc = memcpy_from_msg(sndbuf_base + chunk_off,
+ msg, chunk_len);
+ if (rc) {
+ if (send_done)
+ return send_done;
+ goto out_err;
+ }
+ send_done += chunk_len;
+ send_remaining -= chunk_len;
+
+ if (chunk_len_sum == copylen)
+ break; /* either on 1st or 2nd iteration */
+ /* prepare next (== 2nd) iteration */
+ chunk_len = copylen - chunk_len; /* remainder */
+ chunk_len_sum += chunk_len;
+ chunk_off = 0; /* modulo offset in send ring buffer */
+ }
+ /* update cursors */
+ smc_curs_add(conn->sndbuf_size, &prep, copylen);
+ smc_curs_write(&conn->tx_curs_prep,
+ smc_curs_read(&prep, conn),
+ conn);
+ /* increased in send tasklet smc_cdc_tx_handler() */
+ smp_mb__before_atomic();
+ atomic_sub(copylen, &conn->sndbuf_space);
+ /* guarantee 0 <= sndbuf_space <= sndbuf_size */
+ smp_mb__after_atomic();
+ /* since we just produced more new data into sndbuf,
+ * trigger sndbuf consumer: RDMA write into peer RMBE and CDC
+ */
+ smc_tx_sndbuf_nonempty(conn);
+ } /* while (msg_data_left(msg)) */
+
+ return send_done;
+
+out_err:
+ rc = sk_stream_error(sk, msg->msg_flags, rc);
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(rc == -EAGAIN))
+ sk->sk_write_space(sk);
+ return rc;
+}
+
+/***************************** sndbuf consumer *******************************/
+
+/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
+static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
+ int num_sges, struct ib_sge sges[])
+{
+ struct smc_link_group *lgr = conn->lgr;
+ struct ib_send_wr *failed_wr = NULL;
+ struct ib_rdma_wr rdma_wr;
+ struct smc_link *link;
+ int rc;
+
+ memset(&rdma_wr, 0, sizeof(rdma_wr));
+ link = &lgr->lnk[SMC_SINGLE_LINK];
+ rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link);
+ rdma_wr.wr.sg_list = sges;
+ rdma_wr.wr.num_sge = num_sges;
+ rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
+ rdma_wr.remote_addr =
+ lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
+ /* RMBE within RMB */
+ ((conn->peer_conn_idx - 1) * conn->peer_rmbe_size) +
+ /* offset within RMBE */
+ peer_rmbe_offset;
+ rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+ rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
+ if (rc)
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ return rc;
+}
+
+/* sndbuf consumer */
+static inline void smc_tx_advance_cursors(struct smc_connection *conn,
+ union smc_host_cursor *prod,
+ union smc_host_cursor *sent,
+ size_t len)
+{
+ smc_curs_add(conn->peer_rmbe_size, prod, len);
+ /* increased in recv tasklet smc_cdc_msg_rcv() */
+ smp_mb__before_atomic();
+ /* data in flight reduces usable snd_wnd */
+ atomic_sub(len, &conn->peer_rmbe_space);
+ /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
+ smp_mb__after_atomic();
+ smc_curs_add(conn->sndbuf_size, sent, len);
+}
+
+/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
+ * usable snd_wnd as max transmit
+ */
+static int smc_tx_rdma_writes(struct smc_connection *conn)
+{
+ size_t src_off, src_len, dst_off, dst_len; /* current chunk values */
+ size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
+ union smc_host_cursor sent, prep, prod, cons;
+ struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
+ struct smc_link_group *lgr = conn->lgr;
+ int to_send, rmbespace;
+ struct smc_link *link;
+ int num_sges;
+ int rc;
+
+ /* source: sndbuf */
+ smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
+ smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
+ /* cf. wmem_alloc - (snd_max - snd_una) */
+ to_send = smc_curs_diff(conn->sndbuf_size, &sent, &prep);
+ if (to_send <= 0)
+ return 0;
+
+ /* destination: RMBE */
+ /* cf. snd_wnd */
+ rmbespace = atomic_read(&conn->peer_rmbe_space);
+ if (rmbespace <= 0)
+ return 0;
+ smc_curs_write(&prod,
+ smc_curs_read(&conn->local_tx_ctrl.prod, conn),
+ conn);
+ smc_curs_write(&cons,
+ smc_curs_read(&conn->local_rx_ctrl.cons, conn),
+ conn);
+
+ /* if usable snd_wnd closes ask peer to advertise once it opens again */
+ conn->local_tx_ctrl.prod_flags.write_blocked = (to_send >= rmbespace);
+ /* cf. usable snd_wnd */
+ len = min(to_send, rmbespace);
+
+ /* initialize variables for first iteration of subsequent nested loop */
+ link = &lgr->lnk[SMC_SINGLE_LINK];
+ dst_off = prod.count;
+ if (prod.wrap == cons.wrap) {
+ /* the filled destination area is unwrapped,
+ * hence the available free destination space is wrapped
+ * and we need 2 destination chunks of sum len; start with 1st
+ * which is limited by what's available in sndbuf
+ */
+ dst_len = min_t(size_t,
+ conn->peer_rmbe_size - prod.count, len);
+ } else {
+ /* the filled destination area is wrapped,
+ * hence the available free destination space is unwrapped
+ * and we need a single destination chunk of entire len
+ */
+ dst_len = len;
+ }
+ dst_len_sum = dst_len;
+ src_off = sent.count;
+ /* dst_len determines the maximum src_len */
+ if (sent.count + dst_len <= conn->sndbuf_size) {
+ /* unwrapped src case: single chunk of entire dst_len */
+ src_len = dst_len;
+ } else {
+ /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
+ src_len = conn->sndbuf_size - sent.count;
+ }
+ src_len_sum = src_len;
+ for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+ num_sges = 0;
+ for (srcchunk = 0; srcchunk < 2; srcchunk++) {
+ sges[srcchunk].addr =
+ conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] +
+ src_off;
+ sges[srcchunk].length = src_len;
+ sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+ num_sges++;
+ src_off += src_len;
+ if (src_off >= conn->sndbuf_size)
+ src_off -= conn->sndbuf_size;
+ /* modulo in send ring */
+ if (src_len_sum == dst_len)
+ break; /* either on 1st or 2nd iteration */
+ /* prepare next (== 2nd) iteration */
+ src_len = dst_len - src_len; /* remainder */
+ src_len_sum += src_len;
+ }
+ rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+ if (rc)
+ return rc;
+ if (dst_len_sum == len)
+ break; /* either on 1st or 2nd iteration */
+ /* prepare next (== 2nd) iteration */
+ dst_off = 0; /* modulo offset in RMBE ring buffer */
+ dst_len = len - dst_len; /* remainder */
+ dst_len_sum += dst_len;
+ src_len = min_t(int,
+ dst_len, conn->sndbuf_size - sent.count);
+ src_len_sum = src_len;
+ }
+
+ smc_tx_advance_cursors(conn, &prod, &sent, len);
+ /* update connection's cursors with advanced local cursors */
+ smc_curs_write(&conn->local_tx_ctrl.prod,
+ smc_curs_read(&prod, conn),
+ conn);
+ /* dst: peer RMBE */
+ smc_curs_write(&conn->tx_curs_sent,
+ smc_curs_read(&sent, conn),
+ conn);
+ /* src: local sndbuf */
+
+ return 0;
+}
+
+/* Wakeup sndbuf consumers from any context (IRQ or process)
+ * since there is more data to transmit; usable snd_wnd as max transmit
+ */
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_cdc_tx_pend *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ spin_lock_bh(&conn->send_lock);
+ rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
+ &pend);
+ if (rc < 0) {
+ if (rc == -EBUSY) {
+ rc = 0;
+ schedule_work(&conn->tx_work);
+ }
+ goto out_unlock;
+ }
+
+ rc = smc_tx_rdma_writes(conn);
+ if (rc) {
+ smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+ (struct smc_wr_tx_pend_priv *)pend);
+ goto out_unlock;
+ }
+
+ rc = smc_cdc_msg_send(conn, wr_buf, pend);
+
+out_unlock:
+ spin_unlock_bh(&conn->send_lock);
+ return rc;
+}
+
+/* Wakeup sndbuf consumers from process context
+ * since there is more data to transmit
+ */
+static void smc_tx_work(struct work_struct *work)
+{
+ struct smc_connection *conn = container_of(work,
+ struct smc_connection,
+ tx_work);
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+ lock_sock(&smc->sk);
+ smc_tx_sndbuf_nonempty(conn);
+ release_sock(&smc->sk);
+}
+
+/***************************** send initialize *******************************/
+
+/* Initialize send properties on connection establishment. NB: not __init! */
+void smc_tx_init(struct smc_sock *smc)
+{
+ smc->sk.sk_write_space = smc_tx_write_space;
+ INIT_WORK(&smc->conn.tx_work, smc_tx_work);
+ spin_lock_init(&smc->conn.send_lock);
+}
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
new file mode 100644
index 0000000..931c666
--- /dev/null
+++ b/net/smc/smc_tx.h
@@ -0,0 +1,34 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Manage send buffer
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#ifndef SMC_TX_H
+#define SMC_TX_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+#include "smc.h"
+#include "smc_cdc.h"
+
+static inline int smc_tx_prepared_sends(struct smc_connection *conn)
+{
+ union smc_host_cursor sent, prep;
+
+ smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn);
+ smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn);
+ return smc_curs_diff(conn->sndbuf_size, &sent, &prep);
+}
+
+void smc_tx_init(struct smc_sock *smc);
+int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
+void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
+
+#endif /* SMC_TX_H */
--
2.8.4
^ permalink raw reply related
* [PATCH V3 net-next 14/15] smc: socket closing and linkgroup cleanup
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
smc_shutdown() and smc_release() handling
delayed linkgroup cleanup for linkgroups without connections
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
net/smc/Makefile | 2 +-
net/smc/af_smc.c | 127 ++++++++++++---
net/smc/smc.h | 20 ++-
net/smc/smc_cdc.c | 33 ++--
net/smc/smc_cdc.h | 1 +
net/smc/smc_close.c | 441 ++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_close.h | 28 ++++
net/smc/smc_core.c | 7 +-
net/smc/smc_tx.c | 8 +
net/smc/smc_wr.c | 47 ++++--
net/smc/smc_wr.h | 2 +
11 files changed, 669 insertions(+), 47 deletions(-)
create mode 100644 net/smc/smc_close.c
create mode 100644 net/smc/smc_close.h
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 6255e29..5cf0caf 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_SMC) += smc.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5959c84..23f7d65 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -39,6 +39,7 @@
#include "smc_pnet.h"
#include "smc_tx.h"
#include "smc_rx.h"
+#include "smc_close.h"
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
@@ -70,14 +71,29 @@ static int smc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct smc_sock *smc;
+ int rc = 0;
if (!sk)
goto out;
smc = smc_sk(sk);
- lock_sock(sk);
+ sock_hold(sk);
+ if (sk->sk_state == SMC_LISTEN)
+ /* smc_close_non_accepted() is called and acquires
+ * sock lock for child sockets again
+ */
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+ else
+ lock_sock(sk);
- sk->sk_state = SMC_CLOSED;
+ if (smc->use_fallback) {
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ } else {
+ rc = smc_close_active(smc);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ }
if (smc->clcsock) {
sock_release(smc->clcsock);
smc->clcsock = NULL;
@@ -86,11 +102,18 @@ static int smc_release(struct socket *sock)
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
+ if (smc->use_fallback) {
+ schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
+ } else if (sk->sk_state == SMC_CLOSED) {
+ smc_conn_free(&smc->conn);
+ schedule_delayed_work(&smc->sock_put_work,
+ SMC_CLOSE_SOCK_PUT_DELAY);
+ }
release_sock(sk);
sock_put(sk);
out:
- return 0;
+ return rc;
}
static void smc_destruct(struct sock *sk)
@@ -127,6 +150,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
+ INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
return sk;
}
@@ -427,7 +451,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
out_connected:
smc_copy_sock_settings_to_clc(smc);
- smc->sk.sk_state = SMC_ACTIVE;
+ if (smc->sk.sk_state == SMC_INIT)
+ smc->sk.sk_state = SMC_ACTIVE;
return rc ? rc : local_contact;
@@ -569,8 +594,8 @@ static void smc_accept_unlink(struct sock *sk)
/* remove a sock from the accept queue to bind it to a new socket created
* for a socket accept call from user space
*/
-static struct sock *smc_accept_dequeue(struct sock *parent,
- struct socket *new_sock)
+struct sock *smc_accept_dequeue(struct sock *parent,
+ struct socket *new_sock)
{
struct smc_sock *isk, *n;
struct sock *new_sk;
@@ -591,11 +616,17 @@ static struct sock *smc_accept_dequeue(struct sock *parent,
}
/* clean up for a created but never accepted sock */
-static void smc_close_non_accepted(struct sock *sk)
+void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
sock_hold(sk);
+ lock_sock(sk);
+ if (!sk->sk_lingertime)
+ /* wait for peer closing */
+ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ if (!smc->use_fallback)
+ smc_close_active(smc);
if (smc->clcsock) {
struct socket *tcp;
@@ -603,7 +634,16 @@ static void smc_close_non_accepted(struct sock *sk)
smc->clcsock = NULL;
sock_release(tcp);
}
- /* more closing stuff to be added with socket closing patch */
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_shutdown |= SHUTDOWN_MASK;
+ if (smc->use_fallback) {
+ schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
+ } else {
+ smc_conn_free(&smc->conn);
+ schedule_delayed_work(&smc->sock_put_work,
+ SMC_CLOSE_SOCK_PUT_DELAY);
+ }
+ release_sock(sk);
sock_put(sk);
}
@@ -771,11 +811,12 @@ static void smc_listen_work(struct work_struct *work)
out_connected:
sk_refcnt_debug_inc(newsmcsk);
- newsmcsk->sk_state = SMC_ACTIVE;
+ if (newsmcsk->sk_state == SMC_INIT)
+ newsmcsk->sk_state = SMC_ACTIVE;
enqueue:
if (local_contact == SMC_FIRST_CONTACT)
mutex_unlock(&smc_create_lgr_pending);
- lock_sock(&lsmc->sk);
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
smc_accept_enqueue(&lsmc->sk, newsmcsk);
} else { /* no longer listening */
@@ -801,6 +842,7 @@ static void smc_listen_work(struct work_struct *work)
out_err:
newsmcsk->sk_state = SMC_CLOSED;
+ smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
}
@@ -921,7 +963,8 @@ static int smc_getname(struct socket *sock, struct sockaddr *addr,
{
struct smc_sock *smc;
- if (peer && (sock->sk->sk_state != SMC_ACTIVE))
+ if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
+ (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
return -ENOTCONN;
smc = smc_sk(sock->sk);
@@ -937,7 +980,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
smc = smc_sk(sk);
lock_sock(sk);
- if (sk->sk_state != SMC_ACTIVE)
+ if ((sk->sk_state != SMC_ACTIVE) &&
+ (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+ (sk->sk_state != SMC_INIT))
goto out;
if (smc->use_fallback)
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
@@ -957,13 +1002,21 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
smc = smc_sk(sk);
lock_sock(sk);
- if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
+ if ((sk->sk_state == SMC_INIT) ||
+ (sk->sk_state == SMC_LISTEN) ||
+ (sk->sk_state == SMC_CLOSED))
+ goto out;
+
+ if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
+ rc = 0;
goto out;
+ }
if (smc->use_fallback)
rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
else
rc = smc_rx_recvmsg(smc, msg, len, flags);
+
out:
release_sock(sk);
return rc;
@@ -1023,7 +1076,8 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
mask |= smc_accept_poll(sk);
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space)) {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ (sk->sk_shutdown & SEND_SHUTDOWN)) {
mask |= POLLOUT | POLLWRNORM;
} else {
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -1031,7 +1085,14 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
}
if (atomic_read(&smc->conn.bytes_to_rcv))
mask |= POLLIN | POLLRDNORM;
- /* for now - to be enhanced in follow-on patch */
+ if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+ (sk->sk_state == SMC_CLOSED))
+ mask |= POLLHUP;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+
}
return mask;
@@ -1042,31 +1103,53 @@ static int smc_shutdown(struct socket *sock, int how)
struct sock *sk = sock->sk;
struct smc_sock *smc;
int rc = -EINVAL;
+ int rc1 = 0;
smc = smc_sk(sk);
if ((how < SHUT_RD) || (how > SHUT_RDWR))
- goto out_err;
+ return rc;
lock_sock(sk);
rc = -ENOTCONN;
- if (sk->sk_state == SMC_CLOSED)
+ if ((sk->sk_state != SMC_LISTEN) &&
+ (sk->sk_state != SMC_ACTIVE) &&
+ (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
+ (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
+ (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+ (sk->sk_state != SMC_APPCLOSEWAIT2) &&
+ (sk->sk_state != SMC_APPFINCLOSEWAIT))
goto out;
if (smc->use_fallback) {
rc = kernel_sock_shutdown(smc->clcsock, how);
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
if (sk->sk_shutdown == SHUTDOWN_MASK)
sk->sk_state = SMC_CLOSED;
- } else {
- rc = sock_no_shutdown(sock, how);
+ goto out;
+ }
+ switch (how) {
+ case SHUT_RDWR: /* shutdown in both directions */
+ rc = smc_close_active(smc);
+ break;
+ case SHUT_WR:
+ rc = smc_close_shutdown_write(smc);
+ break;
+ case SHUT_RD:
+ if (sk->sk_state == SMC_LISTEN)
+ rc = smc_close_active(smc);
+ else
+ rc = 0;
+ /* nothing more to do because peer is not involved */
+ break;
}
+ rc1 = kernel_sock_shutdown(smc->clcsock, how);
+ /* map sock_shutdown_cmd constants to sk_shutdown value range */
+ sk->sk_shutdown |= how + 1;
out:
release_sock(sk);
-
-out_err:
- return rc;
+ return rc ? rc : rc1;
}
static int smc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e5e47fa..963b60f 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -27,6 +27,16 @@ enum smc_state { /* possible states of an SMC socket */
SMC_INIT = 2,
SMC_CLOSED = 7,
SMC_LISTEN = 10,
+ /* normal close */
+ SMC_PEERCLOSEWAIT1 = 20,
+ SMC_PEERCLOSEWAIT2 = 21,
+ SMC_APPFINCLOSEWAIT = 24,
+ SMC_APPCLOSEWAIT1 = 22,
+ SMC_APPCLOSEWAIT2 = 23,
+ SMC_PEERFINCLOSEWAIT = 25,
+ /* abnormal close */
+ SMC_PEERABORTWAIT = 26,
+ SMC_PROCESSABORT = 27,
};
struct smc_link_group;
@@ -161,8 +171,14 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
+ struct delayed_work sock_put_work; /* final socket freeing */
u8 use_fallback : 1, /* fallback to tcp */
- clc_started : 1;/* smc_connect_rdma ran */
+ clc_started : 1,/* smc_connect_rdma ran */
+ wait_close_tx_prepared : 1;
+ /* shutdown wr or close
+ * started, waiting for unsent
+ * data to be sent
+ */
};
static inline struct smc_sock *smc_sk(const struct sock *sk)
@@ -248,5 +264,7 @@ void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
struct smc_ib_device *smcibdev, u8 ibport,
struct smc_clc_msg_local *lcl, int srv_first_contact);
+struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
+void smc_close_non_accepted(struct sock *sk);
#endif /* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 47d6d96..c000c23 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -16,6 +16,7 @@
#include "smc_cdc.h"
#include "smc_tx.h"
#include "smc_rx.h"
+#include "smc_close.h"
/********************************** send *************************************/
@@ -55,6 +56,9 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
+ if (smc->sk.sk_state != SMC_ACTIVE)
+ /* wake up smc_close_wait_tx_pends() */
+ smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -149,6 +153,14 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
+bool smc_cdc_tx_has_pending(struct smc_connection *conn)
+{
+ struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
+ return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
+ smc_cdc_tx_filter, (unsigned long)conn);
+}
+
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -201,21 +213,20 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_data_ready(&smc->sk);
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
smc->sk.sk_err = ECONNRESET;
- if (smc_cdc_rxed_any_close_or_senddone(conn)) {
- smc->sk.sk_shutdown |= RCV_SHUTDOWN;
- sock_set_flag(&smc->sk, SOCK_DONE);
-
- /* subsequent patch: terminate connection */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
}
+ if (smc_cdc_rxed_any_close_or_senddone(conn))
+ smc_close_passive_received(smc);
/* piggy backed tx info */
/* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn))
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
smc_tx_sndbuf_nonempty(conn);
-
- /* subsequent patch: trigger socket release if connection closed */
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
+ }
/* socket connected but not accepted */
if (!smc->sk.sk_socket)
@@ -244,10 +255,6 @@ static inline void smc_cdc_msg_recv(struct smc_cdc_msg *cdc,
return;
}
smc = container_of(connection, struct smc_sock, conn);
- if (smc->sk.sk_state == SMC_CLOSED) {
- read_unlock_bh(&lgr->conns_lock);
- return;
- }
sock_hold(&smc->sk);
read_unlock_bh(&lgr->conns_lock);
bh_lock_sock(&smc->sk);
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8028489..dc32c26 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -212,6 +212,7 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
new file mode 100644
index 0000000..d70c05b
--- /dev/null
+++ b/net/smc/smc_close.c
@@ -0,0 +1,441 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Socket Closing - normal and abnormal
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/workqueue.h>
+#include <net/sock.h>
+
+#include "smc.h"
+#include "smc_tx.h"
+#include "smc_cdc.h"
+#include "smc_close.h"
+
+#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+
+static void smc_close_cleanup_listen(struct sock *parent)
+{
+ struct sock *sk;
+
+ /* Close non-accepted connections */
+ while ((sk = smc_accept_dequeue(parent, NULL)))
+ smc_close_non_accepted(sk);
+}
+
+static void smc_close_wait_tx_pends(struct smc_sock *smc)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct sock *sk = &smc->sk;
+ signed long timeout;
+
+ timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (!signal_pending(current) && timeout) {
+ int rc;
+
+ rc = sk_wait_event(sk, &timeout,
+ !smc_cdc_tx_has_pending(&smc->conn),
+ &wait);
+ if (rc)
+ break;
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+}
+
+/* wait for sndbuf data being transmitted */
+static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct sock *sk = &smc->sk;
+
+ if (!timeout)
+ return;
+
+ if (!smc_tx_prepared_sends(&smc->conn))
+ return;
+
+ smc->wait_close_tx_prepared = 1;
+ add_wait_queue(sk_sleep(sk), &wait);
+ while (!signal_pending(current) && timeout) {
+ int rc;
+
+ rc = sk_wait_event(sk, &timeout,
+ !smc_tx_prepared_sends(&smc->conn) ||
+ (sk->sk_err == ECONNABORTED) ||
+ (sk->sk_err == ECONNRESET),
+ &wait);
+ if (rc)
+ break;
+ }
+ remove_wait_queue(sk_sleep(sk), &wait);
+ smc->wait_close_tx_prepared = 0;
+}
+
+void smc_close_wake_tx_prepared(struct smc_sock *smc)
+{
+ if (smc->wait_close_tx_prepared)
+ /* wake up socket closing */
+ smc->sk.sk_state_change(&smc->sk);
+}
+
+static int smc_close_wr(struct smc_connection *conn)
+{
+ conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
+
+ return smc_cdc_get_slot_and_msg_send(conn);
+}
+
+static int smc_close_final(struct smc_connection *conn)
+{
+ if (atomic_read(&conn->bytes_to_rcv))
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ else
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
+
+ return smc_cdc_get_slot_and_msg_send(conn);
+}
+
+static int smc_close_abort(struct smc_connection *conn)
+{
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+
+ return smc_cdc_get_slot_and_msg_send(conn);
+}
+
+/* terminate smc socket abnormally - active abort
+ * RDMA communication no longer possible
+ */
+void smc_close_active_abort(struct smc_sock *smc)
+{
+ struct smc_cdc_conn_state_flags *txflags =
+ &smc->conn.local_tx_ctrl.conn_state_flags;
+
+ bh_lock_sock(&smc->sk);
+ smc->sk.sk_err = ECONNABORTED;
+ if (smc->clcsock && smc->clcsock->sk) {
+ smc->clcsock->sk->sk_err = ECONNABORTED;
+ smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
+ }
+ switch (smc->sk.sk_state) {
+ case SMC_INIT:
+ smc->sk.sk_state = SMC_PEERABORTWAIT;
+ break;
+ case SMC_APPCLOSEWAIT1:
+ case SMC_APPCLOSEWAIT2:
+ txflags->peer_conn_abort = 1;
+ sock_release(smc->clcsock);
+ if (!smc_cdc_rxed_any_close(&smc->conn))
+ smc->sk.sk_state = SMC_PEERABORTWAIT;
+ else
+ smc->sk.sk_state = SMC_CLOSED;
+ break;
+ case SMC_PEERCLOSEWAIT1:
+ case SMC_PEERCLOSEWAIT2:
+ if (!txflags->peer_conn_closed) {
+ smc->sk.sk_state = SMC_PEERABORTWAIT;
+ txflags->peer_conn_abort = 1;
+ sock_release(smc->clcsock);
+ } else {
+ smc->sk.sk_state = SMC_CLOSED;
+ }
+ break;
+ case SMC_PROCESSABORT:
+ case SMC_APPFINCLOSEWAIT:
+ if (!txflags->peer_conn_closed) {
+ txflags->peer_conn_abort = 1;
+ sock_release(smc->clcsock);
+ }
+ smc->sk.sk_state = SMC_CLOSED;
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ case SMC_PEERABORTWAIT:
+ case SMC_CLOSED:
+ break;
+ }
+
+ sock_set_flag(&smc->sk, SOCK_DEAD);
+ bh_unlock_sock(&smc->sk);
+ smc->sk.sk_state_change(&smc->sk);
+}
+
+int smc_close_active(struct smc_sock *smc)
+{
+ struct smc_cdc_conn_state_flags *txflags =
+ &smc->conn.local_tx_ctrl.conn_state_flags;
+ long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ struct smc_connection *conn = &smc->conn;
+ struct sock *sk = &smc->sk;
+ int old_state;
+ int rc = 0;
+
+ if (sock_flag(sk, SOCK_LINGER) &&
+ !(current->flags & PF_EXITING))
+ timeout = sk->sk_lingertime;
+
+again:
+ old_state = sk->sk_state;
+ switch (old_state) {
+ case SMC_INIT:
+ sk->sk_state = SMC_CLOSED;
+ if (smc->smc_listen_work.func)
+ flush_work(&smc->smc_listen_work);
+ sock_put(sk);
+ break;
+ case SMC_LISTEN:
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk); /* wake up accept */
+ if (smc->clcsock && smc->clcsock->sk) {
+ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
+ /* wake up kernel_accept of smc_tcp_listen_worker */
+ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ }
+ release_sock(sk);
+ smc_close_cleanup_listen(sk);
+ flush_work(&smc->tcp_listen_work);
+ lock_sock(sk);
+ break;
+ case SMC_ACTIVE:
+ smc_close_stream_wait(smc, timeout);
+ release_sock(sk);
+ cancel_work_sync(&conn->tx_work);
+ lock_sock(sk);
+ if (sk->sk_state == SMC_ACTIVE) {
+ /* send close request */
+ rc = smc_close_final(conn);
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
+ } else {
+ /* peer event has changed the state */
+ goto again;
+ }
+ break;
+ case SMC_APPFINCLOSEWAIT:
+ /* socket already shutdown wr or both (active close) */
+ if (txflags->peer_done_writing &&
+ !txflags->peer_conn_closed) {
+ /* just shutdown wr done, send close request */
+ rc = smc_close_final(conn);
+ }
+ sk->sk_state = SMC_CLOSED;
+ smc_close_wait_tx_pends(smc);
+ break;
+ case SMC_APPCLOSEWAIT1:
+ case SMC_APPCLOSEWAIT2:
+ if (!smc_cdc_rxed_any_close(conn))
+ smc_close_stream_wait(smc, timeout);
+ release_sock(sk);
+ cancel_work_sync(&conn->tx_work);
+ lock_sock(sk);
+ if (sk->sk_err != ECONNABORTED) {
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ }
+ if (smc_cdc_rxed_any_close(conn))
+ /* peer has closed the socket already */
+ sk->sk_state = SMC_CLOSED;
+ else
+ /* peer has just issued a shutdown write */
+ sk->sk_state = SMC_PEERFINCLOSEWAIT;
+ smc_close_wait_tx_pends(smc);
+ break;
+ case SMC_PEERCLOSEWAIT1:
+ case SMC_PEERCLOSEWAIT2:
+ case SMC_PEERFINCLOSEWAIT:
+ /* peer sending PeerConnectionClosed will cause transition */
+ break;
+ case SMC_PROCESSABORT:
+ cancel_work_sync(&conn->tx_work);
+ smc_close_abort(conn);
+ sk->sk_state = SMC_CLOSED;
+ smc_close_wait_tx_pends(smc);
+ break;
+ case SMC_PEERABORTWAIT:
+ case SMC_CLOSED:
+ /* nothing to do, add tracing in future patch */
+ break;
+ }
+
+ if (old_state != sk->sk_state)
+ sk->sk_state_change(&smc->sk);
+ return rc;
+}
+
+static void smc_close_passive_abort_received(struct smc_sock *smc)
+{
+ struct smc_cdc_conn_state_flags *txflags =
+ &smc->conn.local_tx_ctrl.conn_state_flags;
+ struct sock *sk = &smc->sk;
+
+ switch (sk->sk_state) {
+ case SMC_ACTIVE:
+ case SMC_APPFINCLOSEWAIT:
+ case SMC_APPCLOSEWAIT1:
+ case SMC_APPCLOSEWAIT2:
+ smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ break;
+ case SMC_PEERCLOSEWAIT1:
+ case SMC_PEERCLOSEWAIT2:
+ if (txflags->peer_done_writing &&
+ !txflags->peer_conn_closed) {
+ /* just shutdown, but not yet closed locally */
+ smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ } else {
+ sk->sk_state = SMC_CLOSED;
+ }
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ case SMC_PEERABORTWAIT:
+ sk->sk_state = SMC_CLOSED;
+ break;
+ case SMC_INIT:
+ case SMC_PROCESSABORT:
+ /* nothing to do, add tracing in future patch */
+ break;
+ }
+}
+
+/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
+ * or peer_done_writing.
+ * Called under tasklet context.
+ */
+void smc_close_passive_received(struct smc_sock *smc)
+{
+ struct smc_cdc_conn_state_flags *rxflags =
+ &smc->conn.local_rx_ctrl.conn_state_flags;
+ struct sock *sk = &smc->sk;
+ int old_state;
+
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+ if (smc->clcsock && smc->clcsock->sk)
+ smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
+ sock_set_flag(&smc->sk, SOCK_DONE);
+
+ old_state = sk->sk_state;
+
+ if (rxflags->peer_conn_abort) {
+ smc_close_passive_abort_received(smc);
+ goto wakeup;
+ }
+
+ switch (sk->sk_state) {
+ case SMC_INIT:
+ if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ (rxflags->peer_done_writing &&
+ !rxflags->peer_conn_closed))
+ sk->sk_state = SMC_APPCLOSEWAIT1;
+ else
+ sk->sk_state = SMC_CLOSED;
+ break;
+ case SMC_ACTIVE:
+ sk->sk_state = SMC_APPCLOSEWAIT1;
+ break;
+ case SMC_PEERCLOSEWAIT1:
+ if (rxflags->peer_done_writing)
+ sk->sk_state = SMC_PEERCLOSEWAIT2;
+ /* fall through to check for closing */
+ case SMC_PEERCLOSEWAIT2:
+ case SMC_PEERFINCLOSEWAIT:
+ if (!smc_cdc_rxed_any_close(&smc->conn))
+ break;
+ if (sock_flag(sk, SOCK_DEAD) &&
+ (sk->sk_shutdown == SHUTDOWN_MASK)) {
+ /* smc_release has already been called locally */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ /* just shutdown, but not yet closed locally */
+ sk->sk_state = SMC_APPFINCLOSEWAIT;
+ }
+ break;
+ case SMC_APPCLOSEWAIT1:
+ case SMC_APPCLOSEWAIT2:
+ case SMC_APPFINCLOSEWAIT:
+ case SMC_PEERABORTWAIT:
+ case SMC_PROCESSABORT:
+ case SMC_CLOSED:
+ /* nothing to do, add tracing in future patch */
+ break;
+ }
+
+wakeup:
+ if (old_state != sk->sk_state)
+ sk->sk_state_change(sk);
+ sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
+ sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
+
+ if ((sk->sk_state == SMC_CLOSED) &&
+ (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
+ smc_conn_free(&smc->conn);
+ schedule_delayed_work(&smc->sock_put_work,
+ SMC_CLOSE_SOCK_PUT_DELAY);
+ }
+}
+
+void smc_close_sock_put_work(struct work_struct *work)
+{
+ struct smc_sock *smc = container_of(to_delayed_work(work),
+ struct smc_sock,
+ sock_put_work);
+
+ sock_put(&smc->sk);
+}
+
+int smc_close_shutdown_write(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+ long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
+ struct sock *sk = &smc->sk;
+ int old_state;
+ int rc = 0;
+
+ if (sock_flag(sk, SOCK_LINGER))
+ timeout = sk->sk_lingertime;
+
+again:
+ old_state = sk->sk_state;
+ switch (old_state) {
+ case SMC_ACTIVE:
+ smc_close_stream_wait(smc, timeout);
+ release_sock(sk);
+ cancel_work_sync(&conn->tx_work);
+ lock_sock(sk);
+ /* send close wr request */
+ rc = smc_close_wr(conn);
+ if (sk->sk_state == SMC_ACTIVE)
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
+ else
+ goto again;
+ break;
+ case SMC_APPCLOSEWAIT1:
+ /* passive close */
+ if (!smc_cdc_rxed_any_close(conn))
+ smc_close_stream_wait(smc, timeout);
+ release_sock(sk);
+ cancel_work_sync(&conn->tx_work);
+ lock_sock(sk);
+ /* confirm close from peer */
+ rc = smc_close_wr(conn);
+ sk->sk_state = SMC_APPCLOSEWAIT2;
+ break;
+ case SMC_APPCLOSEWAIT2:
+ case SMC_PEERFINCLOSEWAIT:
+ case SMC_PEERCLOSEWAIT1:
+ case SMC_PEERCLOSEWAIT2:
+ case SMC_APPFINCLOSEWAIT:
+ case SMC_PROCESSABORT:
+ case SMC_PEERABORTWAIT:
+ /* nothing to do, add tracing in future patch */
+ break;
+ }
+
+ if (old_state != sk->sk_state)
+ sk->sk_state_change(&smc->sk);
+ return rc;
+}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
new file mode 100644
index 0000000..bc9a2df
--- /dev/null
+++ b/net/smc/smc_close.h
@@ -0,0 +1,28 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Socket Closing
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#ifndef SMC_CLOSE_H
+#define SMC_CLOSE_H
+
+#include <linux/workqueue.h>
+
+#include "smc.h"
+
+#define SMC_MAX_STREAM_WAIT_TIMEOUT (2 * HZ)
+#define SMC_CLOSE_SOCK_PUT_DELAY HZ
+
+void smc_close_wake_tx_prepared(struct smc_sock *smc);
+void smc_close_active_abort(struct smc_sock *smc);
+int smc_close_active(struct smc_sock *smc);
+void smc_close_passive_received(struct smc_sock *smc);
+void smc_close_sock_put_work(struct work_struct *work);
+int smc_close_shutdown_write(struct smc_sock *smc);
+
+#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8dbe42a..bc3c141 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -23,6 +23,7 @@
#include "smc_wr.h"
#include "smc_llc.h"
#include "smc_cdc.h"
+#include "smc_close.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY (600 * HZ)
@@ -293,6 +294,7 @@ void smc_lgr_free(struct smc_link_group *lgr)
void smc_lgr_terminate(struct smc_link_group *lgr)
{
struct smc_connection *conn;
+ struct smc_sock *smc;
struct rb_node *node;
spin_lock_bh(&smc_lgr_list.lock);
@@ -309,11 +311,14 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
node = rb_first(&lgr->conns_all);
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
+ smc = container_of(conn, struct smc_sock, conn);
+ sock_hold(&smc->sk);
__smc_lgr_unregister_conn(conn);
+ smc_close_active_abort(smc);
+ sock_put(&smc->sk);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
- schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 7e8799f..6e73b28 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -139,6 +139,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
if (sk->sk_state == SMC_INIT)
return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
+ (smc->sk.sk_err == ECONNABORTED) ||
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
return -EPIPE;
if (smc_cdc_rxed_any_close(conn))
@@ -392,6 +393,13 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
&pend);
if (rc < 0) {
if (rc == -EBUSY) {
+ struct smc_sock *smc =
+ container_of(conn, struct smc_sock, conn);
+
+ if (smc->sk.sk_err == ECONNABORTED) {
+ rc = sock_error(&smc->sk);
+ goto out_unlock;
+ }
rc = 0;
schedule_work(&conn->tx_work);
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 14f3f3f..eadf157 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -81,6 +81,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
return;
if (wc->status) {
+ struct smc_link_group *lgr;
+
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
/* clear full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[i], 0,
@@ -89,9 +91,10 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
- /* tbd in future patch: terminate connections of this link
- * group abnormally
- */
+ /* terminate connections of this link group abnormally */
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ smc_lgr_terminate(lgr);
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -176,9 +179,12 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
- /* tbd in future patch: timeout - terminate connections
- * of this link group abnormally
- */
+ /* timeout - terminate connections */
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ smc_lgr_terminate(lgr);
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -256,6 +262,24 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
+bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
+ smc_wr_tx_filter filter, unsigned long data)
+{
+ struct smc_wr_tx_pend_priv *tx_pend;
+ struct smc_wr_rx_hdr *wr_rx;
+ int i;
+
+ for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+ wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
+ if (wr_rx->type != wr_rx_hdr_type)
+ continue;
+ tx_pend = &link->wr_tx_pends[i].priv;
+ if (filter(tx_pend, data))
+ return true;
+ }
+ return false;
+}
+
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
@@ -310,14 +334,19 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
smc_wr_rx_demultiplex(&wc[i]);
smc_wr_rx_post(link); /* refill WR RX */
} else {
+ struct smc_link_group *lgr;
+
/* handle status errors */
switch (wc[i].status) {
case IB_WC_RETRY_EXC_ERR:
case IB_WC_RNR_RETRY_EXC_ERR:
case IB_WC_WR_FLUSH_ERR:
- /* tbd in future patch: terminate connections of this
- * link group abnormally
- */
+ /* terminate connections of this link group
+ * abnormally
+ */
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ smc_lgr_terminate(lgr);
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 124f857..0b9beed 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -92,6 +92,8 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
+ smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
--
2.8.4
^ permalink raw reply related
* [PATCH V3 net-next 11/15] smc: connection data control (CDC)
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
send and receive CDC messages (via IB message send and CQE)
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
net/smc/Makefile | 1 +
net/smc/af_smc.c | 9 ++
net/smc/smc.h | 97 ++++++++++++++++++
net/smc/smc_cdc.c | 288 +++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_cdc.h | 217 ++++++++++++++++++++++++++++++++++++++++
net/smc/smc_core.c | 9 ++
net/smc/smc_wr.c | 19 ++++
net/smc/smc_wr.h | 9 ++
8 files changed, 649 insertions(+)
create mode 100644 net/smc/smc_cdc.c
create mode 100644 net/smc/smc_cdc.h
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 73320bf..ec0fd03 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_SMC) += smc.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
+smc-y += smc_cdc.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9b26772..36a111c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -26,12 +26,14 @@
#include <linux/socket.h>
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
+#include <linux/in.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "smc.h"
#include "smc_clc.h"
#include "smc_llc.h"
+#include "smc_cdc.h"
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_pnet.h"
@@ -291,6 +293,7 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
smc->conn.peer_conn_idx = clc->conn_idx;
+ smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size);
atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
}
@@ -1211,6 +1214,12 @@ static int __init smc_init(void)
goto out_pnet;
}
+ rc = smc_cdc_init();
+ if (rc) {
+ pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
+ goto out_pnet;
+ }
+
rc = proto_register(&smc_proto, 1);
if (rc) {
pr_err("%s: proto_register fails with %d\n", __func__, rc);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 36fb1fa..6cb9496 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,6 +18,10 @@
#define SMCPROTO_SMC 0 /* SMC protocol */
+#ifdef ATOMIC64_INIT
+#define KERNEL_HAS_ATOMIC64
+#endif
+
enum smc_state { /* possible states of an SMC socket */
SMC_ACTIVE = 1,
SMC_INIT = 2,
@@ -31,6 +35,67 @@ struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
u8 type;
} __packed;
+struct smc_cdc_conn_state_flags {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 peer_done_writing : 1; /* Sending done indicator */
+ u8 peer_conn_closed : 1; /* Peer connection closed indicator */
+ u8 peer_conn_abort : 1; /* Abnormal close indicator */
+ u8 reserved : 5;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 5;
+ u8 peer_conn_abort : 1;
+ u8 peer_conn_closed : 1;
+ u8 peer_done_writing : 1;
+#endif
+} __packed;
+
+struct smc_cdc_producer_flags {
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 write_blocked : 1; /* Writing Blocked, no rx buf space */
+ u8 urg_data_pending : 1; /* Urgent Data Pending */
+ u8 urg_data_present : 1; /* Urgent Data Present */
+ u8 cons_curs_upd_req : 1; /* cursor update requested */
+ u8 failover_validation : 1;/* message replay due to failover */
+ u8 reserved : 3;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 reserved : 3;
+ u8 failover_validation : 1;
+ u8 cons_curs_upd_req : 1;
+ u8 urg_data_present : 1;
+ u8 urg_data_pending : 1;
+ u8 write_blocked : 1;
+#endif
+} __packed;
+
+/* in host byte order */
+union smc_host_cursor { /* SMC cursor - an offset in an RMBE */
+ struct {
+ u16 reserved;
+ u16 wrap; /* window wrap sequence number */
+ u32 count; /* cursor (= offset) part */
+ };
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __aligned(8);
+
+/* in host byte order, except for flag bitfields in network byte order */
+struct smc_host_cdc_msg { /* Connection Data Control message */
+ struct smc_wr_rx_hdr common; /* .type = 0xFE */
+ u8 len; /* length = 44 */
+ u16 seqno; /* connection seq # */
+ u32 token; /* alert_token */
+ union smc_host_cursor prod; /* producer cursor */
+ union smc_host_cursor cons; /* consumer cursor,
+ * piggy backed "ack"
+ */
+ struct smc_cdc_producer_flags prod_flags; /* conn. tx/rx status */
+ struct smc_cdc_conn_state_flags conn_state_flags; /* peer conn. status*/
+ u8 reserved[18];
+} __packed __aligned(8);
+
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
@@ -47,6 +112,38 @@ struct smc_connection {
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
int rmbe_size; /* RMBE size <== sock rmem */
int rmbe_size_short;/* compressed notation */
+
+ struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging
+ * buffer for CDC msg send
+ * .prod cf. TCP snd_nxt
+ * .cons cf. TCP sends ack
+ */
+ union smc_host_cursor tx_curs_prep; /* tx - prepared data
+ * snd_max..wmem_alloc
+ */
+ union smc_host_cursor tx_curs_sent; /* tx - sent data
+ * snd_nxt ?
+ */
+ union smc_host_cursor tx_curs_fin; /* tx - confirmed by peer
+ * snd-wnd-begin ?
+ */
+ atomic_t sndbuf_space; /* remaining space in sndbuf */
+ u16 tx_cdc_seq; /* sequence # for CDC send */
+ spinlock_t send_lock; /* protect wr_sends */
+
+ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl.
+ * .prod cf. TCP rcv_nxt
+ * .cons cf. TCP snd_una
+ */
+ union smc_host_cursor rx_curs_confirmed; /* confirmed to peer
+ * source of snd_una ?
+ */
+ atomic_t bytes_to_rcv; /* arrived data,
+ * not yet received
+ */
+#ifndef KERNEL_HAS_ATOMIC64
+ spinlock_t acurs_lock; /* protect cursors */
+#endif
};
struct smc_sock { /* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
new file mode 100644
index 0000000..e14809e
--- /dev/null
+++ b/net/smc/smc_cdc.c
@@ -0,0 +1,288 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Connection Data Control (CDC)
+ * handles flow control
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/spinlock.h>
+
+#include "smc.h"
+#include "smc_wr.h"
+#include "smc_cdc.h"
+
+/********************************** send *************************************/
+
+struct smc_cdc_tx_pend {
+ struct smc_connection *conn; /* socket connection */
+ union smc_host_cursor cursor; /* tx sndbuf cursor sent */
+ union smc_host_cursor p_cursor; /* rx RMBE cursor produced */
+ u16 ctrl_seq; /* conn. tx sequence # */
+};
+
+/* handler for send/transmission completion of a CDC msg */
+static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
+ struct smc_link *link,
+ enum ib_wc_status wc_status)
+{
+ struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
+ struct smc_sock *smc;
+ int diff;
+
+ if (!cdcpend->conn)
+ /* already dismissed */
+ return;
+
+ smc = container_of(cdcpend->conn, struct smc_sock, conn);
+ bh_lock_sock(&smc->sk);
+ if (!wc_status) {
+ diff = smc_curs_diff(cdcpend->conn->sndbuf_size,
+ &cdcpend->conn->tx_curs_fin,
+ &cdcpend->cursor);
+ /* sndbuf_space is decreased in smc_sendmsg */
+ smp_mb__before_atomic();
+ atomic_add(diff, &cdcpend->conn->sndbuf_space);
+ /* guarantee 0 <= sndbuf_space <= sndbuf_size */
+ smp_mb__after_atomic();
+ smc_curs_write(&cdcpend->conn->tx_curs_fin,
+ smc_curs_read(&cdcpend->cursor, cdcpend->conn),
+ cdcpend->conn);
+ }
+ /* subsequent patch: wake if send buffer space available */
+ bh_unlock_sock(&smc->sk);
+}
+
+int smc_cdc_get_free_slot(struct smc_link *link,
+ struct smc_wr_buf **wr_buf,
+ struct smc_cdc_tx_pend **pend)
+{
+ return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+}
+
+static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
+ struct smc_cdc_tx_pend *pend)
+{
+ BUILD_BUG_ON_MSG(
+ sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
+ "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
+ BUILD_BUG_ON_MSG(
+ sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+ "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
+ BUILD_BUG_ON_MSG(
+ sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
+ "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_cdc_tx_pend)");
+ pend->conn = conn;
+ pend->cursor = conn->tx_curs_sent;
+ pend->p_cursor = conn->local_tx_ctrl.prod;
+ pend->ctrl_seq = conn->tx_cdc_seq;
+}
+
+int smc_cdc_msg_send(struct smc_connection *conn,
+ struct smc_wr_buf *wr_buf,
+ struct smc_cdc_tx_pend *pend)
+{
+ struct smc_link *link;
+ int rc;
+
+ link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
+ smc_cdc_add_pending_send(conn, pend);
+
+ conn->tx_cdc_seq++;
+ conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
+ smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf,
+ &conn->local_tx_ctrl, conn);
+ rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+ if (!rc)
+ smc_curs_write(&conn->rx_curs_confirmed,
+ smc_curs_read(&conn->local_tx_ctrl.cons, conn),
+ conn);
+
+ return rc;
+}
+
+int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+{
+ struct smc_cdc_tx_pend *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
+ &pend);
+ if (rc)
+ return rc;
+
+ return smc_cdc_msg_send(conn, wr_buf, pend);
+}
+
+static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
+ unsigned long data)
+{
+ struct smc_connection *conn = (struct smc_connection *)data;
+ struct smc_cdc_tx_pend *cdc_pend =
+ (struct smc_cdc_tx_pend *)tx_pend;
+
+ return cdc_pend->conn == conn;
+}
+
+static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
+{
+ struct smc_cdc_tx_pend *cdc_pend =
+ (struct smc_cdc_tx_pend *)tx_pend;
+
+ cdc_pend->conn = NULL;
+}
+
+void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
+{
+ struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
+ smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
+ smc_cdc_tx_filter, smc_cdc_tx_dismisser,
+ (unsigned long)conn);
+}
+
+/********************************* receive ***********************************/
+
+static inline bool smc_cdc_before(u16 seq1, u16 seq2)
+{
+ return (s16)(seq1 - seq2) < 0;
+}
+
+static void smc_cdc_msg_recv_action(struct smc_sock *smc,
+ struct smc_link *link,
+ struct smc_cdc_msg *cdc)
+{
+ union smc_host_cursor cons_old, prod_old;
+ struct smc_connection *conn = &smc->conn;
+ int diff_cons, diff_prod;
+
+ if (!cdc->prod_flags.failover_validation) {
+ if (smc_cdc_before(ntohs(cdc->seqno),
+ conn->local_rx_ctrl.seqno))
+ /* received seqno is old */
+ return;
+ }
+ smc_curs_write(&prod_old,
+ smc_curs_read(&conn->local_rx_ctrl.prod, conn),
+ conn);
+ smc_curs_write(&cons_old,
+ smc_curs_read(&conn->local_rx_ctrl.cons, conn),
+ conn);
+ smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);
+
+ diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old,
+ &conn->local_rx_ctrl.cons);
+ if (diff_cons) {
+ /* peer_rmbe_space is decreased during data transfer with RDMA
+ * write
+ */
+ smp_mb__before_atomic();
+ atomic_add(diff_cons, &conn->peer_rmbe_space);
+ /* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
+ smp_mb__after_atomic();
+ }
+
+ diff_prod = smc_curs_diff(conn->rmbe_size, &prod_old,
+ &conn->local_rx_ctrl.prod);
+ if (diff_prod) {
+ /* bytes_to_rcv is decreased in smc_recvmsg */
+ smp_mb__before_atomic();
+ atomic_add(diff_prod, &conn->bytes_to_rcv);
+ /* guarantee 0 <= bytes_to_rcv <= rmbe_size */
+ smp_mb__after_atomic();
+ }
+
+ if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ smc->sk.sk_err = ECONNRESET;
+ if (smc_cdc_rxed_any_close_or_senddone(conn)) {
+ smc->sk.sk_shutdown |= RCV_SHUTDOWN;
+ sock_set_flag(&smc->sk, SOCK_DONE);
+
+ /* subsequent patch: terminate connection */
+ }
+
+ /* piggy backed tx info */
+ /* subsequent patch: wake receivers if receive buffer space available */
+
+ /* subsequent patch: trigger socket release if connection closed */
+
+ /* socket connected but not accepted */
+ if (!smc->sk.sk_socket)
+ return;
+
+ /* data available */
+ /* subsequent patch: send delayed ack, wake receivers */
+}
+
+/* called under tasklet context */
+static inline void smc_cdc_msg_recv(struct smc_cdc_msg *cdc,
+ struct smc_link *link, u64 wr_id)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ struct smc_connection *connection;
+ struct smc_sock *smc;
+
+ /* lookup connection */
+ read_lock_bh(&lgr->conns_lock);
+ connection = smc_lgr_find_conn(ntohl(cdc->token), lgr);
+ if (!connection) {
+ read_unlock_bh(&lgr->conns_lock);
+ return;
+ }
+ smc = container_of(connection, struct smc_sock, conn);
+ if (smc->sk.sk_state == SMC_CLOSED) {
+ read_unlock_bh(&lgr->conns_lock);
+ return;
+ }
+ sock_hold(&smc->sk);
+ read_unlock_bh(&lgr->conns_lock);
+ bh_lock_sock(&smc->sk);
+ smc_cdc_msg_recv_action(smc, link, cdc);
+ bh_unlock_sock(&smc->sk);
+ sock_put(&smc->sk); /* no free sk in softirq-context */
+}
+
+/***************************** init, exit, misc ******************************/
+
+static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
+{
+ struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+ struct smc_cdc_msg *cdc = buf;
+
+ if (wc->byte_len < sizeof(*cdc))
+ return; /* short message */
+ if (cdc->len != sizeof(*cdc))
+ return; /* invalid message */
+ smc_cdc_msg_recv(cdc, link, wc->wr_id);
+}
+
+static struct smc_wr_rx_handler smc_cdc_rx_handlers[] = {
+ {
+ .handler = smc_cdc_rx_handler,
+ .type = SMC_CDC_MSG_TYPE
+ },
+ {
+ .handler = NULL,
+ }
+};
+
+int __init smc_cdc_init(void)
+{
+ struct smc_wr_rx_handler *handler;
+ int rc = 0;
+
+ for (handler = smc_cdc_rx_handlers; handler->handler; handler++) {
+ INIT_HLIST_NODE(&handler->list);
+ rc = smc_wr_rx_register_handler(handler);
+ if (rc)
+ break;
+ }
+ return rc;
+}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
new file mode 100644
index 0000000..8028489
--- /dev/null
+++ b/net/smc/smc_cdc.h
@@ -0,0 +1,217 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Connection Data Control (CDC)
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#ifndef SMC_CDC_H
+#define SMC_CDC_H
+
+#include <linux/kernel.h> /* max_t */
+#include <linux/compiler.h> /* __packed */
+#include <linux/atomic.h> /* xchg */
+#include <linux/in.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_wr.h"
+
+#define SMC_CDC_MSG_TYPE 0xFE
+
+/* in network byte order */
+union smc_cdc_cursor { /* SMC cursor */
+ struct {
+ __be16 reserved;
+ __be16 wrap;
+ __be32 count;
+ };
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __packed __aligned(8);
+
+/* in network byte order */
+struct smc_cdc_msg {
+ struct smc_wr_rx_hdr common; /* .type = 0xFE */
+ u8 len; /* 44 */
+ __be16 seqno;
+ __be32 token;
+ union smc_cdc_cursor prod;
+ union smc_cdc_cursor cons; /* piggy backed "ack" */
+ struct smc_cdc_producer_flags prod_flags;
+ struct smc_cdc_conn_state_flags conn_state_flags;
+ u8 reserved[18];
+} __packed;
+
+static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
+{
+ return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_closed;
+}
+
+static inline bool smc_cdc_rxed_any_close_or_senddone(
+ struct smc_connection *conn)
+{
+ return smc_cdc_rxed_any_close(conn) ||
+ conn->local_rx_ctrl.conn_state_flags.peer_done_writing;
+}
+
+static inline void smc_curs_add(int size, union smc_host_cursor *curs,
+ int value)
+{
+ curs->count += value;
+ if (curs->count >= size) {
+ curs->wrap++;
+ curs->count -= size;
+ }
+}
+
+/* SMC cursors are 8 bytes long and require atomic reading and writing */
+static inline u64 smc_curs_read(union smc_host_cursor *curs,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+ u64 ret;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ ret = curs->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+ return ret;
+#else
+ return atomic64_read(&curs->acurs);
+#endif
+}
+
+static inline u64 smc_curs_read_net(union smc_cdc_cursor *curs,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+ u64 ret;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ ret = curs->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+ return ret;
+#else
+ return atomic64_read(&curs->acurs);
+#endif
+}
+
+static inline void smc_curs_write(union smc_host_cursor *curs, u64 val,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ curs->acurs = val;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&curs->acurs, val);
+#endif
+}
+
+static inline void smc_curs_write_net(union smc_cdc_cursor *curs, u64 val,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ curs->acurs = val;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&curs->acurs, val);
+#endif
+}
+
+/* calculate cursor difference between old and new, where old <= new */
+static inline int smc_curs_diff(unsigned int size,
+ union smc_host_cursor *old,
+ union smc_host_cursor *new)
+{
+ if (old->wrap != new->wrap)
+ return max_t(int, 0,
+ ((size - old->count) + new->count));
+
+ return max_t(int, 0, (new->count - old->count));
+}
+
+static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
+ union smc_host_cursor *local,
+ struct smc_connection *conn)
+{
+ union smc_host_cursor temp;
+
+ smc_curs_write(&temp, smc_curs_read(local, conn), conn);
+ peer->count = htonl(temp.count);
+ peer->wrap = htons(temp.wrap);
+ /* peer->reserved = htons(0); must be ensured by caller */
+}
+
+static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer,
+ struct smc_host_cdc_msg *local,
+ struct smc_connection *conn)
+{
+ peer->common.type = local->common.type;
+ peer->len = local->len;
+ peer->seqno = htons(local->seqno);
+ peer->token = htonl(local->token);
+ smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn);
+ smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn);
+ peer->prod_flags = local->prod_flags;
+ peer->conn_state_flags = local->conn_state_flags;
+}
+
+static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
+ union smc_cdc_cursor *peer,
+ struct smc_connection *conn)
+{
+ union smc_host_cursor temp, old;
+ union smc_cdc_cursor net;
+
+ smc_curs_write(&old, smc_curs_read(local, conn), conn);
+ smc_curs_write_net(&net, smc_curs_read_net(peer, conn), conn);
+ temp.count = ntohl(net.count);
+ temp.wrap = ntohs(net.wrap);
+ if ((old.wrap > temp.wrap) && temp.wrap)
+ return;
+ if ((old.wrap == temp.wrap) &&
+ (old.count > temp.count))
+ return;
+ smc_curs_write(local, smc_curs_read(&temp, conn), conn);
+}
+
+static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+ struct smc_cdc_msg *peer,
+ struct smc_connection *conn)
+{
+ local->common.type = peer->common.type;
+ local->len = peer->len;
+ local->seqno = ntohs(peer->seqno);
+ local->token = ntohl(peer->token);
+ smc_cdc_cursor_to_host(&local->prod, &peer->prod, conn);
+ smc_cdc_cursor_to_host(&local->cons, &peer->cons, conn);
+ local->prod_flags = peer->prod_flags;
+ local->conn_state_flags = peer->conn_state_flags;
+}
+
+struct smc_cdc_tx_pend;
+
+int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf,
+ struct smc_cdc_tx_pend **pend);
+void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
+int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
+ struct smc_cdc_tx_pend *pend);
+int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+int smc_cdc_init(void) __init;
+
+#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4404037..bbb29a3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -22,6 +22,7 @@
#include "smc_ib.h"
#include "smc_wr.h"
#include "smc_llc.h"
+#include "smc_cdc.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY (600 * HZ)
@@ -226,6 +227,7 @@ void smc_conn_free(struct smc_connection *conn)
if (!lgr)
return;
+ smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
smc_rmb_unuse(conn);
smc_sndbuf_unuse(conn);
@@ -433,6 +435,11 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr,
smc_lgr_register_conn(conn); /* add smc conn to lgr */
rc = smc_link_determine_gid(conn->lgr);
}
+ conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
+ conn->local_tx_ctrl.len = sizeof(struct smc_cdc_msg);
+#ifndef KERNEL_HAS_ATOMIC64
+ spin_lock_init(&conn->acurs_lock);
+#endif
out:
return rc ? rc : local_contact;
@@ -533,6 +540,7 @@ int smc_sndbuf_create(struct smc_sock *smc)
conn->sndbuf_desc = sndbuf_desc;
conn->sndbuf_size = tmp_bufsize;
smc->sk.sk_sndbuf = tmp_bufsize * 2;
+ atomic_set(&conn->sndbuf_space, tmp_bufsize);
return 0;
} else {
return -ENOMEM;
@@ -609,6 +617,7 @@ int smc_rmb_create(struct smc_sock *smc)
conn->rmbe_size = tmp_bufsize;
conn->rmbe_size_short = tmp_bufsize_short;
smc->sk.sk_rcvbuf = tmp_bufsize * 2;
+ atomic_set(&conn->bytes_to_rcv, 0);
return 0;
} else {
return -ENOMEM;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 5cbc1e5..14f3f3f 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -237,6 +237,25 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
return rc;
}
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+ smc_wr_tx_filter filter,
+ smc_wr_tx_dismisser dismisser,
+ unsigned long data)
+{
+ struct smc_wr_tx_pend_priv *tx_pend;
+ struct smc_wr_rx_hdr *wr_rx;
+ int i;
+
+ for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
+ wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
+ if (wr_rx->type != wr_rx_hdr_type)
+ continue;
+ tx_pend = &link->wr_tx_pends[i].priv;
+ if (filter(tx_pend, data))
+ dismisser(tx_pend);
+ }
+}
+
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 0b62672..124f857 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -36,6 +36,11 @@ typedef void (*smc_wr_tx_handler)(struct smc_wr_tx_pend_priv *,
struct smc_link *,
enum ib_wc_status);
+typedef bool (*smc_wr_tx_filter)(struct smc_wr_tx_pend_priv *,
+ unsigned long);
+
+typedef void (*smc_wr_tx_dismisser)(struct smc_wr_tx_pend_priv *);
+
struct smc_wr_rx_handler {
struct hlist_node list; /* hash table collision resolution */
void (*handler)(struct ib_wc *, void *);
@@ -87,6 +92,10 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
+void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
+ smc_wr_tx_filter filter,
+ smc_wr_tx_dismisser dismisser,
+ unsigned long data);
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
int smc_wr_rx_post_init(struct smc_link *link);
--
2.8.4
^ permalink raw reply related
* [PATCH V3 net-next 15/15] smc: netlink interface for SMC sockets
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
Support for SMC socket monitoring via netlink sockets of protocol
NETLINK_SOCK_DIAG.
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
include/net/smc.h | 20 ++++
include/net/sock.h | 3 +
include/uapi/linux/netlink.h | 1 +
include/uapi/linux/smc_diag.h | 85 +++++++++++++++++
net/smc/Kconfig | 9 ++
net/smc/Makefile | 1 +
net/smc/af_smc.c | 43 ++++++++-
net/smc/smc.h | 2 +
net/smc/smc_close.c | 1 +
net/smc/smc_diag.c | 215 ++++++++++++++++++++++++++++++++++++++++++
10 files changed, 379 insertions(+), 1 deletion(-)
create mode 100644 include/net/smc.h
create mode 100644 include/uapi/linux/smc_diag.h
create mode 100644 net/smc/smc_diag.c
diff --git a/include/net/smc.h b/include/net/smc.h
new file mode 100644
index 0000000..12d2635
--- /dev/null
+++ b/include/net/smc.h
@@ -0,0 +1,20 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Definitions for the SMC module (socket related)
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+#ifndef _SMC_H
+#define _SMC_H
+
+struct smc_hashinfo {
+ rwlock_t lock;
+ struct hlist_head ht;
+};
+
+int smc_hash_sk(struct sock *sk);
+void smc_unhash_sk(struct sock *sk);
+#endif /* _SMC_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index cb4359b..e6bddcf 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -70,6 +70,7 @@
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
+#include <net/smc.h>
/*
* This structure really needs to be cleaned up.
@@ -955,6 +956,7 @@ struct request_sock_ops;
struct timewait_sock_ops;
struct inet_hashinfo;
struct raw_hashinfo;
+struct smc_hashinfo;
struct module;
/*
@@ -1063,6 +1065,7 @@ struct proto {
struct inet_hashinfo *hashinfo;
struct udp_table *udp_table;
struct raw_hashinfo *raw_hash;
+ struct smc_hashinfo *smc_hash;
} h;
struct module *owner;
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 0dba4e4..f3946a2 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -27,6 +27,7 @@
#define NETLINK_ECRYPTFS 19
#define NETLINK_RDMA 20
#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
new file mode 100644
index 0000000..0063919
--- /dev/null
+++ b/include/uapi/linux/smc_diag.h
@@ -0,0 +1,85 @@
+#ifndef _UAPI_SMC_DIAG_H_
+#define _UAPI_SMC_DIAG_H_
+
+#include <linux/types.h>
+#include <linux/inet_diag.h>
+#include <rdma/ib_verbs.h>
+
+/* Request structure */
+struct smc_diag_req {
+ __u8 diag_family;
+ __u8 pad[2];
+ __u8 diag_ext; /* Query extended information */
+ struct inet_diag_sockid id;
+};
+
+/* Base info structure. It contains socket identity (addrs/ports/cookie) based
+ * on the internal clcsock, and more SMC-related socket data
+ */
+struct smc_diag_msg {
+ __u8 diag_family;
+ __u8 diag_state;
+ __u8 diag_fallback;
+ __u8 diag_shutdown;
+ struct inet_diag_sockid id;
+
+ __u32 diag_uid;
+ __u64 diag_inode;
+};
+
+/* Extensions */
+
+enum {
+ SMC_DIAG_NONE,
+ SMC_DIAG_CONNINFO,
+ SMC_DIAG_LGRINFO,
+ SMC_DIAG_SHUTDOWN,
+ __SMC_DIAG_MAX,
+};
+
+#define SMC_DIAG_MAX (__SMC_DIAG_MAX - 1)
+
+/* SMC_DIAG_CONNINFO */
+
+struct smc_diag_cursor {
+ __u16 reserved;
+ __u16 wrap;
+ __u32 count;
+};
+
+struct smc_diag_conninfo {
+ __u32 token; /* unique connection id */
+ __u32 sndbuf_size; /* size of send buffer */
+ __u32 rmbe_size; /* size of RMB element */
+ __u32 peer_rmbe_size; /* size of peer RMB element */
+ /* local RMB element cursors */
+ struct smc_diag_cursor rx_prod; /* received producer cursor */
+ struct smc_diag_cursor rx_cons; /* received consumer cursor */
+ /* peer RMB element cursors */
+ struct smc_diag_cursor tx_prod; /* sent producer cursor */
+ struct smc_diag_cursor tx_cons; /* sent consumer cursor */
+ __u8 rx_prod_flags; /* received producer flags */
+ __u8 rx_conn_state_flags; /* recvd connection flags*/
+ __u8 tx_prod_flags; /* sent producer flags */
+ __u8 tx_conn_state_flags; /* sent connection flags*/
+ /* send buffer cursors */
+ struct smc_diag_cursor tx_prep; /* prepared to be sent cursor */
+ struct smc_diag_cursor tx_sent; /* sent cursor */
+ struct smc_diag_cursor tx_fin; /* confirmed sent cursor */
+};
+
+/* SMC_DIAG_LINKINFO */
+
+struct smc_diag_linkinfo {
+ __u8 link_id; /* link identifier */
+ __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */
+ __u8 ibport; /* RDMA device port number */
+ __u8 gid[40]; /* local GID */
+ __u8 peer_gid[40]; /* peer GID */
+};
+
+struct smc_diag_lgrinfo {
+ struct smc_diag_linkinfo lnk[1];
+ __u8 role;
+};
+#endif /* _UAPI_SMC_DIAG_H_ */
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index bc02980..c717ef0 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -9,3 +9,12 @@ config SMC
a separate socket family SMC.
Select this option if you want to run SMC socket applications
+
+config SMC_DIAG
+ tristate "SMC: socket monitoring interface"
+ depends on SMC
+ ---help---
+ Support for SMC socket monitoring interface used by tools such as
+ smcss.
+
+ if unsure, say Y.
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 5cf0caf..1881046 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_SMC) += smc.o
+obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 23f7d65..53cdef6 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -29,6 +29,7 @@
#include <linux/in.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/smc.h>
#include "smc.h"
#include "smc_clc.h"
@@ -59,13 +60,48 @@ static void smc_set_keepalive(struct sock *sk, int val)
smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
}
-static struct proto smc_proto = {
+static struct smc_hashinfo smc_v4_hashinfo = {
+ .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
+};
+
+int smc_hash_sk(struct sock *sk)
+{
+ struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
+ struct hlist_head *head;
+
+ head = &h->ht;
+
+ write_lock_bh(&h->lock);
+ sk_add_node(sk, head);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ write_unlock_bh(&h->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(smc_hash_sk);
+
+void smc_unhash_sk(struct sock *sk)
+{
+ struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
+
+ write_lock_bh(&h->lock);
+ if (sk_del_node_init(sk))
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ write_unlock_bh(&h->lock);
+}
+EXPORT_SYMBOL_GPL(smc_unhash_sk);
+
+struct proto smc_proto = {
.name = "SMC",
.owner = THIS_MODULE,
.keepalive = smc_set_keepalive,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
.obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v4_hashinfo,
.slab_flags = SLAB_DESTROY_BY_RCU,
};
+EXPORT_SYMBOL_GPL(smc_proto);
static int smc_release(struct socket *sock)
{
@@ -109,6 +145,7 @@ static int smc_release(struct socket *sock)
schedule_delayed_work(&smc->sock_put_work,
SMC_CLOSE_SOCK_PUT_DELAY);
}
+ sk->sk_prot->unhash(sk);
release_sock(sk);
sock_put(sk);
@@ -139,6 +176,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = SMCPROTO_SMC;
+ sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
smc = smc_sk(sk);
@@ -546,6 +584,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
lsmc->sk.sk_err = -rc;
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
+ sk->sk_prot->unhash(new_sk);
sock_put(new_sk);
*new_smc = NULL;
goto out;
@@ -555,6 +594,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
+ sk->sk_prot->unhash(new_sk);
sock_put(new_sk);
*new_smc = NULL;
goto out;
@@ -1330,6 +1370,7 @@ static int __init smc_init(void)
pr_err("%s: sock_register fails with %d\n", __func__, rc);
goto out_proto;
}
+ INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
rc = smc_ib_register_client();
if (rc) {
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 963b60f..38c3644 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -18,6 +18,8 @@
#define SMCPROTO_SMC 0 /* SMC protocol */
+extern struct proto smc_proto;
+
#ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64
#endif
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index d70c05b..03dfcc6 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -384,6 +384,7 @@ void smc_close_sock_put_work(struct work_struct *work)
struct smc_sock,
sock_put_work);
+ smc->sk.sk_prot->unhash(&smc->sk);
sock_put(&smc->sk);
}
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
new file mode 100644
index 0000000..d2d01cf
--- /dev/null
+++ b/net/smc/smc_diag.c
@@ -0,0 +1,215 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Monitoring SMC transport protocol sockets
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sock_diag.h>
+#include <linux/inet_diag.h>
+#include <linux/smc_diag.h>
+#include <net/netlink.h>
+#include <net/smc.h>
+
+#include "smc.h"
+#include "smc_core.h"
+
+static void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
+{
+ sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x",
+ be16_to_cpu(((__be16 *)gid_raw)[0]),
+ be16_to_cpu(((__be16 *)gid_raw)[1]),
+ be16_to_cpu(((__be16 *)gid_raw)[2]),
+ be16_to_cpu(((__be16 *)gid_raw)[3]),
+ be16_to_cpu(((__be16 *)gid_raw)[4]),
+ be16_to_cpu(((__be16 *)gid_raw)[5]),
+ be16_to_cpu(((__be16 *)gid_raw)[6]),
+ be16_to_cpu(((__be16 *)gid_raw)[7]));
+}
+
+static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk)
+{
+ struct smc_sock *smc = smc_sk(sk);
+
+ r->diag_family = sk->sk_family;
+ if (!smc->clcsock)
+ return;
+ r->id.idiag_sport = htons(smc->clcsock->sk->sk_num);
+ r->id.idiag_dport = smc->clcsock->sk->sk_dport;
+ r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if;
+ sock_diag_save_cookie(sk, r->id.idiag_cookie);
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+ r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr;
+ r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr;
+}
+
+static int smc_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+ struct smc_diag_msg *r,
+ struct user_namespace *user_ns)
+{
+ if (nla_put_u8(skb, SMC_DIAG_SHUTDOWN, sk->sk_shutdown))
+ return 1;
+
+ r->diag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
+ r->diag_inode = sock_i_ino(sk);
+ return 0;
+}
+
+static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ const struct smc_diag_req *req,
+ struct nlattr *bc)
+{
+ struct smc_sock *smc = smc_sk(sk);
+ struct user_namespace *user_ns;
+ struct smc_diag_msg *r;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ cb->nlh->nlmsg_type, sizeof(*r), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ smc_diag_msg_common_fill(r, sk);
+ r->diag_state = sk->sk_state;
+ r->diag_fallback = smc->use_fallback;
+ user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
+ if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
+ goto errout;
+
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ struct smc_connection *conn = &smc->conn;
+ struct smc_diag_conninfo cinfo = {
+ .token = conn->alert_token_local,
+ .sndbuf_size = conn->sndbuf_size,
+ .rmbe_size = conn->rmbe_size,
+ .peer_rmbe_size = conn->peer_rmbe_size,
+
+ .rx_prod.wrap = conn->local_rx_ctrl.prod.wrap,
+ .rx_prod.count = conn->local_rx_ctrl.prod.count,
+ .rx_cons.wrap = conn->local_rx_ctrl.cons.wrap,
+ .rx_cons.count = conn->local_rx_ctrl.cons.count,
+
+ .tx_prod.wrap = conn->local_tx_ctrl.prod.wrap,
+ .tx_prod.count = conn->local_tx_ctrl.prod.count,
+ .tx_cons.wrap = conn->local_tx_ctrl.cons.wrap,
+ .tx_cons.count = conn->local_tx_ctrl.cons.count,
+
+ .tx_prod_flags =
+ *(u8 *)&conn->local_tx_ctrl.prod_flags,
+ .tx_conn_state_flags =
+ *(u8 *)&conn->local_tx_ctrl.conn_state_flags,
+ .rx_prod_flags = *(u8 *)&conn->local_rx_ctrl.prod_flags,
+ .rx_conn_state_flags =
+ *(u8 *)&conn->local_rx_ctrl.conn_state_flags,
+
+ .tx_prep.wrap = conn->tx_curs_prep.wrap,
+ .tx_prep.count = conn->tx_curs_prep.count,
+ .tx_sent.wrap = conn->tx_curs_sent.wrap,
+ .tx_sent.count = conn->tx_curs_sent.count,
+ .tx_fin.wrap = conn->tx_curs_fin.wrap,
+ .tx_fin.count = conn->tx_curs_fin.count,
+ };
+
+ if (nla_put(skb, SMC_DIAG_CONNINFO, sizeof(cinfo), &cinfo) < 0)
+ goto errout;
+ }
+
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ struct smc_diag_lgrinfo linfo = {
+ .role = smc->conn.lgr->role,
+ .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
+ .lnk[0].link_id = smc->conn.lgr->lnk[0].link_id,
+ };
+
+ memcpy(linfo.lnk[0].ibname,
+ smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
+ sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
+ smc_gid_be16_convert(linfo.lnk[0].gid,
+ smc->conn.lgr->lnk[0].gid.raw);
+ smc_gid_be16_convert(linfo.lnk[0].peer_gid,
+ smc->conn.lgr->lnk[0].peer_gid);
+
+ if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
+ goto errout;
+ }
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+errout:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int smc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *bc = NULL;
+ struct hlist_head *head;
+ struct sock *sk;
+ int rc = 0;
+
+ read_lock(&smc_proto.h.smc_hash->lock);
+ head = &smc_proto.h.smc_hash->ht;
+ if (hlist_empty(head))
+ goto out;
+
+ sk_for_each(sk, head) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ rc = __smc_diag_dump(sk, skb, cb, nlmsg_data(cb->nlh), bc);
+ if (rc)
+ break;
+ }
+
+out:
+ read_unlock(&smc_proto.h.smc_hash->lock);
+ return rc;
+}
+
+static int smc_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ struct net *net = sock_net(skb->sk);
+
+ if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+ h->nlmsg_flags & NLM_F_DUMP) {
+ {
+ struct netlink_dump_control c = {
+ .dump = smc_diag_dump,
+ .min_dump_alloc = SKB_WITH_OVERHEAD(32768),
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+ }
+ return 0;
+}
+
+static const struct sock_diag_handler smc_diag_handler = {
+ .family = AF_SMC,
+ .dump = smc_diag_handler_dump,
+};
+
+static int __init smc_diag_init(void)
+{
+ return sock_diag_register(&smc_diag_handler);
+}
+
+static void __exit smc_diag_exit(void)
+{
+ sock_diag_unregister(&smc_diag_handler);
+}
+
+module_init(smc_diag_init);
+module_exit(smc_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */);
--
2.8.4
^ permalink raw reply related
* [PATCH V3 net-next 10/15] smc: link layer control (LLC)
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
send and receive LLC messages CONFIRM_LINK (via IB message send and CQE)
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
net/smc/Makefile | 2 +-
net/smc/af_smc.c | 94 ++++++++++++++++++++++++++++++-
net/smc/smc_clc.h | 2 +
net/smc/smc_core.c | 8 +++
net/smc/smc_core.h | 4 ++
net/smc/smc_llc.c | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_llc.h | 63 +++++++++++++++++++++
7 files changed, 328 insertions(+), 3 deletions(-)
create mode 100644 net/smc/smc_llc.c
create mode 100644 net/smc/smc_llc.h
diff --git a/net/smc/Makefile b/net/smc/Makefile
index b19120e..73320bf 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,2 +1,2 @@
obj-$(CONFIG_SMC) += smc.o
-smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o
+smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fc7ef1e..9b26772 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -31,6 +31,7 @@
#include "smc.h"
#include "smc_clc.h"
+#include "smc_llc.h"
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_pnet.h"
@@ -251,6 +252,41 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock,
return rc;
}
+static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid)
+{
+ struct smc_link_group *lgr = smc->conn.lgr;
+ struct smc_link *link;
+ int rest;
+ int rc;
+
+ link = &lgr->lnk[SMC_SINGLE_LINK];
+ /* receive CONFIRM LINK request from server over RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(
+ &link->llc_confirm,
+ SMC_LLC_WAIT_FIRST_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ return rc;
+ }
+
+ rc = smc_ib_modify_qp_rts(link);
+ if (rc)
+ return SMC_CLC_DECL_INTERR;
+
+ smc_wr_remember_qp_attr(link);
+ /* send CONFIRM LINK response over RoCE fabric */
+ rc = smc_llc_send_confirm_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ gid, SMC_LLC_RESP);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ return rc;
+}
+
static void smc_conn_save_peer_info(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
@@ -368,7 +404,17 @@ static int smc_connect_rdma(struct smc_sock *smc)
if (rc)
goto out_err_unlock;
- /* tbd in follow-on patch: llc_confirm */
+ if (local_contact == SMC_FIRST_CONTACT) {
+ /* QP confirmation over RoCE fabric */
+ reason_code = smc_clnt_conf_first_link(
+ smc, &smcibdev->gid[ibport - 1]);
+ if (reason_code < 0) {
+ rc = reason_code;
+ goto out_err_unlock;
+ }
+ if (reason_code > 0)
+ goto decline_rdma_unlock;
+ }
mutex_unlock(&smc_create_lgr_pending);
out_connected:
@@ -553,6 +599,36 @@ static void smc_close_non_accepted(struct sock *sk)
sock_put(sk);
}
+static int smc_serv_conf_first_link(struct smc_sock *smc)
+{
+ struct smc_link_group *lgr = smc->conn.lgr;
+ struct smc_link *link;
+ int rest;
+ int rc;
+
+ link = &lgr->lnk[SMC_SINGLE_LINK];
+ /* send CONFIRM LINK request to client over the RoCE fabric */
+ rc = smc_llc_send_confirm_link(link,
+ link->smcibdev->mac[link->ibport - 1],
+ &link->smcibdev->gid[link->ibport - 1],
+ SMC_LLC_REQ);
+ if (rc < 0)
+ return SMC_CLC_DECL_TCL;
+
+ /* receive CONFIRM LINK response from client over the RoCE fabric */
+ rest = wait_for_completion_interruptible_timeout(
+ &link->llc_confirm_resp,
+ SMC_LLC_WAIT_FIRST_TIME);
+ if (rest <= 0) {
+ struct smc_clc_msg_decline dclc;
+
+ rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
+ SMC_CLC_DECLINE);
+ }
+
+ return rc;
+}
+
/* setup for RDMA connection of server */
static void smc_listen_work(struct work_struct *work)
{
@@ -665,13 +741,21 @@ static void smc_listen_work(struct work_struct *work)
goto decline_rdma;
}
- /* tbd in follow-on patch: modify_qp, llc_confirm */
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma;
}
+ /* QP confirmation over RoCE fabric */
+ reason_code = smc_serv_conf_first_link(new_smc);
+ if (reason_code < 0) {
+ /* peer is not aware of a problem */
+ rc = reason_code;
+ goto out_err;
+ }
+ if (reason_code > 0)
+ goto decline_rdma;
}
out_connected:
@@ -1121,6 +1205,12 @@ static int __init smc_init(void)
if (rc)
return rc;
+ rc = smc_llc_init();
+ if (rc) {
+ pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
+ goto out_pnet;
+ }
+
rc = proto_register(&smc_proto, 1);
if (rc) {
pr_err("%s: proto_register fails with %d\n", __func__, rc);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index eca7cce..bab98bc 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -33,6 +33,8 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_REPLY 0x06000000 /* reply to a received decline */
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
+#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */
+#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2b2e739..4404037 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -21,9 +21,13 @@
#include "smc_core.h"
#include "smc_ib.h"
#include "smc_wr.h"
+#include "smc_llc.h"
+#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY (600 * HZ)
+static u32 smc_lgr_num; /* unique link group number */
+
/* Register connection's alert token in our lookup structure.
* To use rbtrees we have to implement our own insert core.
* Requires @conns_lock
@@ -152,6 +156,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
+ smc_lgr_num += SMC_LGR_NUM_INCR;
+ lgr->id = smc_lgr_num;
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
lgr->conns_all = RB_ROOT;
@@ -175,6 +181,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr,
rc = smc_wr_create_link(lnk);
if (rc)
goto destroy_qp;
+ init_completion(&lnk->llc_confirm);
+ init_completion(&lnk->llc_confirm_resp);
smc->conn.lgr = lgr;
rwlock_init(&lgr->conns_lock);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 97805b2..35aaae3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -73,6 +73,9 @@ struct smc_link {
u32 peer_psn; /* QP rx initial packet seqno */
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/
+ u8 link_id; /* unique # within link group */
+ struct completion llc_confirm; /* wait for rx of conf link */
+ struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
};
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -125,6 +128,7 @@ struct smc_link_group {
SMC_RMBS_PER_LGR_MAX)];
/* used rtoken elements */
+ u32 id; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
bool sync_err; /* lgr no longer fits to peer */
};
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
new file mode 100644
index 0000000..7ee2835
--- /dev/null
+++ b/net/smc/smc_llc.c
@@ -0,0 +1,158 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Link Layer Control (LLC)
+ *
+ * For now, we only support the necessary "confirm link" functionality
+ * which happens for the first RoCE link after successful CLC handshake.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
+ * Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <net/tcp.h>
+#include <rdma/ib_verbs.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_clc.h"
+#include "smc_llc.h"
+
+/********************************** send *************************************/
+
+struct smc_llc_tx_pend {
+};
+
+/* handler for send/transmission completion of an LLC msg */
+static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
+ struct smc_link *link,
+ enum ib_wc_status wc_status)
+{
+ /* future work: handle wc_status error for recovery and failover */
+}
+
+/**
+ * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
+ * @link: Pointer to SMC link used for sending LLC control message.
+ * @wr_buf: Out variable returning pointer to work request payload buffer.
+ * @pend: Out variable returning pointer to private pending WR tracking.
+ * It's the context the transmit complete handler will get.
+ *
+ * Reserves and pre-fills an entry for a pending work request send/tx.
+ * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
+ * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
+ *
+ * Return: 0 on success, otherwise an error value.
+ */
+static int smc_llc_add_pending_send(struct smc_link *link,
+ struct smc_wr_buf **wr_buf,
+ struct smc_wr_tx_pend_priv **pend)
+{
+ int rc;
+
+ rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
+ if (rc < 0)
+ return rc;
+ BUILD_BUG_ON_MSG(
+ sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
+ "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
+ BUILD_BUG_ON_MSG(
+ sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
+ "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
+ BUILD_BUG_ON_MSG(
+ sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
+ "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
+ return 0;
+}
+
+/* high-level API to send LLC confirm link */
+int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
+ union ib_gid *gid,
+ enum smc_llc_reqresp reqresp)
+{
+ struct smc_link_group *lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ struct smc_llc_msg_confirm_link *confllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
+ memset(confllc, 0, sizeof(*confllc));
+ confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
+ confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
+ if (reqresp == SMC_LLC_RESP)
+ confllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(confllc->sender_mac, mac, ETH_ALEN);
+ memcpy(&confllc->sender_gid, gid, SMC_GID_SIZE);
+ hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
+ /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */
+ confllc->link_uid = htonl(lgr->id);
+ confllc->max_links = SMC_LINKS_PER_LGR_MAX;
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
+/********************************* receive ***********************************/
+
+static void smc_llc_rx_confirm_link(struct smc_link *link,
+ struct smc_llc_msg_confirm_link *llc)
+{
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+ if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
+ if (lgr->role == SMC_SERV)
+ complete(&link->llc_confirm_resp);
+ } else {
+ if (lgr->role == SMC_CLNT) {
+ link->link_id = llc->link_num;
+ complete(&link->llc_confirm);
+ }
+ }
+}
+
+static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+{
+ struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+ union smc_llc_msg *llc = buf;
+
+ if (wc->byte_len < sizeof(*llc))
+ return; /* short message */
+ if (llc->raw.hdr.length != sizeof(*llc))
+ return; /* invalid message */
+ if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK)
+ smc_llc_rx_confirm_link(link, &llc->confirm_link);
+}
+
+/***************************** init, exit, misc ******************************/
+
+static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
+ {
+ .handler = smc_llc_rx_handler,
+ .type = SMC_LLC_CONFIRM_LINK
+ },
+ {
+ .handler = NULL,
+ }
+};
+
+int __init smc_llc_init(void)
+{
+ struct smc_wr_rx_handler *handler;
+ int rc = 0;
+
+ for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
+ INIT_HLIST_NODE(&handler->list);
+ rc = smc_wr_rx_register_handler(handler);
+ if (rc)
+ break;
+ }
+ return rc;
+}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
new file mode 100644
index 0000000..68acee0
--- /dev/null
+++ b/net/smc/smc_llc.h
@@ -0,0 +1,63 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Definitions for LLC (link layer control) message handling
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com>
+ * Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#ifndef SMC_LLC_H
+#define SMC_LLC_H
+
+#include "smc_wr.h"
+
+#define SMC_LLC_FLAG_RESP 0x80
+
+#define SMC_LLC_WAIT_FIRST_TIME (5 * HZ)
+
+enum smc_llc_reqresp {
+ SMC_LLC_REQ,
+ SMC_LLC_RESP
+};
+
+enum smc_llc_msg_type {
+ SMC_LLC_CONFIRM_LINK = 0x01,
+};
+
+#define SMC_LLC_DATA_LEN 40
+
+struct smc_llc_hdr {
+ struct smc_wr_rx_hdr common;
+ u8 length; /* 44 */
+ u8 reserved;
+ u8 flags;
+} __packed;
+
+struct smc_llc_msg_confirm_link { /* type 0x01 */
+ struct smc_llc_hdr hd;
+ u8 sender_mac[ETH_ALEN];
+ union ib_gid sender_gid;
+ u8 sender_qp_num[3];
+ u8 link_num;
+ __be32 link_uid;
+ u8 max_links;
+ u8 reserved[9];
+} __packed;
+
+union smc_llc_msg {
+ struct smc_llc_msg_confirm_link confirm_link;
+ struct {
+ struct smc_llc_hdr hdr;
+ u8 data[SMC_LLC_DATA_LEN];
+ } __packed raw;
+} __packed;
+
+/* transmit */
+int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
+ enum smc_llc_reqresp reqresp);
+int smc_llc_init(void) __init;
+
+#endif /* SMC_LLC_H */
--
2.8.4
^ permalink raw reply related
* [PATCH V3 net-next 13/15] smc: receive data from RMBE
From: Ursula Braun @ 2016-11-24 15:06 UTC (permalink / raw)
To: davem; +Cc: netdev, linux-s390, schwidefsky, heiko.carstens, utz.bacher,
ubraun
In-Reply-To: <20161124150645.90881-1-ubraun@linux.vnet.ibm.com>
move RMBE data into user space buffer and update managing cursors
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
---
net/smc/Makefile | 2 +-
net/smc/af_smc.c | 7 +-
net/smc/smc.h | 4 +
net/smc/smc_cdc.c | 6 +-
net/smc/smc_core.c | 10 +++
net/smc/smc_rx.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++++
net/smc/smc_rx.h | 23 ++++++
net/smc/smc_tx.c | 37 +++++++++
net/smc/smc_tx.h | 1 +
9 files changed, 304 insertions(+), 3 deletions(-)
create mode 100644 net/smc/smc_rx.c
create mode 100644 net/smc/smc_rx.h
diff --git a/net/smc/Makefile b/net/smc/Makefile
index fc28d79..6255e29 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_SMC) += smc.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4738f5a..5959c84 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -38,6 +38,7 @@
#include "smc_ib.h"
#include "smc_pnet.h"
#include "smc_tx.h"
+#include "smc_rx.h"
static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
* creation
@@ -422,6 +423,7 @@ static int smc_connect_rdma(struct smc_sock *smc)
mutex_unlock(&smc_create_lgr_pending);
smc_tx_init(smc);
+ smc_rx_init(smc);
out_connected:
smc_copy_sock_settings_to_clc(smc);
@@ -765,6 +767,7 @@ static void smc_listen_work(struct work_struct *work)
}
smc_tx_init(new_smc);
+ smc_rx_init(new_smc);
out_connected:
sk_refcnt_debug_inc(newsmcsk);
@@ -960,7 +963,7 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (smc->use_fallback)
rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
else
- rc = sock_no_recvmsg(sock, msg, len, flags);
+ rc = smc_rx_recvmsg(smc, msg, len, flags);
out:
release_sock(sk);
return rc;
@@ -1026,6 +1029,8 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
}
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
/* for now - to be enhanced in follow-on patch */
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2956135..e5e47fa 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -112,6 +112,10 @@ struct smc_connection {
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
int rmbe_size; /* RMBE size <== sock rmem */
int rmbe_size_short;/* compressed notation */
+ int rmbe_update_limit;
+ /* lower limit for consumer
+ * cursor update
+ */
struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging
* buffer for CDC msg send
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 22af635..47d6d96 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -15,6 +15,7 @@
#include "smc_wr.h"
#include "smc_cdc.h"
#include "smc_tx.h"
+#include "smc_rx.h"
/********************************** send *************************************/
@@ -197,6 +198,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
atomic_add(diff_prod, &conn->bytes_to_rcv);
/* guarantee 0 <= bytes_to_rcv <= rmbe_size */
smp_mb__after_atomic();
+ smc->sk.sk_data_ready(&smc->sk);
}
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
@@ -220,7 +222,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
return;
/* data available */
- /* subsequent patch: send delayed ack, wake receivers */
+ if ((conn->local_rx_ctrl.prod_flags.write_blocked) ||
+ (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req))
+ smc_tx_consumer_update(conn);
}
/* called under tasklet context */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bbb29a3..8dbe42a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -487,6 +487,15 @@ struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr,
return NULL;
}
+/* one of the conditions for announcing a receiver's current window size is
+ * that it "results in a minimum increase in the window size of 10% of the
+ * receive buffer space" [RFC7609]
+ */
+static inline int smc_rmb_wnd_update_limit(int rmbe_size)
+{
+ return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+}
+
/* create the tx buffer for an SMC socket */
int smc_sndbuf_create(struct smc_sock *smc)
{
@@ -618,6 +627,7 @@ int smc_rmb_create(struct smc_sock *smc)
conn->rmbe_size_short = tmp_bufsize_short;
smc->sk.sk_rcvbuf = tmp_bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
+ conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize);
return 0;
} else {
return -ENOMEM;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
new file mode 100644
index 0000000..5d18787
--- /dev/null
+++ b/net/smc/smc_rx.c
@@ -0,0 +1,217 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Manage RMBE
+ * copy new RMBE data into user space
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#include <linux/net.h>
+#include <linux/rcupdate.h>
+#include <net/sock.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_cdc.h"
+#include "smc_tx.h" /* smc_tx_consumer_update() */
+#include "smc_rx.h"
+
+/* callback implementation for sk.sk_data_ready()
+ * to wakeup rcvbuf consumers that blocked with smc_rx_wait_data().
+ * indirectly called by smc_cdc_msg_recv_action().
+ */
+static void smc_rx_data_ready(struct sock *sk)
+{
+ struct socket_wq *wq;
+
+ /* derived from sock_def_readable() */
+ /* called already in smc_listen_work() */
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
+ POLLRDNORM | POLLRDBAND);
+ if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
+ (sk->sk_state == SMC_CLOSED))
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
+ else
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ rcu_read_unlock();
+}
+
+/* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted
+ * @smc smc socket
+ * @timeo pointer to max seconds to wait, pointer to value 0 for no timeout
+ * Returns:
+ * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
+ * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
+ */
+static int smc_rx_wait_data(struct smc_sock *smc, long *timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct smc_connection *conn = &smc->conn;
+ struct sock *sk = &smc->sk;
+ int rc;
+
+ if (atomic_read(&conn->bytes_to_rcv))
+ return 1;
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ add_wait_queue(sk_sleep(sk), &wait);
+ rc = sk_wait_event(sk, timeo,
+ sk->sk_err ||
+ sk->sk_shutdown & RCV_SHUTDOWN ||
+ sock_flag(sk, SOCK_DONE) ||
+ atomic_read(&conn->bytes_to_rcv) ||
+ smc_cdc_rxed_any_close_or_senddone(conn),
+ &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ return rc;
+}
+
+/* rcvbuf consumer: main API called by socket layer.
+ * called under sk lock.
+ */
+int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
+ int flags)
+{
+ size_t copylen, read_done = 0, read_remaining = len;
+ size_t chunk_len, chunk_off, chunk_len_sum;
+ struct smc_connection *conn = &smc->conn;
+ union smc_host_cursor cons;
+ int readable, chunk;
+ char *rcvbuf_base;
+ struct sock *sk;
+ long timeo;
+ int target; /* Read at least these many bytes */
+ int rc;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return -EINVAL; /* future work for sk.sk_family == AF_SMC */
+ if (flags & MSG_OOB)
+ return -EINVAL; /* future work */
+
+ sk = &smc->sk;
+ if (sk->sk_state == SMC_LISTEN)
+ return -ENOTCONN;
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+ msg->msg_namelen = 0;
+ /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
+ rcvbuf_base = conn->rmb_desc->cpu_addr;
+
+ do { /* while (read_remaining) */
+ if (read_done >= target)
+ break;
+
+ if (atomic_read(&conn->bytes_to_rcv))
+ goto copy;
+
+ if (read_done) {
+ if (sk->sk_err ||
+ sk->sk_state == SMC_CLOSED ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ !timeo ||
+ signal_pending(current) ||
+ smc_cdc_rxed_any_close_or_senddone(conn) ||
+ conn->local_tx_ctrl.conn_state_flags.
+ peer_conn_abort)
+ break;
+ } else {
+ if (sock_flag(sk, SOCK_DONE))
+ break;
+ if (sk->sk_err) {
+ read_done = sock_error(sk);
+ break;
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN ||
+ smc_cdc_rxed_any_close_or_senddone(conn) ||
+ conn->local_tx_ctrl.conn_state_flags.
+ peer_conn_abort)
+ break;
+ if (sk->sk_state == SMC_CLOSED) {
+ if (!sock_flag(sk, SOCK_DONE)) {
+ /* This occurs when user tries to read
+ * from never connected socket.
+ */
+ read_done = -ENOTCONN;
+ break;
+ }
+ break;
+ }
+ if (signal_pending(current)) {
+ read_done = sock_intr_errno(timeo);
+ break;
+ }
+ }
+
+ if (!atomic_read(&conn->bytes_to_rcv)) {
+ smc_rx_wait_data(smc, &timeo);
+ continue;
+ }
+
+copy:
+ /* initialize variables for 1st iteration of subsequent loop */
+ /* could be just 1 byte, even after smc_rx_wait_data above */
+ readable = atomic_read(&conn->bytes_to_rcv);
+ /* not more than what user space asked for */
+ copylen = min_t(size_t, read_remaining, readable);
+ smc_curs_write(&cons,
+ smc_curs_read(&conn->local_tx_ctrl.cons, conn),
+ conn);
+ /* determine chunks where to read from rcvbuf */
+ /* either unwrapped case, or 1st chunk of wrapped case */
+ chunk_len = min_t(size_t,
+ copylen, conn->rmbe_size - cons.count);
+ chunk_len_sum = chunk_len;
+ chunk_off = cons.count;
+ for (chunk = 0; chunk < 2; chunk++) {
+ if (!(flags & MSG_TRUNC)) {
+ rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off,
+ chunk_len);
+ if (rc) {
+ if (!read_done)
+ read_done = -EFAULT;
+ goto out;
+ }
+ }
+ read_remaining -= chunk_len;
+ read_done += chunk_len;
+
+ if (chunk_len_sum == copylen)
+ break; /* either on 1st or 2nd iteration */
+ /* prepare next (== 2nd) iteration */
+ chunk_len = copylen - chunk_len; /* remainder */
+ chunk_len_sum += chunk_len;
+ chunk_off = 0; /* modulo offset in recv ring buffer */
+ }
+
+ /* update cursors */
+ if (!(flags & MSG_PEEK)) {
+ smc_curs_add(conn->rmbe_size, &cons, copylen);
+ /* increased in recv tasklet smc_cdc_msg_rcv() */
+ smp_mb__before_atomic();
+ atomic_sub(copylen, &conn->bytes_to_rcv);
+ /* guarantee 0 <= bytes_to_rcv <= rmbe_size */
+ smp_mb__after_atomic();
+ smc_curs_write(&conn->local_tx_ctrl.cons,
+ smc_curs_read(&cons, conn),
+ conn);
+ /* send consumer cursor update if required */
+ /* similar to advertising new TCP rcv_wnd if required */
+ smc_tx_consumer_update(conn);
+ }
+ } while (read_remaining);
+out:
+ return read_done;
+}
+
+/* Initialize receive properties on connection establishment. NB: not __init! */
+void smc_rx_init(struct smc_sock *smc)
+{
+ smc->sk.sk_data_ready = smc_rx_data_ready;
+}
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
new file mode 100644
index 0000000..b5b80e1
--- /dev/null
+++ b/net/smc/smc_rx.h
@@ -0,0 +1,23 @@
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Manage RMBE
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
+ */
+
+#ifndef SMC_RX_H
+#define SMC_RX_H
+
+#include <linux/socket.h>
+#include <linux/types.h>
+
+#include "smc.h"
+
+void smc_rx_init(struct smc_sock *smc);
+int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len,
+ int flags);
+
+#endif /* SMC_RX_H */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index d86bef6..7e8799f 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -427,6 +427,43 @@ static void smc_tx_work(struct work_struct *work)
release_sock(&smc->sk);
}
+void smc_tx_consumer_update(struct smc_connection *conn)
+{
+ union smc_host_cursor cfed, cons;
+ struct smc_cdc_tx_pend *pend;
+ struct smc_wr_buf *wr_buf;
+ int to_confirm, rc;
+
+ smc_curs_write(&cons,
+ smc_curs_read(&conn->local_tx_ctrl.cons, conn),
+ conn);
+ smc_curs_write(&cfed,
+ smc_curs_read(&conn->rx_curs_confirmed, conn),
+ conn);
+ to_confirm = smc_curs_diff(conn->rmbe_size, &cfed, &cons);
+
+ if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
+ ((to_confirm > conn->rmbe_update_limit) &&
+ ((to_confirm > (conn->rmbe_size / 2)) ||
+ conn->local_rx_ctrl.prod_flags.write_blocked))) {
+ rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+ &wr_buf, &pend);
+ if (!rc)
+ rc = smc_cdc_msg_send(conn, wr_buf, pend);
+ if (rc < 0) {
+ schedule_work(&conn->tx_work);
+ return;
+ }
+ smc_curs_write(&conn->rx_curs_confirmed,
+ smc_curs_read(&conn->local_tx_ctrl.cons, conn),
+ conn);
+ conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+ }
+ if (conn->local_rx_ctrl.prod_flags.write_blocked &&
+ !atomic_read(&conn->bytes_to_rcv))
+ conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+}
+
/***************************** send initialize *******************************/
/* Initialize send properties on connection establishment. NB: not __init! */
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 931c666..1d6a0dc 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -30,5 +30,6 @@ void smc_tx_init(struct smc_sock *smc);
int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
+void smc_tx_consumer_update(struct smc_connection *conn);
#endif /* SMC_TX_H */
--
2.8.4
^ permalink raw reply related
* Re: [PATCH net-next 1/4] net: mvneta: Convert to be 64 bits compatible
From: Marcin Wojtas @ 2016-11-24 15:09 UTC (permalink / raw)
To: Gregory CLEMENT
Cc: Arnd Bergmann, linux-arm-kernel@lists.infradead.org,
Jisheng Zhang, Thomas Petazzoni, Andrew Lunn, Jason Cooper,
netdev, linux-kernel, David S. Miller, Sebastian Hesselbarth
In-Reply-To: <8760ncly5s.fsf@free-electrons.com>
Hi Gregory,
2016-11-24 16:01 GMT+01:00 Gregory CLEMENT <gregory.clement@free-electrons.com>:
> Hi Arnd,
>
> On jeu., nov. 24 2016, Arnd Bergmann <arnd@arndb.de> wrote:
>
>> On Thursday, November 24, 2016 4:37:36 PM CET Jisheng Zhang wrote:
>>> solB (a SW shadow cookie) perhaps gives a better performance: in hot path,
>>> such as mvneta_rx(), the driver accesses buf_cookie and buf_phys_addr of
>>> rx_desc which is allocated by dma_alloc_coherent, it's noncacheable if the
>>> device isn't cache-coherent. I didn't measure the performance difference,
>>> because in fact we take solA as well internally. From your experience,
>>> can the performance gain deserve the complex code?
>>
>> Yes, a read from uncached memory is fairly slow, so if you have a chance
>> to avoid that it will probably help. When adding complexity to the code,
>> it probably makes sense to take a runtime profile anyway quantify how
>> much it gains.
>>
>> On machines that have cache-coherent DMA, accessing the descriptor
>> should be fine, as you already have to load the entire cache line
>> to read the status field.
>>
>> Looking at this snippet:
>>
>> rx_status = rx_desc->status;
>> rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
>> data = (unsigned char *)rx_desc->buf_cookie;
>> phys_addr = rx_desc->buf_phys_addr;
>> pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
>> bm_pool = &pp->bm_priv->bm_pools[pool_id];
>>
>> if (!mvneta_rxq_desc_is_first_last(rx_status) ||
>> (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
>> err_drop_frame_ret_pool:
>> /* Return the buffer to the pool */
>> mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
>> rx_desc->buf_phys_addr);
>> err_drop_frame:
>>
>>
>> I think there is more room for optimizing if you start: you read
>> the status field twice (the second one in MVNETA_RX_GET_BM_POOL_ID)
>> and you can cache the buf_phys_addr along with the virtual address
>> once you add that.
>
> I agree we can optimize this code but it is not related to the 64 bits
> conversion. Indeed this part is running when we use the HW buffer
> management, however currently this part is not ready at all for 64
> bits. The virtual address is directly handled by the hardware but it has
> only 32 bits to store it in the cookie. So if we want to use the HWBM in
> 64 bits we need to redesign the code, (maybe by storing the virtual
> address in a array and pass the index in the cookie).
>
How about storing data (virt address and maybe other stuff) as a part
of data buffer and using rx_packet_offset? It has to be used for a3700
anyway. No need of additional rings whatsoever.
Best regards,
Marcin
^ permalink raw reply
* Re: wl1251 & mac address & calibration data
From: Sebastian Reichel @ 2016-11-24 15:13 UTC (permalink / raw)
To: Pali Rohár
Cc: Pavel Machek, Michal Kazior, Kalle Valo, Ivaylo Dimitrov,
Aaro Koskinen, Tony Lindgren, linux-wireless, Network Development,
linux-kernel
In-Reply-To: <20161124083329.GB13735@pali>
[-- Attachment #1: Type: text/plain, Size: 1504 bytes --]
Hi,
On Thu, Nov 24, 2016 at 09:33:29AM +0100, Pali Rohár wrote:
> On Thursday 24 November 2016 08:51:04 Pavel Machek wrote:
> > Hi!
> >
> > > > "ifconfig hw ether XX" normally sets the address. I guess that's
> > > > ioctl?
> > >
> > > This sets temporary address and it is ioctl. IIRC same as what ethtool
> > > uses. (ifconfig is already deprecated).
> > >
> > > > And I guess we should use similar mechanism for permanent
> > > > address.
> > >
> > > I'm not sure here... Above ioctl ↑↑↑ is for changing temporary mac
> > > address. But here we do not want to change permanent mac address. We
> > > want to tell kernel driver current permanent mac address which is
> > > stored
> >
> > Well... I'd still use similar mechanism :-).
>
> Thats problematic, because in time when wlan0 interface is registered
> into system and visible in ifconfig output it already needs to have
> permanent mac address assigned.
>
> We should assign permanent mac address before wlan0 of wl1251 is
> registered into system.
You can just add the MAC address to the NVS data, which is also
required for the device initialization.
I wonder if those information could be put into DT. Iirc some
network devices get their MAC address from DT. Maybe we can add
all NVS info to DT? How much data is it?
Userspace application can add all those information to the DT
using a DT overlay. Also the u-boot could parse and add it at
some point in the future.
-- Sebastian
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [Patch net-next] net_sched: move the empty tp check from ->destroy() to ->delete()
From: Daniel Borkmann @ 2016-11-24 15:20 UTC (permalink / raw)
To: Roi Dayan, Cong Wang, netdev; +Cc: jiri, John Fastabend
In-Reply-To: <5836C87E.8050506@mellanox.com>
On 11/24/2016 12:01 PM, Roi Dayan wrote:
> On 24/11/2016 12:14, Daniel Borkmann wrote:
>> On 11/24/2016 09:29 AM, Roi Dayan wrote:
>>> Hi,
>>>
>>> I'm testing this patch with KASAN enabled and got into a new kernel crash I didn't hit before.
>>>
>>> [ 1860.725065] ==================================================================
>>> [ 1860.733893] BUG: KASAN: use-after-free in __netif_receive_skb_core+0x1ebe/0x29a0 at addr ffff880a68b04028
>>> [ 1860.745415] Read of size 8 by task CPU 0/KVM/5334
>>> [ 1860.751368] CPU: 8 PID: 5334 Comm: CPU 0/KVM Tainted: G O 4.9.0-rc3+ #18
(Btw, your kernel is tainted with o-o-tree module? Anything relevant?)
>>> [ 1860.760547] Hardware name: HP ProLiant DL380p Gen8, BIOS P70 07/01/2015
>>> [ 1860.768036] Call Trace:
>>> [ 1860.771307] [<ffffffffa9b6dc42>] dump_stack+0x63/0x81
>>> [ 1860.777167] [<ffffffffa95fb751>] kasan_object_err+0x21/0x70
>>> [ 1860.783826] [<ffffffffa95fb9dd>] kasan_report_error+0x1ed/0x4e0
>>> [ 1860.790640] [<ffffffffa9b9b841>] ? csum_partial+0x11/0x20
>>> [ 1860.796871] [<ffffffffaa44a6b9>] ? csum_partial_ext+0x9/0x10
>>> [ 1860.803571] [<ffffffffaa453155>] ? __skb_checksum+0x115/0x8d0
>>> [ 1860.810370] [<ffffffffa95fbe81>] __asan_report_load8_noabort+0x61/0x70
>>> [ 1860.818263] [<ffffffffaa49c3fe>] ? __netif_receive_skb_core+0x1ebe/0x29a0
>>> [ 1860.826215] [<ffffffffaa49c3fe>] __netif_receive_skb_core+0x1ebe/0x29a0
>>> [ 1860.833991] [<ffffffffaa49a540>] ? netdev_info+0x100/0x100
>>> [ 1860.840529] [<ffffffffaa671792>] ? udp4_gro_receive+0x802/0x1090
>>> [ 1860.847783] [<ffffffffa9bb9a08>] ? find_next_bit+0x18/0x20
>>> [ 1860.854126] [<ffffffffaa49cf04>] __netif_receive_skb+0x24/0x150
>>> [ 1860.861695] [<ffffffffaa49d0d1>] netif_receive_skb_internal+0xa1/0x1d0
>>> [ 1860.869366] [<ffffffffaa49d030>] ? __netif_receive_skb+0x150/0x150
>>> [ 1860.876464] [<ffffffffaa49f7e9>] ? dev_gro_receive+0x969/0x1660
>>> [ 1860.883924] [<ffffffffaa4a0e1f>] napi_gro_receive+0x1df/0x300
>>> [ 1860.890744] [<ffffffffc02e885d>] mlx5e_handle_rx_cqe_rep+0x83d/0xd30 [mlx5_core]
>>>
>>> checking with gdb
>>>
>>> (gdb) l *(__netif_receive_skb_core+0x1ebe)
>>> 0xffffffff8249c3fe is in __netif_receive_skb_core (net/core/dev.c:3937).
>>> 3932 *pt_prev = NULL;
>>> 3933 }
>>> 3934
>>> 3935 qdisc_skb_cb(skb)->pkt_len = skb->len;
>>> 3936 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
>>> 3937 qdisc_bstats_cpu_update(cl->q, skb);
>>> 3938
>>> 3939 switch (tc_classify(skb, cl, &cl_res, false)) {
>>> 3940 case TC_ACT_OK:
>>> 3941 case TC_ACT_RECLASSIFY:
>>
>> Can you elaborate some more on your test-case? Adding/dropping ingress qdisc with
>> some classifier on it in a loop while traffic goes through?
>
> I first delete the qdisc ingress from the relevant interface
> I start traffic on it then I add the qdisc ingress to the relevant interface and start adding tc flower rules to match the traffic.
Ok, strange, qdisc_destroy() calls into ops->destroy(), where ingress
drops its entire chain via tcf_destroy_chain(), so that will be NULL
eventually. The tps are freed by call_rcu() as well as qdisc itself
later on via qdisc_rcu_free(), where it frees per-cpu bstats as well.
Outstanding readers should either bail out due to if (!cl) or can still
process the chain until read section ends, but during that time, cl->q
resp. bstats should be good. Do you happen to know what's at address
ffff880a68b04028? I was wondering wrt call_rcu() vs call_rcu_bh(), but
at least on ingress (netif_receive_skb_internal()) we hold rcu_read_lock()
here. The KASAN report is reliably happening at this location, right?
^ permalink raw reply
* Re: wl1251 & mac address & calibration data
From: Pali Rohár @ 2016-11-24 15:20 UTC (permalink / raw)
To: Sebastian Reichel
Cc: Pavel Machek, Michal Kazior, Kalle Valo, Ivaylo Dimitrov,
Aaro Koskinen, Tony Lindgren, linux-wireless, Network Development,
linux-kernel
In-Reply-To: <20161124151317.34yoza3dzuh46oa4@earth>
On Thursday 24 November 2016 16:13:17 Sebastian Reichel wrote:
> Hi,
>
> On Thu, Nov 24, 2016 at 09:33:29AM +0100, Pali Rohár wrote:
> > On Thursday 24 November 2016 08:51:04 Pavel Machek wrote:
> > > Hi!
> > >
> > > > > "ifconfig hw ether XX" normally sets the address. I guess that's
> > > > > ioctl?
> > > >
> > > > This sets temporary address and it is ioctl. IIRC same as what ethtool
> > > > uses. (ifconfig is already deprecated).
> > > >
> > > > > And I guess we should use similar mechanism for permanent
> > > > > address.
> > > >
> > > > I'm not sure here... Above ioctl ↑↑↑ is for changing temporary mac
> > > > address. But here we do not want to change permanent mac address. We
> > > > want to tell kernel driver current permanent mac address which is
> > > > stored
> > >
> > > Well... I'd still use similar mechanism :-).
> >
> > Thats problematic, because in time when wlan0 interface is registered
> > into system and visible in ifconfig output it already needs to have
> > permanent mac address assigned.
> >
> > We should assign permanent mac address before wlan0 of wl1251 is
> > registered into system.
>
> You can just add the MAC address to the NVS data, which is also
> required for the device initialization.
NVS data file has fixed size, there is IIRC no place for it.
But one of my suggestion was to use another request_firmware for MAC
address. So this is similar to what you are proposing, as NVS data are
loaded by request_firmware too...
> I wonder if those information could be put into DT. Iirc some
> network devices get their MAC address from DT. Maybe we can add
> all NVS info to DT? How much data is it?
Proprietary, signed and closed bootloader NOLO does not support DT. So
for booting you need to append DTS file to kernel image.
U-Boot is optional and can be used as intermediate bootloader between
NOLO and kernel. But still it has problems with reading from nand, so
cannot read NVS data nor MAC address.
> Userspace application can add all those information to the DT
> using a DT overlay. Also the u-boot could parse and add it at
> some point in the future.
In case when wl1251 is statically linked into kernel image, it is loaded
and initialized before even userspace applications starts.
So no... adding NVS data or MAC address into DT or DT overlay is not a
solution.
--
Pali Rohár
pali.rohar@gmail.com
^ permalink raw reply
* Re: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable
From: Mark Lord @ 2016-11-24 15:24 UTC (permalink / raw)
To: Hayes Wang, netdev@vger.kernel.org
Cc: nic_swsd, linux-kernel@vger.kernel.org, linux-usb@vger.kernel.org
In-Reply-To: <0835B3720019904CB8F7AA43166CEEB201055ED8@RTITMBSV03.realtek.com.tw>
[-- Attachment #1: Type: text/plain, Size: 1958 bytes --]
On 16-11-24 08:26 AM, Hayes Wang wrote:
..
> Besides, it doesn't seem to occur for all platforms. I have
> tested the iperf more than 26 hours, and it still works fine.
> I think I would get the same result on x86 or x86_64 platform.
..
x86 has near fully-coherent memory, so it is the "easy" platform
to get things working on. But Linux supports a very diverse number
of platforms, with varying degrees of cache/memory coherency,
and it can be tricky for things to work correctly on all of them.
If you are testing with the driver as currently in 4.4.34,
then you won't even notice when things are screwing up,
because the driver just silently drops packets.
Or it passes them on without noticing that they have bad data.
Here (attached) is the instrumented driver I am using here now.
I suggest you use it or something similar when testing,
and not the stock driver.
This one has also been converted to use non-cacheable RAM for the
receive buffers -- something that is probably a Good Thing
for it to do regardless of this investigation.
It also never drops a packet without logging the event,
so we can see just how often there's an issue.
This version behaves almost perfectly here, but I am still experimenting
to see what is actually necessary, and what is not. In particular,
there are some mb() calls I had put in there that shouldn't be required,
so I have yet to try removing them again and see what changes.
It takes at least an overnight run to pop up one or two errors,
so do expect to hear back again until after the weekend at this point.
Also, unrelated, but inside r8152_submit_rx() there is this code:
/* The rx would be stopped, so skip submitting */
if (test_bit(RTL8152_UNPLUG, &tp->flags) ||
!test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev))
return 0;
If that "return 0" statement is ever executed, doesn't it result
in the loss/leak of a buffer?
Thanks
[-- Attachment #2: r8152.c --]
[-- Type: text/x-csrc, Size: 102577 bytes --]
/*
* Copyright (c) 2014 Realtek Semiconductor Corp. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*
*/
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/usb.h>
#include <linux/crc32.h>
#include <linux/if_vlan.h>
#include <linux/uaccess.h>
#include <linux/list.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
#include <uapi/linux/mdio.h>
#include <linux/mdio.h>
#include <linux/usb/cdc.h>
/* Information for net-next */
#define NETNEXT_VERSION "08"
/* Information for net */
#define NET_VERSION "2"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
#define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
#define MODULENAME "r8152"
#define R8152_PHY_ID 32
#define PLA_IDR 0xc000
#define PLA_RCR 0xc010
#define PLA_RMS 0xc016
#define PLA_RXFIFO_CTRL0 0xc0a0
#define PLA_RXFIFO_CTRL1 0xc0a4
#define PLA_RXFIFO_CTRL2 0xc0a8
#define PLA_DMY_REG0 0xc0b0
#define PLA_FMC 0xc0b4
#define PLA_CFG_WOL 0xc0b6
#define PLA_TEREDO_CFG 0xc0bc
#define PLA_MAR 0xcd00
#define PLA_BACKUP 0xd000
#define PAL_BDC_CR 0xd1a0
#define PLA_TEREDO_TIMER 0xd2cc
#define PLA_REALWOW_TIMER 0xd2e8
#define PLA_LEDSEL 0xdd90
#define PLA_LED_FEATURE 0xdd92
#define PLA_PHYAR 0xde00
#define PLA_BOOT_CTRL 0xe004
#define PLA_GPHY_INTR_IMR 0xe022
#define PLA_EEE_CR 0xe040
#define PLA_EEEP_CR 0xe080
#define PLA_MAC_PWR_CTRL 0xe0c0
#define PLA_MAC_PWR_CTRL2 0xe0ca
#define PLA_MAC_PWR_CTRL3 0xe0cc
#define PLA_MAC_PWR_CTRL4 0xe0ce
#define PLA_WDT6_CTRL 0xe428
#define PLA_TCR0 0xe610
#define PLA_TCR1 0xe612
#define PLA_MTPS 0xe615
#define PLA_TXFIFO_CTRL 0xe618
#define PLA_RSTTALLY 0xe800
#define PLA_CR 0xe813
#define PLA_CRWECR 0xe81c
#define PLA_CONFIG12 0xe81e /* CONFIG1, CONFIG2 */
#define PLA_CONFIG34 0xe820 /* CONFIG3, CONFIG4 */
#define PLA_CONFIG5 0xe822
#define PLA_PHY_PWR 0xe84c
#define PLA_OOB_CTRL 0xe84f
#define PLA_CPCR 0xe854
#define PLA_MISC_0 0xe858
#define PLA_MISC_1 0xe85a
#define PLA_OCP_GPHY_BASE 0xe86c
#define PLA_TALLYCNT 0xe890
#define PLA_SFF_STS_7 0xe8de
#define PLA_PHYSTATUS 0xe908
#define PLA_BP_BA 0xfc26
#define PLA_BP_0 0xfc28
#define PLA_BP_1 0xfc2a
#define PLA_BP_2 0xfc2c
#define PLA_BP_3 0xfc2e
#define PLA_BP_4 0xfc30
#define PLA_BP_5 0xfc32
#define PLA_BP_6 0xfc34
#define PLA_BP_7 0xfc36
#define PLA_BP_EN 0xfc38
#define USB_USB2PHY 0xb41e
#define USB_SSPHYLINK2 0xb428
#define USB_U2P3_CTRL 0xb460
#define USB_CSR_DUMMY1 0xb464
#define USB_CSR_DUMMY2 0xb466
#define USB_DEV_STAT 0xb808
#define USB_CONNECT_TIMER 0xcbf8
#define USB_BURST_SIZE 0xcfc0
#define USB_USB_CTRL 0xd406
#define USB_PHY_CTRL 0xd408
#define USB_TX_AGG 0xd40a
#define USB_RX_BUF_TH 0xd40c
#define USB_USB_TIMER 0xd428
#define USB_RX_EARLY_TIMEOUT 0xd42c
#define USB_RX_EARLY_SIZE 0xd42e
#define USB_PM_CTRL_STATUS 0xd432
#define USB_TX_DMA 0xd434
#define USB_TOLERANCE 0xd490
#define USB_LPM_CTRL 0xd41a
#define USB_UPS_CTRL 0xd800
#define USB_MISC_0 0xd81a
#define USB_POWER_CUT 0xd80a
#define USB_AFE_CTRL2 0xd824
#define USB_WDT11_CTRL 0xe43c
#define USB_BP_BA 0xfc26
#define USB_BP_0 0xfc28
#define USB_BP_1 0xfc2a
#define USB_BP_2 0xfc2c
#define USB_BP_3 0xfc2e
#define USB_BP_4 0xfc30
#define USB_BP_5 0xfc32
#define USB_BP_6 0xfc34
#define USB_BP_7 0xfc36
#define USB_BP_EN 0xfc38
/* OCP Registers */
#define OCP_ALDPS_CONFIG 0x2010
#define OCP_EEE_CONFIG1 0x2080
#define OCP_EEE_CONFIG2 0x2092
#define OCP_EEE_CONFIG3 0x2094
#define OCP_BASE_MII 0xa400
#define OCP_EEE_AR 0xa41a
#define OCP_EEE_DATA 0xa41c
#define OCP_PHY_STATUS 0xa420
#define OCP_POWER_CFG 0xa430
#define OCP_EEE_CFG 0xa432
#define OCP_SRAM_ADDR 0xa436
#define OCP_SRAM_DATA 0xa438
#define OCP_DOWN_SPEED 0xa442
#define OCP_EEE_ABLE 0xa5c4
#define OCP_EEE_ADV 0xa5d0
#define OCP_EEE_LPABLE 0xa5d2
#define OCP_PHY_STATE 0xa708 /* nway state for 8153 */
#define OCP_ADC_CFG 0xbc06
/* SRAM Register */
#define SRAM_LPF_CFG 0x8012
#define SRAM_10M_AMP1 0x8080
#define SRAM_10M_AMP2 0x8082
#define SRAM_IMPEDANCE 0x8084
/* PLA_RCR */
#define RCR_AAP 0x00000001
#define RCR_APM 0x00000002
#define RCR_AM 0x00000004
#define RCR_AB 0x00000008
#define RCR_ACPT_ALL (RCR_AAP | RCR_APM | RCR_AM | RCR_AB)
/* PLA_RXFIFO_CTRL0 */
#define RXFIFO_THR1_NORMAL 0x00080002
#define RXFIFO_THR1_OOB 0x01800003
/* PLA_RXFIFO_CTRL1 */
#define RXFIFO_THR2_FULL 0x00000060
#define RXFIFO_THR2_HIGH 0x00000038
#define RXFIFO_THR2_OOB 0x0000004a
#define RXFIFO_THR2_NORMAL 0x00a0
/* PLA_RXFIFO_CTRL2 */
#define RXFIFO_THR3_FULL 0x00000078
#define RXFIFO_THR3_HIGH 0x00000048
#define RXFIFO_THR3_OOB 0x0000005a
#define RXFIFO_THR3_NORMAL 0x0110
/* PLA_TXFIFO_CTRL */
#define TXFIFO_THR_NORMAL 0x00400008
#define TXFIFO_THR_NORMAL2 0x01000008
/* PLA_DMY_REG0 */
#define ECM_ALDPS 0x0002
/* PLA_FMC */
#define FMC_FCR_MCU_EN 0x0001
/* PLA_EEEP_CR */
#define EEEP_CR_EEEP_TX 0x0002
/* PLA_WDT6_CTRL */
#define WDT6_SET_MODE 0x0010
/* PLA_TCR0 */
#define TCR0_TX_EMPTY 0x0800
#define TCR0_AUTO_FIFO 0x0080
/* PLA_TCR1 */
#define VERSION_MASK 0x7cf0
/* PLA_MTPS */
#define MTPS_JUMBO (12 * 1024 / 64)
#define MTPS_DEFAULT (6 * 1024 / 64)
/* PLA_RSTTALLY */
#define TALLY_RESET 0x0001
/* PLA_CR */
#define CR_RST 0x10
#define CR_RE 0x08
#define CR_TE 0x04
/* PLA_CRWECR */
#define CRWECR_NORAML 0x00
#define CRWECR_CONFIG 0xc0
/* PLA_OOB_CTRL */
#define NOW_IS_OOB 0x80
#define TXFIFO_EMPTY 0x20
#define RXFIFO_EMPTY 0x10
#define LINK_LIST_READY 0x02
#define DIS_MCU_CLROOB 0x01
#define FIFO_EMPTY (TXFIFO_EMPTY | RXFIFO_EMPTY)
/* PLA_MISC_1 */
#define RXDY_GATED_EN 0x0008
/* PLA_SFF_STS_7 */
#define RE_INIT_LL 0x8000
#define MCU_BORW_EN 0x4000
/* PLA_CPCR */
#define CPCR_RX_VLAN 0x0040
/* PLA_CFG_WOL */
#define MAGIC_EN 0x0001
/* PLA_TEREDO_CFG */
#define TEREDO_SEL 0x8000
#define TEREDO_WAKE_MASK 0x7f00
#define TEREDO_RS_EVENT_MASK 0x00fe
#define OOB_TEREDO_EN 0x0001
/* PAL_BDC_CR */
#define ALDPS_PROXY_MODE 0x0001
/* PLA_CONFIG34 */
#define LINK_ON_WAKE_EN 0x0010
#define LINK_OFF_WAKE_EN 0x0008
/* PLA_CONFIG5 */
#define BWF_EN 0x0040
#define MWF_EN 0x0020
#define UWF_EN 0x0010
#define LAN_WAKE_EN 0x0002
/* PLA_LED_FEATURE */
#define LED_MODE_MASK 0x0700
/* PLA_PHY_PWR */
#define TX_10M_IDLE_EN 0x0080
#define PFM_PWM_SWITCH 0x0040
/* PLA_MAC_PWR_CTRL */
#define D3_CLK_GATED_EN 0x00004000
#define MCU_CLK_RATIO 0x07010f07
#define MCU_CLK_RATIO_MASK 0x0f0f0f0f
#define ALDPS_SPDWN_RATIO 0x0f87
/* PLA_MAC_PWR_CTRL2 */
#define EEE_SPDWN_RATIO 0x8007
/* PLA_MAC_PWR_CTRL3 */
#define PKT_AVAIL_SPDWN_EN 0x0100
#define SUSPEND_SPDWN_EN 0x0004
#define U1U2_SPDWN_EN 0x0002
#define L1_SPDWN_EN 0x0001
/* PLA_MAC_PWR_CTRL4 */
#define PWRSAVE_SPDWN_EN 0x1000
#define RXDV_SPDWN_EN 0x0800
#define TX10MIDLE_EN 0x0100
#define TP100_SPDWN_EN 0x0020
#define TP500_SPDWN_EN 0x0010
#define TP1000_SPDWN_EN 0x0008
#define EEE_SPDWN_EN 0x0001
/* PLA_GPHY_INTR_IMR */
#define GPHY_STS_MSK 0x0001
#define SPEED_DOWN_MSK 0x0002
#define SPDWN_RXDV_MSK 0x0004
#define SPDWN_LINKCHG_MSK 0x0008
/* PLA_PHYAR */
#define PHYAR_FLAG 0x80000000
/* PLA_EEE_CR */
#define EEE_RX_EN 0x0001
#define EEE_TX_EN 0x0002
/* PLA_BOOT_CTRL */
#define AUTOLOAD_DONE 0x0002
/* USB_USB2PHY */
#define USB2PHY_SUSPEND 0x0001
#define USB2PHY_L1 0x0002
/* USB_SSPHYLINK2 */
#define pwd_dn_scale_mask 0x3ffe
#define pwd_dn_scale(x) ((x) << 1)
/* USB_CSR_DUMMY1 */
#define DYNAMIC_BURST 0x0001
/* USB_CSR_DUMMY2 */
#define EP4_FULL_FC 0x0001
/* USB_DEV_STAT */
#define STAT_SPEED_MASK 0x0006
#define STAT_SPEED_HIGH 0x0000
#define STAT_SPEED_FULL 0x0002
/* USB_TX_AGG */
#define TX_AGG_MAX_THRESHOLD 0x03
/* USB_RX_BUF_TH */
#define RX_THR_SUPPER 0x0c350180
#define RX_THR_HIGH 0x7a120180
#define RX_THR_SLOW 0xffff0180
/* USB_TX_DMA */
#define TEST_MODE_DISABLE 0x00000001
#define TX_SIZE_ADJUST1 0x00000100
/* USB_UPS_CTRL */
#define POWER_CUT 0x0100
/* USB_PM_CTRL_STATUS */
#define RESUME_INDICATE 0x0001
/* USB_USB_CTRL */
#define RX_AGG_DISABLE 0x0010
#define RX_ZERO_EN 0x0080
/* USB_U2P3_CTRL */
#define U2P3_ENABLE 0x0001
/* USB_POWER_CUT */
#define PWR_EN 0x0001
#define PHASE2_EN 0x0008
/* USB_MISC_0 */
#define PCUT_STATUS 0x0001
/* USB_RX_EARLY_TIMEOUT */
#define COALESCE_SUPER 85000U
#define COALESCE_HIGH 250000U
#define COALESCE_SLOW 524280U
/* USB_WDT11_CTRL */
#define TIMER11_EN 0x0001
/* USB_LPM_CTRL */
/* bit 4 ~ 5: fifo empty boundary */
#define FIFO_EMPTY_1FB 0x30 /* 0x1fb * 64 = 32448 bytes */
/* bit 2 ~ 3: LMP timer */
#define LPM_TIMER_MASK 0x0c
#define LPM_TIMER_500MS 0x04 /* 500 ms */
#define LPM_TIMER_500US 0x0c /* 500 us */
#define ROK_EXIT_LPM 0x02
/* USB_AFE_CTRL2 */
#define SEN_VAL_MASK 0xf800
#define SEN_VAL_NORMAL 0xa000
#define SEL_RXIDLE 0x0100
/* OCP_ALDPS_CONFIG */
#define ENPWRSAVE 0x8000
#define ENPDNPS 0x0200
#define LINKENA 0x0100
#define DIS_SDSAVE 0x0010
/* OCP_PHY_STATUS */
#define PHY_STAT_MASK 0x0007
#define PHY_STAT_LAN_ON 3
#define PHY_STAT_PWRDN 5
/* OCP_POWER_CFG */
#define EEE_CLKDIV_EN 0x8000
#define EN_ALDPS 0x0004
#define EN_10M_PLLOFF 0x0001
/* OCP_EEE_CONFIG1 */
#define RG_TXLPI_MSK_HFDUP 0x8000
#define RG_MATCLR_EN 0x4000
#define EEE_10_CAP 0x2000
#define EEE_NWAY_EN 0x1000
#define TX_QUIET_EN 0x0200
#define RX_QUIET_EN 0x0100
#define sd_rise_time_mask 0x0070
#define sd_rise_time(x) (min(x, 7) << 4) /* bit 4 ~ 6 */
#define RG_RXLPI_MSK_HFDUP 0x0008
#define SDFALLTIME 0x0007 /* bit 0 ~ 2 */
/* OCP_EEE_CONFIG2 */
#define RG_LPIHYS_NUM 0x7000 /* bit 12 ~ 15 */
#define RG_DACQUIET_EN 0x0400
#define RG_LDVQUIET_EN 0x0200
#define RG_CKRSEL 0x0020
#define RG_EEEPRG_EN 0x0010
/* OCP_EEE_CONFIG3 */
#define fast_snr_mask 0xff80
#define fast_snr(x) (min(x, 0x1ff) << 7) /* bit 7 ~ 15 */
#define RG_LFS_SEL 0x0060 /* bit 6 ~ 5 */
#define MSK_PH 0x0006 /* bit 0 ~ 3 */
/* OCP_EEE_AR */
/* bit[15:14] function */
#define FUN_ADDR 0x0000
#define FUN_DATA 0x4000
/* bit[4:0] device addr */
/* OCP_EEE_CFG */
#define CTAP_SHORT_EN 0x0040
#define EEE10_EN 0x0010
/* OCP_DOWN_SPEED */
#define EN_10M_BGOFF 0x0080
/* OCP_PHY_STATE */
#define TXDIS_STATE 0x01
#define ABD_STATE 0x02
/* OCP_ADC_CFG */
#define CKADSEL_L 0x0100
#define ADC_EN 0x0080
#define EN_EMI_L 0x0040
/* SRAM_LPF_CFG */
#define LPF_AUTO_TUNE 0x8000
/* SRAM_10M_AMP1 */
#define GDAC_IB_UPALL 0x0008
/* SRAM_10M_AMP2 */
#define AMP_DN 0x0200
/* SRAM_IMPEDANCE */
#define RX_DRIVING_MASK 0x6000
enum rtl_register_content {
_1000bps = 0x10,
_100bps = 0x08,
_10bps = 0x04,
LINK_STATUS = 0x02,
FULL_DUP = 0x01,
};
#define RTL8152_MAX_TX 4
#define RTL8152_MAX_RX 10
#define INTBUFSIZE 2
#define CRC_SIZE 4
#define TX_ALIGN 4
#define RX_ALIGN 8
#define INTR_LINK 0x0004
#define RTL8152_REQT_READ 0xc0
#define RTL8152_REQT_WRITE 0x40
#define RTL8152_REQ_GET_REGS 0x05
#define RTL8152_REQ_SET_REGS 0x05
#define BYTE_EN_DWORD 0xff
#define BYTE_EN_WORD 0x33
#define BYTE_EN_BYTE 0x11
#define BYTE_EN_SIX_BYTES 0x3f
#define BYTE_EN_START_MASK 0x0f
#define BYTE_EN_END_MASK 0xf0
#define RTL8153_MAX_PACKET 9216 /* 9K */
#define RTL8153_MAX_MTU (RTL8153_MAX_PACKET - VLAN_ETH_HLEN - VLAN_HLEN)
#define RTL8152_RMS (VLAN_ETH_FRAME_LEN + VLAN_HLEN)
#define RTL8153_RMS RTL8153_MAX_PACKET
#define RTL8152_TX_TIMEOUT (5 * HZ)
#define RTL8152_NAPI_WEIGHT 64
/* rtl8152 flags */
enum rtl8152_flags {
RTL8152_UNPLUG = 0,
RTL8152_SET_RX_MODE,
WORK_ENABLE,
RTL8152_LINK_CHG,
SELECTIVE_SUSPEND,
PHY_RESET,
SCHEDULE_NAPI,
};
/* Define these values to match your device */
#define VENDOR_ID_REALTEK 0x0bda
#define VENDOR_ID_SAMSUNG 0x04e8
#define VENDOR_ID_LENOVO 0x17ef
#define VENDOR_ID_NVIDIA 0x0955
#define MCU_TYPE_PLA 0x0100
#define MCU_TYPE_USB 0x0000
struct tally_counter {
__le64 tx_packets;
__le64 rx_packets;
__le64 tx_errors;
__le32 rx_errors;
__le16 rx_missed;
__le16 align_errors;
__le32 tx_one_collision;
__le32 tx_multi_collision;
__le64 rx_unicast;
__le64 rx_broadcast;
__le32 rx_multicast;
__le16 tx_aborted;
__le16 tx_underrun;
};
struct rx_desc {
__le32 opts1;
#define RX_LEN_MASK 0x7fff
__le32 opts2;
#define RD_UDP_CS BIT(23)
#define RD_TCP_CS BIT(22)
#define RD_IPV6_CS BIT(20)
#define RD_IPV4_CS BIT(19)
__le32 opts3;
#define IPF BIT(23) /* IP checksum fail */
#define UDPF BIT(22) /* UDP checksum fail */
#define TCPF BIT(21) /* TCP checksum fail */
#define RX_VLAN_TAG BIT(16)
__le32 opts4;
__le32 opts5;
__le32 opts6;
};
struct tx_desc {
__le32 opts1;
#define TX_FS BIT(31) /* First segment of a packet */
#define TX_LS BIT(30) /* Final segment of a packet */
#define GTSENDV4 BIT(28)
#define GTSENDV6 BIT(27)
#define GTTCPHO_SHIFT 18
#define GTTCPHO_MAX 0x7fU
#define TX_LEN_MAX 0x3ffffU
__le32 opts2;
#define UDP_CS BIT(31) /* Calculate UDP/IP checksum */
#define TCP_CS BIT(30) /* Calculate TCP/IP checksum */
#define IPV4_CS BIT(29) /* Calculate IPv4 checksum */
#define IPV6_CS BIT(28) /* Calculate IPv6 checksum */
#define MSS_SHIFT 17
#define MSS_MAX 0x7ffU
#define TCPHO_SHIFT 17
#define TCPHO_MAX 0x7ffU
#define TX_VLAN_TAG BIT(16)
};
struct r8152;
struct rx_agg {
struct list_head list;
struct urb *urb;
struct r8152 *context;
dma_addr_t transfer_dma;
void *buffer;
void *head;
};
struct tx_agg {
struct list_head list;
struct urb *urb;
struct r8152 *context;
void *buffer;
void *head;
u32 skb_num;
u32 skb_len;
};
struct r8152 {
unsigned long flags;
struct usb_device *udev;
struct napi_struct napi;
struct usb_interface *intf;
struct net_device *netdev;
struct urb *intr_urb;
struct tx_agg tx_info[RTL8152_MAX_TX];
struct rx_agg rx_info[RTL8152_MAX_RX];
struct list_head rx_done, tx_free;
struct sk_buff_head tx_queue, rx_queue;
spinlock_t rx_lock, tx_lock;
struct delayed_work schedule;
struct mii_if_info mii;
struct mutex control; /* use for hw setting */
struct rtl_ops {
void (*init)(struct r8152 *);
int (*enable)(struct r8152 *);
void (*disable)(struct r8152 *);
void (*up)(struct r8152 *);
void (*down)(struct r8152 *);
void (*unload)(struct r8152 *);
int (*eee_get)(struct r8152 *, struct ethtool_eee *);
int (*eee_set)(struct r8152 *, struct ethtool_eee *);
bool (*in_nway)(struct r8152 *);
} rtl_ops;
int intr_interval;
u32 saved_wolopts;
u32 msg_enable;
u32 tx_qlen;
u32 coalesce;
u16 ocp_base;
u8 *intr_buff;
u8 version;
};
enum rtl_version {
RTL_VER_UNKNOWN = 0,
RTL_VER_01,
RTL_VER_02,
RTL_VER_03,
RTL_VER_04,
RTL_VER_05,
RTL_VER_06,
RTL_VER_MAX
};
enum tx_csum_stat {
TX_CSUM_SUCCESS = 0,
TX_CSUM_TSO,
TX_CSUM_NONE
};
/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
* The RTL chips use a 64 element hash table based on the Ethernet CRC.
*/
static const int multicast_filter_limit = 32;
static unsigned int agg_buf_sz = 16384;
#define RTL_LIMITED_TSO_SIZE (agg_buf_sz - sizeof(struct tx_desc) - \
VLAN_ETH_HLEN - VLAN_HLEN)
static
int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
{
int ret;
void *tmp;
tmp = kmalloc(size, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0),
RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
value, index, tmp, size, 500);
memcpy(data, tmp, size);
kfree(tmp);
return ret;
}
static
int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
{
int ret;
void *tmp;
tmp = kmemdup(data, size, GFP_KERNEL);
if (!tmp)
return -ENOMEM;
ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE,
value, index, tmp, size, 500);
kfree(tmp);
return ret;
}
static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size,
void *data, u16 type)
{
u16 limit = 64;
int ret = 0;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
/* both size and indix must be 4 bytes align */
if ((size & 3) || !size || (index & 3) || !data)
return -EPERM;
if ((u32)index + (u32)size > 0xffff)
return -EPERM;
while (size) {
if (size > limit) {
ret = get_registers(tp, index, type, limit, data);
if (ret < 0)
break;
index += limit;
data += limit;
size -= limit;
} else {
ret = get_registers(tp, index, type, size, data);
if (ret < 0)
break;
index += size;
data += size;
size = 0;
break;
}
}
if (ret == -ENODEV)
set_bit(RTL8152_UNPLUG, &tp->flags);
return ret;
}
static int generic_ocp_write(struct r8152 *tp, u16 index, u16 byteen,
u16 size, void *data, u16 type)
{
int ret;
u16 byteen_start, byteen_end, byen;
u16 limit = 512;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
/* both size and indix must be 4 bytes align */
if ((size & 3) || !size || (index & 3) || !data)
return -EPERM;
if ((u32)index + (u32)size > 0xffff)
return -EPERM;
byteen_start = byteen & BYTE_EN_START_MASK;
byteen_end = byteen & BYTE_EN_END_MASK;
byen = byteen_start | (byteen_start << 4);
ret = set_registers(tp, index, type | byen, 4, data);
if (ret < 0)
goto error1;
index += 4;
data += 4;
size -= 4;
if (size) {
size -= 4;
while (size) {
if (size > limit) {
ret = set_registers(tp, index,
type | BYTE_EN_DWORD,
limit, data);
if (ret < 0)
goto error1;
index += limit;
data += limit;
size -= limit;
} else {
ret = set_registers(tp, index,
type | BYTE_EN_DWORD,
size, data);
if (ret < 0)
goto error1;
index += size;
data += size;
size = 0;
break;
}
}
byen = byteen_end | (byteen_end >> 4);
ret = set_registers(tp, index, type | byen, 4, data);
if (ret < 0)
goto error1;
}
error1:
if (ret == -ENODEV)
set_bit(RTL8152_UNPLUG, &tp->flags);
return ret;
}
static inline
int pla_ocp_read(struct r8152 *tp, u16 index, u16 size, void *data)
{
return generic_ocp_read(tp, index, size, data, MCU_TYPE_PLA);
}
static inline
int pla_ocp_write(struct r8152 *tp, u16 index, u16 byteen, u16 size, void *data)
{
return generic_ocp_write(tp, index, byteen, size, data, MCU_TYPE_PLA);
}
static inline
int usb_ocp_read(struct r8152 *tp, u16 index, u16 size, void *data)
{
return generic_ocp_read(tp, index, size, data, MCU_TYPE_USB);
}
static inline
int usb_ocp_write(struct r8152 *tp, u16 index, u16 byteen, u16 size, void *data)
{
return generic_ocp_write(tp, index, byteen, size, data, MCU_TYPE_USB);
}
static u32 ocp_read_dword(struct r8152 *tp, u16 type, u16 index)
{
__le32 data;
generic_ocp_read(tp, index, sizeof(data), &data, type);
return __le32_to_cpu(data);
}
static void ocp_write_dword(struct r8152 *tp, u16 type, u16 index, u32 data)
{
__le32 tmp = __cpu_to_le32(data);
generic_ocp_write(tp, index, BYTE_EN_DWORD, sizeof(tmp), &tmp, type);
}
static u16 ocp_read_word(struct r8152 *tp, u16 type, u16 index)
{
u32 data;
__le32 tmp;
u8 shift = index & 2;
index &= ~3;
generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
data = __le32_to_cpu(tmp);
data >>= (shift * 8);
data &= 0xffff;
return (u16)data;
}
static void ocp_write_word(struct r8152 *tp, u16 type, u16 index, u32 data)
{
u32 mask = 0xffff;
__le32 tmp;
u16 byen = BYTE_EN_WORD;
u8 shift = index & 2;
data &= mask;
if (index & 2) {
byen <<= shift;
mask <<= (shift * 8);
data <<= (shift * 8);
index &= ~3;
}
tmp = __cpu_to_le32(data);
generic_ocp_write(tp, index, byen, sizeof(tmp), &tmp, type);
}
static u8 ocp_read_byte(struct r8152 *tp, u16 type, u16 index)
{
u32 data;
__le32 tmp;
u8 shift = index & 3;
index &= ~3;
generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
data = __le32_to_cpu(tmp);
data >>= (shift * 8);
data &= 0xff;
return (u8)data;
}
static void ocp_write_byte(struct r8152 *tp, u16 type, u16 index, u32 data)
{
u32 mask = 0xff;
__le32 tmp;
u16 byen = BYTE_EN_BYTE;
u8 shift = index & 3;
data &= mask;
if (index & 3) {
byen <<= shift;
mask <<= (shift * 8);
data <<= (shift * 8);
index &= ~3;
}
tmp = __cpu_to_le32(data);
generic_ocp_write(tp, index, byen, sizeof(tmp), &tmp, type);
}
static u16 ocp_reg_read(struct r8152 *tp, u16 addr)
{
u16 ocp_base, ocp_index;
ocp_base = addr & 0xf000;
if (ocp_base != tp->ocp_base) {
ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, ocp_base);
tp->ocp_base = ocp_base;
}
ocp_index = (addr & 0x0fff) | 0xb000;
return ocp_read_word(tp, MCU_TYPE_PLA, ocp_index);
}
static void ocp_reg_write(struct r8152 *tp, u16 addr, u16 data)
{
u16 ocp_base, ocp_index;
ocp_base = addr & 0xf000;
if (ocp_base != tp->ocp_base) {
ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, ocp_base);
tp->ocp_base = ocp_base;
}
ocp_index = (addr & 0x0fff) | 0xb000;
ocp_write_word(tp, MCU_TYPE_PLA, ocp_index, data);
}
static inline void r8152_mdio_write(struct r8152 *tp, u32 reg_addr, u32 value)
{
ocp_reg_write(tp, OCP_BASE_MII + reg_addr * 2, value);
}
static inline int r8152_mdio_read(struct r8152 *tp, u32 reg_addr)
{
return ocp_reg_read(tp, OCP_BASE_MII + reg_addr * 2);
}
static void sram_write(struct r8152 *tp, u16 addr, u16 data)
{
ocp_reg_write(tp, OCP_SRAM_ADDR, addr);
ocp_reg_write(tp, OCP_SRAM_DATA, data);
}
static int read_mii_word(struct net_device *netdev, int phy_id, int reg)
{
struct r8152 *tp = netdev_priv(netdev);
int ret;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
if (phy_id != R8152_PHY_ID)
return -EINVAL;
ret = r8152_mdio_read(tp, reg);
return ret;
}
static
void write_mii_word(struct net_device *netdev, int phy_id, int reg, int val)
{
struct r8152 *tp = netdev_priv(netdev);
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
if (phy_id != R8152_PHY_ID)
return;
r8152_mdio_write(tp, reg, val);
}
static int
r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags);
static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
{
struct r8152 *tp = netdev_priv(netdev);
struct sockaddr *addr = p;
int ret = -EADDRNOTAVAIL;
if (!is_valid_ether_addr(addr->sa_data))
goto out1;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out1;
mutex_lock(&tp->control);
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
pla_ocp_write(tp, PLA_IDR, BYTE_EN_SIX_BYTES, 8, addr->sa_data);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out1:
return ret;
}
static int set_ethernet_addr(struct r8152 *tp)
{
struct net_device *dev = tp->netdev;
struct sockaddr sa;
int ret;
if (tp->version == RTL_VER_01)
ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
else
ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa.sa_data);
if (ret < 0) {
netif_err(tp, probe, dev, "Get ether addr fail\n");
} else if (!is_valid_ether_addr(sa.sa_data)) {
netif_err(tp, probe, dev, "Invalid ether addr %pM\n",
sa.sa_data);
eth_hw_addr_random(dev);
ether_addr_copy(sa.sa_data, dev->dev_addr);
ret = rtl8152_set_mac_address(dev, &sa);
netif_info(tp, probe, dev, "Random ether addr %pM\n",
sa.sa_data);
} else {
if (tp->version == RTL_VER_01)
ether_addr_copy(dev->dev_addr, sa.sa_data);
else
ret = rtl8152_set_mac_address(dev, &sa);
}
return ret;
}
static void read_bulk_callback(struct urb *urb)
{
struct net_device *netdev;
int status = urb->status;
struct rx_agg *agg;
struct r8152 *tp;
agg = urb->context;
if (!agg)
return;
tp = agg->context;
if (!tp)
return;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
if (!test_bit(WORK_ENABLE, &tp->flags))
return;
netdev = tp->netdev;
/* When link down, the driver would cancel all bulks. */
/* This avoid the re-submitting bulk */
if (!netif_carrier_ok(netdev))
return;
usb_mark_last_busy(tp->udev);
switch (status) {
case 0:
mb();
if (urb->actual_length < (sizeof(struct rx_desc) + ETH_ZLEN)) {
printk(KERN_INFO "r8152_read_bulk_callback: actual_length (%u) too short\n", urb->actual_length);
break;
}
spin_lock(&tp->rx_lock);
list_add_tail(&agg->list, &tp->rx_done);
spin_unlock(&tp->rx_lock);
napi_schedule(&tp->napi);
return;
case -ESHUTDOWN:
set_bit(RTL8152_UNPLUG, &tp->flags);
netif_device_detach(tp->netdev);
return;
case -ENOENT:
return; /* the urb is in unlink state */
case -ETIME:
if (net_ratelimit())
netdev_warn(netdev, "maybe reset is needed?\n");
break;
default:
if (net_ratelimit())
netdev_warn(netdev, "Rx status %d\n", status);
break;
}
r8152_submit_rx(tp, agg, GFP_ATOMIC);
}
static void write_bulk_callback(struct urb *urb)
{
struct net_device_stats *stats;
struct net_device *netdev;
struct tx_agg *agg;
struct r8152 *tp;
int status = urb->status;
agg = urb->context;
if (!agg)
return;
tp = agg->context;
if (!tp)
return;
netdev = tp->netdev;
stats = &netdev->stats;
if (status) {
if (net_ratelimit())
netdev_warn(netdev, "Tx status %d\n", status);
stats->tx_errors += agg->skb_num;
} else {
stats->tx_packets += agg->skb_num;
stats->tx_bytes += agg->skb_len;
}
spin_lock(&tp->tx_lock);
list_add_tail(&agg->list, &tp->tx_free);
spin_unlock(&tp->tx_lock);
usb_autopm_put_interface_async(tp->intf);
if (!netif_carrier_ok(netdev))
return;
if (!test_bit(WORK_ENABLE, &tp->flags))
return;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
if (!skb_queue_empty(&tp->tx_queue))
napi_schedule(&tp->napi);
}
static void intr_callback(struct urb *urb)
{
struct r8152 *tp;
__le16 *d;
int status = urb->status;
int res;
tp = urb->context;
if (!tp)
return;
if (!test_bit(WORK_ENABLE, &tp->flags))
return;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
switch (status) {
case 0: /* success */
break;
case -ECONNRESET: /* unlink */
case -ESHUTDOWN:
netif_device_detach(tp->netdev);
case -ENOENT:
case -EPROTO:
netif_info(tp, intr, tp->netdev,
"Stop submitting intr, status %d\n", status);
return;
case -EOVERFLOW:
netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n");
goto resubmit;
/* -EPIPE: should clear the halt */
default:
netif_info(tp, intr, tp->netdev, "intr status %d\n", status);
goto resubmit;
}
d = urb->transfer_buffer;
if (INTR_LINK & __le16_to_cpu(d[0])) {
if (!netif_carrier_ok(tp->netdev)) {
set_bit(RTL8152_LINK_CHG, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
} else {
if (netif_carrier_ok(tp->netdev)) {
set_bit(RTL8152_LINK_CHG, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
}
resubmit:
res = usb_submit_urb(urb, GFP_ATOMIC);
if (res == -ENODEV) {
set_bit(RTL8152_UNPLUG, &tp->flags);
netif_device_detach(tp->netdev);
} else if (res) {
netif_err(tp, intr, tp->netdev,
"can't resubmit intr, status %d\n", res);
}
}
static inline void *rx_agg_align(void *data)
{
return (void *)ALIGN((uintptr_t)data, RX_ALIGN);
}
static inline void *tx_agg_align(void *data)
{
return (void *)ALIGN((uintptr_t)data, TX_ALIGN);
}
static void free_all_mem(struct r8152 *tp)
{
int i;
for (i = 0; i < RTL8152_MAX_RX; i++) {
usb_free_urb(tp->rx_info[i].urb);
tp->rx_info[i].urb = NULL;
usb_free_coherent(tp->udev, agg_buf_sz, tp->rx_info[i].buffer, tp->rx_info[i].transfer_dma);
tp->rx_info[i].buffer = NULL;
tp->rx_info[i].head = NULL;
}
for (i = 0; i < RTL8152_MAX_TX; i++) {
usb_free_urb(tp->tx_info[i].urb);
tp->tx_info[i].urb = NULL;
kfree(tp->tx_info[i].buffer);
tp->tx_info[i].buffer = NULL;
tp->tx_info[i].head = NULL;
}
usb_free_urb(tp->intr_urb);
tp->intr_urb = NULL;
kfree(tp->intr_buff);
tp->intr_buff = NULL;
}
static int alloc_all_mem(struct r8152 *tp)
{
struct net_device *netdev = tp->netdev;
struct usb_interface *intf = tp->intf;
struct usb_host_interface *alt = intf->cur_altsetting;
struct usb_host_endpoint *ep_intr = alt->endpoint + 2;
struct urb *urb;
int node, i;
u8 *buf;
node = netdev->dev.parent ? dev_to_node(netdev->dev.parent) : -1;
spin_lock_init(&tp->rx_lock);
spin_lock_init(&tp->tx_lock);
INIT_LIST_HEAD(&tp->tx_free);
skb_queue_head_init(&tp->tx_queue);
skb_queue_head_init(&tp->rx_queue);
for (i = 0; i < RTL8152_MAX_RX; i++) {
dma_addr_t transfer_dma = 0;
buf = usb_alloc_coherent(tp->udev, agg_buf_sz, GFP_KERNEL, &transfer_dma);
if (!buf)
goto err1;
if (buf != rx_agg_align(buf)) {
kfree(buf);
buf = kmalloc_node(agg_buf_sz + RX_ALIGN, GFP_KERNEL,
node);
if (!buf)
goto err1;
}
urb = usb_alloc_urb(0, GFP_KERNEL);
if (!urb) {
kfree(buf);
goto err1;
}
INIT_LIST_HEAD(&tp->rx_info[i].list);
tp->rx_info[i].context = tp;
tp->rx_info[i].urb = urb;
tp->rx_info[i].transfer_dma = transfer_dma;
tp->rx_info[i].buffer = buf;
tp->rx_info[i].head = rx_agg_align(buf);
}
for (i = 0; i < RTL8152_MAX_TX; i++) {
buf = kmalloc_node(agg_buf_sz, GFP_KERNEL, node);
if (!buf)
goto err1;
if (buf != tx_agg_align(buf)) {
kfree(buf);
buf = kmalloc_node(agg_buf_sz + TX_ALIGN, GFP_KERNEL,
node);
if (!buf)
goto err1;
}
urb = usb_alloc_urb(0, GFP_KERNEL);
if (!urb) {
kfree(buf);
goto err1;
}
INIT_LIST_HEAD(&tp->tx_info[i].list);
tp->tx_info[i].context = tp;
tp->tx_info[i].urb = urb;
tp->tx_info[i].buffer = buf;
tp->tx_info[i].head = tx_agg_align(buf);
list_add_tail(&tp->tx_info[i].list, &tp->tx_free);
}
tp->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!tp->intr_urb)
goto err1;
tp->intr_buff = kmalloc(INTBUFSIZE, GFP_KERNEL);
if (!tp->intr_buff)
goto err1;
tp->intr_interval = (int)ep_intr->desc.bInterval;
usb_fill_int_urb(tp->intr_urb, tp->udev, usb_rcvintpipe(tp->udev, 3),
tp->intr_buff, INTBUFSIZE, intr_callback,
tp, tp->intr_interval);
return 0;
err1:
free_all_mem(tp);
return -ENOMEM;
}
static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
{
struct tx_agg *agg = NULL;
unsigned long flags;
if (list_empty(&tp->tx_free))
return NULL;
spin_lock_irqsave(&tp->tx_lock, flags);
if (!list_empty(&tp->tx_free)) {
struct list_head *cursor;
cursor = tp->tx_free.next;
list_del_init(cursor);
agg = list_entry(cursor, struct tx_agg, list);
}
spin_unlock_irqrestore(&tp->tx_lock, flags);
return agg;
}
/* r8152_csum_workaround()
* The hw limites the value the transport offset. When the offset is out of the
* range, calculate the checksum by sw.
*/
static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
struct sk_buff_head *list)
{
if (skb_shinfo(skb)->gso_size) {
netdev_features_t features = tp->netdev->features;
struct sk_buff_head seg_list;
struct sk_buff *segs, *nskb;
features &= ~(NETIF_F_SG | NETIF_F_IPV6_CSUM | NETIF_F_TSO6);
segs = skb_gso_segment(skb, features);
if (IS_ERR(segs) || !segs)
goto drop;
__skb_queue_head_init(&seg_list);
do {
nskb = segs;
segs = segs->next;
nskb->next = NULL;
__skb_queue_tail(&seg_list, nskb);
} while (segs);
skb_queue_splice(&seg_list, list);
dev_kfree_skb(skb);
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (skb_checksum_help(skb) < 0)
goto drop;
__skb_queue_head(list, skb);
} else {
struct net_device_stats *stats;
drop:
stats = &tp->netdev->stats;
stats->tx_dropped++;
dev_kfree_skb(skb);
}
}
/* msdn_giant_send_check()
* According to the document of microsoft, the TCP Pseudo Header excludes the
* packet length for IPv6 TCP large packets.
*/
static int msdn_giant_send_check(struct sk_buff *skb)
{
const struct ipv6hdr *ipv6h;
struct tcphdr *th;
int ret;
ret = skb_cow_head(skb, 0);
if (ret)
return ret;
ipv6h = ipv6_hdr(skb);
th = tcp_hdr(skb);
th->check = 0;
th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0);
return ret;
}
static inline void rtl_tx_vlan_tag(struct tx_desc *desc, struct sk_buff *skb)
{
if (skb_vlan_tag_present(skb)) {
u32 opts2;
opts2 = TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb));
desc->opts2 |= cpu_to_le32(opts2);
}
}
static inline void rtl_rx_vlan_tag(struct rx_desc *desc, struct sk_buff *skb)
{
u32 opts2 = le32_to_cpu(desc->opts2);
if (opts2 & RX_VLAN_TAG)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
swab16(opts2 & 0xffff));
}
static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
struct sk_buff *skb, u32 len, u32 transport_offset)
{
u32 mss = skb_shinfo(skb)->gso_size;
u32 opts1, opts2 = 0;
int ret = TX_CSUM_SUCCESS;
WARN_ON_ONCE(len > TX_LEN_MAX);
opts1 = len | TX_FS | TX_LS;
if (mss) {
if (transport_offset > GTTCPHO_MAX) {
netif_warn(tp, tx_err, tp->netdev,
"Invalid transport offset 0x%x for TSO\n",
transport_offset);
ret = TX_CSUM_TSO;
goto unavailable;
}
switch (vlan_get_protocol(skb)) {
case htons(ETH_P_IP):
opts1 |= GTSENDV4;
break;
case htons(ETH_P_IPV6):
if (msdn_giant_send_check(skb)) {
ret = TX_CSUM_TSO;
goto unavailable;
}
opts1 |= GTSENDV6;
break;
default:
WARN_ON_ONCE(1);
break;
}
opts1 |= transport_offset << GTTCPHO_SHIFT;
opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
u8 ip_protocol;
if (transport_offset > TCPHO_MAX) {
netif_warn(tp, tx_err, tp->netdev,
"Invalid transport offset 0x%x\n",
transport_offset);
ret = TX_CSUM_NONE;
goto unavailable;
}
switch (vlan_get_protocol(skb)) {
case htons(ETH_P_IP):
opts2 |= IPV4_CS;
ip_protocol = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
opts2 |= IPV6_CS;
ip_protocol = ipv6_hdr(skb)->nexthdr;
break;
default:
ip_protocol = IPPROTO_RAW;
break;
}
if (ip_protocol == IPPROTO_TCP)
opts2 |= TCP_CS;
else if (ip_protocol == IPPROTO_UDP)
opts2 |= UDP_CS;
else
WARN_ON_ONCE(1);
opts2 |= transport_offset << TCPHO_SHIFT;
}
desc->opts2 = cpu_to_le32(opts2);
desc->opts1 = cpu_to_le32(opts1);
unavailable:
return ret;
}
static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
{
struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
int remain, ret;
u8 *tx_data;
__skb_queue_head_init(&skb_head);
spin_lock(&tx_queue->lock);
skb_queue_splice_init(tx_queue, &skb_head);
spin_unlock(&tx_queue->lock);
tx_data = agg->head;
agg->skb_num = 0;
agg->skb_len = 0;
remain = agg_buf_sz;
while (remain >= ETH_ZLEN + sizeof(struct tx_desc)) {
struct tx_desc *tx_desc;
struct sk_buff *skb;
unsigned int len;
u32 offset;
skb = __skb_dequeue(&skb_head);
if (!skb)
break;
len = skb->len + sizeof(*tx_desc);
if (len > remain) {
__skb_queue_head(&skb_head, skb);
break;
}
tx_data = tx_agg_align(tx_data);
tx_desc = (struct tx_desc *)tx_data;
offset = (u32)skb_transport_offset(skb);
if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
r8152_csum_workaround(tp, skb, &skb_head);
continue;
}
rtl_tx_vlan_tag(tx_desc, skb);
tx_data += sizeof(*tx_desc);
len = skb->len;
if (skb_copy_bits(skb, 0, tx_data, len) < 0) {
struct net_device_stats *stats = &tp->netdev->stats;
stats->tx_dropped++;
dev_kfree_skb_any(skb);
tx_data -= sizeof(*tx_desc);
continue;
}
tx_data += len;
agg->skb_len += len;
agg->skb_num++;
dev_kfree_skb_any(skb);
remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
}
if (!skb_queue_empty(&skb_head)) {
spin_lock(&tx_queue->lock);
skb_queue_splice(&skb_head, tx_queue);
spin_unlock(&tx_queue->lock);
}
netif_tx_lock(tp->netdev);
if (netif_queue_stopped(tp->netdev) &&
skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
netif_wake_queue(tp->netdev);
netif_tx_unlock(tp->netdev);
ret = usb_autopm_get_interface_async(tp->intf);
if (ret < 0)
goto out_tx_fill;
usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2),
agg->head, (int)(tx_data - (u8 *)agg->head),
(usb_complete_t)write_bulk_callback, agg);
ret = usb_submit_urb(agg->urb, GFP_ATOMIC);
if (ret < 0)
usb_autopm_put_interface_async(tp->intf);
out_tx_fill:
return ret;
}
static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
{
u8 checksum = CHECKSUM_NONE;
u32 opts2, opts3;
if (tp->version == RTL_VER_01)
goto return_result;
opts2 = le32_to_cpu(rx_desc->opts2);
opts3 = le32_to_cpu(rx_desc->opts3);
if (opts2 & RD_IPV4_CS) {
if (opts3 & IPF)
checksum = CHECKSUM_NONE;
else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF))
checksum = CHECKSUM_NONE;
else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF))
checksum = CHECKSUM_NONE;
else
checksum = CHECKSUM_UNNECESSARY;
} else if (opts2 & RD_IPV6_CS) {
if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
checksum = CHECKSUM_UNNECESSARY;
else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
checksum = CHECKSUM_UNNECESSARY;
}
return_result:
return checksum;
}
static void r8152_dump_rx_desc(struct rx_desc *rx_desc)
{
int rx_len = (le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK);
printk(KERN_INFO "%s: %08x %08x %08x %08x %08x %08x rx_len=%d\n",
__func__,
le32_to_cpu(rx_desc->opts1),
le32_to_cpu(rx_desc->opts2),
le32_to_cpu(rx_desc->opts3),
le32_to_cpu(rx_desc->opts4),
le32_to_cpu(rx_desc->opts5),
le32_to_cpu(rx_desc->opts6),
rx_len);
}
static int r8152_check_rx_desc(struct r8152 *tp, struct rx_desc *rx_desc)
{
u32 opts1, opts2, opts3, opts4, opts5, opts6;
int pkt_len;
if (tp->version == RTL_VER_01)
return 0; /* rx_desc looks okay */
opts1 = le32_to_cpu(rx_desc->opts1);
opts2 = le32_to_cpu(rx_desc->opts2);
opts3 = le32_to_cpu(rx_desc->opts3);
opts4 = le32_to_cpu(rx_desc->opts4);
opts5 = le32_to_cpu(rx_desc->opts5);
opts6 = le32_to_cpu(rx_desc->opts6);
pkt_len = (opts1 & RX_LEN_MASK) - CRC_SIZE;
if ( !opts1
|| ((opts1 & 0x0ff3f000) != 0x04400000 && (opts1 & 0xffff0000) != 0x00040000 && (opts1 & 0xffff0000) != 0x00080000)
|| (opts2 & ~(BIT(30)|RD_UDP_CS|RD_TCP_CS|RD_IPV6_CS|RD_IPV4_CS))
|| ((opts2 & RD_IPV6_CS) && (opts2 & RD_IPV4_CS))
|| ((opts3 & 0xffff0000) & ~(IPF|UDPF|TCPF|RX_VLAN_TAG)) // 0xff170000
|| (opts4 & 0x060cfff8) != 0x06000000
|| (opts5 | opts6)
|| pkt_len > (tp->netdev->mtu + 42)
){
printk(KERN_WARNING "%s: rx_desc looks bad.\n", __func__);
return -EIO; /* rx_desc looks bad */
}
return 0; /* rx_desc looks okay */
}
static int rx_bottom(struct r8152 *tp, int budget)
{
unsigned long flags;
struct list_head *cursor, *next, rx_queue;
int ret = 0, work_done = 0;
if (!skb_queue_empty(&tp->rx_queue)) {
while (work_done < budget) {
struct sk_buff *skb = __skb_dequeue(&tp->rx_queue);
struct net_device *netdev = tp->netdev;
struct net_device_stats *stats = &netdev->stats;
unsigned int pkt_len;
if (!skb)
break;
pkt_len = skb->len;
napi_gro_receive(&tp->napi, skb);
work_done++;
stats->rx_packets++;
stats->rx_bytes += pkt_len;
}
}
if (list_empty(&tp->rx_done))
goto out1;
INIT_LIST_HEAD(&rx_queue);
spin_lock_irqsave(&tp->rx_lock, flags);
list_splice_init(&tp->rx_done, &rx_queue);
spin_unlock_irqrestore(&tp->rx_lock, flags);
list_for_each_safe(cursor, next, &rx_queue) {
struct rx_desc *rx_desc;
struct rx_agg *agg;
int len_used = 0;
struct urb *urb;
u8 *rx_data;
list_del_init(cursor);
agg = list_entry(cursor, struct rx_agg, list);
urb = agg->urb;
if (urb->actual_length < (sizeof(struct rx_desc) + ETH_ZLEN)) {
printk(KERN_WARNING "r8152_rx_bottom: URB too small: actual_length=%u\n", urb->actual_length);
goto submit;
}
rx_desc = agg->head;
rx_data = agg->head;
mb();
while (urb->actual_length > len_used) {
struct net_device *netdev = tp->netdev;
struct net_device_stats *stats = &netdev->stats;
unsigned int pkt_len;
struct sk_buff *skb;
if ((len_used + sizeof(struct rx_desc)) > urb->actual_length) {
printk(KERN_WARNING "r8152_rx_bottom: offset=%u/%u too small for rx_desc\n",
len_used, urb->actual_length);
break;
}
len_used += sizeof(struct rx_desc);
if (r8152_check_rx_desc(tp, rx_desc)) {
printk(KERN_WARNING "r8152_rx_bottom: offset=%u/%u bad rx_desc\n",
len_used - sizeof(struct rx_desc), urb->actual_length);
r8152_dump_rx_desc(rx_desc);
}
pkt_len = le32_to_cpu(rx_desc->opts1) & RX_LEN_MASK;
if (pkt_len < ETH_ZLEN) {
printk(KERN_WARNING "r8152_rx_bottom: offset=%u/%u pkt_len(%u) < ETH_ZLEN\n",
len_used, urb->actual_length, pkt_len);
r8152_dump_rx_desc(rx_desc);
break;
}
len_used += pkt_len;
if (urb->actual_length < len_used) {
printk(KERN_WARNING "r8152_rx_bottom: offset=%u/%u pkt_len(%u) exceeds buffer\n",
len_used - pkt_len, urb->actual_length, pkt_len);
r8152_dump_rx_desc(rx_desc);
break;
}
pkt_len -= CRC_SIZE;
rx_data += sizeof(struct rx_desc);
skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
if (!skb) {
printk(KERN_WARNING "r8152_rx_bottom: netdev_alloc_skb_ip_align(%u) failed\n", pkt_len);
stats->rx_dropped++;
goto find_next_rx;
}
skb->ip_summed = r8152_rx_csum(tp, rx_desc);
memcpy(skb->data, rx_data, pkt_len);
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, netdev);
rtl_rx_vlan_tag(rx_desc, skb);
if (work_done < budget) {
napi_gro_receive(&tp->napi, skb);
work_done++;
stats->rx_packets++;
stats->rx_bytes += pkt_len;
} else {
__skb_queue_tail(&tp->rx_queue, skb);
}
find_next_rx:
rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE);
rx_desc = (struct rx_desc *)rx_data;
len_used = (int)(rx_data - (u8 *)agg->head);
}
submit:
if (!ret) {
ret = r8152_submit_rx(tp, agg, GFP_ATOMIC);
} else {
urb->actual_length = 0;
list_add_tail(&agg->list, next);
}
}
if (!list_empty(&rx_queue)) {
spin_lock_irqsave(&tp->rx_lock, flags);
list_splice_tail(&rx_queue, &tp->rx_done);
spin_unlock_irqrestore(&tp->rx_lock, flags);
}
out1:
return work_done;
}
static void tx_bottom(struct r8152 *tp)
{
int res;
do {
struct tx_agg *agg;
if (skb_queue_empty(&tp->tx_queue))
break;
agg = r8152_get_tx_agg(tp);
if (!agg)
break;
res = r8152_tx_agg_fill(tp, agg);
if (res) {
struct net_device *netdev = tp->netdev;
if (res == -ENODEV) {
set_bit(RTL8152_UNPLUG, &tp->flags);
netif_device_detach(netdev);
} else {
struct net_device_stats *stats = &netdev->stats;
unsigned long flags;
netif_warn(tp, tx_err, netdev,
"failed tx_urb %d\n", res);
stats->tx_dropped += agg->skb_num;
spin_lock_irqsave(&tp->tx_lock, flags);
list_add_tail(&agg->list, &tp->tx_free);
spin_unlock_irqrestore(&tp->tx_lock, flags);
}
}
} while (res == 0);
}
static void bottom_half(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
if (!test_bit(WORK_ENABLE, &tp->flags))
return;
/* When link down, the driver would cancel all bulks. */
/* This avoid the re-submitting bulk */
if (!netif_carrier_ok(tp->netdev))
return;
clear_bit(SCHEDULE_NAPI, &tp->flags);
tx_bottom(tp);
}
static int r8152_poll(struct napi_struct *napi, int budget)
{
struct r8152 *tp = container_of(napi, struct r8152, napi);
int work_done;
work_done = rx_bottom(tp, budget);
bottom_half(tp);
if (work_done < budget) {
napi_complete(napi);
if (!list_empty(&tp->rx_done))
napi_schedule(napi);
}
return work_done;
}
static
int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags)
{
int ret;
/* The rx would be stopped, so skip submitting */
if (test_bit(RTL8152_UNPLUG, &tp->flags) ||
!test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev))
return 0; /* FIXME: memory leak? */
usb_fill_bulk_urb(agg->urb, tp->udev, usb_rcvbulkpipe(tp->udev, 1),
agg->head, agg_buf_sz,
(usb_complete_t)read_bulk_callback, agg);
agg->urb->transfer_dma = agg->transfer_dma;
agg->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
mb();
ret = usb_submit_urb(agg->urb, mem_flags);
if (ret == -ENODEV) {
set_bit(RTL8152_UNPLUG, &tp->flags);
netif_device_detach(tp->netdev);
} else if (ret) {
struct urb *urb = agg->urb;
unsigned long flags;
urb->actual_length = 0;
spin_lock_irqsave(&tp->rx_lock, flags);
list_add_tail(&agg->list, &tp->rx_done);
spin_unlock_irqrestore(&tp->rx_lock, flags);
netif_err(tp, rx_err, tp->netdev,
"Couldn't submit rx[%p], ret = %d\n", agg, ret);
napi_schedule(&tp->napi);
}
return ret;
}
static void rtl_drop_queued_tx(struct r8152 *tp)
{
struct net_device_stats *stats = &tp->netdev->stats;
struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
struct sk_buff *skb;
if (skb_queue_empty(tx_queue))
return;
__skb_queue_head_init(&skb_head);
spin_lock_bh(&tx_queue->lock);
skb_queue_splice_init(tx_queue, &skb_head);
spin_unlock_bh(&tx_queue->lock);
while ((skb = __skb_dequeue(&skb_head))) {
dev_kfree_skb(skb);
stats->tx_dropped++;
}
}
static void rtl8152_tx_timeout(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
netif_warn(tp, tx_err, netdev, "Tx timeout\n");
usb_queue_reset_device(tp->intf);
}
static void rtl8152_set_rx_mode(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
if (netif_carrier_ok(netdev)) {
set_bit(RTL8152_SET_RX_MODE, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
}
static void _rtl8152_set_rx_mode(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
u32 mc_filter[2]; /* Multicast hash filter */
__le32 tmp[2];
u32 ocp_data;
clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
netif_stop_queue(netdev);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data &= ~RCR_ACPT_ALL;
ocp_data |= RCR_AB | RCR_APM;
if (netdev->flags & IFF_PROMISC) {
/* Unconditionally log net taps. */
netif_notice(tp, link, netdev, "Promiscuous mode enabled\n");
ocp_data |= RCR_AM | RCR_AAP;
mc_filter[1] = 0xffffffff;
mc_filter[0] = 0xffffffff;
} else if ((netdev_mc_count(netdev) > multicast_filter_limit) ||
(netdev->flags & IFF_ALLMULTI)) {
/* Too many to filter perfectly -- accept all multicasts. */
ocp_data |= RCR_AM;
mc_filter[1] = 0xffffffff;
mc_filter[0] = 0xffffffff;
} else {
struct netdev_hw_addr *ha;
mc_filter[1] = 0;
mc_filter[0] = 0;
netdev_for_each_mc_addr(ha, netdev) {
int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
ocp_data |= RCR_AM;
}
}
tmp[0] = __cpu_to_le32(swab32(mc_filter[1]));
tmp[1] = __cpu_to_le32(swab32(mc_filter[0]));
pla_ocp_write(tp, PLA_MAR, BYTE_EN_DWORD, sizeof(tmp), tmp);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
netif_wake_queue(netdev);
}
static netdev_features_t
rtl8152_features_check(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features)
{
u32 mss = skb_shinfo(skb)->gso_size;
int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX;
int offset = skb_transport_offset(skb);
if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset)
features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz)
features &= ~NETIF_F_GSO_MASK;
return features;
}
static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
skb_tx_timestamp(skb);
skb_queue_tail(&tp->tx_queue, skb);
if (!list_empty(&tp->tx_free)) {
if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
set_bit(SCHEDULE_NAPI, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
} else {
usb_mark_last_busy(tp->udev);
napi_schedule(&tp->napi);
}
} else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen) {
netif_stop_queue(netdev);
}
return NETDEV_TX_OK;
}
static void r8152b_reset_packet_filter(struct r8152 *tp)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_FMC);
ocp_data &= ~FMC_FCR_MCU_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_FMC, ocp_data);
ocp_data |= FMC_FCR_MCU_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_FMC, ocp_data);
}
static void rtl8152_nic_reset(struct r8152 *tp)
{
int i;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, CR_RST);
for (i = 0; i < 1000; i++) {
if (!(ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR) & CR_RST))
break;
usleep_range(100, 400);
}
}
static void set_tx_qlen(struct r8152 *tp)
{
struct net_device *netdev = tp->netdev;
tp->tx_qlen = agg_buf_sz / (netdev->mtu + VLAN_ETH_HLEN + VLAN_HLEN +
sizeof(struct tx_desc));
}
static inline u8 rtl8152_get_speed(struct r8152 *tp)
{
return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS);
}
static void rtl_set_eee_plus(struct r8152 *tp)
{
u32 ocp_data;
u8 speed;
speed = rtl8152_get_speed(tp);
if (speed & _10bps) {
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR);
ocp_data |= EEEP_CR_EEEP_TX;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data);
} else {
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR);
ocp_data &= ~EEEP_CR_EEEP_TX;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data);
}
}
static void rxdy_gated_en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_MISC_1);
if (enable)
ocp_data |= RXDY_GATED_EN;
else
ocp_data &= ~RXDY_GATED_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MISC_1, ocp_data);
}
static int rtl_start_rx(struct r8152 *tp)
{
int i, ret = 0;
INIT_LIST_HEAD(&tp->rx_done);
for (i = 0; i < RTL8152_MAX_RX; i++) {
INIT_LIST_HEAD(&tp->rx_info[i].list);
ret = r8152_submit_rx(tp, &tp->rx_info[i], GFP_KERNEL);
if (ret)
break;
}
if (ret && ++i < RTL8152_MAX_RX) {
struct list_head rx_queue;
unsigned long flags;
INIT_LIST_HEAD(&rx_queue);
do {
struct rx_agg *agg = &tp->rx_info[i++];
struct urb *urb = agg->urb;
urb->actual_length = 0;
list_add_tail(&agg->list, &rx_queue);
} while (i < RTL8152_MAX_RX);
spin_lock_irqsave(&tp->rx_lock, flags);
list_splice_tail(&rx_queue, &tp->rx_done);
spin_unlock_irqrestore(&tp->rx_lock, flags);
}
return ret;
}
static int rtl_stop_rx(struct r8152 *tp)
{
int i;
for (i = 0; i < RTL8152_MAX_RX; i++)
usb_kill_urb(tp->rx_info[i].urb);
while (!skb_queue_empty(&tp->rx_queue))
dev_kfree_skb(__skb_dequeue(&tp->rx_queue));
return 0;
}
static int rtl_enable(struct r8152 *tp)
{
u32 ocp_data;
r8152b_reset_packet_filter(tp);
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CR);
ocp_data |= CR_RE | CR_TE;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, ocp_data);
rxdy_gated_en(tp, false);
return 0;
}
static int rtl8152_enable(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
set_tx_qlen(tp);
rtl_set_eee_plus(tp);
return rtl_enable(tp);
}
static void r8153_set_rx_early_timeout(struct r8152 *tp)
{
u32 ocp_data = tp->coalesce / 8;
ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_TIMEOUT, ocp_data);
}
static void r8153_set_rx_early_size(struct r8152 *tp)
{
u32 mtu = tp->netdev->mtu;
u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
}
static int rtl8153_enable(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
usb_disable_lpm(tp->udev);
set_tx_qlen(tp);
rtl_set_eee_plus(tp);
r8153_set_rx_early_timeout(tp);
r8153_set_rx_early_size(tp);
return rtl_enable(tp);
}
static void rtl_disable(struct r8152 *tp)
{
u32 ocp_data;
int i;
if (test_bit(RTL8152_UNPLUG, &tp->flags)) {
rtl_drop_queued_tx(tp);
return;
}
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data &= ~RCR_ACPT_ALL;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
rtl_drop_queued_tx(tp);
for (i = 0; i < RTL8152_MAX_TX; i++)
usb_kill_urb(tp->tx_info[i].urb);
rxdy_gated_en(tp, true);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if ((ocp_data & FIFO_EMPTY) == FIFO_EMPTY)
break;
usleep_range(1000, 2000);
}
for (i = 0; i < 1000; i++) {
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0) & TCR0_TX_EMPTY)
break;
usleep_range(1000, 2000);
}
rtl_stop_rx(tp);
rtl8152_nic_reset(tp);
}
static void r8152_power_cut_en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_UPS_CTRL);
if (enable)
ocp_data |= POWER_CUT;
else
ocp_data &= ~POWER_CUT;
ocp_write_word(tp, MCU_TYPE_USB, USB_UPS_CTRL, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_PM_CTRL_STATUS);
ocp_data &= ~RESUME_INDICATE;
ocp_write_word(tp, MCU_TYPE_USB, USB_PM_CTRL_STATUS, ocp_data);
}
static void rtl_rx_vlan_en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CPCR);
if (enable)
ocp_data |= CPCR_RX_VLAN;
else
ocp_data &= ~CPCR_RX_VLAN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_CPCR, ocp_data);
}
static int rtl8152_set_features(struct net_device *dev,
netdev_features_t features)
{
netdev_features_t changed = features ^ dev->features;
struct r8152 *tp = netdev_priv(dev);
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
if (features & NETIF_F_HW_VLAN_CTAG_RX)
rtl_rx_vlan_en(tp, true);
else
rtl_rx_vlan_en(tp, false);
}
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
#define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST)
static u32 __rtl_get_wol(struct r8152 *tp)
{
u32 ocp_data;
u32 wolopts = 0;
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG5);
if (!(ocp_data & LAN_WAKE_EN))
return 0;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
if (ocp_data & LINK_ON_WAKE_EN)
wolopts |= WAKE_PHY;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
if (ocp_data & UWF_EN)
wolopts |= WAKE_UCAST;
if (ocp_data & BWF_EN)
wolopts |= WAKE_BCAST;
if (ocp_data & MWF_EN)
wolopts |= WAKE_MCAST;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CFG_WOL);
if (ocp_data & MAGIC_EN)
wolopts |= WAKE_MAGIC;
return wolopts;
}
static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
{
u32 ocp_data;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
ocp_data &= ~LINK_ON_WAKE_EN;
if (wolopts & WAKE_PHY)
ocp_data |= LINK_ON_WAKE_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG5);
ocp_data &= ~(UWF_EN | BWF_EN | MWF_EN | LAN_WAKE_EN);
if (wolopts & WAKE_UCAST)
ocp_data |= UWF_EN;
if (wolopts & WAKE_BCAST)
ocp_data |= BWF_EN;
if (wolopts & WAKE_MCAST)
ocp_data |= MWF_EN;
if (wolopts & WAKE_ANY)
ocp_data |= LAN_WAKE_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG5, ocp_data);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CFG_WOL);
ocp_data &= ~MAGIC_EN;
if (wolopts & WAKE_MAGIC)
ocp_data |= MAGIC_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_CFG_WOL, ocp_data);
if (wolopts & WAKE_ANY)
device_set_wakeup_enable(&tp->udev->dev, true);
else
device_set_wakeup_enable(&tp->udev->dev, false);
}
static void r8153_u1u2en(struct r8152 *tp, bool enable)
{
u8 u1u2[8];
if (enable)
memset(u1u2, 0xff, sizeof(u1u2));
else
memset(u1u2, 0x00, sizeof(u1u2));
usb_ocp_write(tp, USB_TOLERANCE, BYTE_EN_SIX_BYTES, sizeof(u1u2), u1u2);
}
static void r8153_u2p3en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL);
if (enable && tp->version != RTL_VER_03 && tp->version != RTL_VER_04)
ocp_data |= U2P3_ENABLE;
else
ocp_data &= ~U2P3_ENABLE;
ocp_write_word(tp, MCU_TYPE_USB, USB_U2P3_CTRL, ocp_data);
}
static void r8153_power_cut_en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_POWER_CUT);
if (enable)
ocp_data |= PWR_EN | PHASE2_EN;
else
ocp_data &= ~(PWR_EN | PHASE2_EN);
ocp_write_word(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0);
ocp_data &= ~PCUT_STATUS;
ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data);
}
static bool rtl_can_wakeup(struct r8152 *tp)
{
struct usb_device *udev = tp->udev;
return (udev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_WAKEUP);
}
static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable)
{
if (enable) {
u32 ocp_data;
r8153_u1u2en(tp, false);
r8153_u2p3en(tp, false);
__rtl_set_wol(tp, WAKE_ANY);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34);
ocp_data |= LINK_OFF_WAKE_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
} else {
__rtl_set_wol(tp, tp->saved_wolopts);
r8153_u2p3en(tp, true);
r8153_u1u2en(tp, true);
}
}
static void rtl_phy_reset(struct r8152 *tp)
{
u16 data;
int i;
clear_bit(PHY_RESET, &tp->flags);
data = r8152_mdio_read(tp, MII_BMCR);
/* don't reset again before the previous one complete */
if (data & BMCR_RESET)
return;
data |= BMCR_RESET;
r8152_mdio_write(tp, MII_BMCR, data);
for (i = 0; i < 50; i++) {
msleep(20);
if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
break;
}
}
static void r8153_teredo_off(struct r8152 *tp)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
ocp_data &= ~(TEREDO_SEL | TEREDO_RS_EVENT_MASK | OOB_TEREDO_EN);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_WDT6_CTRL, WDT6_SET_MODE);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_REALWOW_TIMER, 0);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
}
static void r8152b_disable_aldps(struct r8152 *tp)
{
ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPDNPS | LINKENA | DIS_SDSAVE);
msleep(20);
}
static inline void r8152b_enable_aldps(struct r8152 *tp)
{
ocp_reg_write(tp, OCP_ALDPS_CONFIG, ENPWRSAVE | ENPDNPS |
LINKENA | DIS_SDSAVE);
}
static void rtl8152_disable(struct r8152 *tp)
{
r8152b_disable_aldps(tp);
rtl_disable(tp);
r8152b_enable_aldps(tp);
}
static void r8152b_hw_phy_cfg(struct r8152 *tp)
{
u16 data;
data = r8152_mdio_read(tp, MII_BMCR);
if (data & BMCR_PDOWN) {
data &= ~BMCR_PDOWN;
r8152_mdio_write(tp, MII_BMCR, data);
}
set_bit(PHY_RESET, &tp->flags);
}
static void r8152b_exit_oob(struct r8152 *tp)
{
u32 ocp_data;
int i;
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data &= ~RCR_ACPT_ALL;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
rxdy_gated_en(tp, true);
r8153_teredo_off(tp);
r8152b_hw_phy_cfg(tp);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CR, 0x00);
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data &= ~MCU_BORW_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data |= RE_INIT_LL;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
rtl8152_nic_reset(tp);
/* rx share fifo credit full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_NORMAL);
if (tp->udev->speed == USB_SPEED_FULL ||
tp->udev->speed == USB_SPEED_LOW) {
/* rx share fifo credit near full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1,
RXFIFO_THR2_FULL);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2,
RXFIFO_THR3_FULL);
} else {
/* rx share fifo credit near full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1,
RXFIFO_THR2_HIGH);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2,
RXFIFO_THR3_HIGH);
}
/* TX share fifo free credit full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, TXFIFO_THR_NORMAL);
ocp_write_byte(tp, MCU_TYPE_USB, USB_TX_AGG, TX_AGG_MAX_THRESHOLD);
ocp_write_dword(tp, MCU_TYPE_USB, USB_RX_BUF_TH, RX_THR_HIGH);
ocp_write_dword(tp, MCU_TYPE_USB, USB_TX_DMA,
TEST_MODE_DISABLE | TX_SIZE_ADJUST1);
rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8152_RMS);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
ocp_data |= TCR0_AUTO_FIFO;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR0, ocp_data);
}
static void r8152b_enter_oob(struct r8152 *tp)
{
u32 ocp_data;
int i;
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB);
rtl_disable(tp);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data |= RE_INIT_LL;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8152_RMS);
rtl_rx_vlan_en(tp, true);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PAL_BDC_CR);
ocp_data |= ALDPS_PROXY_MODE;
ocp_write_word(tp, MCU_TYPE_PLA, PAL_BDC_CR, ocp_data);
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data |= RCR_APM | RCR_AM | RCR_AB;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
static void r8153_hw_phy_cfg(struct r8152 *tp)
{
u32 ocp_data;
u16 data;
if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
tp->version == RTL_VER_05)
ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
data = r8152_mdio_read(tp, MII_BMCR);
if (data & BMCR_PDOWN) {
data &= ~BMCR_PDOWN;
r8152_mdio_write(tp, MII_BMCR, data);
}
if (tp->version == RTL_VER_03) {
data = ocp_reg_read(tp, OCP_EEE_CFG);
data &= ~CTAP_SHORT_EN;
ocp_reg_write(tp, OCP_EEE_CFG, data);
}
data = ocp_reg_read(tp, OCP_POWER_CFG);
data |= EEE_CLKDIV_EN;
ocp_reg_write(tp, OCP_POWER_CFG, data);
data = ocp_reg_read(tp, OCP_DOWN_SPEED);
data |= EN_10M_BGOFF;
ocp_reg_write(tp, OCP_DOWN_SPEED, data);
data = ocp_reg_read(tp, OCP_POWER_CFG);
data |= EN_10M_PLLOFF;
ocp_reg_write(tp, OCP_POWER_CFG, data);
sram_write(tp, SRAM_IMPEDANCE, 0x0b13);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
ocp_data |= PFM_PWM_SWITCH;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
/* Enable LPF corner auto tune */
sram_write(tp, SRAM_LPF_CFG, 0xf70f);
/* Adjust 10M Amplitude */
sram_write(tp, SRAM_10M_AMP1, 0x00af);
sram_write(tp, SRAM_10M_AMP2, 0x0208);
set_bit(PHY_RESET, &tp->flags);
}
static void r8153_first_init(struct r8152 *tp)
{
u32 ocp_data;
int i;
rxdy_gated_en(tp, true);
r8153_teredo_off(tp);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data &= ~RCR_ACPT_ALL;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
r8153_hw_phy_cfg(tp);
rtl8152_nic_reset(tp);
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data &= ~MCU_BORW_EN;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data |= RE_INIT_LL;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0);
ocp_data |= TCR0_AUTO_FIFO;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_TCR0, ocp_data);
rtl8152_nic_reset(tp);
/* rx share fifo credit full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_NORMAL);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_NORMAL);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_NORMAL);
/* TX share fifo free credit full threshold */
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TXFIFO_CTRL, TXFIFO_THR_NORMAL2);
/* rx aggregation */
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
}
static void r8153_enter_oob(struct r8152 *tp)
{
u32 ocp_data;
int i;
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
rtl_disable(tp);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
ocp_data |= RE_INIT_LL;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
for (i = 0; i < 1000; i++) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
if (ocp_data & LINK_LIST_READY)
break;
usleep_range(1000, 2000);
}
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG);
ocp_data &= ~TEREDO_WAKE_MASK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG, ocp_data);
rtl_rx_vlan_en(tp, true);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PAL_BDC_CR);
ocp_data |= ALDPS_PROXY_MODE;
ocp_write_word(tp, MCU_TYPE_PLA, PAL_BDC_CR, ocp_data);
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
ocp_data |= RCR_APM | RCR_AM | RCR_AB;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
}
static void r8153_disable_aldps(struct r8152 *tp)
{
u16 data;
data = ocp_reg_read(tp, OCP_POWER_CFG);
data &= ~EN_ALDPS;
ocp_reg_write(tp, OCP_POWER_CFG, data);
msleep(20);
}
static void r8153_enable_aldps(struct r8152 *tp)
{
u16 data;
data = ocp_reg_read(tp, OCP_POWER_CFG);
data |= EN_ALDPS;
ocp_reg_write(tp, OCP_POWER_CFG, data);
}
static void rtl8153_disable(struct r8152 *tp)
{
r8153_disable_aldps(tp);
rtl_disable(tp);
r8153_enable_aldps(tp);
usb_enable_lpm(tp->udev);
}
static int rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u16 speed, u8 duplex)
{
u16 bmcr, anar, gbcr;
int ret = 0;
cancel_delayed_work_sync(&tp->schedule);
anar = r8152_mdio_read(tp, MII_ADVERTISE);
anar &= ~(ADVERTISE_10HALF | ADVERTISE_10FULL |
ADVERTISE_100HALF | ADVERTISE_100FULL);
if (tp->mii.supports_gmii) {
gbcr = r8152_mdio_read(tp, MII_CTRL1000);
gbcr &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
} else {
gbcr = 0;
}
if (autoneg == AUTONEG_DISABLE) {
if (speed == SPEED_10) {
bmcr = 0;
anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
} else if (speed == SPEED_100) {
bmcr = BMCR_SPEED100;
anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
} else if (speed == SPEED_1000 && tp->mii.supports_gmii) {
bmcr = BMCR_SPEED1000;
gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
} else {
ret = -EINVAL;
goto out;
}
if (duplex == DUPLEX_FULL)
bmcr |= BMCR_FULLDPLX;
} else {
if (speed == SPEED_10) {
if (duplex == DUPLEX_FULL)
anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
else
anar |= ADVERTISE_10HALF;
} else if (speed == SPEED_100) {
if (duplex == DUPLEX_FULL) {
anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
} else {
anar |= ADVERTISE_10HALF;
anar |= ADVERTISE_100HALF;
}
} else if (speed == SPEED_1000 && tp->mii.supports_gmii) {
if (duplex == DUPLEX_FULL) {
anar |= ADVERTISE_10HALF | ADVERTISE_10FULL;
anar |= ADVERTISE_100HALF | ADVERTISE_100FULL;
gbcr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF;
} else {
anar |= ADVERTISE_10HALF;
anar |= ADVERTISE_100HALF;
gbcr |= ADVERTISE_1000HALF;
}
} else {
ret = -EINVAL;
goto out;
}
bmcr = BMCR_ANENABLE | BMCR_ANRESTART;
}
if (test_bit(PHY_RESET, &tp->flags))
bmcr |= BMCR_RESET;
if (tp->mii.supports_gmii)
r8152_mdio_write(tp, MII_CTRL1000, gbcr);
r8152_mdio_write(tp, MII_ADVERTISE, anar);
r8152_mdio_write(tp, MII_BMCR, bmcr);
if (test_bit(PHY_RESET, &tp->flags)) {
int i;
clear_bit(PHY_RESET, &tp->flags);
for (i = 0; i < 50; i++) {
msleep(20);
if ((r8152_mdio_read(tp, MII_BMCR) & BMCR_RESET) == 0)
break;
}
}
out:
return ret;
}
static void rtl8152_up(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
r8152b_disable_aldps(tp);
r8152b_exit_oob(tp);
r8152b_enable_aldps(tp);
}
static void rtl8152_down(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags)) {
rtl_drop_queued_tx(tp);
return;
}
r8152_power_cut_en(tp, false);
r8152b_disable_aldps(tp);
r8152b_enter_oob(tp);
r8152b_enable_aldps(tp);
}
static void rtl8153_up(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
r8153_u1u2en(tp, false);
r8153_disable_aldps(tp);
r8153_first_init(tp);
r8153_enable_aldps(tp);
r8153_u2p3en(tp, true);
r8153_u1u2en(tp, true);
usb_enable_lpm(tp->udev);
}
static void rtl8153_down(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags)) {
rtl_drop_queued_tx(tp);
return;
}
r8153_u1u2en(tp, false);
r8153_u2p3en(tp, false);
r8153_power_cut_en(tp, false);
r8153_disable_aldps(tp);
r8153_enter_oob(tp);
r8153_enable_aldps(tp);
}
static bool rtl8152_in_nway(struct r8152 *tp)
{
u16 nway_state;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, 0x2000);
tp->ocp_base = 0x2000;
ocp_write_byte(tp, MCU_TYPE_PLA, 0xb014, 0x4c); /* phy state */
nway_state = ocp_read_word(tp, MCU_TYPE_PLA, 0xb01a);
/* bit 15: TXDIS_STATE, bit 14: ABD_STATE */
if (nway_state & 0xc000)
return false;
else
return true;
}
static bool rtl8153_in_nway(struct r8152 *tp)
{
u16 phy_state = ocp_reg_read(tp, OCP_PHY_STATE) & 0xff;
if (phy_state == TXDIS_STATE || phy_state == ABD_STATE)
return false;
else
return true;
}
static void set_carrier(struct r8152 *tp)
{
struct net_device *netdev = tp->netdev;
u8 speed;
clear_bit(RTL8152_LINK_CHG, &tp->flags);
speed = rtl8152_get_speed(tp);
if (speed & LINK_STATUS) {
if (!netif_carrier_ok(netdev)) {
tp->rtl_ops.enable(tp);
set_bit(RTL8152_SET_RX_MODE, &tp->flags);
napi_disable(&tp->napi);
netif_carrier_on(netdev);
rtl_start_rx(tp);
napi_enable(&tp->napi);
}
} else {
if (netif_carrier_ok(netdev)) {
netif_carrier_off(netdev);
napi_disable(&tp->napi);
tp->rtl_ops.disable(tp);
napi_enable(&tp->napi);
}
}
}
static void rtl_work_func_t(struct work_struct *work)
{
struct r8152 *tp = container_of(work, struct r8152, schedule.work);
/* If the device is unplugged or !netif_running(), the workqueue
* doesn't need to wake the device, and could return directly.
*/
if (test_bit(RTL8152_UNPLUG, &tp->flags) || !netif_running(tp->netdev))
return;
if (usb_autopm_get_interface(tp->intf) < 0)
return;
if (!test_bit(WORK_ENABLE, &tp->flags))
goto out1;
if (!mutex_trylock(&tp->control)) {
schedule_delayed_work(&tp->schedule, 0);
goto out1;
}
if (test_bit(RTL8152_LINK_CHG, &tp->flags))
set_carrier(tp);
if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
_rtl8152_set_rx_mode(tp->netdev);
/* don't schedule napi before linking */
if (test_bit(SCHEDULE_NAPI, &tp->flags) &&
netif_carrier_ok(tp->netdev)) {
clear_bit(SCHEDULE_NAPI, &tp->flags);
napi_schedule(&tp->napi);
}
if (test_bit(PHY_RESET, &tp->flags))
rtl_phy_reset(tp);
mutex_unlock(&tp->control);
out1:
usb_autopm_put_interface(tp->intf);
}
static int rtl8152_open(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
int res = 0;
res = alloc_all_mem(tp);
if (res)
goto out;
netif_carrier_off(netdev);
res = usb_autopm_get_interface(tp->intf);
if (res < 0) {
free_all_mem(tp);
goto out;
}
mutex_lock(&tp->control);
tp->rtl_ops.up(tp);
rtl8152_set_speed(tp, AUTONEG_ENABLE,
tp->mii.supports_gmii ? SPEED_1000 : SPEED_100,
DUPLEX_FULL);
netif_carrier_off(netdev);
netif_start_queue(netdev);
set_bit(WORK_ENABLE, &tp->flags);
res = usb_submit_urb(tp->intr_urb, GFP_KERNEL);
if (res) {
if (res == -ENODEV)
netif_device_detach(tp->netdev);
netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n",
res);
free_all_mem(tp);
} else {
napi_enable(&tp->napi);
}
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return res;
}
static int rtl8152_close(struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
int res = 0;
napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
cancel_delayed_work_sync(&tp->schedule);
netif_stop_queue(netdev);
res = usb_autopm_get_interface(tp->intf);
if (res < 0 || test_bit(RTL8152_UNPLUG, &tp->flags)) {
rtl_drop_queued_tx(tp);
rtl_stop_rx(tp);
} else {
mutex_lock(&tp->control);
tp->rtl_ops.down(tp);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
}
free_all_mem(tp);
return res;
}
static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
{
ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
ocp_reg_write(tp, OCP_EEE_DATA, reg);
ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
}
static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
{
u16 data;
r8152_mmd_indirect(tp, dev, reg);
data = ocp_reg_read(tp, OCP_EEE_DATA);
ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
return data;
}
static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
{
r8152_mmd_indirect(tp, dev, reg);
ocp_reg_write(tp, OCP_EEE_DATA, data);
ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
}
static void r8152_eee_en(struct r8152 *tp, bool enable)
{
u16 config1, config2, config3;
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
if (enable) {
ocp_data |= EEE_RX_EN | EEE_TX_EN;
config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
config1 |= sd_rise_time(1);
config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
config3 |= fast_snr(42);
} else {
ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
RX_QUIET_EN);
config1 |= sd_rise_time(7);
config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
config3 |= fast_snr(511);
}
ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
}
static void r8152b_enable_eee(struct r8152 *tp)
{
r8152_eee_en(tp, true);
r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
}
static void r8153_eee_en(struct r8152 *tp, bool enable)
{
u32 ocp_data;
u16 config;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
config = ocp_reg_read(tp, OCP_EEE_CFG);
if (enable) {
ocp_data |= EEE_RX_EN | EEE_TX_EN;
config |= EEE10_EN;
} else {
ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
config &= ~EEE10_EN;
}
ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
ocp_reg_write(tp, OCP_EEE_CFG, config);
}
static void r8153_enable_eee(struct r8152 *tp)
{
r8153_eee_en(tp, true);
ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
}
static void r8152b_enable_fc(struct r8152 *tp)
{
u16 anar;
anar = r8152_mdio_read(tp, MII_ADVERTISE);
anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
r8152_mdio_write(tp, MII_ADVERTISE, anar);
}
static void rtl_tally_reset(struct r8152 *tp)
{
u32 ocp_data;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY);
ocp_data |= TALLY_RESET;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_RSTTALLY, ocp_data);
}
static void r8152b_init(struct r8152 *tp)
{
u32 ocp_data;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
r8152b_disable_aldps(tp);
if (tp->version == RTL_VER_01) {
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE);
ocp_data &= ~LED_MODE_MASK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data);
}
r8152_power_cut_en(tp, false);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR);
ocp_data |= TX_10M_IDLE_EN | PFM_PWM_SWITCH;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL);
ocp_data &= ~MCU_CLK_RATIO_MASK;
ocp_data |= MCU_CLK_RATIO | D3_CLK_GATED_EN;
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ocp_data);
ocp_data = GPHY_STS_MSK | SPEED_DOWN_MSK |
SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
r8152b_enable_eee(tp);
r8152b_enable_aldps(tp);
r8152b_enable_fc(tp);
rtl_tally_reset(tp);
/* enable rx aggregation */
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
}
static void r8153_init(struct r8152 *tp)
{
u32 ocp_data;
int i;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
r8153_disable_aldps(tp);
r8153_u1u2en(tp, false);
for (i = 0; i < 500; i++) {
if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) &
AUTOLOAD_DONE)
break;
msleep(20);
}
for (i = 0; i < 500; i++) {
ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
if (ocp_data == PHY_STAT_LAN_ON || ocp_data == PHY_STAT_PWRDN)
break;
msleep(20);
}
usb_disable_lpm(tp->udev);
r8153_u2p3en(tp, false);
if (tp->version == RTL_VER_04) {
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2);
ocp_data &= ~pwd_dn_scale_mask;
ocp_data |= pwd_dn_scale(96);
ocp_write_word(tp, MCU_TYPE_USB, USB_SSPHYLINK2, ocp_data);
ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_USB2PHY);
ocp_data |= USB2PHY_L1 | USB2PHY_SUSPEND;
ocp_write_byte(tp, MCU_TYPE_USB, USB_USB2PHY, ocp_data);
} else if (tp->version == RTL_VER_05) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0);
ocp_data &= ~ECM_ALDPS;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_DMY_REG0, ocp_data);
ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
ocp_data &= ~DYNAMIC_BURST;
else
ocp_data |= DYNAMIC_BURST;
ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
} else if (tp->version == RTL_VER_06) {
ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1);
if (ocp_read_word(tp, MCU_TYPE_USB, USB_BURST_SIZE) == 0)
ocp_data &= ~DYNAMIC_BURST;
else
ocp_data |= DYNAMIC_BURST;
ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY1, ocp_data);
}
ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2);
ocp_data |= EP4_FULL_FC;
ocp_write_byte(tp, MCU_TYPE_USB, USB_CSR_DUMMY2, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL);
ocp_data &= ~TIMER11_EN;
ocp_write_word(tp, MCU_TYPE_USB, USB_WDT11_CTRL, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE);
ocp_data &= ~LED_MODE_MASK;
ocp_write_word(tp, MCU_TYPE_PLA, PLA_LED_FEATURE, ocp_data);
ocp_data = FIFO_EMPTY_1FB | ROK_EXIT_LPM;
if (tp->version == RTL_VER_04 && tp->udev->speed != USB_SPEED_SUPER)
ocp_data |= LPM_TIMER_500MS;
else
ocp_data |= LPM_TIMER_500US;
ocp_write_byte(tp, MCU_TYPE_USB, USB_LPM_CTRL, ocp_data);
ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2);
ocp_data &= ~SEN_VAL_MASK;
ocp_data |= SEN_VAL_NORMAL | SEL_RXIDLE;
ocp_write_word(tp, MCU_TYPE_USB, USB_AFE_CTRL2, ocp_data);
ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001);
r8153_power_cut_en(tp, false);
r8153_u1u2en(tp, true);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
U1U2_SPDWN_EN | L1_SPDWN_EN);
ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN |
EEE_SPDWN_EN);
r8153_enable_eee(tp);
r8153_enable_aldps(tp);
r8152b_enable_fc(tp);
rtl_tally_reset(tp);
r8153_u2p3en(tp, true);
}
static int rtl8152_pre_reset(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
struct net_device *netdev;
if (!tp)
return 0;
netdev = tp->netdev;
if (!netif_running(netdev))
return 0;
napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
cancel_delayed_work_sync(&tp->schedule);
if (netif_carrier_ok(netdev)) {
netif_stop_queue(netdev);
mutex_lock(&tp->control);
tp->rtl_ops.disable(tp);
mutex_unlock(&tp->control);
}
return 0;
}
static int rtl8152_post_reset(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
struct net_device *netdev;
if (!tp)
return 0;
netdev = tp->netdev;
if (!netif_running(netdev))
return 0;
set_bit(WORK_ENABLE, &tp->flags);
if (netif_carrier_ok(netdev)) {
mutex_lock(&tp->control);
tp->rtl_ops.enable(tp);
rtl8152_set_rx_mode(netdev);
mutex_unlock(&tp->control);
netif_wake_queue(netdev);
}
napi_enable(&tp->napi);
return 0;
}
static bool delay_autosuspend(struct r8152 *tp)
{
bool sw_linking = !!netif_carrier_ok(tp->netdev);
bool hw_linking = !!(rtl8152_get_speed(tp) & LINK_STATUS);
/* This means a linking change occurs and the driver doesn't detect it,
* yet. If the driver has disabled tx/rx and hw is linking on, the
* device wouldn't wake up by receiving any packet.
*/
if (work_busy(&tp->schedule.work) || sw_linking != hw_linking)
return true;
/* If the linking down is occurred by nway, the device may miss the
* linking change event. And it wouldn't wake when linking on.
*/
if (!sw_linking && tp->rtl_ops.in_nway(tp))
return true;
else
return false;
}
static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message)
{
struct r8152 *tp = usb_get_intfdata(intf);
struct net_device *netdev = tp->netdev;
int ret = 0;
mutex_lock(&tp->control);
if (PMSG_IS_AUTO(message)) {
if (netif_running(netdev) && delay_autosuspend(tp)) {
ret = -EBUSY;
goto out1;
}
set_bit(SELECTIVE_SUSPEND, &tp->flags);
} else {
netif_device_detach(netdev);
}
if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
napi_disable(&tp->napi);
if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
rtl_stop_rx(tp);
rtl_runtime_suspend_enable(tp, true);
} else {
cancel_delayed_work_sync(&tp->schedule);
tp->rtl_ops.down(tp);
}
napi_enable(&tp->napi);
}
out1:
mutex_unlock(&tp->control);
return ret;
}
static int rtl8152_resume(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
mutex_lock(&tp->control);
if (!test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
tp->rtl_ops.init(tp);
netif_device_attach(tp->netdev);
}
if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
rtl_runtime_suspend_enable(tp, false);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
napi_disable(&tp->napi);
set_bit(WORK_ENABLE, &tp->flags);
if (netif_carrier_ok(tp->netdev))
rtl_start_rx(tp);
napi_enable(&tp->napi);
} else {
tp->rtl_ops.up(tp);
rtl8152_set_speed(tp, AUTONEG_ENABLE,
tp->mii.supports_gmii ?
SPEED_1000 : SPEED_100,
DUPLEX_FULL);
netif_carrier_off(tp->netdev);
set_bit(WORK_ENABLE, &tp->flags);
}
usb_submit_urb(tp->intr_urb, GFP_KERNEL);
} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
if (tp->netdev->flags & IFF_UP)
rtl_runtime_suspend_enable(tp, false);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
}
mutex_unlock(&tp->control);
return 0;
}
static int rtl8152_reset_resume(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
return rtl8152_resume(intf);
}
static void rtl8152_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct r8152 *tp = netdev_priv(dev);
if (usb_autopm_get_interface(tp->intf) < 0)
return;
if (!rtl_can_wakeup(tp)) {
wol->supported = 0;
wol->wolopts = 0;
} else {
mutex_lock(&tp->control);
wol->supported = WAKE_ANY;
wol->wolopts = __rtl_get_wol(tp);
mutex_unlock(&tp->control);
}
usb_autopm_put_interface(tp->intf);
}
static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
{
struct r8152 *tp = netdev_priv(dev);
int ret;
if (!rtl_can_wakeup(tp))
return -EOPNOTSUPP;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out_set_wol;
mutex_lock(&tp->control);
__rtl_set_wol(tp, wol->wolopts);
tp->saved_wolopts = wol->wolopts & WAKE_ANY;
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out_set_wol:
return ret;
}
static u32 rtl8152_get_msglevel(struct net_device *dev)
{
struct r8152 *tp = netdev_priv(dev);
return tp->msg_enable;
}
static void rtl8152_set_msglevel(struct net_device *dev, u32 value)
{
struct r8152 *tp = netdev_priv(dev);
tp->msg_enable = value;
}
static void rtl8152_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *info)
{
struct r8152 *tp = netdev_priv(netdev);
strlcpy(info->driver, MODULENAME, sizeof(info->driver));
strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
usb_make_path(tp->udev, info->bus_info, sizeof(info->bus_info));
}
static
int rtl8152_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
{
struct r8152 *tp = netdev_priv(netdev);
int ret;
if (!tp->mii.mdio_read)
return -EOPNOTSUPP;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
ret = mii_ethtool_gset(&tp->mii, cmd);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
static int rtl8152_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
struct r8152 *tp = netdev_priv(dev);
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
ret = rtl8152_set_speed(tp, cmd->autoneg, cmd->speed, cmd->duplex);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
static const char rtl8152_gstrings[][ETH_GSTRING_LEN] = {
"tx_packets",
"rx_packets",
"tx_errors",
"rx_errors",
"rx_missed",
"align_errors",
"tx_single_collisions",
"tx_multi_collisions",
"rx_unicast",
"rx_broadcast",
"rx_multicast",
"tx_aborted",
"tx_underrun",
};
static int rtl8152_get_sset_count(struct net_device *dev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
return ARRAY_SIZE(rtl8152_gstrings);
default:
return -EOPNOTSUPP;
}
}
static void rtl8152_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct r8152 *tp = netdev_priv(dev);
struct tally_counter tally;
if (usb_autopm_get_interface(tp->intf) < 0)
return;
generic_ocp_read(tp, PLA_TALLYCNT, sizeof(tally), &tally, MCU_TYPE_PLA);
usb_autopm_put_interface(tp->intf);
data[0] = le64_to_cpu(tally.tx_packets);
data[1] = le64_to_cpu(tally.rx_packets);
data[2] = le64_to_cpu(tally.tx_errors);
data[3] = le32_to_cpu(tally.rx_errors);
data[4] = le16_to_cpu(tally.rx_missed);
data[5] = le16_to_cpu(tally.align_errors);
data[6] = le32_to_cpu(tally.tx_one_collision);
data[7] = le32_to_cpu(tally.tx_multi_collision);
data[8] = le64_to_cpu(tally.rx_unicast);
data[9] = le64_to_cpu(tally.rx_broadcast);
data[10] = le32_to_cpu(tally.rx_multicast);
data[11] = le16_to_cpu(tally.tx_aborted);
data[12] = le16_to_cpu(tally.tx_underrun);
}
static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
switch (stringset) {
case ETH_SS_STATS:
memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
break;
}
}
static int r8152_get_eee(struct r8152 *tp, struct ethtool_eee *eee)
{
u32 ocp_data, lp, adv, supported = 0;
u16 val;
val = r8152_mmd_read(tp, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
supported = mmd_eee_cap_to_ethtool_sup_t(val);
val = r8152_mmd_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
adv = mmd_eee_adv_to_ethtool_adv_t(val);
val = r8152_mmd_read(tp, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
lp = mmd_eee_adv_to_ethtool_adv_t(val);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
ocp_data &= EEE_RX_EN | EEE_TX_EN;
eee->eee_enabled = !!ocp_data;
eee->eee_active = !!(supported & adv & lp);
eee->supported = supported;
eee->advertised = adv;
eee->lp_advertised = lp;
return 0;
}
static int r8152_set_eee(struct r8152 *tp, struct ethtool_eee *eee)
{
u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised);
r8152_eee_en(tp, eee->eee_enabled);
if (!eee->eee_enabled)
val = 0;
r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val);
return 0;
}
static int r8153_get_eee(struct r8152 *tp, struct ethtool_eee *eee)
{
u32 ocp_data, lp, adv, supported = 0;
u16 val;
val = ocp_reg_read(tp, OCP_EEE_ABLE);
supported = mmd_eee_cap_to_ethtool_sup_t(val);
val = ocp_reg_read(tp, OCP_EEE_ADV);
adv = mmd_eee_adv_to_ethtool_adv_t(val);
val = ocp_reg_read(tp, OCP_EEE_LPABLE);
lp = mmd_eee_adv_to_ethtool_adv_t(val);
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
ocp_data &= EEE_RX_EN | EEE_TX_EN;
eee->eee_enabled = !!ocp_data;
eee->eee_active = !!(supported & adv & lp);
eee->supported = supported;
eee->advertised = adv;
eee->lp_advertised = lp;
return 0;
}
static int r8153_set_eee(struct r8152 *tp, struct ethtool_eee *eee)
{
u16 val = ethtool_adv_to_mmd_eee_adv_t(eee->advertised);
r8153_eee_en(tp, eee->eee_enabled);
if (!eee->eee_enabled)
val = 0;
ocp_reg_write(tp, OCP_EEE_ADV, val);
return 0;
}
static int
rtl_ethtool_get_eee(struct net_device *net, struct ethtool_eee *edata)
{
struct r8152 *tp = netdev_priv(net);
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
ret = tp->rtl_ops.eee_get(tp, edata);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
static int
rtl_ethtool_set_eee(struct net_device *net, struct ethtool_eee *edata)
{
struct r8152 *tp = netdev_priv(net);
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
ret = tp->rtl_ops.eee_set(tp, edata);
if (!ret)
ret = mii_nway_restart(&tp->mii);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
static int rtl8152_nway_reset(struct net_device *dev)
{
struct r8152 *tp = netdev_priv(dev);
int ret;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
goto out;
mutex_lock(&tp->control);
ret = mii_nway_restart(&tp->mii);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
out:
return ret;
}
static int rtl8152_get_coalesce(struct net_device *netdev,
struct ethtool_coalesce *coalesce)
{
struct r8152 *tp = netdev_priv(netdev);
switch (tp->version) {
case RTL_VER_01:
case RTL_VER_02:
return -EOPNOTSUPP;
default:
break;
}
coalesce->rx_coalesce_usecs = tp->coalesce;
return 0;
}
static int rtl8152_set_coalesce(struct net_device *netdev,
struct ethtool_coalesce *coalesce)
{
struct r8152 *tp = netdev_priv(netdev);
int ret;
switch (tp->version) {
case RTL_VER_01:
case RTL_VER_02:
return -EOPNOTSUPP;
default:
break;
}
if (coalesce->rx_coalesce_usecs > COALESCE_SLOW)
return -EINVAL;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
return ret;
mutex_lock(&tp->control);
if (tp->coalesce != coalesce->rx_coalesce_usecs) {
tp->coalesce = coalesce->rx_coalesce_usecs;
if (netif_running(tp->netdev) && netif_carrier_ok(netdev))
r8153_set_rx_early_timeout(tp);
}
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
return ret;
}
static struct ethtool_ops ops = {
.get_drvinfo = rtl8152_get_drvinfo,
.get_settings = rtl8152_get_settings,
.set_settings = rtl8152_set_settings,
.get_link = ethtool_op_get_link,
.nway_reset = rtl8152_nway_reset,
.get_msglevel = rtl8152_get_msglevel,
.set_msglevel = rtl8152_set_msglevel,
.get_wol = rtl8152_get_wol,
.set_wol = rtl8152_set_wol,
.get_strings = rtl8152_get_strings,
.get_sset_count = rtl8152_get_sset_count,
.get_ethtool_stats = rtl8152_get_ethtool_stats,
.get_coalesce = rtl8152_get_coalesce,
.set_coalesce = rtl8152_set_coalesce,
.get_eee = rtl_ethtool_get_eee,
.set_eee = rtl_ethtool_set_eee,
};
static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
{
struct r8152 *tp = netdev_priv(netdev);
struct mii_ioctl_data *data = if_mii(rq);
int res;
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return -ENODEV;
res = usb_autopm_get_interface(tp->intf);
if (res < 0)
goto out;
switch (cmd) {
case SIOCGMIIPHY:
data->phy_id = R8152_PHY_ID; /* Internal PHY */
break;
case SIOCGMIIREG:
mutex_lock(&tp->control);
data->val_out = r8152_mdio_read(tp, data->reg_num);
mutex_unlock(&tp->control);
break;
case SIOCSMIIREG:
if (!capable(CAP_NET_ADMIN)) {
res = -EPERM;
break;
}
mutex_lock(&tp->control);
r8152_mdio_write(tp, data->reg_num, data->val_in);
mutex_unlock(&tp->control);
break;
default:
res = -EOPNOTSUPP;
}
usb_autopm_put_interface(tp->intf);
out:
return res;
}
static int rtl8152_change_mtu(struct net_device *dev, int new_mtu)
{
struct r8152 *tp = netdev_priv(dev);
int ret;
switch (tp->version) {
case RTL_VER_01:
case RTL_VER_02:
return eth_change_mtu(dev, new_mtu);
default:
break;
}
if (new_mtu < 68 || new_mtu > RTL8153_MAX_MTU)
return -EINVAL;
ret = usb_autopm_get_interface(tp->intf);
if (ret < 0)
return ret;
mutex_lock(&tp->control);
dev->mtu = new_mtu;
if (netif_running(dev) && netif_carrier_ok(dev))
r8153_set_rx_early_size(tp);
mutex_unlock(&tp->control);
usb_autopm_put_interface(tp->intf);
return ret;
}
static const struct net_device_ops rtl8152_netdev_ops = {
.ndo_open = rtl8152_open,
.ndo_stop = rtl8152_close,
.ndo_do_ioctl = rtl8152_ioctl,
.ndo_start_xmit = rtl8152_start_xmit,
.ndo_tx_timeout = rtl8152_tx_timeout,
.ndo_set_features = rtl8152_set_features,
.ndo_set_rx_mode = rtl8152_set_rx_mode,
.ndo_set_mac_address = rtl8152_set_mac_address,
.ndo_change_mtu = rtl8152_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_features_check = rtl8152_features_check,
};
static void r8152b_get_version(struct r8152 *tp)
{
u32 ocp_data;
u16 version;
ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR1);
version = (u16)(ocp_data & VERSION_MASK);
switch (version) {
case 0x4c00:
tp->version = RTL_VER_01;
break;
case 0x4c10:
tp->version = RTL_VER_02;
break;
case 0x5c00:
tp->version = RTL_VER_03;
tp->mii.supports_gmii = 1;
break;
case 0x5c10:
tp->version = RTL_VER_04;
tp->mii.supports_gmii = 1;
break;
case 0x5c20:
tp->version = RTL_VER_05;
tp->mii.supports_gmii = 1;
break;
case 0x5c30:
tp->version = RTL_VER_06;
tp->mii.supports_gmii = 1;
break;
default:
netif_info(tp, probe, tp->netdev,
"Unknown version 0x%04x\n", version);
break;
}
}
static void rtl8152_unload(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
if (tp->version != RTL_VER_01)
r8152_power_cut_en(tp, true);
}
static void rtl8153_unload(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return;
r8153_power_cut_en(tp, false);
}
static int rtl_ops_init(struct r8152 *tp)
{
struct rtl_ops *ops = &tp->rtl_ops;
int ret = 0;
switch (tp->version) {
case RTL_VER_01:
case RTL_VER_02:
ops->init = r8152b_init;
ops->enable = rtl8152_enable;
ops->disable = rtl8152_disable;
ops->up = rtl8152_up;
ops->down = rtl8152_down;
ops->unload = rtl8152_unload;
ops->eee_get = r8152_get_eee;
ops->eee_set = r8152_set_eee;
ops->in_nway = rtl8152_in_nway;
break;
case RTL_VER_03:
case RTL_VER_04:
case RTL_VER_05:
case RTL_VER_06:
ops->init = r8153_init;
ops->enable = rtl8153_enable;
ops->disable = rtl8153_disable;
ops->up = rtl8153_up;
ops->down = rtl8153_down;
ops->unload = rtl8153_unload;
ops->eee_get = r8153_get_eee;
ops->eee_set = r8153_set_eee;
ops->in_nway = rtl8153_in_nway;
break;
default:
ret = -ENODEV;
netif_err(tp, probe, tp->netdev, "Unknown Device\n");
break;
}
return ret;
}
static int rtl8152_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
struct usb_device *udev = interface_to_usbdev(intf);
struct r8152 *tp;
struct net_device *netdev;
int ret;
if (udev->actconfig->desc.bConfigurationValue != 1) {
usb_driver_set_configuration(udev, 1);
return -ENODEV;
}
usb_reset_device(udev);
netdev = alloc_etherdev(sizeof(struct r8152));
if (!netdev) {
dev_err(&intf->dev, "Out of memory\n");
return -ENOMEM;
}
SET_NETDEV_DEV(netdev, &intf->dev);
tp = netdev_priv(netdev);
tp->msg_enable = 0x7FFF;
tp->udev = udev;
tp->netdev = netdev;
tp->intf = intf;
r8152b_get_version(tp);
ret = rtl_ops_init(tp);
if (ret)
goto out;
mutex_init(&tp->control);
INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t);
netdev->netdev_ops = &rtl8152_netdev_ops;
netdev->watchdog_timeo = RTL8152_TX_TIMEOUT;
netdev->features |= NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM |
NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_TX;
netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_FRAGLIST |
NETIF_F_IPV6_CSUM | NETIF_F_TSO6 |
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX;
netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
netdev->ethtool_ops = &ops;
netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
tp->mii.dev = netdev;
tp->mii.mdio_read = read_mii_word;
tp->mii.mdio_write = write_mii_word;
tp->mii.phy_id_mask = 0x3f;
tp->mii.reg_num_mask = 0x1f;
tp->mii.phy_id = R8152_PHY_ID;
switch (udev->speed) {
case USB_SPEED_SUPER:
tp->coalesce = COALESCE_SUPER;
break;
case USB_SPEED_HIGH:
tp->coalesce = COALESCE_HIGH;
break;
default:
tp->coalesce = COALESCE_SLOW;
break;
}
intf->needs_remote_wakeup = 1;
tp->rtl_ops.init(tp);
set_ethernet_addr(tp);
usb_set_intfdata(intf, tp);
netif_napi_add(netdev, &tp->napi, r8152_poll, RTL8152_NAPI_WEIGHT);
ret = register_netdev(netdev);
if (ret != 0) {
netif_err(tp, probe, netdev, "couldn't register the device\n");
goto out1;
}
if (!rtl_can_wakeup(tp))
__rtl_set_wol(tp, 0);
tp->saved_wolopts = __rtl_get_wol(tp);
if (tp->saved_wolopts)
device_set_wakeup_enable(&udev->dev, true);
else
device_set_wakeup_enable(&udev->dev, false);
netif_info(tp, probe, netdev, "%s\n", DRIVER_VERSION);
return 0;
out1:
netif_napi_del(&tp->napi);
usb_set_intfdata(intf, NULL);
out:
free_netdev(netdev);
return ret;
}
static void rtl8152_disconnect(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
usb_set_intfdata(intf, NULL);
if (tp) {
struct usb_device *udev = tp->udev;
if (udev->state == USB_STATE_NOTATTACHED)
set_bit(RTL8152_UNPLUG, &tp->flags);
netif_napi_del(&tp->napi);
unregister_netdev(tp->netdev);
tp->rtl_ops.unload(tp);
free_netdev(tp->netdev);
}
}
#define REALTEK_USB_DEVICE(vend, prod) \
.match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
USB_DEVICE_ID_MATCH_INT_CLASS, \
.idVendor = (vend), \
.idProduct = (prod), \
.bInterfaceClass = USB_CLASS_VENDOR_SPEC \
}, \
{ \
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO | \
USB_DEVICE_ID_MATCH_DEVICE, \
.idVendor = (vend), \
.idProduct = (prod), \
.bInterfaceClass = USB_CLASS_COMM, \
.bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \
.bInterfaceProtocol = USB_CDC_PROTO_NONE
/* table of devices that work with this driver */
static struct usb_device_id rtl8152_table[] = {
{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
{REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)},
{REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)},
{}
};
MODULE_DEVICE_TABLE(usb, rtl8152_table);
static struct usb_driver rtl8152_driver = {
.name = MODULENAME,
.id_table = rtl8152_table,
.probe = rtl8152_probe,
.disconnect = rtl8152_disconnect,
.suspend = rtl8152_suspend,
.resume = rtl8152_resume,
.reset_resume = rtl8152_reset_resume,
.pre_reset = rtl8152_pre_reset,
.post_reset = rtl8152_post_reset,
.supports_autosuspend = 1,
.disable_hub_initiated_lpm = 1,
};
module_usb_driver(rtl8152_driver);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
^ permalink raw reply
* Re: [PATCH] net/mlx5: drop duplicate header delay.h
From: Saeed Mahameed @ 2016-11-24 15:25 UTC (permalink / raw)
To: Geliang Tang
Cc: Saeed Mahameed, Matan Barak, Leon Romanovsky, Linux Netdev List,
linux-rdma, linux-kernel
In-Reply-To: <03d5a2a0f03458cdb4f2b139cfabc11b5c334f95.1479990943.git.geliangtang@gmail.com>
On Thu, Nov 24, 2016 at 3:58 PM, Geliang Tang <geliangtang@gmail.com> wrote:
> Drop duplicate header delay.h from mlx5/core/main.c.
>
> Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
^ permalink raw reply
* Re: wl1251 & mac address & calibration data
From: Ivaylo Dimitrov @ 2016-11-24 15:31 UTC (permalink / raw)
To: Pali Rohár, Sebastian Reichel
Cc: Pavel Machek, Michal Kazior, Kalle Valo, Aaro Koskinen,
Tony Lindgren, linux-wireless, Network Development, linux-kernel
In-Reply-To: <20161124152045.GK13735@pali>
Hi,
On 24.11.2016 17:20, Pali Rohár wrote:
>
> In case when wl1251 is statically linked into kernel image, it is loaded
> and initialized before even userspace applications starts.
>
Which leaves no option, but integrating libcal into kernel :).
Ivo
^ permalink raw reply
* Re: [PATCH iproute2 0/2] tc/cls_flower: Support for ip tunnel metadata set/release/classify
From: Jiri Benc @ 2016-11-24 15:33 UTC (permalink / raw)
To: Amir Vadai
Cc: Stephen Hemminger, David S. Miller, netdev, Or Gerlitz,
Hadar Har-Zion, Roi Dayan
In-Reply-To: <20161124150633.GA27727@office.localdomain>
On Thu, 24 Nov 2016 17:06:33 +0200, Amir Vadai wrote:
> So you mean to just unconditionally call skb_dst_drop() from
> act_mirred()?
That's one option. Or just leave the dst there, it shouldn't matter?
(Except for forwarding to a different tunnel but as I said, it's a
corner case and we may have a "tunnel_key unset" action for that.)
> The use case we already have that uses the release action is the
> hardware offload support, which is already in the kernel.
> It is using the "tunnel_key release" action to signal the hardware to
> strip off the ip tunnel headers.
The tunnel headers must be removed upon reception on the tunnel
interface without specifying anything, because that's how the Linux
kernel behaves currently. If this is offloaded, this behavior must be
preserved. I don't see how "tunnel_key release" might be used for
stripping the headers.
Jiri
^ permalink raw reply
* Re: [patch] net/mlx5: remove a duplicate condition
From: Saeed Mahameed @ 2016-11-24 15:30 UTC (permalink / raw)
To: Dan Carpenter
Cc: Saeed Mahameed, Matan Barak, Leon Romanovsky, Linux Netdev List,
linux-rdma, linux-kernel, kernel-janitors
In-Reply-To: <20161124110345.GC17225@mwanda>
On Thu, Nov 24, 2016 at 1:03 PM, Dan Carpenter <dan.carpenter@oracle.com> wrote:
> We verified that MLX5_FLOW_CONTEXT_ACTION_COUNT was set on the first
> line of the function so we don't need to check again here.
>
> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
^ permalink raw reply
* Re: [PATCH v2] cpsw: ethtool: add support for getting/setting EEE registers
From: Andrew Lunn @ 2016-11-24 15:38 UTC (permalink / raw)
To: Yegor Yefremov
Cc: Florian Fainelli, netdev, linux-omap@vger.kernel.org,
Grygorii Strashko, N, Mugunthan V, Rami Rosen
In-Reply-To: <CAGm1_ktuAbt0pr_NJu2GLDOavFV_bvMBmH5RvDxr5AyXvfwB3A@mail.gmail.com>
> As for enabling advertising and correct working of cpsw do you mean it
> would be better to disable EEE in any PHY on cpsw initialization as
> long as cpsw doesn't provide support for EEE?
>
> We observe some strange behavior with our gigabit PHYs and a link
> partner in a EEE-capable unmanaged NetGear switch. Disabling
> advertising seems to help. Though we're still investigating the issue.
Hi Florian
Am i right in saying, a PHY should not advertise EEE until the MAC
driver calls phy_init_eee(), indicating the MAC supports EEE?
If so, it looks like we need to change a few of the PHY drivers, in
particular, the bcm-*.c.
Andrew
^ permalink raw reply
* [PATCH net 1/1] driver: ipvlan: Fix one possible memleak in ipvlan_link_new
From: fgao @ 2016-11-24 15:39 UTC (permalink / raw)
To: davem, maheshb, edumazet, netdev, gfree.wind; +Cc: Gao Feng
From: Gao Feng <fgao@ikuai8.com>
When ipvlan_link_new fails and creates one ipvlan port, it does not
destroy the ipvlan port created. It causes mem leak and the physical
device contains invalid ipvlan data.
Signed-off-by: Gao Feng <fgao@ikuai8.com>
---
drivers/net/ipvlan/ipvlan_main.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index f442eb3..0fef178 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -497,6 +497,7 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
struct net_device *phy_dev;
int err;
u16 mode = IPVLAN_MODE_L3;
+ bool create = false;
if (!tb[IFLA_LINK])
return -EINVAL;
@@ -513,6 +514,7 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
err = ipvlan_port_create(phy_dev);
if (err < 0)
return err;
+ create = true;
}
if (data && data[IFLA_IPVLAN_MODE])
@@ -536,22 +538,27 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
err = register_netdevice(dev);
if (err < 0)
- return err;
+ goto destroy_ipvlan_port;
err = netdev_upper_dev_link(phy_dev, dev);
if (err) {
- unregister_netdevice(dev);
- return err;
+ goto unregister_netdev;
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
- unregister_netdevice(dev);
- return err;
+ goto unregister_netdev;
}
list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
+
+unregister_netdev:
+ unregister_netdevice(dev);
+destroy_ipvlan_port:
+ if (create)
+ ipvlan_port_destroy(phy_dev);
+ return err;
}
static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
--
1.9.1
^ permalink raw reply related
* Re: [net-next PATCH v1 1/2] net: dt-bindings: add RGMII TX delay configuration to meson8b-dwmac
From: Andrew Lunn @ 2016-11-24 15:48 UTC (permalink / raw)
To: Martin Blumenstingl
Cc: linux-amlogic, devicetree, netdev, davem, khilman, mark.rutland,
robh+dt, linux-arm-kernel, alexandre.torgue, peppe.cavallaro,
carlo, jbrunet
In-Reply-To: <20161124143417.10178-2-martin.blumenstingl@googlemail.com>
> The configuration values are provided as preprocessor macros to make the
> devicetree files easier to read.
Hi Martin
If i'm reading the code/comments correctly, you can set the delay to
0, 2, 4 or 6ns? So calling this property amlogic,tx-delay-ns would be
even easier to read.
Andrew
^ permalink raw reply
* Re: [PATCH net-next 1/2] openvswitch: Add a missing break statement.
From: Jiri Benc @ 2016-11-24 15:55 UTC (permalink / raw)
To: Jarno Rajahalme; +Cc: netdev
In-Reply-To: <1479874174-75329-1-git-send-email-jarno@ovn.org>
On Tue, 22 Nov 2016 20:09:33 -0800, Jarno Rajahalme wrote:
> Add a break statement to prevent fall-through from
> OVS_KEY_ATTR_ETHERNET to OVS_KEY_ATTR_TUNNEL. Without the break
> actions setting ethernet addresses fail to validate with log messages
> complaining about invalid tunnel attributes.
>
> Fixes: 0a6410fbde ("openvswitch: netlink: support L3 packets")
> Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
It's a good practice to CC the one who messed up :-) Please do that
next time.
Thanks for noticing the bug and fixing it.
Jiri
^ permalink raw reply
* Re: [net-next PATCH v1 0/2] stmmac: dwmac-meson8b: configurable RGMII TX delay
From: Jerome Brunet @ 2016-11-24 15:56 UTC (permalink / raw)
To: Martin Blumenstingl, linux-amlogic, devicetree, netdev, davem,
khilman, mark.rutland, robh+dt
Cc: linux-arm-kernel, alexandre.torgue, peppe.cavallaro, carlo
In-Reply-To: <20161124143417.10178-1-martin.blumenstingl@googlemail.com>
On Thu, 2016-11-24 at 15:34 +0100, Martin Blumenstingl wrote:
> Currently the dwmac-meson8b stmmac glue driver uses a hardcoded 1/4
> cycle TX clock delay. This seems to work fine for many boards (for
> example Odroid-C2 or Amlogic's reference boards) but there are some
> others where TX traffic is simply broken.
> There are probably multiple reasons why it's working on some boards
> while it's broken on others:
> - some of Amlogic's reference boards are using a Micrel PHY
> - hardware circuit design
> - maybe more...
>
> This raises a question though:
> Which device is supposed to enable the TX delay when both MAC and PHY
> support it? And should we implement it for each PHY / MAC separately
> or should we think about a more generic solution (currently it's not
> possible to disable the TX delay generated by the RTL8211F PHY via
> devicetree when using phy-mode "rgmii")?
Actually you can skip the part which activate the Tx-delay on the phy
by setting "phy-mode = "rgmii-id" instead of "rgmii"
phy->interface will no longer be PHY_INTERFACE_MODE_RGMII
but PHY_INTERFACE_MODE_RGMII_ID.
>
> iperf3 results on my Mecool BB2 board (Meson GXM, RTL8211F PHY) with
> TX clock delay disabled on the MAC (as it's enabled in the PHY
> driver).
> TX throughput was virtually zero before:
> $ iperf3 -c 192.168.1.100 -R
> Connecting to host 192.168.1.100, port 5201
> Reverse mode, remote host 192.168.1.100 is sending
> [ 4] local 192.168.1.206 port 52828 connected to 192.168.1.100 port
> 5201
> [ ID] Interval Transfer Bandwidth
> [ 4] 0.00-1.00 sec 108 MBytes 901
> Mbits/sec
> [ 4] 1.00-2.00 sec 94.2 MBytes 791
> Mbits/sec
> [ 4] 2.00-3.00 sec 96.5 MBytes 810
> Mbits/sec
> [ 4] 3.00-4.00 sec 96.2 MBytes 808
> Mbits/sec
> [ 4] 4.00-5.00 sec 96.6 MBytes 810
> Mbits/sec
> [ 4] 5.00-6.00 sec 96.5 MBytes 810
> Mbits/sec
> [ 4] 6.00-7.00 sec 96.6 MBytes 810
> Mbits/sec
> [ 4] 7.00-8.00 sec 96.5 MBytes 809
> Mbits/sec
> [ 4] 8.00-9.00 sec 105 MBytes 884
> Mbits/sec
> [ 4] 9.00-10.00 sec 111 MBytes 934
> Mbits/sec
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bandwidth Retr
> [ 4] 0.00-10.00 sec 1000 MBytes 839
> Mbits/sec 0 sender
> [ 4] 0.00-10.00 sec 998 MBytes 837
> Mbits/sec receiver
>
> iperf Done.
> $ iperf3 -c 192.168.1.100
> Connecting to host 192.168.1.100, port 5201
> [ 4] local 192.168.1.206 port 52832 connected to 192.168.1.100 port
> 5201
> [ ID] Interval Transfer Bandwidth Retr Cwnd
> [ 4] 0.00-1.01 sec 99.5 MBytes 829 Mbits/sec 117 139
> KBytes
> [ 4] 1.01-2.00 sec 105 MBytes 884 Mbits/sec 129 70.7
> KBytes
> [ 4] 2.00-3.01 sec 107 MBytes 889 Mbits/sec 106 187
> KBytes
> [ 4] 3.01-4.01 sec 105 MBytes 878 Mbits/sec 92 143
> KBytes
> [ 4] 4.01-5.00 sec 105 MBytes 882 Mbits/sec 140 129
> KBytes
> [ 4] 5.00-6.01 sec 106 MBytes 883 Mbits/sec 115 195
> KBytes
> [ 4] 6.01-7.00 sec 102 MBytes 863 Mbits/sec 133 70.7
> KBytes
> [ 4] 7.00-8.01 sec 106 MBytes 884 Mbits/sec 143 97.6
> KBytes
> [ 4] 8.01-9.01 sec 104 MBytes 875 Mbits/sec 124 107
> KBytes
> [ 4] 9.01-10.01 sec 105 MBytes 876 Mbits/sec 90 139
> KBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bandwidth Retr
> [ 4] 0.00-10.01 sec 1.02 GBytes 874
> Mbits/sec 1189 sender
> [ 4] 0.00-10.01 sec 1.02 GBytes 873
> Mbits/sec receiver
>
> iperf Done.
>
>
> Martin Blumenstingl (2):
> net: dt-bindings: add RGMII TX delay configuration to meson8b-dwmac
> net: stmmac: dwmac-meson8b: make the RGMII TX delay configurable
>
> Documentation/devicetree/bindings/net/meson-dwmac.txt | 11
> +++++++++++
> drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 16
> +++++++++++-----
> include/dt-bindings/net/dwmac-meson8b.h | 18
> ++++++++++++++++++
> 3 files changed, 40 insertions(+), 5 deletions(-)
> create mode 100644 include/dt-bindings/net/dwmac-meson8b.h
>
^ permalink raw reply
* Re: [RFC PATCH net v2 0/3] Fix OdroidC2 Gigabit Tx link issue
From: Jerome Brunet @ 2016-11-24 16:01 UTC (permalink / raw)
To: Martin Blumenstingl
Cc: netdev-u79uwXL29TY76Z2rM5mHXA, devicetree-u79uwXL29TY76Z2rM5mHXA,
Florian Fainelli, Carlo Caione, Kevin Hilman, Giuseppe Cavallaro,
Alexandre TORGUE, Andre Roth, Neil Armstrong,
linux-amlogic-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <CAFBinCCexmS_z9FCX-ud5NgGhhP7xJ_cLxpC7TEc=mLAdafosg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
On Thu, 2016-11-24 at 15:40 +0100, Martin Blumenstingl wrote:
> Hi Jerome,
>
> On Mon, Nov 21, 2016 at 4:35 PM, Jerome Brunet <jbrunet-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> wrote:
> >
> > This patchset fixes an issue with the OdroidC2 board (DWMAC +
> > RTL8211F).
> > Initially reported as a low Tx throughput issue at gigabit speed,
> > the
> > platform enters LPI too often. This eventually break the link (both
> > Tx
> > and Rx), and require to bring the interface down and up again to
> > get the
> > Rx path working again.
> >
> > The root cause of this issue is not fully understood yet but
> > disabling EEE
> > advertisement on the PHY prevent this feature to be negotiated.
> > With this change, the link is stable and reliable, with the
> > expected
> > throughput performance.
> I have just sent a series which allows configuring the TX delay on
> the
> MAC (dwmac-meson8b glue) side: [0]
> Disabling the TX delay generated by the MAC fixes TX throughput for
> me, even when leaving EEE enabled in the RTL8211F PHY driver!
>
> Unfortunately the RTL8211F PHY is a black-box for the community
> because there is no public datasheeet available.
> *maybe* (pure speculation!) they're enabling the TX delay based on
> some internal magic only when EEE is enabled.
Hi already tried acting on the register setting the TX_delay. I also
tried on the PHY. I never been able to improve situation on the
Odroic2. Only disabling EEE improved the situation.
To make sure, i tried again with your patch but the result remains
unchanged. With Tx_delay disabled (either the mac or the phy), the
situation is even worse, it seems that nothing gets through
>
> Jerome, could you please re-test the behavior on your Odroid-C2 when
> you have EEE still enabled but the TX-delay disabled?
> In my case throughput is fine, and "$ ethtool -S eth0 | grep lpi"
> gives:
> irq_tx_path_in_lpi_mode_n: 0
> irq_tx_path_exit_lpi_mode_n: 0
> irq_rx_path_in_lpi_mode_n: 0
> irq_rx_path_exit_lpi_mode_n: 0
>
I still have lpi interrupts on my side. I don't get how a properly
configured tx_delay would disable EEE. I must be missing something
here.
>
> Regards,
> Martin
>
>
> [0] http://lists.infradead.org/pipermail/linux-amlogic/2016-November/
> 001674.html
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: stmmac ethernet in kernel 4.9-rc6: coalescing related pauses.
From: David Miller @ 2016-11-24 16:04 UTC (permalink / raw)
To: pavel; +Cc: peppe.cavallaro, netdev, linux-kernel
In-Reply-To: <20161124085506.GA25007@amd>
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 24 Nov 2016 09:55:06 +0100
> Hi!
>
>> I'm debugging strange delays during transmit in stmmac driver. They
>> seem to be present in 4.4 kernel (and older kernels, too). Workload is
>> burst of udp packets being sent, pause, burst of udp packets, ...
>>
>> Test code is attached, I use these parameters for testing:
>>
>> ./udp-test raw 10.0.0.6 1234 1000 100 30
>>
>> The delays seem to be related to coalescing:
>>
>> drivers/net/ethernet/stmicro/stmmac/common.h
>> #define STMMAC_COAL_TX_TIMER 40000
>> #define STMMAC_MAX_COAL_TX_TICK 100000
>> #define STMMAC_TX_MAX_FRAMES 256
>>
>> If I lower the parameters, delays are gone, but I get netdev watchdog
>> backtrace followed by broken driver.
>>
>> Any ideas what is going on there?
>
> 4.9-rc6 still has the delays. With the
>
> #define STMMAC_COAL_TX_TIMER 1000
> #define STMMAC_TX_MAX_FRAMES 2
>
> settings, delays go away, and driver still works. (It fails fairly
> fast in 4.4). Good news. But the question still is: what is going on
> there?
256 packets looks way too large for being a trigger for aborting the
TX coalescing timer.
Looking more deeply into this, the driver is using non-highres timers
to implement the TX coalescing. This simply cannot work.
1 HZ, which is the lowest granularity of non-highres timers in the
kernel, is variable as well as already too large of a delay for
effective TX coalescing.
I seriously think that the TX coalescing support should be ripped out
or disabled entirely until it is implemented properly in this driver.
^ permalink raw reply
* Re: [net-next PATCH v1 0/2] stmmac: dwmac-meson8b: configurable RGMII TX delay
From: Jerome Brunet @ 2016-11-24 16:08 UTC (permalink / raw)
To: Martin Blumenstingl, linux-amlogic, devicetree, netdev, davem,
khilman, mark.rutland, robh+dt
Cc: linux-arm-kernel, alexandre.torgue, peppe.cavallaro, carlo
In-Reply-To: <20161124143417.10178-1-martin.blumenstingl@googlemail.com>
On Thu, 2016-11-24 at 15:34 +0100, Martin Blumenstingl wrote:
> Currently the dwmac-meson8b stmmac glue driver uses a hardcoded 1/4
> cycle TX clock delay. This seems to work fine for many boards (for
> example Odroid-C2 or Amlogic's reference boards) but there are some
> others where TX traffic is simply broken.
> There are probably multiple reasons why it's working on some boards
> while it's broken on others:
> - some of Amlogic's reference boards are using a Micrel PHY
> - hardware circuit design
> - maybe more...
>
> This raises a question though:
> Which device is supposed to enable the TX delay when both MAC and PHY
> support it? And should we implement it for each PHY / MAC separately
> or should we think about a more generic solution (currently it's not
> possible to disable the TX delay generated by the RTL8211F PHY via
> devicetree when using phy-mode "rgmii")?
>
> iperf3 results on my Mecool BB2 board (Meson GXM, RTL8211F PHY) with
> TX clock delay disabled on the MAC (as it's enabled in the PHY
> driver).
> TX throughput was virtually zero before:
> $ iperf3 -c 192.168.1.100 -R
> Connecting to host 192.168.1.100, port 5201
> Reverse mode, remote host 192.168.1.100 is sending
> [ 4] local 192.168.1.206 port 52828 connected to 192.168.1.100 port
> 5201
> [ ID] Interval Transfer Bandwidth
> [ 4] 0.00-1.00 sec 108 MBytes 901
> Mbits/sec
> [ 4] 1.00-2.00 sec 94.2 MBytes 791
> Mbits/sec
> [ 4] 2.00-3.00 sec 96.5 MBytes 810
> Mbits/sec
> [ 4] 3.00-4.00 sec 96.2 MBytes 808
> Mbits/sec
> [ 4] 4.00-5.00 sec 96.6 MBytes 810
> Mbits/sec
> [ 4] 5.00-6.00 sec 96.5 MBytes 810
> Mbits/sec
> [ 4] 6.00-7.00 sec 96.6 MBytes 810
> Mbits/sec
> [ 4] 7.00-8.00 sec 96.5 MBytes 809
> Mbits/sec
> [ 4] 8.00-9.00 sec 105 MBytes 884
> Mbits/sec
> [ 4] 9.00-10.00 sec 111 MBytes 934
> Mbits/sec
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bandwidth Retr
> [ 4] 0.00-10.00 sec 1000 MBytes 839
> Mbits/sec 0 sender
> [ 4] 0.00-10.00 sec 998 MBytes 837
> Mbits/sec receiver
>
> iperf Done.
> $ iperf3 -c 192.168.1.100
> Connecting to host 192.168.1.100, port 5201
> [ 4] local 192.168.1.206 port 52832 connected to 192.168.1.100 port
> 5201
> [ ID] Interval Transfer Bandwidth Retr Cwnd
> [ 4] 0.00-1.01 sec 99.5 MBytes 829 Mbits/sec 117 139
> KBytes
> [ 4] 1.01-2.00 sec 105 MBytes 884 Mbits/sec 129 70.7
> KBytes
> [ 4] 2.00-3.01 sec 107 MBytes 889 Mbits/sec 106 187
> KBytes
> [ 4] 3.01-4.01 sec 105 MBytes 878 Mbits/sec 92 143
> KBytes
> [ 4] 4.01-5.00 sec 105 MBytes 882 Mbits/sec 140 129
> KBytes
> [ 4] 5.00-6.01 sec 106 MBytes 883 Mbits/sec 115 195
> KBytes
> [ 4] 6.01-7.00 sec 102 MBytes 863 Mbits/sec 133 70.7
> KBytes
> [ 4] 7.00-8.01 sec 106 MBytes 884 Mbits/sec 143 97.6
> KBytes
> [ 4] 8.01-9.01 sec 104 MBytes 875 Mbits/sec 124 107
> KBytes
> [ 4] 9.01-10.01 sec 105 MBytes 876 Mbits/sec 90 139
> KBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bandwidth Retr
> [ 4] 0.00-10.01 sec 1.02 GBytes 874
> Mbits/sec 1189 sender
> [ 4] 0.00-10.01 sec 1.02 GBytes 873
> Mbits/sec receiver
>
Cool, one more board working ;)
I tried your patch on another board (MXQ_V2.1, with sheep printed on
the PCB). It 's not improving the situation for this one unfortunately.
Actually I already tried playing with the TX delay on the MAC and PHY
but I could get any good results with the boards I have.
It is strange that we can adjust the delay by 2ns steps, when delay
seens by the phy should be 2ns ...
> iperf Done.
>
>
> Martin Blumenstingl (2):
> net: dt-bindings: add RGMII TX delay configuration to meson8b-dwmac
> net: stmmac: dwmac-meson8b: make the RGMII TX delay configurable
>
> Documentation/devicetree/bindings/net/meson-dwmac.txt | 11
> +++++++++++
> drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 16
> +++++++++++-----
> include/dt-bindings/net/dwmac-meson8b.h | 18
> ++++++++++++++++++
> 3 files changed, 40 insertions(+), 5 deletions(-)
> create mode 100644 include/dt-bindings/net/dwmac-meson8b.h
>
^ permalink raw reply
* Re: wl1251 & mac address & calibration data
From: Sebastian Reichel @ 2016-11-24 16:08 UTC (permalink / raw)
To: Pali Rohár
Cc: Pavel Machek, Michal Kazior, Kalle Valo, Ivaylo Dimitrov,
Aaro Koskinen, Tony Lindgren, linux-wireless, Network Development,
linux-kernel
In-Reply-To: <20161124152045.GK13735@pali>
[-- Attachment #1: Type: text/plain, Size: 2902 bytes --]
Hi,
On Thu, Nov 24, 2016 at 04:20:45PM +0100, Pali Rohár wrote:
> On Thursday 24 November 2016 16:13:17 Sebastian Reichel wrote:
> > On Thu, Nov 24, 2016 at 09:33:29AM +0100, Pali Rohár wrote:
> > > On Thursday 24 November 2016 08:51:04 Pavel Machek wrote:
> > > > > > "ifconfig hw ether XX" normally sets the address. I guess that's
> > > > > > ioctl?
> > > > >
> > > > > This sets temporary address and it is ioctl. IIRC same as what ethtool
> > > > > uses. (ifconfig is already deprecated).
> > > > >
> > > > > > And I guess we should use similar mechanism for permanent
> > > > > > address.
> > > > >
> > > > > I'm not sure here... Above ioctl ↑↑↑ is for changing temporary mac
> > > > > address. But here we do not want to change permanent mac address. We
> > > > > want to tell kernel driver current permanent mac address which is
> > > > > stored
> > > >
> > > > Well... I'd still use similar mechanism :-).
> > >
> > > Thats problematic, because in time when wlan0 interface is registered
> > > into system and visible in ifconfig output it already needs to have
> > > permanent mac address assigned.
> > >
> > > We should assign permanent mac address before wlan0 of wl1251 is
> > > registered into system.
> >
> > You can just add the MAC address to the NVS data, which is also
> > required for the device initialization.
>
> NVS data file has fixed size, there is IIRC no place for it.
>
> But one of my suggestion was to use another request_firmware for MAC
> address. So this is similar to what you are proposing, as NVS data are
> loaded by request_firmware too...
Just append it to NVS data and modify the size check accordingly?
> > I wonder if those information could be put into DT. Iirc some
> > network devices get their MAC address from DT. Maybe we can add
> > all NVS info to DT? How much data is it?
>
> Proprietary, signed and closed bootloader NOLO does not support DT. So
> for booting you need to append DTS file to kernel image.
Yeah, so NOLO without U-Boot will depend on userspace to fixup DT.
> U-Boot is optional and can be used as intermediate bootloader between
> NOLO and kernel. But still it has problems with reading from nand, so
> cannot read NVS data nor MAC address.
It may in the future?
> > Userspace application can add all those information to the DT
> > using a DT overlay. Also the u-boot could parse and add it at
> > some point in the future.
>
> In case when wl1251 is statically linked into kernel image, it is loaded
> and initialized before even userspace applications starts.
>
> So no... adding NVS data or MAC address into DT or DT overlay is not a
> solution.
Actually with data loaded from DT you *can* load data quite early in
the boot process, while your suggestions always require userspace.
So you argument against yourself?
-- Sebastian
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* [PATCH] net/iucv: use explicit clean up labels in iucv_init()
From: Sebastian Andrzej Siewior @ 2016-11-24 16:10 UTC (permalink / raw)
To: Ursula Braun, tglx; +Cc: linux-kernel, rt, David S. Miller, linux-s390, netdev
In-Reply-To: <20161124141415.llqki5vo7nqdusqv@linutronix.de>
Ursula suggested to use explicit labels for clean up in the error path
instead of one `out_free' label which handles multiple exits.
Since the previous patch got already applied, here is a follow up patch.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
net/iucv/iucv.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f0d6afc5d4a9..8f7ef167c45a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -2038,16 +2038,16 @@ static int __init iucv_init(void)
rc = cpuhp_setup_state(CPUHP_NET_IUCV_PREPARE, "net/iucv:prepare",
iucv_cpu_prepare, iucv_cpu_dead);
if (rc)
- goto out_free;
+ goto out_dev;
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "net/iucv:online",
iucv_cpu_online, iucv_cpu_down_prep);
if (rc < 0)
- goto out_free;
+ goto out_prep;
iucv_online = rc;
rc = register_reboot_notifier(&iucv_reboot_notifier);
if (rc)
- goto out_free;
+ goto out_remove_hp;
ASCEBC(iucv_error_no_listener, 16);
ASCEBC(iucv_error_no_memory, 16);
ASCEBC(iucv_error_pathid, 16);
@@ -2061,11 +2061,11 @@ static int __init iucv_init(void)
out_reboot:
unregister_reboot_notifier(&iucv_reboot_notifier);
-out_free:
- if (iucv_online)
- cpuhp_remove_state(iucv_online);
+out_remove_hp:
+ cpuhp_remove_state(iucv_online);
+out_prep:
cpuhp_remove_state(CPUHP_NET_IUCV_PREPARE);
-
+out_dev:
root_device_unregister(iucv_root);
out_int:
unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt);
--
2.10.2
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox