* [RFC v2 PATCH 2/2] SCTP: Convert bind_addr_list locking to RCU
From: Vlad Yasevich @ 2007-09-12 19:46 UTC (permalink / raw)
To: netdev; +Cc: lksctp-developers, Vlad Yasevich
In-Reply-To: <11896263983281-git-send-email-vladislav.yasevich@hp.com>
Since the sctp_sockaddr_entry is now RCU enabled as part of
the patch to synchronize sctp_localaddr_list, it makes sense to
change all handling of these entries to RCU. This includes the
sctp_bind_addrs structure and it's list of bound addresses.
This list is currently protected by an external rw_lock and that
looks like an overkill. There are only 2 writers to the list:
bind()/bindx() calls, and BH processing of ASCONF-ACK chunks.
These are already seriealized via the socket lock, so they will
not step on each other. These are also relatively rare, so we
should be good with RCU.
The readers are varied and they are easily converted to RCU.
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
include/net/sctp/structs.h | 7 +--
net/sctp/associola.c | 14 +-----
net/sctp/bind_addr.c | 68 ++++++++++++++++++++----------
net/sctp/endpointola.c | 27 +++---------
net/sctp/ipv6.c | 12 ++---
net/sctp/protocol.c | 25 ++++-------
net/sctp/sm_make_chunk.c | 18 +++-----
net/sctp/socket.c | 98 ++++++++++++-------------------------------
8 files changed, 106 insertions(+), 163 deletions(-)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a89e361..c2fe2dc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1155,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
int flags);
int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
__u8 use_as_src, gfp_t gfp);
-int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
+int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
+ void (*rcu_call)(struct rcu_head *,
+ void (*func)(struct rcu_head *)));
int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
struct sctp_sock *);
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
@@ -1226,9 +1228,6 @@ struct sctp_ep_common {
* bind_addr.address_list is our set of local IP addresses.
*/
struct sctp_bind_addr bind_addr;
-
- /* Protection during address list comparisons. */
- rwlock_t addr_lock;
};
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2ad1caf..9bad8ba 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the bind addr area. */
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
- rwlock_init(&asoc->base.addr_lock);
asoc->state = SCTP_STATE_CLOSED;
@@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
{
struct sctp_transport *transport;
- sctp_read_lock(&asoc->base.addr_lock);
-
if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
(htons(asoc->peer.port) == paddr->v4.sin_port)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
@@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
transport = NULL;
out:
- sctp_read_unlock(&asoc->base.addr_lock);
return transport;
}
@@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
const union sctp_addr *laddr)
{
- int found;
+ int found = 0;
- sctp_read_lock(&asoc->base.addr_lock);
if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
- sctp_sk(asoc->base.sk))) {
+ sctp_sk(asoc->base.sk)))
found = 1;
- goto out;
- }
- found = 0;
-out:
- sctp_read_unlock(&asoc->base.addr_lock);
return found;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 7fc369f..14f4c02 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -167,7 +167,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
INIT_LIST_HEAD(&addr->list);
INIT_RCU_HEAD(&addr->rcu);
- list_add_tail(&addr->list, &bp->address_list);
+
+ /* We always hold a socket lock when calling this function,
+ * so rcu_read_lock is not needed.
+ */
+ list_add_tail_rcu(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
return 0;
@@ -176,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
/* Delete an address from the bind address list in the SCTP_bind_addr
* structure.
*/
-int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
+int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
+ void (*rcu_call)(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)))
{
- struct list_head *pos, *temp;
- struct sctp_sockaddr_entry *addr;
+ struct sctp_sockaddr_entry *addr, *temp;
- list_for_each_safe(pos, temp, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* We hold the socket lock when calling this function, so
+ * rcu_read_lock is not needed.
+ */
+ list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
/* Found the exact match. */
- list_del(pos);
- kfree(addr);
- SCTP_DBG_OBJCNT_DEC(addr);
-
- return 0;
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
+ break;
}
}
+ /* Call the rcu callback provided in the args. This function is
+ * called by both BH packet processing and user side socket option
+ * processing, but it works on different lists in those 2 contexts.
+ * Each context provides it's own callback, whether call_rc_bh()
+ * or call_rcu(), to make sure that we wait an for appropriate time.
+ */
+ if (addr && !addr->valid) {
+ rcu_call(&addr->rcu, sctp_local_addr_free);
+ SCTP_DBG_OBJCNT_DEC(addr);
+ }
+
return -EINVAL;
}
@@ -302,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
-
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (opt->pf->cmp_addr(&laddr->a, addr, opt))
- return 1;
+ int match = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
+ if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
+ match = 1;
+ break;
+ }
}
+ rcu_read_unlock();
- return 0;
+ return match;
}
/* Find the first address in the bind address list that is not present in
@@ -325,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
union sctp_addr *addr;
void *addr_buf;
struct sctp_af *af;
- struct list_head *pos;
int i;
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ /* This is only called sctp_send_asconf_del_ip() and we hold
+ * the socket lock in that code patch, so that address list
+ * can't change.
+ */
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
addr_buf = (union sctp_addr *)addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
if (!af)
- return NULL;
+ break;
if (opt->pf->cmp_addr(&laddr->a, addr, opt))
break;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1404a9e..d888332 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
- rwlock_init(&ep->base.addr_lock);
/* Remember who we are attached to. */
ep->base.sk = sk;
@@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
const union sctp_addr *laddr)
{
- struct sctp_endpoint *retval;
+ struct sctp_endpoint *retval = NULL;
- sctp_read_lock(&ep->base.addr_lock);
if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
- sctp_sk(ep->base.sk))) {
+ sctp_sk(ep->base.sk)))
retval = ep;
- goto out;
- }
}
- retval = NULL;
-
-out:
- sctp_read_unlock(&ep->base.addr_lock);
return retval;
}
@@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
list_for_each(pos, &ep->asocs) {
asoc = list_entry(pos, struct sctp_association, asocs);
if (rport == asoc->peer.port) {
- sctp_read_lock(&asoc->base.addr_lock);
*transport = sctp_assoc_lookup_paddr(asoc, paddr);
- sctp_read_unlock(&asoc->base.addr_lock);
if (*transport)
return asoc;
@@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
- struct list_head *pos;
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
- sctp_read_lock(&ep->base.addr_lock);
bp = &ep->base.bind_addr;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (sctp_has_association(&addr->a, paddr)) {
- sctp_read_unlock(&ep->base.addr_lock);
+ /* This function is called whith the socket lock held,
+ * so the address_list can not change.
+ */
+ list_for_each_entry_rcu(addr, &bp->address_list, list) {
+ if (sctp_has_association(&addr->a, paddr))
return 1;
- }
}
- sctp_read_unlock(&ep->base.addr_lock);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 54ff472..c8b0115 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -302,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
union sctp_addr *saddr)
{
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
sctp_scope_t scope;
union sctp_addr *baddr = NULL;
__u8 matchlen = 0;
@@ -324,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
scope = sctp_scope(daddr);
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
/* Go through the bind address list and find the best source address
* that matches the scope of the destination address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(laddr->a.sa.sa_family == AF_INET6) &&
(scope <= sctp_scope(&laddr->a))) {
@@ -353,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
__FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
}
/* Make a copy of all potential local addresses. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4688559..35af75b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -223,7 +223,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
(copy_flags & SCTP_ADDR6_ALLOWED) &&
(copy_flags & SCTP_ADDR6_PEERSUPP)))) {
error = sctp_add_bind_addr(bp, &addr->a, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
if (error)
goto end_copy;
}
@@ -427,9 +427,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
struct rtable *rt;
struct flowi fl;
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
struct dst_entry *dst = NULL;
union sctp_addr dst_saddr;
@@ -458,23 +456,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
goto out;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
if (dst) {
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry,
- list);
- if (!laddr->use_as_src)
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid || !laddr->use_as_src)
continue;
sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
/* None of the bound addresses match the source address of the
* dst. So release it.
@@ -486,10 +481,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
/* Walk through the bind address list and try to get a dst that
* matches a bind address as the source address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
@@ -501,7 +496,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
}
out_unlock:
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
out:
if (dst)
SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 79856c9..0dc965c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1531,7 +1531,7 @@ no_hmac:
/* Also, add the destination address. */
if (list_empty(&retval->base.bind_addr.address_list)) {
sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
}
retval->next_tsn = retval->c.initial_tsn;
@@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- list_for_each(pos, &bp->address_list) {
- saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* This is always done in BH context with a socket lock
+ * held, so the list can not change.
+ */
+ list_for_each_entry_rcu(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, &addr))
saddr->use_as_src = 1;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
break;
case SCTP_PARAM_DEL_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- retval = sctp_del_bind_addr(bp, &addr);
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh);
list_for_each(pos, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport,
transports);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a3acf78..cb253ab 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
if (!bp->port)
bp->port = inet_sk(sk)->num;
- /* Add the address to the bind address list. */
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- /* Use GFP_ATOMIC since BHs are disabled. */
+ /* Add the address to the bind address list.
+ * Use GFP_ATOMIC since BHs will be disabled.
+ */
ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
/* Copy back into socket for getsockname() use. */
if (!ret) {
@@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
if (i < addrcnt)
continue;
- /* Use the first address in bind addr list of association as
- * Address Parameter of ASCONF CHUNK.
+ /* Use the first valid address in bind addr list of
+ * association as Address Parameter of ASCONF CHUNK.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
p = bp->address_list.next;
laddr = list_entry(p, struct sctp_sockaddr_entry, list);
- sctp_read_unlock(&asoc->base.addr_lock);
-
chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
addrcnt, SCTP_PARAM_ADD_IP);
if (!chunk) {
@@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
/* Add the new addresses to the bind address list with
* use_as_src set to 0.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
@@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
GFP_ATOMIC);
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
}
out:
@@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
* socket routing and failover schemes. Refer to comments in
* sctp_do_bind(). -daisy
*/
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- retval = sctp_del_bind_addr(bp, sa_addr);
-
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, sa_addr, call_rcu);
addr_buf += af->sockaddr_len;
err_bindx_rem:
@@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
* make sure that we do not delete all the addresses in the
* association.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
addrcnt, sp);
- sctp_read_unlock(&asoc->base.addr_lock);
if (!laddr)
continue;
+ /* We do not need RCU protection throughout this loop
+ * because this is done under a socket lock from the
+ * setsockopt call.
+ */
chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
SCTP_PARAM_DEL_IP);
if (!chunk) {
@@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
/* Reset use_as_src flag for the addresses in the bind address
* list that are to be deleted.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
laddr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
- list_for_each(pos1, &bp->address_list) {
- saddr = list_entry(pos1,
- struct sctp_sockaddr_entry,
- list);
+ list_for_each_entry_rcu(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, laddr))
saddr->use_as_src = 0;
}
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
/* Update the route and saddr entries for all the transports
* as some of the addresses in the bind address list are
@@ -4057,11 +4035,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
int __user *optlen)
{
sctp_assoc_t id;
- struct list_head *pos;
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
struct sctp_sockaddr_entry *addr;
- rwlock_t *addr_lock;
int cnt = 0;
if (len < sizeof(sctp_assoc_t))
@@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
*/
if (0 == id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
* addresses from the global local address list.
*/
@@ -4115,12 +4087,14 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
goto done;
}
- list_for_each(pos, &bp->address_list) {
+ /* Protection on the bound address list is not needed,
+ * since in the socket option context we hold the socket lock,
+ * so there is no way that the bound address list can change.
+ */
+ list_for_each_entry_rcu(addr, &bp->address_list, list) {
cnt ++;
}
-
done:
- sctp_read_unlock(addr_lock);
return cnt;
}
@@ -4204,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs_old getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4212,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
void *addrs;
void *buf;
@@ -4234,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = getaddrs.addrs;
@@ -4254,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4271,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry_rcu(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
@@ -4284,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
/* copy the entire address list into the user provided space */
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
@@ -4307,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4315,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
size_t space_left;
int bytes_copied = 0;
@@ -4336,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = optval + offsetof(struct sctp_getaddrs,addrs);
@@ -4352,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4365,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
space_left, &bytes_copied);
if (cnt < 0) {
err = cnt;
- goto error_lock;
+ goto out;
}
goto copy_getaddrs;
}
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry_rcu(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if (space_left < addrlen) {
err = -ENOMEM; /*fixme: right error?*/
- goto error_lock;
+ goto out;
}
memcpy(buf, &temp, addrlen);
buf += addrlen;
@@ -4389,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
goto out;
@@ -4401,12 +4365,6 @@ copy_getaddrs:
}
if (put_user(bytes_copied, optlen))
err = -EFAULT;
-
- goto out;
-
-error_lock:
- sctp_read_unlock(addr_lock);
-
out:
kfree(addrs);
return err;
--
1.5.2.4
^ permalink raw reply related
* [RFC v2 PATCH 1/2] SCTP: Add RCU synchronization around sctp_localaddr_list
From: Vlad Yasevich @ 2007-09-12 19:46 UTC (permalink / raw)
To: netdev; +Cc: lksctp-developers, Vlad Yasevich
In-Reply-To: <11896263983281-git-send-email-vladislav.yasevich@hp.com>
sctp_localaddr_list is modified dynamically via NETDEV_UP
and NETDEV_DOWN events, but there is not synchronization
between writer (even handler) and readers. As a result,
the readers can access an entry that has been freed and
crash the sytem.
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
include/net/sctp/sctp.h | 1 +
include/net/sctp/structs.h | 6 +++++
net/sctp/bind_addr.c | 2 +
net/sctp/ipv6.c | 34 ++++++++++++++++++++++---------
net/sctp/protocol.c | 46 ++++++++++++++++++++++++++++++-------------
net/sctp/socket.c | 38 +++++++++++++++++++++++------------
6 files changed, 90 insertions(+), 37 deletions(-)
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d529045..c9cc00c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -123,6 +123,7 @@
* sctp/protocol.c
*/
extern struct sock *sctp_get_ctl_sock(void);
+extern void sctp_local_addr_free(struct rcu_head *head);
extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
sctp_scope_t, gfp_t gfp,
int flags);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c0d5848..a89e361 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -207,6 +207,9 @@ extern struct sctp_globals {
* It is a list of sctp_sockaddr_entry.
*/
struct list_head local_addr_list;
+
+ /* Lock that protects the local_addr_list writers */
+ spinlock_t addr_list_lock;
/* Flag to indicate if addip is enabled. */
int addip_enable;
@@ -242,6 +245,7 @@ extern struct sctp_globals {
#define sctp_port_alloc_lock (sctp_globals.port_alloc_lock)
#define sctp_port_hashtable (sctp_globals.port_hashtable)
#define sctp_local_addr_list (sctp_globals.local_addr_list)
+#define sctp_local_addr_lock (sctp_globals.addr_list_lock)
#define sctp_addip_enable (sctp_globals.addip_enable)
#define sctp_prsctp_enable (sctp_globals.prsctp_enable)
@@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
/* This is a structure for holding either an IPv6 or an IPv4 address. */
struct sctp_sockaddr_entry {
struct list_head list;
+ struct rcu_head rcu;
union sctp_addr a;
__u8 use_as_src;
+ __u8 valid;
};
typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index fdb287a..7fc369f 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -163,8 +163,10 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
addr->a.v4.sin_port = htons(bp->port);
addr->use_as_src = use_as_src;
+ addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
+ INIT_RCU_HEAD(&addr->rcu);
list_add_tail(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f8aa23d..54ff472 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -77,13 +77,18 @@
#include <asm/uaccess.h>
-/* Event handler for inet6 address addition/deletion events. */
+/* Event handler for inet6 address addition/deletion events.
+ * This even is part of the atomic notifier call chain
+ * and thus happens atomically and can NOT sleep. As a result
+ * we can't and really don't need to add any locks to guard the
+ * RCU.
+ */
static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
void *ptr)
{
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct sctp_sockaddr_entry *addr;
- struct list_head *pos, *temp;
+ struct sctp_sockaddr_entry *addr = NULL;
+ struct sctp_sockaddr_entry *temp;
switch (ev) {
case NETDEV_UP:
@@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
sizeof(struct in6_addr));
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
- list_add_tail(&addr->list, &sctp_local_addr_list);
+ addr->valid = 1;
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+ spin_unlock_bh(&sctp_local_addr_lock);
}
break;
case NETDEV_DOWN:
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
- list_del(pos);
- kfree(addr);
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_for_each_entry_safe(addr, temp,
+ &sctp_local_addr_list, list) {
+ if (ipv6_addr_equal(&addr->a.v6.sin6_addr,
+ &ifa->addr)) {
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
break;
}
}
-
+ spin_unlock_bh(&sctp_local_addr_lock);
+ if (addr && !addr->valid)
+ call_rcu(&addr->rcu, sctp_local_addr_free);
break;
}
@@ -367,7 +379,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
addr->a.v6.sin6_port = 0;
addr->a.v6.sin6_addr = ifp->addr;
addr->a.v6.sin6_scope_id = dev->ifindex;
+ addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
+ INIT_RCU_HEAD(&addr->rcu);
list_add_tail(&addr->list, addrlist);
}
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e98579b..4688559 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -153,6 +153,8 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
addr->a.v4.sin_family = AF_INET;
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
+ addr->valid = 1;
+ INIT_RCU_HEAD(&addr->rcu);
list_add_tail(&addr->list, addrlist);
}
}
@@ -192,16 +194,24 @@ static void sctp_free_local_addr_list(void)
}
}
+void sctp_local_addr_free(struct rcu_head *head)
+{
+ struct sctp_sockaddr_entry *e = container_of(head,
+ struct sctp_sockaddr_entry, rcu);
+ kfree(e);
+}
+
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
- struct list_head *pos, *temp;
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
if (sctp_in_scope(&addr->a, scope)) {
/* Now that the address is in scope, check to see if
* the address type is really supported by the local
@@ -221,6 +231,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
}
end_copy:
+ rcu_read_unlock();
return error;
}
@@ -605,8 +616,8 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct sctp_sockaddr_entry *addr;
- struct list_head *pos, *temp;
+ struct sctp_sockaddr_entry *addr = NULL;
+ struct sctp_sockaddr_entry *temp;
switch (ev) {
case NETDEV_UP:
@@ -615,19 +626,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
addr->a.v4.sin_family = AF_INET;
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
- list_add_tail(&addr->list, &sctp_local_addr_list);
+ addr->valid = 1;
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+ spin_unlock_bh(&sctp_local_addr_lock);
}
break;
case NETDEV_DOWN:
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_for_each_entry_safe(addr, temp,
+ &sctp_local_addr_list, list) {
if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
- list_del(pos);
- kfree(addr);
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
break;
}
}
-
+ spin_unlock_bh(&sctp_local_addr_lock);
+ if (addr && !addr->valid)
+ call_rcu(&addr->rcu, sctp_local_addr_free);
break;
}
@@ -1160,6 +1177,7 @@ SCTP_STATIC __init int sctp_init(void)
/* Initialize the local address list. */
INIT_LIST_HEAD(&sctp_local_addr_list);
+ spin_lock_init(&sctp_local_addr_lock);
sctp_get_local_addr_list();
/* Register notifier for inet address additions/deletions. */
@@ -1227,6 +1245,9 @@ SCTP_STATIC __exit void sctp_exit(void)
sctp_v6_del_protocol();
inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ /* Unregister notifier for inet address additions/deletions. */
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
/* Free the local address list. */
sctp_free_local_addr_list();
@@ -1240,9 +1261,6 @@ SCTP_STATIC __exit void sctp_exit(void)
inet_unregister_protosw(&sctp_stream_protosw);
inet_unregister_protosw(&sctp_seqpacket_protosw);
- /* Unregister notifier for inet address additions/deletions. */
- unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
-
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3335460..a3acf78 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4057,9 +4057,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
int __user *optlen)
{
sctp_assoc_t id;
+ struct list_head *pos;
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos, *temp;
struct sctp_sockaddr_entry *addr;
rwlock_t *addr_lock;
int cnt = 0;
@@ -4096,15 +4096,19 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos,
- struct sctp_sockaddr_entry,
- list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr,
+ &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
+
cnt++;
}
+ rcu_read_unlock();
} else {
cnt = 1;
}
@@ -4127,14 +4131,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
int max_addrs, void *to,
int *bytes_copied)
{
- struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- list_for_each_safe(pos, next, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
@@ -4149,6 +4155,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
cnt ++;
if (cnt >= max_addrs) break;
}
+ rcu_read_unlock();
return cnt;
}
@@ -4156,14 +4163,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
size_t space_left, int *bytes_copied)
{
- struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- list_for_each_safe(pos, next, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
@@ -4171,8 +4180,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (space_left < addrlen)
- return -ENOMEM;
+ if (space_left < addrlen) {
+ cnt = -ENOMEM;
+ break;
+ }
memcpy(to, &temp, addrlen);
to += addrlen;
@@ -4180,6 +4191,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
space_left -= addrlen;
*bytes_copied += addrlen;
}
+ rcu_read_unlock();
return cnt;
}
--
1.5.2.4
^ permalink raw reply related
* Re: [patch] sunrpc: make closing of old temporary sockets work (was: problems with lockd in 2.6.22.6)
From: J. Bruce Fields @ 2007-09-12 19:55 UTC (permalink / raw)
To: Wolfgang Walter; +Cc: Neil Brown, netdev, nfs, linux-kernel, trond.myklebust
In-Reply-To: <200709122140.57783.wolfgang.walter@studentenwerk.mhn.de>
On Wed, Sep 12, 2007 at 09:40:57PM +0200, Wolfgang Walter wrote:
> On Wednesday 12 September 2007, J. Bruce Fields wrote:
> > On Wed, Sep 12, 2007 at 04:14:06PM +0200, Neil Brown wrote:
> > > So it is in 2.6.21 and later and should probably go to .stable for .21
> > > and .22.
> > >
> > > Bruce: for you :-)
> >
> > OK, thanks! But, (as is alas often the case) I'm still confused:
> >
> > > if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
> > > continue;
> > > - if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
> > > + if (atomic_read(&svsk->sk_inuse) > 1
> > > + || test_bit(SK_BUSY, &svsk->sk_flags))
> > > continue;
> > > atomic_inc(&svsk->sk_inuse);
> > > list_move(le, &to_be_aged);
> >
> > What is it that ensures svsk->sk_inuse isn't incremented or SK_BUSY set
> > after that test? Not all the code that does either of those is under
> > the same serv->sv_lock lock that this code is.
> >
>
> This should not matter - SK_CLOSED may be set at any time.
>
> svc_age_temp_sockets only detaches the socket, sets SK_CLOSED and then
> enqueues it. If SK_BUSY is set its already enqueued and svc_sock_enqueue
> ensures that it is not enqueued twice.
Oh, got it. And the list manipulation is safe thanks to sv_lock. Neat,
thanks. Can you verify that this solves your problem?
--b.
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply
* Re: [PATCH] Fix e100 on systems that have cache incoherent DMA
From: David Acker @ 2007-09-12 20:11 UTC (permalink / raw)
To: James Chapman
Cc: Jeff Garzik, Kok, Auke, John Ronciak, Jesse Brandeburg,
Jeff Kirsher, Milton Miller, netdev, e1000-devel, Scott Feldman
In-Reply-To: <46E7CDD5.3060306@katalix.com>
James Chapman wrote:
> David Acker wrote:
>> Jeff Garzik wrote:
>>> pktgen outputs for the various cases modified/unmodified[/others?]
>>> would be nice, if you have a spot of time.
>>>
>>> Jeff
>>
>> I am not familiar with pktgen but I seem to have it working for a
>> simple test.
>> I edited the 1-1 example from
>> ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
>> . The results with and without the patch are below.
>
> It looks like you ran pktgen on the embedded system, which exercised
> only the transmit path. Auke indicated that the lockup was in the RU.
> Have you run pktgen on a test system to fire packets at the embedded
> system at max rate? Also test what happens when you fire packets in both
> directions simultaneously.
>
It appears that running pktgen between my pc and the embedded device at the same time eventually hangs the RU. I will
be looking into this failure in detail and report back when I know more. Thanks for bearing with me on this!
-Ack
^ permalink raw reply
* Re: skb configured but can't get data allocated
From: Vlad Yasevich @ 2007-09-12 20:12 UTC (permalink / raw)
To: DHAJOGLO; +Cc: netdev, kernelnewbies
In-Reply-To: <200709121731456120c5e4ad@mail.smumn.edu>
You may want to read this page:
http://vger.kernel.org/~davem/skb.html
You will then see you problem.
Essentially, you are writing your data into an
area that is never sent.
What you are end up is this:
---------------------------------------------
| your data| space | ll-header | IPv4 header|
---------------------------------------------
-vlad
^ permalink raw reply
* Re: [PATCH] [RFC] allow admin/users to specify rto_min in milliseconds rather than jiffies
From: Rick Jones @ 2007-09-12 20:28 UTC (permalink / raw)
To: Stephen Hemminger, David Miller; +Cc: Rick Jones, netdev
In-Reply-To: <46E06E7C.4000407@hp.com>
>> The api in netlink should be in milliseconds rather than compensating
>> in the application (iproute2).
>
>
> My understanding of the in-kernel rtnetlink code is far from complete,
> but it doesn't seem to have much in the way of provisions for unit
> conversion, which would suggest no nice suffix-based ui as in tc, and ip
> is already doing some massaging of units on the display side for a
> couple of the other parameters, so I'm at something of a loss.
So, I used the source and looked and saw that tc seems to convert
everything to nanoseconds and passes that to the kernel. The user can
give it seconds, milliseconds, microseconds or nanoseconds by using a
suffix. It then does something ostensibly intelligent to display those
to the user.
Ip converts nothing when passing things to the kernel (rtt rttvar or rto
- when/if at least the intial rto changes are included - were they?),
but when they come-out of the kernel ip converts them to milliseconds.
So the units in != the units out.
Tc seems much more friendly and less prone to user error.
I'm still not sure how "easily" rtnetlink can do conversions itself -
feedback there would be _very_ welcome - but at the very least, having
ip provide at least the illusion of what tc does would seem to be a good
thing.
rick jones
^ permalink raw reply
* Re: PROBLEM: 2.6.23-rc "NETDEV WATCHDOG: eth0: transmit timed out"
From: Francois Romieu @ 2007-09-12 20:50 UTC (permalink / raw)
To: Karl Meyer; +Cc: Michal Piotrowski, linux-kernel, netdev
In-Reply-To: <be1b757c0709021521q850f2bdq3f88c24c02c26b1b@mail.gmail.com>
Karl Meyer <adhocrocker@gmail.com> :
[...]
> am am looking for this issue for some time now, but there where no
> errors in 2.6.22-r2 (gentoo speak, I guess this is 2.6.22.2
> officially), I also ran git-bisect (for more information see the older
> messages in this thread).
2.6.22-r2 in gentoo is based on 2.6.22.1. It is way before
0e4851502f846b13b29b7f88f1250c980d57e944 that you reported to work.
Thus it is not surprizing that it works.
Any update regarding the patchkit that I sent on 2007/08/16 ?
It would help to narrow the culprit.
--
Ueimor
^ permalink raw reply
* Re: net/bluetooth/hci_sock.c:352: error: storage size of 'ctv' isn't known
From: Randy Dunlap @ 2007-09-12 20:58 UTC (permalink / raw)
To: Robert P. J. Day; +Cc: netdev
In-Reply-To: <Pine.LNX.4.64.0709120612110.476@localhost.localdomain>
On Wed, 12 Sep 2007 06:15:20 -0400 (EDT) Robert P. J. Day wrote:
> p.s. dumb question -- what locale should i be using to get those
> quotes to not make such a mess of my screen? thanks.
right or wrong, I use "LC_ALL=C" and it works for me.
---
~Randy
^ permalink raw reply
* Re: [RFC v2 PATCH 0/2] Add RCU locking to SCTP address management
From: Vlad Yasevich @ 2007-09-12 20:59 UTC (permalink / raw)
To: netdev; +Cc: lksctp-developers
In-Reply-To: <11896263983281-git-send-email-vladislav.yasevich@hp.com>
Vlad Yasevich wrote:
> Ok, this is version 2 of the patch that incorporates comments from
> Sridhar Samudrala and Paul McKenney.
>
> The changes icorporated are:
> 1. Add locking around the modification of the global sctp_local_addr_list
> when processing the notifiers. After looking around, it is possible for
> the IPv4 and IPv6 notifiers to be called at the same time, which means that
> we need a spin lock.
>
> 2. After the Paul's explanation of why writers would would to call
> rcu_read_lock, it's apparent that we really don't need that in our usage.
> I've removed all that I could find and conser safe.
>
> 3. I took Paul's suggestiong of passing an explicit rcu callback when
> removing entries from the list since these can be done it different
> contexts. This made the removal code rather simple.
Paul Moore just pointed out that using rcu versions of list traversal
macros inside the writer protected sections is just plain silly, so
consider that another change.
I'll update patch 2/2 with that comment.
-vlad
>
> Things I've left behind:
> 1. The valid flag remains. After discussing the virtues with Paul Moore
> (who used the same functionality in Netlabel code), I think that the
> valid flag slightly reduces the possibility that the reader will use
> an entry that's about to be removed. It's a good thing in our case.
> It doesn't really harm anything if a reader used a !valid entry, but
> I'd like to reduce that chance.
>
> I would appreciate any further comments
^ permalink raw reply
* [RFC v3 PATCH 2/21] SCTP: Convert bind_addr_list locking to RCU
From: Vlad Yasevich @ 2007-09-12 21:03 UTC (permalink / raw)
To: netdev; +Cc: lksctp-developers, Vlad Yasevich
In-Reply-To: <46E85344.1030402@hp.com>
[... and here is the updated version as promissed ...]
Since the sctp_sockaddr_entry is now RCU enabled as part of
the patch to synchronize sctp_localaddr_list, it makes sense to
change all handling of these entries to RCU. This includes the
sctp_bind_addrs structure and it's list of bound addresses.
This list is currently protected by an external rw_lock and that
looks like an overkill. There are only 2 writers to the list:
bind()/bindx() calls, and BH processing of ASCONF-ACK chunks.
These are already seriealized via the socket lock, so they will
not step on each other. These are also relatively rare, so we
should be good with RCU.
The readers are varied and they are easily converted to RCU.
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
include/net/sctp/structs.h | 7 +--
net/sctp/associola.c | 14 +-----
net/sctp/bind_addr.c | 68 ++++++++++++++++++++----------
net/sctp/endpointola.c | 27 +++---------
net/sctp/ipv6.c | 12 ++---
net/sctp/protocol.c | 25 ++++-------
net/sctp/sm_make_chunk.c | 18 +++-----
net/sctp/socket.c | 98 ++++++++++++-------------------------------
8 files changed, 106 insertions(+), 163 deletions(-)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a89e361..c2fe2dc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1155,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
int flags);
int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
__u8 use_as_src, gfp_t gfp);
-int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
+int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
+ void (*rcu_call)(struct rcu_head *,
+ void (*func)(struct rcu_head *)));
int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
struct sctp_sock *);
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
@@ -1226,9 +1228,6 @@ struct sctp_ep_common {
* bind_addr.address_list is our set of local IP addresses.
*/
struct sctp_bind_addr bind_addr;
-
- /* Protection during address list comparisons. */
- rwlock_t addr_lock;
};
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2ad1caf..9bad8ba 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the bind addr area. */
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
- rwlock_init(&asoc->base.addr_lock);
asoc->state = SCTP_STATE_CLOSED;
@@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
{
struct sctp_transport *transport;
- sctp_read_lock(&asoc->base.addr_lock);
-
if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
(htons(asoc->peer.port) == paddr->v4.sin_port)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
@@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
transport = NULL;
out:
- sctp_read_unlock(&asoc->base.addr_lock);
return transport;
}
@@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
const union sctp_addr *laddr)
{
- int found;
+ int found = 0;
- sctp_read_lock(&asoc->base.addr_lock);
if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
- sctp_sk(asoc->base.sk))) {
+ sctp_sk(asoc->base.sk)))
found = 1;
- goto out;
- }
- found = 0;
-out:
- sctp_read_unlock(&asoc->base.addr_lock);
return found;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 7fc369f..d16055f 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -167,7 +167,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
INIT_LIST_HEAD(&addr->list);
INIT_RCU_HEAD(&addr->rcu);
- list_add_tail(&addr->list, &bp->address_list);
+
+ /* We always hold a socket lock when calling this function,
+ * so rcu_read_lock is not needed.
+ */
+ list_add_tail_rcu(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
return 0;
@@ -176,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
/* Delete an address from the bind address list in the SCTP_bind_addr
* structure.
*/
-int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
+int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
+ void (*rcu_call)(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)))
{
- struct list_head *pos, *temp;
- struct sctp_sockaddr_entry *addr;
+ struct sctp_sockaddr_entry *addr, *temp;
- list_for_each_safe(pos, temp, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* We hold the socket lock when calling this function, so
+ * rcu_read_lock is not needed.
+ */
+ list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
/* Found the exact match. */
- list_del(pos);
- kfree(addr);
- SCTP_DBG_OBJCNT_DEC(addr);
-
- return 0;
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
+ break;
}
}
+ /* Call the rcu callback provided in the args. This function is
+ * called by both BH packet processing and user side socket option
+ * processing, but it works on different lists in those 2 contexts.
+ * Each context provides it's own callback, whether call_rc_bh()
+ * or call_rcu(), to make sure that we wait an for appropriate time.
+ */
+ if (addr && !addr->valid) {
+ rcu_call(&addr->rcu, sctp_local_addr_free);
+ SCTP_DBG_OBJCNT_DEC(addr);
+ }
+
return -EINVAL;
}
@@ -302,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
-
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (opt->pf->cmp_addr(&laddr->a, addr, opt))
- return 1;
+ int match = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
+ if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
+ match = 1;
+ break;
+ }
}
+ rcu_read_unlock();
- return 0;
+ return match;
}
/* Find the first address in the bind address list that is not present in
@@ -325,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
union sctp_addr *addr;
void *addr_buf;
struct sctp_af *af;
- struct list_head *pos;
int i;
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ /* This is only called sctp_send_asconf_del_ip() and we hold
+ * the socket lock in that code patch, so that address list
+ * can't change.
+ */
+ list_for_each_entry(laddr, &bp->address_list, list) {
addr_buf = (union sctp_addr *)addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
if (!af)
- return NULL;
+ break;
if (opt->pf->cmp_addr(&laddr->a, addr, opt))
break;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1404a9e..110d912 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
- rwlock_init(&ep->base.addr_lock);
/* Remember who we are attached to. */
ep->base.sk = sk;
@@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
const union sctp_addr *laddr)
{
- struct sctp_endpoint *retval;
+ struct sctp_endpoint *retval = NULL;
- sctp_read_lock(&ep->base.addr_lock);
if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
- sctp_sk(ep->base.sk))) {
+ sctp_sk(ep->base.sk)))
retval = ep;
- goto out;
- }
}
- retval = NULL;
-
-out:
- sctp_read_unlock(&ep->base.addr_lock);
return retval;
}
@@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
list_for_each(pos, &ep->asocs) {
asoc = list_entry(pos, struct sctp_association, asocs);
if (rport == asoc->peer.port) {
- sctp_read_lock(&asoc->base.addr_lock);
*transport = sctp_assoc_lookup_paddr(asoc, paddr);
- sctp_read_unlock(&asoc->base.addr_lock);
if (*transport)
return asoc;
@@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
- struct list_head *pos;
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
- sctp_read_lock(&ep->base.addr_lock);
bp = &ep->base.bind_addr;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (sctp_has_association(&addr->a, paddr)) {
- sctp_read_unlock(&ep->base.addr_lock);
+ /* This function is called whith the socket lock held,
+ * so the address_list can not change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
+ if (sctp_has_association(&addr->a, paddr))
return 1;
- }
}
- sctp_read_unlock(&ep->base.addr_lock);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 54ff472..c8b0115 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -302,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
union sctp_addr *saddr)
{
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
sctp_scope_t scope;
union sctp_addr *baddr = NULL;
__u8 matchlen = 0;
@@ -324,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
scope = sctp_scope(daddr);
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
/* Go through the bind address list and find the best source address
* that matches the scope of the destination address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(laddr->a.sa.sa_family == AF_INET6) &&
(scope <= sctp_scope(&laddr->a))) {
@@ -353,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
__FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
}
/* Make a copy of all potential local addresses. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4688559..35af75b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -223,7 +223,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
(copy_flags & SCTP_ADDR6_ALLOWED) &&
(copy_flags & SCTP_ADDR6_PEERSUPP)))) {
error = sctp_add_bind_addr(bp, &addr->a, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
if (error)
goto end_copy;
}
@@ -427,9 +427,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
struct rtable *rt;
struct flowi fl;
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
struct dst_entry *dst = NULL;
union sctp_addr dst_saddr;
@@ -458,23 +456,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
goto out;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
if (dst) {
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry,
- list);
- if (!laddr->use_as_src)
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid || !laddr->use_as_src)
continue;
sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
/* None of the bound addresses match the source address of the
* dst. So release it.
@@ -486,10 +481,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
/* Walk through the bind address list and try to get a dst that
* matches a bind address as the source address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
@@ -501,7 +496,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
}
out_unlock:
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
out:
if (dst)
SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 79856c9..2e34220 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1531,7 +1531,7 @@ no_hmac:
/* Also, add the destination address. */
if (list_empty(&retval->base.bind_addr.address_list)) {
sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
}
retval->next_tsn = retval->c.initial_tsn;
@@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- list_for_each(pos, &bp->address_list) {
- saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* This is always done in BH context with a socket lock
+ * held, so the list can not change.
+ */
+ list_for_each_entry(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, &addr))
saddr->use_as_src = 1;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
break;
case SCTP_PARAM_DEL_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- retval = sctp_del_bind_addr(bp, &addr);
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh);
list_for_each(pos, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport,
transports);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a3acf78..772fbfb 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
if (!bp->port)
bp->port = inet_sk(sk)->num;
- /* Add the address to the bind address list. */
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- /* Use GFP_ATOMIC since BHs are disabled. */
+ /* Add the address to the bind address list.
+ * Use GFP_ATOMIC since BHs will be disabled.
+ */
ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
/* Copy back into socket for getsockname() use. */
if (!ret) {
@@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
if (i < addrcnt)
continue;
- /* Use the first address in bind addr list of association as
- * Address Parameter of ASCONF CHUNK.
+ /* Use the first valid address in bind addr list of
+ * association as Address Parameter of ASCONF CHUNK.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
p = bp->address_list.next;
laddr = list_entry(p, struct sctp_sockaddr_entry, list);
- sctp_read_unlock(&asoc->base.addr_lock);
-
chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
addrcnt, SCTP_PARAM_ADD_IP);
if (!chunk) {
@@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
/* Add the new addresses to the bind address list with
* use_as_src set to 0.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
@@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
GFP_ATOMIC);
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
}
out:
@@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
* socket routing and failover schemes. Refer to comments in
* sctp_do_bind(). -daisy
*/
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- retval = sctp_del_bind_addr(bp, sa_addr);
-
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, sa_addr, call_rcu);
addr_buf += af->sockaddr_len;
err_bindx_rem:
@@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
* make sure that we do not delete all the addresses in the
* association.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
addrcnt, sp);
- sctp_read_unlock(&asoc->base.addr_lock);
if (!laddr)
continue;
+ /* We do not need RCU protection throughout this loop
+ * because this is done under a socket lock from the
+ * setsockopt call.
+ */
chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
SCTP_PARAM_DEL_IP);
if (!chunk) {
@@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
/* Reset use_as_src flag for the addresses in the bind address
* list that are to be deleted.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
laddr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
- list_for_each(pos1, &bp->address_list) {
- saddr = list_entry(pos1,
- struct sctp_sockaddr_entry,
- list);
+ list_for_each_entry(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, laddr))
saddr->use_as_src = 0;
}
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
/* Update the route and saddr entries for all the transports
* as some of the addresses in the bind address list are
@@ -4057,11 +4035,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
int __user *optlen)
{
sctp_assoc_t id;
- struct list_head *pos;
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
struct sctp_sockaddr_entry *addr;
- rwlock_t *addr_lock;
int cnt = 0;
if (len < sizeof(sctp_assoc_t))
@@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
*/
if (0 == id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
* addresses from the global local address list.
*/
@@ -4115,12 +4087,14 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
goto done;
}
- list_for_each(pos, &bp->address_list) {
+ /* Protection on the bound address list is not needed,
+ * since in the socket option context we hold the socket lock,
+ * so there is no way that the bound address list can change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
cnt ++;
}
-
done:
- sctp_read_unlock(addr_lock);
return cnt;
}
@@ -4204,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs_old getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4212,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
void *addrs;
void *buf;
@@ -4234,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = getaddrs.addrs;
@@ -4254,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4271,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
@@ -4284,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
/* copy the entire address list into the user provided space */
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
@@ -4307,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4315,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
size_t space_left;
int bytes_copied = 0;
@@ -4336,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = optval + offsetof(struct sctp_getaddrs,addrs);
@@ -4352,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4365,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
space_left, &bytes_copied);
if (cnt < 0) {
err = cnt;
- goto error_lock;
+ goto out;
}
goto copy_getaddrs;
}
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if (space_left < addrlen) {
err = -ENOMEM; /*fixme: right error?*/
- goto error_lock;
+ goto out;
}
memcpy(buf, &temp, addrlen);
buf += addrlen;
@@ -4389,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
goto out;
@@ -4401,12 +4365,6 @@ copy_getaddrs:
}
if (put_user(bytes_copied, optlen))
err = -EFAULT;
-
- goto out;
-
-error_lock:
- sctp_read_unlock(addr_lock);
-
out:
kfree(addrs);
return err;
--
1.5.2.4
^ permalink raw reply related
* Re: [PATCH] Add IP1000A Driver
From: Francois Romieu @ 2007-09-12 21:44 UTC (permalink / raw)
To: =?unknown-8bit?B?6buD5bu66IiILUplc3Nl?=
Cc: jeff, akpm, netdev, Stephen Hemminger
In-Reply-To: <AA68EB0EBA29BA40A06B700C33343EEF019012D2@fileserver.icplus.com.tw>
-Jesse <Jesse@icplus.com.tw> :
[...]
> Because lot of patch was created by you, could I write your name on
> MAINTAINERS file.
No objection/volunteer in the room ?
--
Ueimor
^ permalink raw reply
* Re: [PATCH] [-MM, FIX] ixgbe: incorporate napi_struct changes from net-2.6.24.git
From: Andrew Morton @ 2007-09-12 22:16 UTC (permalink / raw)
To: Auke Kok; +Cc: davem, netdev, jeff, jesse.brandeburg
In-Reply-To: <20070912181307.13189.33476.stgit@localhost.localdomain>
On Wed, 12 Sep 2007 11:13:07 -0700
Auke Kok <auke-jan.h.kok@intel.com> wrote:
> This incorporates the new napi_struct changes into ixgbe.
I get a reject storm.
> --- a/drivers/net/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ixgbe/ixgbe_main.c
> @@ -557,14 +557,15 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
> struct ixgbe_adapter *adapter = rxr->adapter;
>
> IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
> - netif_rx_schedule(adapter->netdev);
> + netif_rx_schedule(adapter->netdev, &adapter->napi);
> return IRQ_HANDLED;
> }
>
For example, my copy of ixgbe_msix_clean_rx(), from
git://lost.foo-projects.org/~aveerani/git/linux-2.6#ixgbe is:
static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
{
struct ixgbe_ring *rxr = data;
struct ixgbe_adapter *adapter = rxr->adapter;
#ifndef CONFIG_IXGBE_NAPI
int i;
for (i = 0; i < IXGBE_MAX_INTR; i++)
if (unlikely(!ixgbe_clean_rx_irq(adapter, rxr)))
break;
#else
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
netif_rx_schedule(adapter->netdev);
#endif
return IRQ_HANDLED;
}
which is quite different from the function whcih you're altering here?
^ permalink raw reply
* Re: [patch] sunrpc: make closing of old temporary sockets work (was: problems with lockd in 2.6.22.6)
From: Wolfgang Walter @ 2007-09-12 22:18 UTC (permalink / raw)
To: J. Bruce Fields; +Cc: Neil Brown, trond.myklebust, netdev, nfs, linux-kernel
In-Reply-To: <20070912195512.GC13792@fieldses.org>
On Wednesday 12 September 2007, J. Bruce Fields wrote:
> On Wed, Sep 12, 2007 at 09:40:57PM +0200, Wolfgang Walter wrote:
> > On Wednesday 12 September 2007, J. Bruce Fields wrote:
> > > On Wed, Sep 12, 2007 at 04:14:06PM +0200, Neil Brown wrote:
> > > > So it is in 2.6.21 and later and should probably go to .stable for .21
> > > > and .22.
> > > >
> > > > Bruce: for you :-)
> > >
> > > OK, thanks! But, (as is alas often the case) I'm still confused:
> > >
> > > > if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
> > > > continue;
> > > > - if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY,
&svsk->sk_flags))
> > > > + if (atomic_read(&svsk->sk_inuse) > 1
> > > > + || test_bit(SK_BUSY, &svsk->sk_flags))
> > > > continue;
> > > > atomic_inc(&svsk->sk_inuse);
> > > > list_move(le, &to_be_aged);
> > >
> > > What is it that ensures svsk->sk_inuse isn't incremented or SK_BUSY set
> > > after that test? Not all the code that does either of those is under
> > > the same serv->sv_lock lock that this code is.
> > >
> >
> > This should not matter - SK_CLOSED may be set at any time.
> >
> > svc_age_temp_sockets only detaches the socket, sets SK_CLOSED and then
> > enqueues it. If SK_BUSY is set its already enqueued and svc_sock_enqueue
> > ensures that it is not enqueued twice.
>
> Oh, got it. And the list manipulation is safe thanks to sv_lock. Neat,
> thanks. Can you verify that this solves your problem?
>
I'll test it tomorrow. So friday morning I'll know and mail you for sure.
Regards,
--
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts
^ permalink raw reply
* Re: [PATCH] [-MM, FIX] ixgbe: incorporate napi_struct changes from net-2.6.24.git
From: Kok, Auke @ 2007-09-12 22:20 UTC (permalink / raw)
To: Andrew Morton; +Cc: davem, netdev, jeff, jesse.brandeburg
In-Reply-To: <20070912151643.cb0e070b.akpm@linux-foundation.org>
Andrew Morton wrote:
> On Wed, 12 Sep 2007 11:13:07 -0700
> Auke Kok <auke-jan.h.kok@intel.com> wrote:
>
>> This incorporates the new napi_struct changes into ixgbe.
>
> I get a reject storm.
>
>> --- a/drivers/net/ixgbe/ixgbe_main.c
>> +++ b/drivers/net/ixgbe/ixgbe_main.c
>> @@ -557,14 +557,15 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
>> struct ixgbe_adapter *adapter = rxr->adapter;
>>
>> IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
>> - netif_rx_schedule(adapter->netdev);
>> + netif_rx_schedule(adapter->netdev, &adapter->napi);
>> return IRQ_HANDLED;
>> }
>>
>
> For example, my copy of ixgbe_msix_clean_rx(), from
> git://lost.foo-projects.org/~aveerani/git/linux-2.6#ixgbe is:
please drop that tree, and get the one I posted last week instead:
git://lost.foo-projects.org/~ahkok/linux-2.6 ixgbe-20070905-submission
or:
http://foo-projects.org/~sofar/ixgbe-20070905-submission.patch
http://foo-projects.org/~sofar/ixgbe-20070905-submission.patch.bz2
Cheers,
Auke
^ permalink raw reply
* Re: [RFC v2 PATCH 1/2] SCTP: Add RCU synchronization around sctp_localaddr_list
From: Paul E. McKenney @ 2007-09-12 22:26 UTC (permalink / raw)
To: Vlad Yasevich; +Cc: netdev, lksctp-developers
In-Reply-To: <11896263982464-git-send-email-vladislav.yasevich@hp.com>
On Wed, Sep 12, 2007 at 03:46:37PM -0400, Vlad Yasevich wrote:
> sctp_localaddr_list is modified dynamically via NETDEV_UP
> and NETDEV_DOWN events, but there is not synchronization
> between writer (even handler) and readers. As a result,
> the readers can access an entry that has been freed and
> crash the sytem.
Looks much better!!! Typo in comment noted below.
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
> ---
> include/net/sctp/sctp.h | 1 +
> include/net/sctp/structs.h | 6 +++++
> net/sctp/bind_addr.c | 2 +
> net/sctp/ipv6.c | 34 ++++++++++++++++++++++---------
> net/sctp/protocol.c | 46 ++++++++++++++++++++++++++++++-------------
> net/sctp/socket.c | 38 +++++++++++++++++++++++------------
> 6 files changed, 90 insertions(+), 37 deletions(-)
>
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index d529045..c9cc00c 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -123,6 +123,7 @@
> * sctp/protocol.c
> */
> extern struct sock *sctp_get_ctl_sock(void);
> +extern void sctp_local_addr_free(struct rcu_head *head);
> extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
> sctp_scope_t, gfp_t gfp,
> int flags);
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index c0d5848..a89e361 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -207,6 +207,9 @@ extern struct sctp_globals {
> * It is a list of sctp_sockaddr_entry.
> */
> struct list_head local_addr_list;
> +
> + /* Lock that protects the local_addr_list writers */
> + spinlock_t addr_list_lock;
>
> /* Flag to indicate if addip is enabled. */
> int addip_enable;
> @@ -242,6 +245,7 @@ extern struct sctp_globals {
> #define sctp_port_alloc_lock (sctp_globals.port_alloc_lock)
> #define sctp_port_hashtable (sctp_globals.port_hashtable)
> #define sctp_local_addr_list (sctp_globals.local_addr_list)
> +#define sctp_local_addr_lock (sctp_globals.addr_list_lock)
> #define sctp_addip_enable (sctp_globals.addip_enable)
> #define sctp_prsctp_enable (sctp_globals.prsctp_enable)
>
> @@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
> /* This is a structure for holding either an IPv6 or an IPv4 address. */
> struct sctp_sockaddr_entry {
> struct list_head list;
> + struct rcu_head rcu;
> union sctp_addr a;
> __u8 use_as_src;
> + __u8 valid;
> };
>
> typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
> diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
> index fdb287a..7fc369f 100644
> --- a/net/sctp/bind_addr.c
> +++ b/net/sctp/bind_addr.c
> @@ -163,8 +163,10 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
> addr->a.v4.sin_port = htons(bp->port);
>
> addr->use_as_src = use_as_src;
> + addr->valid = 1;
>
> INIT_LIST_HEAD(&addr->list);
> + INIT_RCU_HEAD(&addr->rcu);
> list_add_tail(&addr->list, &bp->address_list);
> SCTP_DBG_OBJCNT_INC(addr);
>
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index f8aa23d..54ff472 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -77,13 +77,18 @@
>
> #include <asm/uaccess.h>
>
> -/* Event handler for inet6 address addition/deletion events. */
> +/* Event handler for inet6 address addition/deletion events.
> + * This even is part of the atomic notifier call chain
s/even/event/
> + * and thus happens atomically and can NOT sleep. As a result
> + * we can't and really don't need to add any locks to guard the
> + * RCU.
> + */
> static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
> void *ptr)
> {
> struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
> - struct sctp_sockaddr_entry *addr;
> - struct list_head *pos, *temp;
> + struct sctp_sockaddr_entry *addr = NULL;
> + struct sctp_sockaddr_entry *temp;
>
> switch (ev) {
> case NETDEV_UP:
> @@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
> memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
> sizeof(struct in6_addr));
> addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
> - list_add_tail(&addr->list, &sctp_local_addr_list);
> + addr->valid = 1;
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
> + spin_unlock_bh(&sctp_local_addr_lock);
> }
> break;
> case NETDEV_DOWN:
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
> - list_del(pos);
> - kfree(addr);
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_for_each_entry_safe(addr, temp,
> + &sctp_local_addr_list, list) {
> + if (ipv6_addr_equal(&addr->a.v6.sin6_addr,
> + &ifa->addr)) {
> + addr->valid = 0;
> + list_del_rcu(&addr->list);
> break;
> }
> }
> -
> + spin_unlock_bh(&sctp_local_addr_lock);
> + if (addr && !addr->valid)
> + call_rcu(&addr->rcu, sctp_local_addr_free);
> break;
> }
>
> @@ -367,7 +379,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
> addr->a.v6.sin6_port = 0;
> addr->a.v6.sin6_addr = ifp->addr;
> addr->a.v6.sin6_scope_id = dev->ifindex;
> + addr->valid = 1;
> INIT_LIST_HEAD(&addr->list);
> + INIT_RCU_HEAD(&addr->rcu);
> list_add_tail(&addr->list, addrlist);
> }
> }
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index e98579b..4688559 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -153,6 +153,8 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
> addr->a.v4.sin_family = AF_INET;
> addr->a.v4.sin_port = 0;
> addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
> + addr->valid = 1;
> + INIT_RCU_HEAD(&addr->rcu);
> list_add_tail(&addr->list, addrlist);
> }
> }
> @@ -192,16 +194,24 @@ static void sctp_free_local_addr_list(void)
> }
> }
>
> +void sctp_local_addr_free(struct rcu_head *head)
> +{
> + struct sctp_sockaddr_entry *e = container_of(head,
> + struct sctp_sockaddr_entry, rcu);
> + kfree(e);
> +}
> +
> /* Copy the local addresses which are valid for 'scope' into 'bp'. */
> int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
> gfp_t gfp, int copy_flags)
> {
> struct sctp_sockaddr_entry *addr;
> int error = 0;
> - struct list_head *pos, *temp;
>
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> if (sctp_in_scope(&addr->a, scope)) {
> /* Now that the address is in scope, check to see if
> * the address type is really supported by the local
> @@ -221,6 +231,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
> }
>
> end_copy:
> + rcu_read_unlock();
> return error;
> }
>
> @@ -605,8 +616,8 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
> void *ptr)
> {
> struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
> - struct sctp_sockaddr_entry *addr;
> - struct list_head *pos, *temp;
> + struct sctp_sockaddr_entry *addr = NULL;
> + struct sctp_sockaddr_entry *temp;
>
> switch (ev) {
> case NETDEV_UP:
> @@ -615,19 +626,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
> addr->a.v4.sin_family = AF_INET;
> addr->a.v4.sin_port = 0;
> addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
> - list_add_tail(&addr->list, &sctp_local_addr_list);
> + addr->valid = 1;
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
> + spin_unlock_bh(&sctp_local_addr_lock);
> }
> break;
> case NETDEV_DOWN:
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_for_each_entry_safe(addr, temp,
> + &sctp_local_addr_list, list) {
> if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
> - list_del(pos);
> - kfree(addr);
> + addr->valid = 0;
> + list_del_rcu(&addr->list);
> break;
> }
> }
> -
> + spin_unlock_bh(&sctp_local_addr_lock);
> + if (addr && !addr->valid)
> + call_rcu(&addr->rcu, sctp_local_addr_free);
> break;
> }
>
> @@ -1160,6 +1177,7 @@ SCTP_STATIC __init int sctp_init(void)
>
> /* Initialize the local address list. */
> INIT_LIST_HEAD(&sctp_local_addr_list);
> + spin_lock_init(&sctp_local_addr_lock);
> sctp_get_local_addr_list();
>
> /* Register notifier for inet address additions/deletions. */
> @@ -1227,6 +1245,9 @@ SCTP_STATIC __exit void sctp_exit(void)
> sctp_v6_del_protocol();
> inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
>
> + /* Unregister notifier for inet address additions/deletions. */
> + unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
> +
> /* Free the local address list. */
> sctp_free_local_addr_list();
>
> @@ -1240,9 +1261,6 @@ SCTP_STATIC __exit void sctp_exit(void)
> inet_unregister_protosw(&sctp_stream_protosw);
> inet_unregister_protosw(&sctp_seqpacket_protosw);
>
> - /* Unregister notifier for inet address additions/deletions. */
> - unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
> -
> sctp_sysctl_unregister();
> list_del(&sctp_ipv4_specific.list);
>
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 3335460..a3acf78 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -4057,9 +4057,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> int __user *optlen)
> {
> sctp_assoc_t id;
> + struct list_head *pos;
> struct sctp_bind_addr *bp;
> struct sctp_association *asoc;
> - struct list_head *pos, *temp;
> struct sctp_sockaddr_entry *addr;
> rwlock_t *addr_lock;
> int cnt = 0;
> @@ -4096,15 +4096,19 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> addr = list_entry(bp->address_list.next,
> struct sctp_sockaddr_entry, list);
> if (sctp_is_any(&addr->a)) {
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos,
> - struct sctp_sockaddr_entry,
> - list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr,
> + &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> +
> cnt++;
> }
> + rcu_read_unlock();
> } else {
> cnt = 1;
> }
> @@ -4127,14 +4131,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> int max_addrs, void *to,
> int *bytes_copied)
> {
> - struct list_head *pos, *next;
> struct sctp_sockaddr_entry *addr;
> union sctp_addr temp;
> int cnt = 0;
> int addrlen;
>
> - list_for_each_safe(pos, next, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> @@ -4149,6 +4155,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> cnt ++;
> if (cnt >= max_addrs) break;
> }
> + rcu_read_unlock();
>
> return cnt;
> }
> @@ -4156,14 +4163,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> size_t space_left, int *bytes_copied)
> {
> - struct list_head *pos, *next;
> struct sctp_sockaddr_entry *addr;
> union sctp_addr temp;
> int cnt = 0;
> int addrlen;
>
> - list_for_each_safe(pos, next, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> @@ -4171,8 +4180,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
> &temp);
> addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
> - if (space_left < addrlen)
> - return -ENOMEM;
> + if (space_left < addrlen) {
> + cnt = -ENOMEM;
> + break;
> + }
> memcpy(to, &temp, addrlen);
>
> to += addrlen;
> @@ -4180,6 +4191,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> space_left -= addrlen;
> *bytes_copied += addrlen;
> }
> + rcu_read_unlock();
>
> return cnt;
> }
> --
> 1.5.2.4
>
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [RFC v3 PATCH 2/21] SCTP: Convert bind_addr_list locking to RCU
From: Paul E. McKenney @ 2007-09-12 22:33 UTC (permalink / raw)
To: Vlad Yasevich; +Cc: netdev, lksctp-developers
In-Reply-To: <11896310224146-git-send-email-vladislav.yasevich@hp.com>
On Wed, Sep 12, 2007 at 05:03:42PM -0400, Vlad Yasevich wrote:
> [... and here is the updated version as promissed ...]
>
> Since the sctp_sockaddr_entry is now RCU enabled as part of
> the patch to synchronize sctp_localaddr_list, it makes sense to
> change all handling of these entries to RCU. This includes the
> sctp_bind_addrs structure and it's list of bound addresses.
>
> This list is currently protected by an external rw_lock and that
> looks like an overkill. There are only 2 writers to the list:
> bind()/bindx() calls, and BH processing of ASCONF-ACK chunks.
> These are already seriealized via the socket lock, so they will
> not step on each other. These are also relatively rare, so we
> should be good with RCU.
>
> The readers are varied and they are easily converted to RCU.
Looks good from an RCU viewpoint -- I must defer to others on
the networking aspects.
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
> ---
> include/net/sctp/structs.h | 7 +--
> net/sctp/associola.c | 14 +-----
> net/sctp/bind_addr.c | 68 ++++++++++++++++++++----------
> net/sctp/endpointola.c | 27 +++---------
> net/sctp/ipv6.c | 12 ++---
> net/sctp/protocol.c | 25 ++++-------
> net/sctp/sm_make_chunk.c | 18 +++-----
> net/sctp/socket.c | 98 ++++++++++++-------------------------------
> 8 files changed, 106 insertions(+), 163 deletions(-)
>
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index a89e361..c2fe2dc 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -1155,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
> int flags);
> int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
> __u8 use_as_src, gfp_t gfp);
> -int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
> +int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
> + void (*rcu_call)(struct rcu_head *,
> + void (*func)(struct rcu_head *)));
> int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
> struct sctp_sock *);
> union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
> @@ -1226,9 +1228,6 @@ struct sctp_ep_common {
> * bind_addr.address_list is our set of local IP addresses.
> */
> struct sctp_bind_addr bind_addr;
> -
> - /* Protection during address list comparisons. */
> - rwlock_t addr_lock;
> };
>
>
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 2ad1caf..9bad8ba 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
>
> /* Initialize the bind addr area. */
> sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
> - rwlock_init(&asoc->base.addr_lock);
>
> asoc->state = SCTP_STATE_CLOSED;
>
> @@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
> {
> struct sctp_transport *transport;
>
> - sctp_read_lock(&asoc->base.addr_lock);
> -
> if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
> (htons(asoc->peer.port) == paddr->v4.sin_port)) {
> transport = sctp_assoc_lookup_paddr(asoc, paddr);
> @@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
> transport = NULL;
>
> out:
> - sctp_read_unlock(&asoc->base.addr_lock);
> return transport;
> }
>
> @@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
> int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
> const union sctp_addr *laddr)
> {
> - int found;
> + int found = 0;
>
> - sctp_read_lock(&asoc->base.addr_lock);
> if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
> sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
> - sctp_sk(asoc->base.sk))) {
> + sctp_sk(asoc->base.sk)))
> found = 1;
> - goto out;
> - }
>
> - found = 0;
> -out:
> - sctp_read_unlock(&asoc->base.addr_lock);
> return found;
> }
>
> diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
> index 7fc369f..d16055f 100644
> --- a/net/sctp/bind_addr.c
> +++ b/net/sctp/bind_addr.c
> @@ -167,7 +167,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
>
> INIT_LIST_HEAD(&addr->list);
> INIT_RCU_HEAD(&addr->rcu);
> - list_add_tail(&addr->list, &bp->address_list);
> +
> + /* We always hold a socket lock when calling this function,
> + * so rcu_read_lock is not needed.
> + */
> + list_add_tail_rcu(&addr->list, &bp->address_list);
> SCTP_DBG_OBJCNT_INC(addr);
>
> return 0;
> @@ -176,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
> /* Delete an address from the bind address list in the SCTP_bind_addr
> * structure.
> */
> -int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
> +int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
> + void (*rcu_call)(struct rcu_head *head,
> + void (*func)(struct rcu_head *head)))
> {
> - struct list_head *pos, *temp;
> - struct sctp_sockaddr_entry *addr;
> + struct sctp_sockaddr_entry *addr, *temp;
>
> - list_for_each_safe(pos, temp, &bp->address_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + /* We hold the socket lock when calling this function, so
> + * rcu_read_lock is not needed.
> + */
> + list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
> if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
> /* Found the exact match. */
> - list_del(pos);
> - kfree(addr);
> - SCTP_DBG_OBJCNT_DEC(addr);
> -
> - return 0;
> + addr->valid = 0;
> + list_del_rcu(&addr->list);
> + break;
> }
> }
>
> + /* Call the rcu callback provided in the args. This function is
> + * called by both BH packet processing and user side socket option
> + * processing, but it works on different lists in those 2 contexts.
> + * Each context provides it's own callback, whether call_rc_bh()
> + * or call_rcu(), to make sure that we wait an for appropriate time.
> + */
> + if (addr && !addr->valid) {
> + rcu_call(&addr->rcu, sctp_local_addr_free);
> + SCTP_DBG_OBJCNT_DEC(addr);
> + }
> +
> return -EINVAL;
> }
>
> @@ -302,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
> struct sctp_sock *opt)
> {
> struct sctp_sockaddr_entry *laddr;
> - struct list_head *pos;
> -
> - list_for_each(pos, &bp->address_list) {
> - laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
> - if (opt->pf->cmp_addr(&laddr->a, addr, opt))
> - return 1;
> + int match = 0;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> + if (!laddr->valid)
> + continue;
> + if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
> + match = 1;
> + break;
> + }
> }
> + rcu_read_unlock();
>
> - return 0;
> + return match;
> }
>
> /* Find the first address in the bind address list that is not present in
> @@ -325,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
> union sctp_addr *addr;
> void *addr_buf;
> struct sctp_af *af;
> - struct list_head *pos;
> int i;
>
> - list_for_each(pos, &bp->address_list) {
> - laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
> -
> + /* This is only called sctp_send_asconf_del_ip() and we hold
> + * the socket lock in that code patch, so that address list
> + * can't change.
> + */
> + list_for_each_entry(laddr, &bp->address_list, list) {
> addr_buf = (union sctp_addr *)addrs;
> for (i = 0; i < addrcnt; i++) {
> addr = (union sctp_addr *)addr_buf;
> af = sctp_get_af_specific(addr->v4.sin_family);
> if (!af)
> - return NULL;
> + break;
>
> if (opt->pf->cmp_addr(&laddr->a, addr, opt))
> break;
> diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
> index 1404a9e..110d912 100644
> --- a/net/sctp/endpointola.c
> +++ b/net/sctp/endpointola.c
> @@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
>
> /* Initialize the bind addr area */
> sctp_bind_addr_init(&ep->base.bind_addr, 0);
> - rwlock_init(&ep->base.addr_lock);
>
> /* Remember who we are attached to. */
> ep->base.sk = sk;
> @@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
> struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
> const union sctp_addr *laddr)
> {
> - struct sctp_endpoint *retval;
> + struct sctp_endpoint *retval = NULL;
>
> - sctp_read_lock(&ep->base.addr_lock);
> if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
> if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
> - sctp_sk(ep->base.sk))) {
> + sctp_sk(ep->base.sk)))
> retval = ep;
> - goto out;
> - }
> }
>
> - retval = NULL;
> -
> -out:
> - sctp_read_unlock(&ep->base.addr_lock);
> return retval;
> }
>
> @@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
> list_for_each(pos, &ep->asocs) {
> asoc = list_entry(pos, struct sctp_association, asocs);
> if (rport == asoc->peer.port) {
> - sctp_read_lock(&asoc->base.addr_lock);
> *transport = sctp_assoc_lookup_paddr(asoc, paddr);
> - sctp_read_unlock(&asoc->base.addr_lock);
>
> if (*transport)
> return asoc;
> @@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
> int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
> const union sctp_addr *paddr)
> {
> - struct list_head *pos;
> struct sctp_sockaddr_entry *addr;
> struct sctp_bind_addr *bp;
>
> - sctp_read_lock(&ep->base.addr_lock);
> bp = &ep->base.bind_addr;
> - list_for_each(pos, &bp->address_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> - if (sctp_has_association(&addr->a, paddr)) {
> - sctp_read_unlock(&ep->base.addr_lock);
> + /* This function is called whith the socket lock held,
> + * so the address_list can not change.
> + */
> + list_for_each_entry(addr, &bp->address_list, list) {
> + if (sctp_has_association(&addr->a, paddr))
> return 1;
> - }
> }
> - sctp_read_unlock(&ep->base.addr_lock);
>
> return 0;
> }
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index 54ff472..c8b0115 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -302,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
> union sctp_addr *saddr)
> {
> struct sctp_bind_addr *bp;
> - rwlock_t *addr_lock;
> struct sctp_sockaddr_entry *laddr;
> - struct list_head *pos;
> sctp_scope_t scope;
> union sctp_addr *baddr = NULL;
> __u8 matchlen = 0;
> @@ -324,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
> scope = sctp_scope(daddr);
>
> bp = &asoc->base.bind_addr;
> - addr_lock = &asoc->base.addr_lock;
>
> /* Go through the bind address list and find the best source address
> * that matches the scope of the destination address.
> */
> - sctp_read_lock(addr_lock);
> - list_for_each(pos, &bp->address_list) {
> - laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> + if (!laddr->valid)
> + continue;
> if ((laddr->use_as_src) &&
> (laddr->a.sa.sa_family == AF_INET6) &&
> (scope <= sctp_scope(&laddr->a))) {
> @@ -353,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
> __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
> }
>
> - sctp_read_unlock(addr_lock);
> + rcu_read_unlock();
> }
>
> /* Make a copy of all potential local addresses. */
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 4688559..35af75b 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -223,7 +223,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
> (copy_flags & SCTP_ADDR6_ALLOWED) &&
> (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
> error = sctp_add_bind_addr(bp, &addr->a, 1,
> - GFP_ATOMIC);
> + GFP_ATOMIC);
> if (error)
> goto end_copy;
> }
> @@ -427,9 +427,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
> struct rtable *rt;
> struct flowi fl;
> struct sctp_bind_addr *bp;
> - rwlock_t *addr_lock;
> struct sctp_sockaddr_entry *laddr;
> - struct list_head *pos;
> struct dst_entry *dst = NULL;
> union sctp_addr dst_saddr;
>
> @@ -458,23 +456,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
> goto out;
>
> bp = &asoc->base.bind_addr;
> - addr_lock = &asoc->base.addr_lock;
>
> if (dst) {
> /* Walk through the bind address list and look for a bind
> * address that matches the source address of the returned dst.
> */
> - sctp_read_lock(addr_lock);
> - list_for_each(pos, &bp->address_list) {
> - laddr = list_entry(pos, struct sctp_sockaddr_entry,
> - list);
> - if (!laddr->use_as_src)
> + rcu_read_lock();
> + list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> + if (!laddr->valid || !laddr->use_as_src)
> continue;
> sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
> if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
> goto out_unlock;
> }
> - sctp_read_unlock(addr_lock);
> + rcu_read_unlock();
>
> /* None of the bound addresses match the source address of the
> * dst. So release it.
> @@ -486,10 +481,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
> /* Walk through the bind address list and try to get a dst that
> * matches a bind address as the source address.
> */
> - sctp_read_lock(addr_lock);
> - list_for_each(pos, &bp->address_list) {
> - laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
> -
> + rcu_read_lock();
> + list_for_each_entry_rcu(laddr, &bp->address_list, list) {
> + if (!laddr->valid)
> + continue;
> if ((laddr->use_as_src) &&
> (AF_INET == laddr->a.sa.sa_family)) {
> fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
> @@ -501,7 +496,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
> }
>
> out_unlock:
> - sctp_read_unlock(addr_lock);
> + rcu_read_unlock();
> out:
> if (dst)
> SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
> diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
> index 79856c9..2e34220 100644
> --- a/net/sctp/sm_make_chunk.c
> +++ b/net/sctp/sm_make_chunk.c
> @@ -1531,7 +1531,7 @@ no_hmac:
> /* Also, add the destination address. */
> if (list_empty(&retval->base.bind_addr.address_list)) {
> sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
> - GFP_ATOMIC);
> + GFP_ATOMIC);
> }
>
> retval->next_tsn = retval->c.initial_tsn;
> @@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
>
> switch (asconf_param->param_hdr.type) {
> case SCTP_PARAM_ADD_IP:
> - sctp_local_bh_disable();
> - sctp_write_lock(&asoc->base.addr_lock);
> - list_for_each(pos, &bp->address_list) {
> - saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + /* This is always done in BH context with a socket lock
> + * held, so the list can not change.
> + */
> + list_for_each_entry(saddr, &bp->address_list, list) {
> if (sctp_cmp_addr_exact(&saddr->a, &addr))
> saddr->use_as_src = 1;
> }
> - sctp_write_unlock(&asoc->base.addr_lock);
> - sctp_local_bh_enable();
> break;
> case SCTP_PARAM_DEL_IP:
> - sctp_local_bh_disable();
> - sctp_write_lock(&asoc->base.addr_lock);
> - retval = sctp_del_bind_addr(bp, &addr);
> - sctp_write_unlock(&asoc->base.addr_lock);
> - sctp_local_bh_enable();
> + retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh);
> list_for_each(pos, &asoc->peer.transport_addr_list) {
> transport = list_entry(pos, struct sctp_transport,
> transports);
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index a3acf78..772fbfb 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
> if (!bp->port)
> bp->port = inet_sk(sk)->num;
>
> - /* Add the address to the bind address list. */
> - sctp_local_bh_disable();
> - sctp_write_lock(&ep->base.addr_lock);
> -
> - /* Use GFP_ATOMIC since BHs are disabled. */
> + /* Add the address to the bind address list.
> + * Use GFP_ATOMIC since BHs will be disabled.
> + */
> ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
> - sctp_write_unlock(&ep->base.addr_lock);
> - sctp_local_bh_enable();
>
> /* Copy back into socket for getsockname() use. */
> if (!ret) {
> @@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
> if (i < addrcnt)
> continue;
>
> - /* Use the first address in bind addr list of association as
> - * Address Parameter of ASCONF CHUNK.
> + /* Use the first valid address in bind addr list of
> + * association as Address Parameter of ASCONF CHUNK.
> */
> - sctp_read_lock(&asoc->base.addr_lock);
> bp = &asoc->base.bind_addr;
> p = bp->address_list.next;
> laddr = list_entry(p, struct sctp_sockaddr_entry, list);
> - sctp_read_unlock(&asoc->base.addr_lock);
> -
> chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
> addrcnt, SCTP_PARAM_ADD_IP);
> if (!chunk) {
> @@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
> /* Add the new addresses to the bind address list with
> * use_as_src set to 0.
> */
> - sctp_local_bh_disable();
> - sctp_write_lock(&asoc->base.addr_lock);
> addr_buf = addrs;
> for (i = 0; i < addrcnt; i++) {
> addr = (union sctp_addr *)addr_buf;
> @@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
> GFP_ATOMIC);
> addr_buf += af->sockaddr_len;
> }
> - sctp_write_unlock(&asoc->base.addr_lock);
> - sctp_local_bh_enable();
> }
>
> out:
> @@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
> * socket routing and failover schemes. Refer to comments in
> * sctp_do_bind(). -daisy
> */
> - sctp_local_bh_disable();
> - sctp_write_lock(&ep->base.addr_lock);
> -
> - retval = sctp_del_bind_addr(bp, sa_addr);
> -
> - sctp_write_unlock(&ep->base.addr_lock);
> - sctp_local_bh_enable();
> + retval = sctp_del_bind_addr(bp, sa_addr, call_rcu);
>
> addr_buf += af->sockaddr_len;
> err_bindx_rem:
> @@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
> * make sure that we do not delete all the addresses in the
> * association.
> */
> - sctp_read_lock(&asoc->base.addr_lock);
> bp = &asoc->base.bind_addr;
> laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
> addrcnt, sp);
> - sctp_read_unlock(&asoc->base.addr_lock);
> if (!laddr)
> continue;
>
> + /* We do not need RCU protection throughout this loop
> + * because this is done under a socket lock from the
> + * setsockopt call.
> + */
> chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
> SCTP_PARAM_DEL_IP);
> if (!chunk) {
> @@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
> /* Reset use_as_src flag for the addresses in the bind address
> * list that are to be deleted.
> */
> - sctp_local_bh_disable();
> - sctp_write_lock(&asoc->base.addr_lock);
> addr_buf = addrs;
> for (i = 0; i < addrcnt; i++) {
> laddr = (union sctp_addr *)addr_buf;
> af = sctp_get_af_specific(laddr->v4.sin_family);
> - list_for_each(pos1, &bp->address_list) {
> - saddr = list_entry(pos1,
> - struct sctp_sockaddr_entry,
> - list);
> + list_for_each_entry(saddr, &bp->address_list, list) {
> if (sctp_cmp_addr_exact(&saddr->a, laddr))
> saddr->use_as_src = 0;
> }
> addr_buf += af->sockaddr_len;
> }
> - sctp_write_unlock(&asoc->base.addr_lock);
> - sctp_local_bh_enable();
>
> /* Update the route and saddr entries for all the transports
> * as some of the addresses in the bind address list are
> @@ -4057,11 +4035,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> int __user *optlen)
> {
> sctp_assoc_t id;
> - struct list_head *pos;
> struct sctp_bind_addr *bp;
> struct sctp_association *asoc;
> struct sctp_sockaddr_entry *addr;
> - rwlock_t *addr_lock;
> int cnt = 0;
>
> if (len < sizeof(sctp_assoc_t))
> @@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> */
> if (0 == id) {
> bp = &sctp_sk(sk)->ep->base.bind_addr;
> - addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
> } else {
> asoc = sctp_id2assoc(sk, id);
> if (!asoc)
> return -EINVAL;
> bp = &asoc->base.bind_addr;
> - addr_lock = &asoc->base.addr_lock;
> }
>
> - sctp_read_lock(addr_lock);
> -
> /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
> * addresses from the global local address list.
> */
> @@ -4115,12 +4087,14 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> goto done;
> }
>
> - list_for_each(pos, &bp->address_list) {
> + /* Protection on the bound address list is not needed,
> + * since in the socket option context we hold the socket lock,
> + * so there is no way that the bound address list can change.
> + */
> + list_for_each_entry(addr, &bp->address_list, list) {
> cnt ++;
> }
> -
> done:
> - sctp_read_unlock(addr_lock);
> return cnt;
> }
>
> @@ -4204,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> {
> struct sctp_bind_addr *bp;
> struct sctp_association *asoc;
> - struct list_head *pos;
> int cnt = 0;
> struct sctp_getaddrs_old getaddrs;
> struct sctp_sockaddr_entry *addr;
> @@ -4212,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> union sctp_addr temp;
> struct sctp_sock *sp = sctp_sk(sk);
> int addrlen;
> - rwlock_t *addr_lock;
> int err = 0;
> void *addrs;
> void *buf;
> @@ -4234,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> */
> if (0 == getaddrs.assoc_id) {
> bp = &sctp_sk(sk)->ep->base.bind_addr;
> - addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
> } else {
> asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
> if (!asoc)
> return -EINVAL;
> bp = &asoc->base.bind_addr;
> - addr_lock = &asoc->base.addr_lock;
> }
>
> to = getaddrs.addrs;
> @@ -4254,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> if (!addrs)
> return -ENOMEM;
>
> - sctp_read_lock(addr_lock);
> -
> /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
> * addresses from the global local address list.
> */
> @@ -4271,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> }
>
> buf = addrs;
> - list_for_each(pos, &bp->address_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + /* Protection on the bound address list is not needed since
> + * in the socket option context we hold a socket lock and
> + * thus the bound address list can't change.
> + */
> + list_for_each_entry(addr, &bp->address_list, list) {
> memcpy(&temp, &addr->a, sizeof(temp));
> sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
> addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
> @@ -4284,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
> }
>
> copy_getaddrs:
> - sctp_read_unlock(addr_lock);
> -
> /* copy the entire address list into the user provided space */
> if (copy_to_user(to, addrs, bytes_copied)) {
> err = -EFAULT;
> @@ -4307,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> {
> struct sctp_bind_addr *bp;
> struct sctp_association *asoc;
> - struct list_head *pos;
> int cnt = 0;
> struct sctp_getaddrs getaddrs;
> struct sctp_sockaddr_entry *addr;
> @@ -4315,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> union sctp_addr temp;
> struct sctp_sock *sp = sctp_sk(sk);
> int addrlen;
> - rwlock_t *addr_lock;
> int err = 0;
> size_t space_left;
> int bytes_copied = 0;
> @@ -4336,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> */
> if (0 == getaddrs.assoc_id) {
> bp = &sctp_sk(sk)->ep->base.bind_addr;
> - addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
> } else {
> asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
> if (!asoc)
> return -EINVAL;
> bp = &asoc->base.bind_addr;
> - addr_lock = &asoc->base.addr_lock;
> }
>
> to = optval + offsetof(struct sctp_getaddrs,addrs);
> @@ -4352,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> if (!addrs)
> return -ENOMEM;
>
> - sctp_read_lock(addr_lock);
> -
> /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
> * addresses from the global local address list.
> */
> @@ -4365,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> space_left, &bytes_copied);
> if (cnt < 0) {
> err = cnt;
> - goto error_lock;
> + goto out;
> }
> goto copy_getaddrs;
> }
> }
>
> buf = addrs;
> - list_for_each(pos, &bp->address_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + /* Protection on the bound address list is not needed since
> + * in the socket option context we hold a socket lock and
> + * thus the bound address list can't change.
> + */
> + list_for_each_entry(addr, &bp->address_list, list) {
> memcpy(&temp, &addr->a, sizeof(temp));
> sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
> addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
> if (space_left < addrlen) {
> err = -ENOMEM; /*fixme: right error?*/
> - goto error_lock;
> + goto out;
> }
> memcpy(buf, &temp, addrlen);
> buf += addrlen;
> @@ -4389,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
> }
>
> copy_getaddrs:
> - sctp_read_unlock(addr_lock);
> -
> if (copy_to_user(to, addrs, bytes_copied)) {
> err = -EFAULT;
> goto out;
> @@ -4401,12 +4365,6 @@ copy_getaddrs:
> }
> if (put_user(bytes_copied, optlen))
> err = -EFAULT;
> -
> - goto out;
> -
> -error_lock:
> - sctp_read_unlock(addr_lock);
> -
> out:
> kfree(addrs);
> return err;
> --
> 1.5.2.4
>
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] net: Fix race when opening a proc file while a network namespace is exiting.
From: Paul E. McKenney @ 2007-09-12 22:46 UTC (permalink / raw)
To: Eric W. Biederman; +Cc: David Miller, netdev, Linux Containers
In-Reply-To: <m14phzam2l.fsf@ebiederm.dsl.xmission.com>
On Wed, Sep 12, 2007 at 10:24:34AM -0600, Eric W. Biederman wrote:
>
> The problem: proc_net files remember which network namespace the are
> against but do not remember hold a reference count (as that would pin
> the network namespace). So we currently have a small window where
> the reference count on a network namespace may be incremented when opening
> a /proc file when it has already gone to zero.
>
> To fix this introduce maybe_get_net and get_proc_net.
>
> maybe_get_net increments the network namespace reference count only if it is
> greater then zero, ensuring we don't increment a reference count after it
> has gone to zero.
>
> get_proc_net handles all of the magic to go from a proc inode to the network
> namespace instance and call maybe_get_net on it.
>
> PROC_NET the old accessor is removed so that we don't get confused and use
> the wrong helper function.
>
> Then I fix up the callers to use get_proc_net and handle the case case
> where get_proc_net returns NULL. In that case I return -ENXIO because
> effectively the network namespace has already gone away so the files
> we are trying to access don't exist anymore.
Looks much better!
Acked-by: Paul E. McKenney <paulmck@us.ibm.com>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> fs/proc/proc_net.c | 6 ++++++
> include/linux/proc_fs.h | 5 +----
> include/net/net_namespace.h | 12 ++++++++++++
> net/core/dev.c | 6 +++++-
> net/core/dev_mcast.c | 6 +++++-
> net/netlink/af_netlink.c | 6 +++++-
> net/wireless/wext.c | 6 +++++-
> 7 files changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 358930a..85cc8e8 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -51,6 +51,12 @@ void proc_net_remove(struct net *net, const char *name)
> }
> EXPORT_SYMBOL_GPL(proc_net_remove);
>
> +struct net *get_proc_net(const struct inode *inode)
> +{
> + return maybe_get_net(PDE_NET(PDE(inode)));
> +}
> +EXPORT_SYMBOL_GPL(get_proc_net);
> +
> static struct proc_dir_entry *proc_net_shadow;
>
> static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 5964670..20741f6 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -270,10 +270,7 @@ static inline struct net *PDE_NET(struct proc_dir_entry *pde)
> return pde->parent->data;
> }
>
> -static inline struct net *PROC_NET(const struct inode *inode)
> -{
> - return PDE_NET(PDE(inode));
> -}
> +struct net *get_proc_net(const struct inode *inode);
>
> struct proc_maps_private {
> struct pid *pid;
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index fac42db..dda03f3 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -39,6 +39,18 @@ static inline struct net *get_net(struct net *net)
> return net;
> }
>
> +static inline struct net *maybe_get_net(struct net *net)
> +{
> + /* Used when we know struct net exists but we
> + * aren't guaranteed a previous reference count
> + * exists. If the reference count is zero this
> + * function fails and returns NULL.
> + */
> + if (!atomic_inc_not_zero(&net->count))
> + net = NULL;
> + return net;
> +}
> +
> static inline void put_net(struct net *net)
> {
> if (atomic_dec_and_test(&net->count))
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a22a95d..f119dc0 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2446,7 +2446,11 @@ static int dev_seq_open(struct inode *inode, struct file *file)
> res = seq_open(file, &dev_seq_ops);
> if (!res) {
> seq = file->private_data;
> - seq->private = get_net(PROC_NET(inode));
> + seq->private = get_proc_net(inode);
> + if (!seq->private) {
> + seq_release(inode, file);
> + res = -ENXIO;
> + }
> }
> return res;
> }
> diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
> index 1c4f619..896b0ca 100644
> --- a/net/core/dev_mcast.c
> +++ b/net/core/dev_mcast.c
> @@ -246,7 +246,11 @@ static int dev_mc_seq_open(struct inode *inode, struct file *file)
> res = seq_open(file, &dev_mc_seq_ops);
> if (!res) {
> seq = file->private_data;
> - seq->private = get_net(PROC_NET(inode));
> + seq->private = get_proc_net(inode);
> + if (!seq->private) {
> + seq_release(inode, file);
> + res = -ENXIO;
> + }
> }
> return res;
> }
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 3029f86..dc9f8c2 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -1859,7 +1859,11 @@ static int netlink_seq_open(struct inode *inode, struct file *file)
>
> seq = file->private_data;
> seq->private = iter;
> - iter->net = get_net(PROC_NET(inode));
> + iter->net = get_proc_net(inode);
> + if (!iter->net) {
> + seq_release_private(inode, file);
> + return -ENXIO;
> + }
> return 0;
> }
>
> diff --git a/net/wireless/wext.c b/net/wireless/wext.c
> index e8b3409..85e5f9d 100644
> --- a/net/wireless/wext.c
> +++ b/net/wireless/wext.c
> @@ -678,7 +678,11 @@ static int wireless_seq_open(struct inode *inode, struct file *file)
> res = seq_open(file, &wireless_seq_ops);
> if (!res) {
> seq = file->private_data;
> - seq->private = get_net(PROC_NET(inode));
> + seq->private = get_proc_net(inode);
> + if (!seq->private) {
> + seq_release(inode, file);
> + res = -ENXIO;
> + }
> }
> return res;
> }
> --
> 1.5.3.rc6.17.g1911
>
^ permalink raw reply
* [PATCH] IPV6: fix source address selection
From: Jiri Kosina @ 2007-09-12 22:56 UTC (permalink / raw)
To: davem, nhorman; +Cc: netdev, Jiri Bohac, Petr Baudis
From: Jiri Kosina <jkosina@suse.cz>
[PATCH] IPV6: fix source address selection
The commit 95c385 broke proper source address selection for cases in which
there is a address which is makred 'deprecated'. The commit mistakenly
changed ifa->flags to ifa_result->flags (probably copy/paste error from a
few lines above) in the 'Rule 3' address selection code.
The patch below restores the previous RFC-compliant behavior, please
apply.
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Petr Baudis <pasky@suse.cz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91ef3be..45b4c82 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1021,7 +1021,7 @@ int ipv6_dev_get_saddr(struct net_device
hiscore.rule++;
}
if (ipv6_saddr_preferred(score.addr_type) ||
- (((ifa_result->flags &
+ (((ifa->flags &
(IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
^ permalink raw reply related
* Re: [RFC v2 PATCH 1/2] SCTP: Add RCU synchronization around sctp_localaddr_list
From: Sridhar Samudrala @ 2007-09-12 23:03 UTC (permalink / raw)
To: Vlad Yasevich; +Cc: netdev, lksctp-developers
In-Reply-To: <11896263982464-git-send-email-vladislav.yasevich@hp.com>
Vlad,
few minor comments inline.
otherwise, looks good.
Thanks
Sridhar
On Wed, 2007-09-12 at 15:46 -0400, Vlad Yasevich wrote:
> sctp_localaddr_list is modified dynamically via NETDEV_UP
> and NETDEV_DOWN events, but there is not synchronization
> between writer (even handler) and readers. As a result,
> the readers can access an entry that has been freed and
> crash the sytem.
>
> Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
> ---
> include/net/sctp/sctp.h | 1 +
> include/net/sctp/structs.h | 6 +++++
> net/sctp/bind_addr.c | 2 +
> net/sctp/ipv6.c | 34 ++++++++++++++++++++++---------
> net/sctp/protocol.c | 46 ++++++++++++++++++++++++++++++-------------
> net/sctp/socket.c | 38 +++++++++++++++++++++++------------
> 6 files changed, 90 insertions(+), 37 deletions(-)
>
> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
> index d529045..c9cc00c 100644
> --- a/include/net/sctp/sctp.h
> +++ b/include/net/sctp/sctp.h
> @@ -123,6 +123,7 @@
> * sctp/protocol.c
> */
> extern struct sock *sctp_get_ctl_sock(void);
> +extern void sctp_local_addr_free(struct rcu_head *head);
> extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
> sctp_scope_t, gfp_t gfp,
> int flags);
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index c0d5848..a89e361 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -207,6 +207,9 @@ extern struct sctp_globals {
> * It is a list of sctp_sockaddr_entry.
> */
> struct list_head local_addr_list;
> +
> + /* Lock that protects the local_addr_list writers */
> + spinlock_t addr_list_lock;
>
> /* Flag to indicate if addip is enabled. */
> int addip_enable;
> @@ -242,6 +245,7 @@ extern struct sctp_globals {
> #define sctp_port_alloc_lock (sctp_globals.port_alloc_lock)
> #define sctp_port_hashtable (sctp_globals.port_hashtable)
> #define sctp_local_addr_list (sctp_globals.local_addr_list)
> +#define sctp_local_addr_lock (sctp_globals.addr_list_lock)
> #define sctp_addip_enable (sctp_globals.addip_enable)
> #define sctp_prsctp_enable (sctp_globals.prsctp_enable)
>
> @@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
> /* This is a structure for holding either an IPv6 or an IPv4 address. */
> struct sctp_sockaddr_entry {
> struct list_head list;
> + struct rcu_head rcu;
> union sctp_addr a;
> __u8 use_as_src;
> + __u8 valid;
> };
>
> typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
> diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
> index fdb287a..7fc369f 100644
> --- a/net/sctp/bind_addr.c
> +++ b/net/sctp/bind_addr.c
> @@ -163,8 +163,10 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
> addr->a.v4.sin_port = htons(bp->port);
>
> addr->use_as_src = use_as_src;
> + addr->valid = 1;
>
> INIT_LIST_HEAD(&addr->list);
> + INIT_RCU_HEAD(&addr->rcu);
> list_add_tail(&addr->list, &bp->address_list);
> SCTP_DBG_OBJCNT_INC(addr);
>
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index f8aa23d..54ff472 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -77,13 +77,18 @@
>
> #include <asm/uaccess.h>
>
> -/* Event handler for inet6 address addition/deletion events. */
> +/* Event handler for inet6 address addition/deletion events.
> + * This even is part of the atomic notifier call chain
> + * and thus happens atomically and can NOT sleep. As a result
> + * we can't and really don't need to add any locks to guard the
> + * RCU.
> + */
Now that we are adding a spin_lock, the above comment is not valid.
It should be fixed saying that we still need a lock because we use the
same list for both inet and inet6 address events and they can happen in
parallel.
> static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
> void *ptr)
> {
> struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
> - struct sctp_sockaddr_entry *addr;
> - struct list_head *pos, *temp;
> + struct sctp_sockaddr_entry *addr = NULL;
> + struct sctp_sockaddr_entry *temp;
>
> switch (ev) {
> case NETDEV_UP:
> @@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
> memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
> sizeof(struct in6_addr));
> addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
> - list_add_tail(&addr->list, &sctp_local_addr_list);
> + addr->valid = 1;
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
> + spin_unlock_bh(&sctp_local_addr_lock);
> }
> break;
> case NETDEV_DOWN:
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
> - list_del(pos);
> - kfree(addr);
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_for_each_entry_safe(addr, temp,
> + &sctp_local_addr_list, list) {
> + if (ipv6_addr_equal(&addr->a.v6.sin6_addr,
> + &ifa->addr)) {
> + addr->valid = 0;
> + list_del_rcu(&addr->list);
> break;
> }
> }
> -
> + spin_unlock_bh(&sctp_local_addr_lock);
> + if (addr && !addr->valid)
> + call_rcu(&addr->rcu, sctp_local_addr_free);
> break;
> }
>
> @@ -367,7 +379,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
> addr->a.v6.sin6_port = 0;
> addr->a.v6.sin6_addr = ifp->addr;
> addr->a.v6.sin6_scope_id = dev->ifindex;
> + addr->valid = 1;
> INIT_LIST_HEAD(&addr->list);
> + INIT_RCU_HEAD(&addr->rcu);
> list_add_tail(&addr->list, addrlist);
> }
> }
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index e98579b..4688559 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -153,6 +153,8 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
> addr->a.v4.sin_family = AF_INET;
> addr->a.v4.sin_port = 0;
> addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
> + addr->valid = 1;
> + INIT_RCU_HEAD(&addr->rcu);
This has nothing to do with this patch, but i noticed that
INIT_LIST_HEAD(&addr->list) is missing here when comparing with
earlier v6 version of this routine.
> list_add_tail(&addr->list, addrlist);
> }
> }
> @@ -192,16 +194,24 @@ static void sctp_free_local_addr_list(void)
> }
> }
>
> +void sctp_local_addr_free(struct rcu_head *head)
> +{
> + struct sctp_sockaddr_entry *e = container_of(head,
> + struct sctp_sockaddr_entry, rcu);
> + kfree(e);
> +}
> +
> /* Copy the local addresses which are valid for 'scope' into 'bp'. */
> int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
> gfp_t gfp, int copy_flags)
> {
> struct sctp_sockaddr_entry *addr;
> int error = 0;
> - struct list_head *pos, *temp;
>
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> if (sctp_in_scope(&addr->a, scope)) {
> /* Now that the address is in scope, check to see if
> * the address type is really supported by the local
> @@ -221,6 +231,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
> }
>
> end_copy:
> + rcu_read_unlock();
> return error;
> }
>
> @@ -605,8 +616,8 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
> void *ptr)
> {
> struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
> - struct sctp_sockaddr_entry *addr;
> - struct list_head *pos, *temp;
> + struct sctp_sockaddr_entry *addr = NULL;
> + struct sctp_sockaddr_entry *temp;
>
> switch (ev) {
> case NETDEV_UP:
> @@ -615,19 +626,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
> addr->a.v4.sin_family = AF_INET;
> addr->a.v4.sin_port = 0;
> addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
> - list_add_tail(&addr->list, &sctp_local_addr_list);
> + addr->valid = 1;
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
> + spin_unlock_bh(&sctp_local_addr_lock);
> }
> break;
> case NETDEV_DOWN:
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + spin_lock_bh(&sctp_local_addr_lock);
> + list_for_each_entry_safe(addr, temp,
> + &sctp_local_addr_list, list) {
> if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
> - list_del(pos);
> - kfree(addr);
> + addr->valid = 0;
> + list_del_rcu(&addr->list);
> break;
> }
> }
> -
> + spin_unlock_bh(&sctp_local_addr_lock);
> + if (addr && !addr->valid)
> + call_rcu(&addr->rcu, sctp_local_addr_free);
> break;
> }
>
> @@ -1160,6 +1177,7 @@ SCTP_STATIC __init int sctp_init(void)
>
> /* Initialize the local address list. */
> INIT_LIST_HEAD(&sctp_local_addr_list);
> + spin_lock_init(&sctp_local_addr_lock);
> sctp_get_local_addr_list();
>
> /* Register notifier for inet address additions/deletions. */
> @@ -1227,6 +1245,9 @@ SCTP_STATIC __exit void sctp_exit(void)
> sctp_v6_del_protocol();
> inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
>
> + /* Unregister notifier for inet address additions/deletions. */
> + unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
> +
> /* Free the local address list. */
> sctp_free_local_addr_list();
>
> @@ -1240,9 +1261,6 @@ SCTP_STATIC __exit void sctp_exit(void)
> inet_unregister_protosw(&sctp_stream_protosw);
> inet_unregister_protosw(&sctp_seqpacket_protosw);
>
> - /* Unregister notifier for inet address additions/deletions. */
> - unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
> -
> sctp_sysctl_unregister();
> list_del(&sctp_ipv4_specific.list);
>
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index 3335460..a3acf78 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -4057,9 +4057,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> int __user *optlen)
> {
> sctp_assoc_t id;
> + struct list_head *pos;
> struct sctp_bind_addr *bp;
> struct sctp_association *asoc;
> - struct list_head *pos, *temp;
> struct sctp_sockaddr_entry *addr;
> rwlock_t *addr_lock;
> int cnt = 0;
> @@ -4096,15 +4096,19 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
> addr = list_entry(bp->address_list.next,
> struct sctp_sockaddr_entry, list);
> if (sctp_is_any(&addr->a)) {
> - list_for_each_safe(pos, temp, &sctp_local_addr_list) {
> - addr = list_entry(pos,
> - struct sctp_sockaddr_entry,
> - list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr,
> + &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> +
> cnt++;
> }
> + rcu_read_unlock();
> } else {
> cnt = 1;
> }
> @@ -4127,14 +4131,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> int max_addrs, void *to,
> int *bytes_copied)
> {
> - struct list_head *pos, *next;
> struct sctp_sockaddr_entry *addr;
> union sctp_addr temp;
> int cnt = 0;
> int addrlen;
>
> - list_for_each_safe(pos, next, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> @@ -4149,6 +4155,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> cnt ++;
> if (cnt >= max_addrs) break;
> }
> + rcu_read_unlock();
>
> return cnt;
> }
> @@ -4156,14 +4163,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
> static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> size_t space_left, int *bytes_copied)
> {
> - struct list_head *pos, *next;
> struct sctp_sockaddr_entry *addr;
> union sctp_addr temp;
> int cnt = 0;
> int addrlen;
>
> - list_for_each_safe(pos, next, &sctp_local_addr_list) {
> - addr = list_entry(pos, struct sctp_sockaddr_entry, list);
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
> + if (!addr->valid)
> + continue;
> +
> if ((PF_INET == sk->sk_family) &&
> (AF_INET6 == addr->a.sa.sa_family))
> continue;
> @@ -4171,8 +4180,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
> &temp);
> addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
> - if (space_left < addrlen)
> - return -ENOMEM;
> + if (space_left < addrlen) {
> + cnt = -ENOMEM;
> + break;
> + }
> memcpy(to, &temp, addrlen);
>
> to += addrlen;
> @@ -4180,6 +4191,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
> space_left -= addrlen;
> *bytes_copied += addrlen;
> }
> + rcu_read_unlock();
>
> return cnt;
> }
^ permalink raw reply
* Re: [PATCH] [-MM, FIX] ixgbe: incorporate napi_struct changes from net-2.6.24.git
From: Andrew Morton @ 2007-09-12 23:14 UTC (permalink / raw)
To: Kok, Auke; +Cc: davem, netdev, jeff, jesse.brandeburg
In-Reply-To: <46E86611.60307@intel.com>
On Wed, 12 Sep 2007 15:20:01 -0700
"Kok, Auke" <auke-jan.h.kok@intel.com> wrote:
> Andrew Morton wrote:
> > On Wed, 12 Sep 2007 11:13:07 -0700
> > Auke Kok <auke-jan.h.kok@intel.com> wrote:
> >
> >> This incorporates the new napi_struct changes into ixgbe.
> >
> > I get a reject storm.
> >
> >> --- a/drivers/net/ixgbe/ixgbe_main.c
> >> +++ b/drivers/net/ixgbe/ixgbe_main.c
> >> @@ -557,14 +557,15 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
> >> struct ixgbe_adapter *adapter = rxr->adapter;
> >>
> >> IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
> >> - netif_rx_schedule(adapter->netdev);
> >> + netif_rx_schedule(adapter->netdev, &adapter->napi);
> >> return IRQ_HANDLED;
> >> }
> >>
> >
> > For example, my copy of ixgbe_msix_clean_rx(), from
> > git://lost.foo-projects.org/~aveerani/git/linux-2.6#ixgbe is:
>
> please drop that tree, and get the one I posted last week instead:
>
> git://lost.foo-projects.org/~ahkok/linux-2.6 ixgbe-20070905-submission
I think I just ignored that branch. It looks like some frozen-week-old
snapshot, whereas I like to get the latest tip-of-tree.
Or is that branch just misnamed?
^ permalink raw reply
* Re: [PATCH] [-MM, FIX] ixgbe: incorporate napi_struct changes from net-2.6.24.git
From: Kok, Auke @ 2007-09-12 23:29 UTC (permalink / raw)
To: Andrew Morton; +Cc: davem, netdev, jeff, jesse.brandeburg
In-Reply-To: <20070912161419.f6764d34.akpm@linux-foundation.org>
Andrew Morton wrote:
> On Wed, 12 Sep 2007 15:20:01 -0700
> "Kok, Auke" <auke-jan.h.kok@intel.com> wrote:
>
>> Andrew Morton wrote:
>>> On Wed, 12 Sep 2007 11:13:07 -0700
>>> Auke Kok <auke-jan.h.kok@intel.com> wrote:
>>>
>>>> This incorporates the new napi_struct changes into ixgbe.
>>> I get a reject storm.
>>>
>>>> --- a/drivers/net/ixgbe/ixgbe_main.c
>>>> +++ b/drivers/net/ixgbe/ixgbe_main.c
>>>> @@ -557,14 +557,15 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
>>>> struct ixgbe_adapter *adapter = rxr->adapter;
>>>>
>>>> IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
>>>> - netif_rx_schedule(adapter->netdev);
>>>> + netif_rx_schedule(adapter->netdev, &adapter->napi);
>>>> return IRQ_HANDLED;
>>>> }
>>>>
>>> For example, my copy of ixgbe_msix_clean_rx(), from
>>> git://lost.foo-projects.org/~aveerani/git/linux-2.6#ixgbe is:
>> please drop that tree, and get the one I posted last week instead:
>>
>> git://lost.foo-projects.org/~ahkok/linux-2.6 ixgbe-20070905-submission
>
> I think I just ignored that branch. It looks like some frozen-week-old
> snapshot, whereas I like to get the latest tip-of-tree.
I usually don't update the branches on that machine all that often, the machine
is horribly slow and a git-checkout of a new branch takes 5 minutes. However, it
*is* the very latest version of ixgbe and the one I intend to get Jeff to merge ;).
If you want to automatically pull all the changes I make for all the drivers, I
would probably prefer going to a git.kernel.org tree instead, which is something
I have been contemplating already, and I might just do so
in any case, you do want to drop the ~aveerani/git/linux-2.6#ixgbe branch, which
is quite a bit older than that.
Auke
^ permalink raw reply
* cc: Re: skb configured but can't get data allocated
From: DHAJOGLO @ 2007-09-12 23:41 UTC (permalink / raw)
To: Vlad Yasevich; +Cc: kernelnewbies, macnish, netdev
To All, Thanks for the help. I ended up adding my data packet after the ip headers and then updated the IP length:
/* Allocate space for our exp_packet data payload and set the data */
d_out = (struct exp_packet *)skb_put(skb,sizeof(struct exp_packet));
d_out->headbits = (reply<<4) & 0xF0;
d_out->sequence = pkthr.m_seq;
d_out->payload = htons(pkthr.p_hash);
/* adjust the total length of this IP packet */
iplen = skb->tail - (unsigned char *)skb->nh.iph;
iph->tot_len = htons(iplen);
ip_send_check(iph);
This is my first venture into the kernel!
On Wednesday, September 12, 2007 3:12 PM, Vlad Yasevich wrote:
>
>Date: Wed, 12 Sep 2007 16:12:37 -0400
>From: Vlad Yasevich
>To: DHAJOGLO <DHAJOGLO@smumn.edu>
>Subject: Re: skb configured but can't get data allocated
>
>You may want to read this page:
>http://vger.kernel.org/~davem/skb.html
>
>You will then see you problem.
>
>Essentially, you are writing your data into an
>area that is never sent.
>
>What you are end up is this:
>
>---------------------------------------------
>| your data| space | ll-header | IPv4 header|
>---------------------------------------------
>
>-vlad
^ permalink raw reply
* [PATCH 1/4] [IPV6]: Fix unbalanced socket reference with MSG_CONFIRM.
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2007-09-13 0:30 UTC (permalink / raw)
To: davem; +Cc: netdev, yoshfuji
Also applicable for stable releases.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
--
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a58459a..fc5cb83 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -858,11 +858,10 @@ back_from_confirm:
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
err = rawv6_push_pending_frames(sk, &fl, rp);
+ release_sock(sk);
}
done:
dst_release(dst);
- if (!inet->hdrincl)
- release_sock(sk);
out:
fl6_sock_release(flowlabel);
return err<0?err:len;
--
YOSHIFUJI Hideaki @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA
^ permalink raw reply related
* [PATCH 2/4] [IPV6]: Fix oops during flushing corked datagrams.
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2007-09-13 0:30 UTC (permalink / raw)
To: davem; +Cc: yoshfuji, netdev
When we corking sub-datagrams, we do not clone skb->dst for sub-datagrams
other than the first one, so we get oops if we have multiple sub-datagrams
here.
One possible way to fix this is to clone skb->dst for all sub-datagrams,
but we do not take this approach because skb->dst is not used in other
places and it is more natural to increment statistics once per a datagram.
Also applicable for stable releases.
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4704b5f..6530044 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1423,8 +1423,15 @@ void ip6_flush_pending_frames(struct sock *sk)
struct sk_buff *skb;
while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
- IP6_INC_STATS(ip6_dst_idev(skb->dst),
- IPSTATS_MIB_OUTDISCARDS);
+ if (skb->dst) {
+ /*
+ * Note: we count standard stats once per "datagram"
+ * and skb->dst is set only for the first
+ * sub-datagram of the datagram.
+ */
+ IP6_INC_STATS(ip6_dst_idev(skb->dst),
+ IPSTATS_MIB_OUTDISCARDS);
+ }
kfree_skb(skb);
}
--
YOSHIFUJI Hideaki @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA
^ permalink raw reply related
* [PATCH 3/4] [IPV6]: Just increment OutDatagrams once per a datagram.
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2007-09-13 0:30 UTC (permalink / raw)
To: davem; +Cc: netdev, yoshfuji
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4210951..c347f3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -555,6 +555,8 @@ static int udp_v6_push_pending_frames(struct sock *sk)
out:
up->len = 0;
up->pending = 0;
+ if (!err)
+ UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag);
return err;
}
@@ -823,10 +825,8 @@ do_append_data:
release_sock(sk);
out:
fl6_sock_release(flowlabel);
- if (!err) {
- UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+ if (!err)
return len;
- }
/*
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
* ENOBUFS might not be good (it's not tunable per se), but otherwise
--
YOSHIFUJI Hideaki @ USAGI Project <yoshfuji@linux-ipv6.org>
GPG-FP : 9022 65EB 1ECF 3AD1 0BDF 80D8 4807 F894 E062 0EEA
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox