Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] ehea: Fix a checksum issue on the receive path
From: Breno Leitao @ 2010-10-13 14:06 UTC (permalink / raw)
  To: David Miller; +Cc: Stephen Hemminger, eric.dumazet, netdev, fubar
In-Reply-To: <20101009103136.341104c5@nehalam>

On 10/09/2010 02:31 PM, Stephen Hemminger wrote:
> Also hardware checksum can be wrong/broken. By passing up a packet
> which the driver thinks is bad, the software can still work.
I see. Unfortunately ehea driver is dropping the packets that have a 
wrong checksum. I will work on the fix, and soon I will send the patch.

David,

Just to clarify, this patch that started this thead is not invalidated 
by this "problem". So, I'd like to see this patch committed on your 
tree. Does it make sense?

Thanks
Breno

^ permalink raw reply

* Re: [PATCH net-next 2/5] tipc: Simplify bearer shutdown logic
From: Neil Horman @ 2010-10-13 14:39 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: davem, netdev, allan.stephens
In-Reply-To: <1286929558-2954-2-git-send-email-paul.gortmaker@windriver.com>

On Tue, Oct 12, 2010 at 08:25:55PM -0400, Paul Gortmaker wrote:
> From: Allan Stephens <allan.stephens@windriver.com>
> 
> Disable all active bearers when TIPC is shut down without having to do
> a name-based search to locate each bearer object.
> 
It seems like you're doing a good deal more in this patch than just disabling
all active bearers without doing a name search.  The description is implemented
in the for loop of tipc_bearer_stop.  Whats the rest of it for?

> Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  net/tipc/bearer.c |   61 ++++++++++++++++++++--------------------------------
>  1 files changed, 24 insertions(+), 37 deletions(-)
> 
> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
> index 9c10c6b..9969ec6 100644
> --- a/net/tipc/bearer.c
> +++ b/net/tipc/bearer.c
> @@ -280,39 +280,39 @@ static int bearer_name_validate(const char *name,
>  }
>  
>  /**
> - * bearer_find - locates bearer object with matching bearer name
> + * tipc_bearer_find_interface - locates bearer object with matching interface name
>   */
>  
> -static struct bearer *bearer_find(const char *name)
> +struct bearer *tipc_bearer_find_interface(const char *if_name)
>  {
>  	struct bearer *b_ptr;
> +	char *b_if_name;
>  	u32 i;
>  
> -	if (tipc_mode != TIPC_NET_MODE)
> -		return NULL;
> -
>  	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
> -		if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
> +		if (!b_ptr->active)
> +			continue;
> +		b_if_name = strchr(b_ptr->publ.name, ':') + 1;
> +		if (!strcmp(b_if_name, if_name))
>  			return b_ptr;
>  	}
>  	return NULL;
>  }
>  
>  /**
> - * tipc_bearer_find_interface - locates bearer object with matching interface name
> + * bearer_find - locates bearer object with matching bearer name
>   */
>  
> -struct bearer *tipc_bearer_find_interface(const char *if_name)
> +static struct bearer *bearer_find(const char *name)
>  {
>  	struct bearer *b_ptr;
> -	char *b_if_name;
>  	u32 i;
>  
> +	if (tipc_mode != TIPC_NET_MODE)
> +		return NULL;
> +
I get why you're removing the tipc_mode check above, but why are you adding it
here?  commit d0021b252eaf65ca07ed14f0d66425dd9ccab9a6 modified tipc_bearers so
that calling this function should be tipc_mode safe (i.e. looking for a bearer
prior to entering TIPC_NET_MODE will return a NULL result).  Did you find a
subsequent problem?

>  	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
> -		if (!b_ptr->active)
> -			continue;
> -		b_if_name = strchr(b_ptr->publ.name, ':') + 1;
> -		if (!strcmp(b_if_name, if_name))
> +		if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
>  			return b_ptr;
>  	}
>  	return NULL;
> @@ -630,30 +630,17 @@ int tipc_block_bearer(const char *name)
>   * Note: This routine assumes caller holds tipc_net_lock.
>   */
>  
> -static int bearer_disable(const char *name)
> +static int bearer_disable(struct bearer *b_ptr)
>  {
> -	struct bearer *b_ptr;
>  	struct link *l_ptr;
>  	struct link *temp_l_ptr;
>  
> -	b_ptr = bearer_find(name);
> -	if (!b_ptr) {
> -		warn("Attempt to disable unknown bearer <%s>\n", name);
> -		return -EINVAL;
> -	}
> -
> -	info("Disabling bearer <%s>\n", name);
> +	info("Disabling bearer <%s>\n", b_ptr->publ.name);
>  	tipc_disc_stop_link_req(b_ptr->link_req);
>  	spin_lock_bh(&b_ptr->publ.lock);
>  	b_ptr->link_req = NULL;
>  	b_ptr->publ.blocked = 1;
> -	if (b_ptr->media->disable_bearer) {
> -		spin_unlock_bh(&b_ptr->publ.lock);
> -		write_unlock_bh(&tipc_net_lock);
> -		b_ptr->media->disable_bearer(&b_ptr->publ);
> -		write_lock_bh(&tipc_net_lock);
> -		spin_lock_bh(&b_ptr->publ.lock);
> -	}
> +	b_ptr->media->disable_bearer(&b_ptr->publ);
>  	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
>  		tipc_link_delete(l_ptr);
>  	}
> @@ -664,10 +651,16 @@ static int bearer_disable(const char *name)
>  
>  int tipc_disable_bearer(const char *name)
>  {
> +	struct bearer *b_ptr;
>  	int res;
>  
>  	write_lock_bh(&tipc_net_lock);
> -	res = bearer_disable(name);
> +	b_ptr = bearer_find(name);
> +	if (b_ptr == NULL) {
> +		warn("Attempt to disable unknown bearer <%s>\n", name);
> +		res = -EINVAL;
> +	} else
> +		res = bearer_disable(b_ptr);
>  	write_unlock_bh(&tipc_net_lock);
>  	return res;
>  }
> @@ -680,13 +673,7 @@ void tipc_bearer_stop(void)
>  
>  	for (i = 0; i < MAX_BEARERS; i++) {
>  		if (tipc_bearers[i].active)
> -			tipc_bearers[i].publ.blocked = 1;
> -	}
> -	for (i = 0; i < MAX_BEARERS; i++) {
> -		if (tipc_bearers[i].active)
> -			bearer_disable(tipc_bearers[i].publ.name);
> +			bearer_disable(&tipc_bearers[i]);
>  	}
>  	media_count = 0;
>  }
> -
> -
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH net-next] fib: remove a useless synchronize_rcu() call
From: Eric Dumazet @ 2010-10-13 14:43 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

fib_nl_delrule() calls synchronize_rcu() for no apparent reason,
while rtnl is held.

I suspect it was done to avoid an atomic_inc_not_zero() in
fib_rules_lookup(), which commit 7fa7cb7109d07 added anyway.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/fib_rules.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 21698f8..1bc3f25 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -494,7 +494,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 			}
 		}
 
-		synchronize_rcu();
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);



^ permalink raw reply related

* Re: [PATCH net-next 3/5] tipc: Optimizations to bearer enabling logic
From: Neil Horman @ 2010-10-13 14:58 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: davem, netdev, allan.stephens
In-Reply-To: <1286929558-2954-3-git-send-email-paul.gortmaker@windriver.com>

On Tue, Oct 12, 2010 at 08:25:56PM -0400, Paul Gortmaker wrote:
> From: Allan Stephens <allan.stephens@windriver.com>
> 
> Introduces "enabling" state during activation of a new TIPC bearer,
> which supplements the existing "disabled" and "enabled" states.
> This change allows the new bearer to be added without having to
> temporarily block the processing of incoming packets on existing
> bearers during the binding of the new bearer to its associated
> interface. It also makes it unnecessary to zero out the entire
> bearer structure at the start of activation.
> 
> Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  net/tipc/bcast.c  |    2 +-
>  net/tipc/bearer.c |   27 +++++++++++++++++----------
>  net/tipc/bearer.h |    8 ++++++--
>  net/tipc/link.c   |    2 +-
>  4 files changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> index ecfaac1..ba6dcb2 100644
> --- a/net/tipc/bcast.c
> +++ b/net/tipc/bcast.c
> @@ -645,7 +645,7 @@ void tipc_bcbearer_sort(void)
>  	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
>  		struct bearer *b = &tipc_bearers[b_index];
>  
> -		if (!b->active || !b->nodes.count)
> +		if ((b->state != BEARER_ENABLED) || !b->nodes.count)
>  			continue;
>  
>  		if (!bp_temp[b->priority].primary)
> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
> index 9969ec6..379338f 100644
> --- a/net/tipc/bearer.c
> +++ b/net/tipc/bearer.c
> @@ -290,7 +290,7 @@ struct bearer *tipc_bearer_find_interface(const char *if_name)
>  	u32 i;
>  
>  	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
> -		if (!b_ptr->active)
> +		if (b_ptr->state != BEARER_ENABLED)
>  			continue;
>  		b_if_name = strchr(b_ptr->publ.name, ':') + 1;
>  		if (!strcmp(b_if_name, if_name))
> @@ -312,7 +312,8 @@ static struct bearer *bearer_find(const char *name)
>  		return NULL;
>  
>  	for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
> -		if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
> +		if ((b_ptr->state == BEARER_ENABLED) &&
> +		    (!strcmp(b_ptr->publ.name, name)))
>  			return b_ptr;
>  	}
>  	return NULL;
> @@ -337,7 +338,8 @@ struct sk_buff *tipc_bearer_get_names(void)
>  	for (i = 0, m_ptr = media_list; i < media_count; i++, m_ptr++) {
>  		for (j = 0; j < MAX_BEARERS; j++) {
>  			b_ptr = &tipc_bearers[j];
> -			if (b_ptr->active && (b_ptr->media == m_ptr)) {
> +			if ((b_ptr->state == BEARER_ENABLED) &&
> +			    (b_ptr->media == m_ptr)) {
>  				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
>  						    b_ptr->publ.name,
>  						    strlen(b_ptr->publ.name) + 1);
> @@ -532,7 +534,7 @@ restart:
>  	bearer_id = MAX_BEARERS;
>  	with_this_prio = 1;
>  	for (i = MAX_BEARERS; i-- != 0; ) {
> -		if (!tipc_bearers[i].active) {
> +		if (tipc_bearers[i].state != BEARER_ENABLED) {
>  			bearer_id = i;
>  			continue;
>  		}
> @@ -559,21 +561,23 @@ restart:
>  	}
>  
>  	b_ptr = &tipc_bearers[bearer_id];
> -	memset(b_ptr, 0, sizeof(struct bearer));
> -
> +	b_ptr->state = BEARER_ENABLING;
>  	strcpy(b_ptr->publ.name, name);
> +	b_ptr->priority = priority;
> +
> +	write_unlock_bh(&tipc_net_lock);
Why the 3rd state?  Doesn't seem needed. 

^ permalink raw reply

* [-next Oct 13] Build failure drivers/s390/net/ctcm_mpc.o
From: Sachin Sant @ 2010-10-13 15:09 UTC (permalink / raw)
  To: netdev; +Cc: linux-next@vger.kernel.org, linux-s390, ursula.braun,
	Eric Dumazet

Today's next fails to build on a s390 box with following
error

drivers/s390/net/ctcm_mpc.c: In function ctc_mpc_dealloc_ch:
drivers/s390/net/ctcm_mpc.c:541: error: struct net_device has no member named refcnt

The code in question was changed by
commit 29b4433d991c88d86ca48a4c1cc33c671475be4b
net: percpu net_device refcount

The net_device struct member refcnt was changed from atomic_t to
int __percpu.

Thanks
-Sachin


-- 

---------------------------------
Sachin Sant
IBM Linux Technology Center
India Systems and Technology Labs
Bangalore, India
---------------------------------


^ permalink raw reply

* Re: [PATCH v2] xps-mp: Transmit Packet Steering for multiqueue
From: Tom Herbert @ 2010-10-13 15:41 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev
In-Reply-To: <1286948893.2703.88.camel@edumazet-laptop>

On Tue, Oct 12, 2010 at 10:48 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
> Le mardi 12 octobre 2010 à 17:20 -0700, Tom Herbert a écrit :
> > +#ifdef CONFIG_RPS
> > +       struct kobject          kobj;
> > +       struct netdev_queue     *first;
> > +       atomic_t                count;
> > +       struct xps_map          *xps_maps;
> > +#endif
>
> Please put kobj at the end of this block, because its a big object (64
> bytes)
>
Okay.

> +#ifdef CONFIG_RPS
> > +       struct xps_map          *xps_maps;
> > +       struct netdev_queue     *first;
> > +       atomic_t                count;
> > +       struct kobject          kobj;
> > +#endif
> >
>
> This way, we only use one cache line to access hot path fields, instead
> of two cache lines.
>
Okay.

>
>
> Tom, I believe you should split your patch in several parts, its really
> too hard to review.
>
>
> For example the netif_alloc_netdev_queues() stuff should be a patch on
> its own, as this stuff was already discussed on netdev some days ago, so
> that you could copy Ben and me on this one ;)
>
Ahh, that was some discussion about TX queues in the thread about RX
queue allocation.  I seemed to have missed that part.

> (Ie not allocating _tx in alloc_netdev_mq() but in register_netdevice())
>
Sorry about that.  The good changes in rx queue motivated these
changes.  I'll extract the parts that move the TX queue allocation
into a separate patch.

Thanks for reviewing,
Tom

>
>
> Its also a good thing to give diffstat output :
>
>  include/linux/netdevice.h |   28 +++
>  include/linux/skbuff.h    |    1
>  net/core/dev.c            |  191 ++++++++++++++++------
>  net/core/net-sysfs.c      |  305 +++++++++++++++++++++++++++++++++++-
>  net/core/net-sysfs.h      |    3
>  net/ipv4/tcp_output.c     |    4
>  6 files changed, 471 insertions(+), 61 deletions(-)
>
>
>

^ permalink raw reply

* Re: [PATCH net-next] net:  allocate skbs on local node
From: Christoph Lameter @ 2010-10-13 16:00 UTC (permalink / raw)
  To: David Rientjes
  Cc: Pekka Enberg, Andrew Morton, Eric Dumazet, David Miller, netdev,
	Michael Chan, Eilon Greenstein, Christoph Hellwig, LKML,
	Nick Piggin
In-Reply-To: <alpine.DEB.2.00.1010121234380.10165@chino.kir.corp.google.com>

On Tue, 12 Oct 2010, David Rientjes wrote:

> > Take the unified as a SLAB cleanup instead? Then at least we have
> > a large common code base and just differentiate through the locking
> > mechanism?
> >
>
> Will you be adding the extensive slub debugging to slab then?  It would be
> a shame to lose it because one allocator is chosen over another for
> performance reasons and then we need to recompile to debug issues as they
> arise.

Well basically we would copy SLUB to SLAB apply unification patches to
SLAB instead of SLUBB. We first have to make sure that the unified patches
have the same performance as SLAB.

It maybe much better to isolate the debug features and general bootstrap
from the particulars of the allocation strategy of either SLUB or SLAB.
That way a common code base exists and it would be easier to add different
allocation strategies.

Basically have slab.c with the basic functions and then slab_queueing.c
and slab_noqueue.c for SLAB/SLUB with the particulars of the allocation
strategy?



^ permalink raw reply

* Re: [-next Oct 13] Build failure drivers/s390/net/ctcm_mpc.o
From: David Miller @ 2010-10-13 16:12 UTC (permalink / raw)
  To: sachinp; +Cc: netdev, linux-next, linux-s390, ursula.braun, eric.dumazet
In-Reply-To: <4CB5CB9C.9030603@in.ibm.com>

From: Sachin Sant <sachinp@in.ibm.com>
Date: Wed, 13 Oct 2010 20:39:16 +0530

> Today's next fails to build on a s390 box with following
> error
> 
> drivers/s390/net/ctcm_mpc.c: In function ctc_mpc_dealloc_ch:
> drivers/s390/net/ctcm_mpc.c:541: error: struct net_device has no
> member named refcnt

This should fix it:

--------------------
s390: ctcm_mpc: Fix build after netdev refcount changes.

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/ctcm_mpc.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c
index 2861e78..b64881f 100644
--- a/drivers/s390/net/ctcm_mpc.c
+++ b/drivers/s390/net/ctcm_mpc.c
@@ -540,7 +540,7 @@ void ctc_mpc_dealloc_ch(int port_num)
 
 	CTCM_DBF_TEXT_(MPC_SETUP, CTC_DBF_DEBUG,
 			"%s: %s: refcount = %d\n",
-			CTCM_FUNTAIL, dev->name, atomic_read(&dev->refcnt));
+			CTCM_FUNTAIL, dev->name, netdev_refcnt_read(dev));
 
 	fsm_deltimer(&priv->restart_timer);
 	grp->channels_terminating = 0;
-- 
1.7.3.1

^ permalink raw reply related

* Re: [PATCH net-next 4/5] tipc: Rework data structures that track neighboring nodes and links
From: Neil Horman @ 2010-10-13 16:24 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: davem, netdev, allan.stephens
In-Reply-To: <1286929558-2954-4-git-send-email-paul.gortmaker@windriver.com>

On Tue, Oct 12, 2010 at 08:25:57PM -0400, Paul Gortmaker wrote:
> From: Allan Stephens <allan.stephens@windriver.com>
> 
> Convert existing linked list of neighboring nodes to a standard
> doubly-linked list. Add counters that track total number of nodes
> in list and total number of links to these nodes, thereby allowing
> configuration message replies to allocate only space based on
> the actual number of nodes and links rather than the worst case.
> 
> Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  net/tipc/bcast.c |    5 ++-
>  net/tipc/link.c  |    3 +-
>  net/tipc/node.c  |   60 ++++++++++++++++++++++++++---------------------------
>  net/tipc/node.h  |    5 +--
>  4 files changed, 36 insertions(+), 37 deletions(-)
> 
NAK, this is completely broken.  You're casting tipc_node structure as
struct list_heads.  That basically makes the first 64 (or 128 bits on 64 bit
arches) act like a list_heads prev and next pointers.  So if you use node like a
list head, you're corrupting those first 64 or 128 bits.  If you modify
node->addr, node->lock, etc, you're corrupting your list pointers.  What you
want is an empty list head pointer defined somewhere to serve as the head of
your list, and to use the node_list member that you added to tipc_node to serve
as your list member in the use of list_add, list_del and list_for_each_entry
calls.

Neil


> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
> index ba6dcb2..e006678 100644
> --- a/net/tipc/bcast.c
> +++ b/net/tipc/bcast.c
> @@ -454,10 +454,11 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf)
>  			tipc_node_unlock(node);
>  			spin_lock_bh(&bc_lock);
>  			bcl->stats.recv_nacks++;
> -			bcl->owner->next = node;   /* remember requestor */
> +			/* remember retransmit requester */
> +			bcl->owner->node_list.next =
> +				(struct list_head *)node;
>  			bclink_retransmit_pkt(msg_bcgap_after(msg),
>  					      msg_bcgap_to(msg));
> -			bcl->owner->next = NULL;
>  			spin_unlock_bh(&bc_lock);
>  		} else {
>  			tipc_bclink_peek_nack(msg_destnode(msg),
> diff --git a/net/tipc/link.c b/net/tipc/link.c
> index 54bd99d..13bc0b6 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -1652,7 +1652,8 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
>  		tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
>  				     (unsigned long) TIPC_SKB_CB(buf)->handle);
>  
> -		n_ptr = l_ptr->owner->next;
> +		/* recover retransmit requester */
> +		n_ptr = (struct tipc_node *)l_ptr->owner->node_list.next;
>  		tipc_node_lock(n_ptr);
>  
>  		tipc_addr_string_fill(addr_string, n_ptr->addr);
> diff --git a/net/tipc/node.c b/net/tipc/node.c
> index 7c49cd0..944b480 100644
> --- a/net/tipc/node.c
> +++ b/net/tipc/node.c
> @@ -50,7 +50,9 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
>  static void node_lost_contact(struct tipc_node *n_ptr);
>  static void node_established_contact(struct tipc_node *n_ptr);
>  
> -struct tipc_node *tipc_nodes = NULL;	/* sorted list of nodes within cluster */
> +static LIST_HEAD(nodes_list);	/* sorted list of neighboring nodes */
> +static int node_count;	/* number of neighboring nodes that exist */
> +static int link_count;	/* number of unicast links node currently has */
>  
>  static DEFINE_SPINLOCK(node_create_lock);
>  
> @@ -70,11 +72,11 @@ struct tipc_node *tipc_node_create(u32 addr)
>  {
>  	struct cluster *c_ptr;
>  	struct tipc_node *n_ptr;
> -	struct tipc_node **curr_node;
> +	struct tipc_node *new_n_ptr;
>  
>  	spin_lock_bh(&node_create_lock);
>  
> -	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
> +	list_for_each_entry(n_ptr, &nodes_list, node_list) {
>  		if (addr < n_ptr->addr)
>  			break;
>  		if (addr == n_ptr->addr) {
> @@ -83,8 +85,8 @@ struct tipc_node *tipc_node_create(u32 addr)
>  		}
>  	}
>  
> -	n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC);
> -	if (!n_ptr) {
> +	new_n_ptr = kzalloc(sizeof(*new_n_ptr), GFP_ATOMIC);
> +	if (!new_n_ptr) {
>  		spin_unlock_bh(&node_create_lock);
>  		warn("Node creation failed, no memory\n");
>  		return NULL;
> @@ -96,28 +98,22 @@ struct tipc_node *tipc_node_create(u32 addr)
>  	}
>  	if (!c_ptr) {
>  		spin_unlock_bh(&node_create_lock);
> -		kfree(n_ptr);
> +		kfree(new_n_ptr);
>  		return NULL;
>  	}
>  
> -	n_ptr->addr = addr;
> -		spin_lock_init(&n_ptr->lock);
> -	INIT_LIST_HEAD(&n_ptr->nsub);
> -	n_ptr->owner = c_ptr;
> -	tipc_cltr_attach_node(c_ptr, n_ptr);
> -	n_ptr->last_router = -1;
> +	new_n_ptr->addr = addr;
> +	spin_lock_init(&new_n_ptr->lock);
> +	INIT_LIST_HEAD(&new_n_ptr->nsub);
> +	new_n_ptr->owner = c_ptr;
> +	tipc_cltr_attach_node(c_ptr, new_n_ptr);
> +	new_n_ptr->last_router = -1;
> +
> +	list_add_tail(&new_n_ptr->node_list, &n_ptr->node_list);
> +	node_count++;
>  
> -	/* Insert node into ordered list */
> -	for (curr_node = &tipc_nodes; *curr_node;
> -	     curr_node = &(*curr_node)->next) {
> -		if (addr < (*curr_node)->addr) {
> -			n_ptr->next = *curr_node;
> -			break;
> -		}
> -	}
> -	(*curr_node) = n_ptr;
>  	spin_unlock_bh(&node_create_lock);
> -	return n_ptr;
> +	return new_n_ptr;
>  }
>  
>  void tipc_node_delete(struct tipc_node *n_ptr)
> @@ -136,6 +132,8 @@ void tipc_node_delete(struct tipc_node *n_ptr)
>  #endif
>  
>  	dbg("node %x deleted\n", n_ptr->addr);
> +	node_count--;
> +	list_del(&n_ptr->node_list);
>  	kfree(n_ptr);
>  }
>  
> @@ -275,6 +273,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
>  			n_ptr->links[bearer_id] = l_ptr;
>  			tipc_net.zones[tipc_zone(l_ptr->addr)]->links++;
>  			n_ptr->link_cnt++;
> +			link_count++;
>  			return n_ptr;
>  		}
>  		err("Attempt to establish second link on <%s> to %s\n",
> @@ -289,6 +288,7 @@ void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr)
>  	n_ptr->links[l_ptr->b_ptr->identity] = NULL;
>  	tipc_net.zones[tipc_zone(l_ptr->addr)]->links--;
>  	n_ptr->link_cnt--;
> +	link_count--;
>  }
>  
>  /*
> @@ -619,7 +619,7 @@ u32 tipc_available_nodes(const u32 domain)
>  	u32 cnt = 0;
>  
>  	read_lock_bh(&tipc_net_lock);
> -	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
> +	list_for_each_entry(n_ptr, &nodes_list, node_list) {
>  		if (!tipc_in_scope(domain, n_ptr->addr))
>  			continue;
>  		if (tipc_node_is_up(n_ptr))
> @@ -646,15 +646,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
>  						   " (network address)");
>  
>  	read_lock_bh(&tipc_net_lock);
> -	if (!tipc_nodes) {
> +	if (!node_count) {
>  		read_unlock_bh(&tipc_net_lock);
>  		return tipc_cfg_reply_none();
>  	}
>  
> -	/* For now, get space for all other nodes
> -	   (will need to modify this when slave nodes are supported */
> +	/* Get space for all neighboring nodes */
>  
> -	payload_size = TLV_SPACE(sizeof(node_info)) * (tipc_max_nodes - 1);
> +	payload_size = TLV_SPACE(sizeof(node_info)) * node_count;
>  	if (payload_size > 32768u) {
>  		read_unlock_bh(&tipc_net_lock);
>  		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
> @@ -668,7 +667,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
>  
>  	/* Add TLVs for all nodes in scope */
>  
> -	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
> +	list_for_each_entry(n_ptr, &nodes_list, node_list) {
>  		if (!tipc_in_scope(domain, n_ptr->addr))
>  			continue;
>  		node_info.addr = htonl(n_ptr->addr);
> @@ -704,8 +703,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
>  
>  	/* Get space for all unicast links + multicast link */
>  
> -	payload_size = TLV_SPACE(sizeof(link_info)) *
> -		(tipc_net.zones[tipc_zone(tipc_own_addr)]->links + 1);
> +	payload_size = TLV_SPACE(sizeof(link_info)) * (link_count + 1);
>  	if (payload_size > 32768u) {
>  		read_unlock_bh(&tipc_net_lock);
>  		return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
> @@ -726,7 +724,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
>  
>  	/* Add TLVs for any other links in scope */
>  
> -	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
> +	list_for_each_entry(n_ptr, &nodes_list, node_list) {
>  		u32 i;
>  
>  		if (!tipc_in_scope(domain, n_ptr->addr))
> diff --git a/net/tipc/node.h b/net/tipc/node.h
> index 45f3db3..26715dc 100644
> --- a/net/tipc/node.h
> +++ b/net/tipc/node.h
> @@ -47,7 +47,7 @@
>   * @addr: network address of node
>   * @lock: spinlock governing access to structure
>   * @owner: pointer to cluster that node belongs to
> - * @next: pointer to next node in sorted list of cluster's nodes
> + * @node_list: adjacent entries in sorted list of nodes
>   * @nsub: list of "node down" subscriptions monitoring node
>   * @active_links: pointers to active links to node
>   * @links: pointers to all links to node
> @@ -73,7 +73,7 @@ struct tipc_node {
>  	u32 addr;
>  	spinlock_t lock;
>  	struct cluster *owner;
> -	struct tipc_node *next;
> +	struct list_head node_list;
>  	struct list_head nsub;
>  	struct link *active_links[2];
>  	struct link *links[MAX_BEARERS];
> @@ -96,7 +96,6 @@ struct tipc_node {
>  	} bclink;
>  };
>  
> -extern struct tipc_node *tipc_nodes;
>  extern u32 tipc_own_tag;
>  
>  struct tipc_node *tipc_node_create(u32 addr);
> -- 
> 1.7.0.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCH net-next 5/5] tipc: clean out all instances of #if 0'd unused code
From: Neil Horman @ 2010-10-13 16:26 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: davem, netdev, allan.stephens
In-Reply-To: <1286929558-2954-5-git-send-email-paul.gortmaker@windriver.com>

On Tue, Oct 12, 2010 at 08:25:58PM -0400, Paul Gortmaker wrote:
> Remove all instances of legacy, or as yet to be implemented code
> that is currently living within an #if 0 ... #endif block.
> In the rare instance that some of it be needed in the future,
> it can still be dragged out of history, but there is no need
> for it to sit in mainline.
> 
> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>


^ permalink raw reply

* Re: linux-next: Tree for October 13 (net/3c59x)
From: Randy Dunlap @ 2010-10-13 16:38 UTC (permalink / raw)
  To: Stephen Rothwell, netdev; +Cc: linux-next, LKML
In-Reply-To: <20101013154522.7b087a20.sfr@canb.auug.org.au>

On Wed, 13 Oct 2010 15:45:22 +1100 Stephen Rothwell wrote:

> Hi all,
> 
> Changes since 20101012:


when CONFIG_PCI is not enabled:

drivers/net/3c59x.c:3211: error:request for member 'current_state' in something not a structure or union

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

^ permalink raw reply

* Re: [PATCH] b44: fix resume, request_irq after hw reset
From: David Miller @ 2010-10-13 16:46 UTC (permalink / raw)
  To: james
  Cc: zambrano, jpirko, fujita.tomonori, hauke, Larry.Finger, netdev,
	linux-kernel
In-Reply-To: <201010120022.13171.james@albanarts.com>

From: James Hogan <james@albanarts.com>
Date: Tue, 12 Oct 2010 00:22:12 +0100

> @@ -2309,6 +2303,12 @@ static int b44_resume(struct ssb_device *sdev)
>  	netif_device_attach(bp->dev);
>  	spin_unlock_irq(&bp->lock);
>  
> +	rc = request_irq(dev->irq, b44_interrupt, IRQF_SHARED, dev->name, dev);
> +	if (rc) {
> +		netdev_err(dev, "request_irq failed\n");
> +		return rc;
> +	}
> +
>  	b44_enable_ints(bp);
>  	netif_wake_queue(dev);

Since you've moved the request_irq() down, you'll need to adjust
the error handling so that it undoes side effects made by this
function up until this point.

F.e. netif_device_attach() has to be undone for one thing.

Next, b44_init_rings() allocates memory that you must now free.

Etc. etc. etc.

This change is not so simple. :-)

^ permalink raw reply

* Re: linux-next: Tree for October 13 (net/3c59x)
From: David Miller @ 2010-10-13 16:54 UTC (permalink / raw)
  To: randy.dunlap; +Cc: sfr, netdev, linux-next, linux-kernel
In-Reply-To: <20101013093836.1ee6f2f9.randy.dunlap@oracle.com>

From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 13 Oct 2010 09:38:36 -0700

> On Wed, 13 Oct 2010 15:45:22 +1100 Stephen Rothwell wrote:
> 
>> Hi all,
>> 
>> Changes since 20101012:
> 
> 
> when CONFIG_PCI is not enabled:
> 
> drivers/net/3c59x.c:3211: error:request for member 'current_state' in something not a structure or union

Like all the other PCI interfaces that NOP out into empty inlines when
CONFIG_PCI is not enabled, it would be nice to have an interface that
provides pci_dev->current_state in a similar fashion so we don't have
to add a bunch of ifdefs here.

^ permalink raw reply

* Re: [PATCH] net: allow FEC driver to use fixed PHY support
From: David Miller @ 2010-10-13 16:56 UTC (permalink / raw)
  To: gerg; +Cc: netdev, gerg
In-Reply-To: <201010120703.o9C735Jd026818@goober.internal.moreton.com.au>

From: Greg Ungerer <gerg@snapgear.com>
Date: Tue, 12 Oct 2010 17:03:05 +1000

> At least one board using the FEC driver does not have a conventional
> PHY attached to it, it is directly connected to a somewhat simple
> ethernet switch (the board is the SnapGear/LITE, and the attached
> 4-port ethernet switch is a RealTek RTL8305). This switch does not
> present the usual register interface of a PHY, it presents nothing.
> So a PHY scan will find nothing - it finds ID's of 0 for each PHY
> on the attached MII bus.
> 
> After the FEC driver was changed to use phylib for supporting PHYs
> it no longer works on this particular board/switch setup.
> 
> Add code support to use a fixed phy if no PHY is found on the MII bus.
> This is based on the way the cpmac.c driver solved this same problem.
> 
> Signed-off-by: Greg Ungerer <gerg@uclinux.org>

Applied, thanks Greg.

^ permalink raw reply

* [PATCH net-next] fib_trie: use fls() instead of open coded loop
From: Eric Dumazet @ 2010-10-13 16:56 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Robert Olsson

fib_table_lookup() might use fls() to speedup an open coded loop.

Noticed while doing a profile analysis.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/fib_trie.c |   16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 271c89b..31494f3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1384,8 +1384,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 	t_key cindex = 0;
 	int current_prefix_length = KEYLENGTH;
 	struct tnode *cn;
-	t_key node_prefix, key_prefix, pref_mismatch;
-	int mp;
+	t_key pref_mismatch;
 
 	rcu_read_lock();
 
@@ -1500,10 +1499,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 		 * matching prefix.
 		 */
 
-		node_prefix = mask_pfx(cn->key, cn->pos);
-		key_prefix = mask_pfx(key, cn->pos);
-		pref_mismatch = key_prefix^node_prefix;
-		mp = 0;
+		pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
 
 		/*
 		 * In short: If skipped bits in this node do not match
@@ -1511,13 +1507,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
 		 * state.directly.
 		 */
 		if (pref_mismatch) {
-			while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
-				mp++;
-				pref_mismatch = pref_mismatch << 1;
-			}
-			key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
+			int mp = KEYLENGTH - fls(pref_mismatch);
 
-			if (key_prefix != 0)
+			if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
 				goto backtrace;
 
 			if (current_prefix_length >= cn->pos)



^ permalink raw reply related

* Question about PHY-less board and "fixed_phy"
From: Grant Edwards @ 2010-10-13 16:55 UTC (permalink / raw)
  To: netdev


I'm working with an Atmel ARM9 board (macb driver), that doesn't have
a PHY.

The MAC is connected to a switch via MII.  That MII link is hard-wired
to be 100M full-duplex.

>From what I've been able to google, the "fixed_phy" driver is intended
for this situation.  But from looking at the fixed.c source code it
appears to provide an emulated MDIO bus.

I don't want an emulated MDIO bus.

I have a real, working MDIO bus.

What I don't have is a PHY.

The switch presents a bunch of logical slave devices (23, IIRC)
attached to the MDIO bus, and I need use the MDIO bus to access those
"devices" (mainly from user-space via ioctl() calls).  If I use the
fixed_phy driver, it appears that it will hide the real MDIO bus and
replace it with a fake one that's connected to a fake set of PHY
registers.  That means I won't be able to access the the registers in
the switch to which my MAC is connected.

The ethernet/phy architecture seems to be based on several assumptions
that aren't true in my case:

 1) Every MAC is connected to a PHY.

 2) An MDIO bus is a point-to-point link between the MAC and the PHY.

 3) The MDIO bus belongs to the PHY.

 4) It's OK to go out and read arbitrary registers from every device
    on the MDIO bus when that bus is registered using mdiobus_register().
 
[Hmm, #4 may be true in my case, but it seems like a dangerous assumption.]
    
Anyway, I'm confused on how the MAC/PHY architecture is meant to deal
with the case where there is no PHY, but there are real devices
attached to a real MDIO bus which needs to be accessed both from the
Ethernet driver and from userspace using the normal user-space ioctl()
call.  

Do I need to write my own "fixed_phy" driver that presents a virtual
PHY without putting a fake MDIO bus in place?  

How _do_ you present a virtual PHY without faking an MDIO bus?

Do I need _two_ MDIO buses, the real one that is used to communicate
with the switch chip and a fake one that's used to talk to a fake PHY?
If so, how do I associate two MDIO buses with a single Ethernet
interface?

How do you register an mdio bus without having every device on the bus
accessed?

Can anybody loan me a clue?

Another thing I don't understand is that it looks to me like the
ioctl() to access devices on the mdio bus is handled by the PHY driver
when it should be handled by the Ethernet driver: the MDIO bus belongs
to the MAC, not the PHY.

The PHY driver apparently ignores the device ID specified by the user
and forces the device ID to that of the PHY, thus cutting of access to
any other devices on the bus.  [I've worked around that by kludging
the Ethernet driver's mdio_read/write routines so that I can specify
the device ID by placing it in the upper 8 bits of the 16-bit register
number (which is left unchanged as it passes through the PHY driver).]

This doesn't make sense to me.  Why provide a device ID in the ioctl()
API, and then overwrite it as the request passes through the PHY
driver to the Ethernet driver.  Why are ioctl() MDIO register
read/write requests that are done on an Ethernet interface passed
through the PHY driver?

I've read and re-read the phy.txt file under Documentation, and I've
looked at the fixed_phy source and sources of drivers where it is
used, but I'm still in the dark...

-- 
Grant Edwards               grant.b.edwards        Yow! Somewhere in Tenafly,
                                  at               New Jersey, a chiropractor
                              gmail.com            is viewing "Leave it to
                                                   Beaver"!


^ permalink raw reply

* Re: [PATCH net-next 5/5] tipc: clean out all instances of #if 0'd unused code
From: Paul Gortmaker @ 2010-10-13 17:08 UTC (permalink / raw)
  To: Neil Horman; +Cc: davem, netdev, allan.stephens
In-Reply-To: <20101013162652.GF31379@hmsreliant.think-freely.org>

On 10-10-13 12:26 PM, Neil Horman wrote:
> On Tue, Oct 12, 2010 at 08:25:58PM -0400, Paul Gortmaker wrote:
>> Remove all instances of legacy, or as yet to be implemented code
>> that is currently living within an #if 0 ... #endif block.
>> In the rare instance that some of it be needed in the future,
>> it can still be dragged out of history, but there is no need
>> for it to sit in mainline.
>>
>> Signed-off-by: Paul Gortmaker<paul.gortmaker@windriver.com>
> Acked-by: Neil Horman<nhorman@tuxdriver.com>
> 

Thanks for all the reviews.  As I'd indicated a while back, I
am by far no TIPC expert, so it may take me a bit to digest
it and rework things on 1 --> 4. That will probably put them
into the "for-38" net-next timeframe, as I'm guessing net-next
that is destined for 2.6.37 will close in a day or two.

This cleanup patch (patch #5) doesn't explicitly depend on
the other 4 bearer related patches, so it can be applied
at whatever time is most convenient for Dave.

Paul.

^ permalink raw reply

* Re: [PATCH net-next 5/5] tipc: clean out all instances of #if 0'd unused code
From: Neil Horman @ 2010-10-13 17:23 UTC (permalink / raw)
  To: Paul Gortmaker; +Cc: davem, netdev, allan.stephens
In-Reply-To: <4CB5E79B.4060507@windriver.com>

On Wed, Oct 13, 2010 at 01:08:43PM -0400, Paul Gortmaker wrote:
> On 10-10-13 12:26 PM, Neil Horman wrote:
> > On Tue, Oct 12, 2010 at 08:25:58PM -0400, Paul Gortmaker wrote:
> >> Remove all instances of legacy, or as yet to be implemented code
> >> that is currently living within an #if 0 ... #endif block.
> >> In the rare instance that some of it be needed in the future,
> >> it can still be dragged out of history, but there is no need
> >> for it to sit in mainline.
> >>
> >> Signed-off-by: Paul Gortmaker<paul.gortmaker@windriver.com>
> > Acked-by: Neil Horman<nhorman@tuxdriver.com>
> > 
> 
> Thanks for all the reviews.  As I'd indicated a while back, I
> am by far no TIPC expert, so it may take me a bit to digest
> it and rework things on 1 --> 4. That will probably put them
> into the "for-38" net-next timeframe, as I'm guessing net-next
> that is destined for 2.6.37 will close in a day or two.
> 
No problem,  Most of what I identified I don't think is strictly a tipc related
issue, they're more just correct usage of various primitives (i.e. don't
initalize a work queue every time right before you schedule it), and use
list_add/list_del, instead of just munging data structures onto a list).  Most
looks pretty easy to correct I think.  Allan can probably help you out if tipc
specific knoweldge is needed.

Neil

> This cleanup patch (patch #5) doesn't explicitly depend on
> the other 4 bearer related patches, so it can be applied
> at whatever time is most convenient for Dave.
> 
> Paul.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH net-next] fib: avoid false sharing on fib_table_hash
From: Eric Dumazet @ 2010-10-13 18:22 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

While doing profile analysis, I found fib_hash_table was sometime in a
cache line shared by a possibly often written kernel structure.

(CONFIG_IP_ROUTE_MULTIPATH || !CONFIG_IPV6_MULTIPLE_TABLES)

It's hard to detect because not easily reproductible.

Make sure we allocate a full cache line to keep this shared in all cpus
caches.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
We probably need a generic kernel function, but it might take time...

 net/ipv4/fib_frontend.c |   11 +++++------
 net/ipv6/ip6_fib.c      |    9 ++++++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 919f2ad..3df057e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1016,16 +1016,15 @@ static struct notifier_block fib_netdev_notifier = {
 static int __net_init ip_fib_net_init(struct net *net)
 {
 	int err;
-	unsigned int i;
+	size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
+
+	/* Avoid false sharing : Use at least a full cache line */
+	size = max_t(size_t, size, L1_CACHE_BYTES);
 
-	net->ipv4.fib_table_hash = kzalloc(
-			sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
+	net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
 	if (net->ipv4.fib_table_hash == NULL)
 		return -ENOMEM;
 
-	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
-		INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
-
 	err = fib4_rules_init(net);
 	if (err < 0)
 		goto fail;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b6a5859..de38211 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1500,15 +1500,18 @@ static void fib6_gc_timer_cb(unsigned long arg)
 
 static int __net_init fib6_net_init(struct net *net)
 {
+	size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+
 	setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
 
 	net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
 	if (!net->ipv6.rt6_stats)
 		goto out_timer;
 
-	net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ,
-					   sizeof(*net->ipv6.fib_table_hash),
-					   GFP_KERNEL);
+	/* Avoid false sharing : Use at least a full cache line */
+	size = max_t(size_t, size, L1_CACHE_BYTES);
+
+	net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
 	if (!net->ipv6.fib_table_hash)
 		goto out_rt6_stats;
 



^ permalink raw reply related

* Re: Question about PHY-less board and "fixed_phy"
From: David Daney @ 2010-10-13 19:06 UTC (permalink / raw)
  To: Grant Edwards; +Cc: netdev
In-Reply-To: <i94oag$epv$1@dough.gmane.org>

On 10/13/2010 09:55 AM, Grant Edwards wrote:
>
> I'm working with an Atmel ARM9 board (macb driver), that doesn't have
> a PHY.
>
> The MAC is connected to a switch via MII.  That MII link is hard-wired
> to be 100M full-duplex.
>
>> From what I've been able to google, the "fixed_phy" driver is intended
> for this situation.  But from looking at the fixed.c source code it
> appears to provide an emulated MDIO bus.
>
> I don't want an emulated MDIO bus.
>
> I have a real, working MDIO bus.
>
> What I don't have is a PHY.
>
> The switch presents a bunch of logical slave devices (23, IIRC)
> attached to the MDIO bus, and I need use the MDIO bus to access those
> "devices" (mainly from user-space via ioctl() calls).  If I use the
> fixed_phy driver, it appears that it will hide the real MDIO bus and
> replace it with a fake one that's connected to a fake set of PHY
> registers.  That means I won't be able to access the the registers in
> the switch to which my MAC is connected.
>
> The ethernet/phy architecture seems to be based on several assumptions
> that aren't true in my case:
>
>   1) Every MAC is connected to a PHY.

As long as you handle calling netif_carrier_{on,off}() and some of the 
ndo_do_ioctl commands, I don't think you need a PHY.

>
>   2) An MDIO bus is a point-to-point link between the MAC and the PHY.

This I don't think is the case.  See phy_connect() for example.  It 
certianly allows for multiple PHYs per bus.

We have a SOC device (Octeon) that has many PHYs on a single MDIO bus, 
and have a separate MDIO bus driver (mdio-octeon.c) that is shared 
between all the Ethernet devices/drivers.

>
>   3) The MDIO bus belongs to the PHY.
>

??, The mdio bus lock is taken for some operations, but how does it 
'belong to' the PHY?

>   4) It's OK to go out and read arbitrary registers from every device
>      on the MDIO bus when that bus is registered using mdiobus_register().
>

You can set the struct mii_bus phy_mask element to prevent probing of 
specific addresses.



> [Hmm, #4 may be true in my case, but it seems like a dangerous assumption.]
>
> Anyway, I'm confused on how the MAC/PHY architecture is meant to deal
> with the case where there is no PHY, but there are real devices
> attached to a real MDIO bus which needs to be accessed both from the
> Ethernet driver and from userspace using the normal user-space ioctl()
> call.
>
> Do I need to write my own "fixed_phy" driver that presents a virtual
> PHY without putting a fake MDIO bus in place?
>
> How _do_ you present a virtual PHY without faking an MDIO bus?
>
> Do I need _two_ MDIO buses, the real one that is used to communicate
> with the switch chip and a fake one that's used to talk to a fake PHY?
> If so, how do I associate two MDIO buses with a single Ethernet
> interface?
>
> How do you register an mdio bus without having every device on the bus
> accessed?
>
> Can anybody loan me a clue?
>
> Another thing I don't understand is that it looks to me like the
> ioctl() to access devices on the mdio bus is handled by the PHY driver
> when it should be handled by the Ethernet driver: the MDIO bus belongs
> to the MAC, not the PHY.
>
> The PHY driver apparently ignores the device ID specified by the user
> and forces the device ID to that of the PHY, thus cutting of access to
> any other devices on the bus.  [I've worked around that by kludging
> the Ethernet driver's mdio_read/write routines so that I can specify
> the device ID by placing it in the upper 8 bits of the 16-bit register
> number (which is left unchanged as it passes through the PHY driver).]
>
> This doesn't make sense to me.  Why provide a device ID in the ioctl()
> API, and then overwrite it as the request passes through the PHY
> driver to the Ethernet driver.  Why are ioctl() MDIO register
> read/write requests that are done on an Ethernet interface passed
> through the PHY driver?
>
> I've read and re-read the phy.txt file under Documentation, and I've
> looked at the fixed_phy source and sources of drivers where it is
> used, but I'm still in the dark...
>


^ permalink raw reply

* [patch] ns83820: spin_lock_irq() => spin_lock()
From: Dan Carpenter @ 2010-10-13 19:18 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, kernel-janitors, mingo

This is essentially cosmetic.  At this point the IRQs are already
disabled because we called spin_lock_irq(&dev->rx_info.lock).

The real bug here was fixed back in 2006 in 3a10ccebe: "[PATCH] lock
validator: fix ns83820.c irq-flags bug".  Prior to that patch, it was
a "spin_lock_irq is not nestable" type bug.  The 2006 patch changes the
unlock to not re-enable IRQs, which eliminates the potential deadlock.

But this bit was missed.  We should change the lock function as well so
it balances nicely.

Signed-off-by: Dan Carpenter <error27@gmail.com>

diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 3bbd0aa..84134c7 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -772,7 +772,7 @@ static int ns83820_setup_rx(struct net_device *ndev)
 		phy_intr(ndev);
 
 		/* Okay, let it rip */
-		spin_lock_irq(&dev->misc_lock);
+		spin_lock(&dev->misc_lock);
 		dev->IMR_cache |= ISR_PHY;
 		dev->IMR_cache |= ISR_RXRCMP;
 		//dev->IMR_cache |= ISR_RXERR;

^ permalink raw reply related

* [patch] gianfar: fix double lock typo
From: Dan Carpenter @ 2010-10-13 19:19 UTC (permalink / raw)
  To: Sandeep Gopalpet; +Cc: David S. Miller, netdev, kernel-janitors

This should be a _restore() instead of a _save().

Signed-off-by: Dan Carpenter <error27@gmail.com>

diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index ae8e5d3..5c566eb 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -538,7 +538,7 @@ static int gfar_set_rx_csum(struct net_device *dev, uint32_t data)
 
 		unlock_tx_qs(priv);
 		unlock_rx_qs(priv);
-		local_irq_save(flags);
+		local_irq_restore(flags);
 
 		for (i = 0; i < priv->num_rx_queues; i++)
 			gfar_clean_rx_ring(priv->rx_queue[i],

^ permalink raw reply related

* Re: Question about PHY-less board and "fixed_phy"
From: Grant Edwards @ 2010-10-13 19:21 UTC (permalink / raw)
  To: netdev
In-Reply-To: <4CB6032A.8020908@caviumnetworks.com>

On 2010-10-13, David Daney <ddaney@caviumnetworks.com> wrote:
> On 10/13/2010 09:55 AM, Grant Edwards wrote:
>
>> I'm working with an Atmel ARM9 board (macb driver), that doesn't have
>> a PHY.
>>
>> The MAC is connected to a switch via MII.  That MII link is hard-wired
>> to be 100M full-duplex.
>>
>>> From what I've been able to google, the "fixed_phy" driver is intended
>> for this situation.  But from looking at the fixed.c source code it
>> appears to provide an emulated MDIO bus.
>>
>> I don't want an emulated MDIO bus.
>>
>> I have a real, working MDIO bus.
>>
>> What I don't have is a PHY.
>>
>> The switch presents a bunch of logical slave devices (23, IIRC)
>> attached to the MDIO bus, and I need use the MDIO bus to access those
>> "devices" (mainly from user-space via ioctl() calls).  If I use the
>> fixed_phy driver, it appears that it will hide the real MDIO bus and
>> replace it with a fake one that's connected to a fake set of PHY
>> registers.  That means I won't be able to access the the registers in
>> the switch to which my MAC is connected.
>>
>> The ethernet/phy architecture seems to be based on several assumptions
>> that aren't true in my case:
>>
>>   1) Every MAC is connected to a PHY.
>
> As long as you handle calling netif_carrier_{on,off}() and some of
> the ndo_do_ioctl commands, I don't think you need a PHY.

Thanks.  I'll look into that.

>>   2) An MDIO bus is a point-to-point link between the MAC and the PHY.
>
> This I don't think is the case.  See phy_connect() for example.  It 
> certianly allows for multiple PHYs per bus.
>
> We have a SOC device (Octeon) that has many PHYs on a single MDIO
> bus, and have a separate MDIO bus driver (mdio-octeon.c) that is
> shared between all the Ethernet devices/drivers.

Do you bypass the ioctl code in phy.c that ignores the device id in
the request and instead routes all read/write operations to the PHY's
id?

>>   3) The MDIO bus belongs to the PHY.
>
> ??, The mdio bus lock is taken for some operations, but how does it 
> 'belong to' the PHY?

I said that because the ioctl() to do read/write operations on the
mdio bus is a "phy" ioctl() that is handled by phy.c code (in the
drivers I've looked at), and it overwrites any user-provided device ID
with that of the PHY, thus turning the mdio bus into a point-to-point
link to the PHY whose use is controlled by the PHY driver.  In my mind
I guess that equates to the mdio bus "belonging to" the PHY.

It looks like I need to change the macb driver to handle those ioctl
requests directly instead of having the phy.c code handle them.

>>   4) It's OK to go out and read arbitrary registers from every device
>>      on the MDIO bus when that bus is registered using mdiobus_register().
>
> You can set the struct mii_bus phy_mask element to prevent probing of
> specific addresses.

Ah, I hadn't figured that out yet.

Thanks for the hints!

-- 
Grant Edwards               grant.b.edwards        Yow! I know things about
                                  at               TROY DONAHUE that can't
                              gmail.com            even be PRINTED!!


^ permalink raw reply

* Re: [patch] IPVS: ip_vs_dbg_callid() is only needed for debugging
From: Patrick McHardy @ 2010-10-13 19:23 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov
In-Reply-To: <20101007012325.GA32392@verge.net.au>

Am 07.10.2010 03:23, schrieb Simon Horman:
> ip_vs_dbg_callid() and IP_VS_DEBUG_CALLID() are only needed
> it CONFIG_IP_VS_DEBUG is defined.
> 
> This resolves the following build warning when CONFIG_IP_VS_DEBUG is
> not defined.
> 
> net/netfilter/ipvs/ip_vs_pe_sip.c:11: warning: 'ip_vs_dbg_callid' defined but not used

Applied, thanks Simon.

^ permalink raw reply

* [PATCH] via-velocity: forced 1000 Mbps mode support.
From: Francois Romieu @ 2010-10-13 19:26 UTC (permalink / raw)
  To: David Lv
  Cc: netdev, DavidLv, ShirleyHu, AndersMa, David S. Miller,
	Seguier Regis
In-Reply-To: <AANLkTinn8UQfOoPgf=13jfvD5Ld6p4GhT7jpyYmXKT9X@mail.gmail.com>

Full duplex only. Half duplex 1000 Mbps is not supported.

Signed-off-by: David Lv <DavidLv@viatech.com.cn>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Tested-by: Seguier Regis <rseguier@e-teleport.net>

---
 David (Lv), your mail agent apparently wrapped some more-than-80-columns lines
 in the patch, thus making it unusable. I fixed it.

 drivers/net/via-velocity.c |   82 ++++++++++++++++++++++++++++++++++++++++---
 drivers/net/via-velocity.h |    5 ++-
 2 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index f534123..b21a3d9 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -312,13 +312,14 @@ VELOCITY_PARAM(flow_control, "Enable flow control ability");

 #define MED_LNK_DEF 0
 #define MED_LNK_MIN 0
-#define MED_LNK_MAX 4
+#define MED_LNK_MAX 5
 /* speed_duplex[] is used for setting the speed and duplex mode of NIC.
    0: indicate autonegotiation for both speed and duplex mode
    1: indicate 100Mbps half duplex mode
    2: indicate 100Mbps full duplex mode
    3: indicate 10Mbps half duplex mode
    4: indicate 10Mbps full duplex mode
+   5: indicate 1000Mbps full duplex mode

    Note:
    if EEPROM have been set to the force mode, this option is ignored
@@ -617,6 +618,9 @@ static u32 velocity_get_opt_media_mode(struct velocity_info *vptr)
 	case SPD_DPX_10_HALF:
 		status = VELOCITY_SPEED_10;
 		break;
+	case SPD_DPX_1000_FULL:
+		status = VELOCITY_SPEED_1000 | VELOCITY_DUPLEX_FULL;
+		break;
 	}
 	vptr->mii_status = status;
 	return status;
@@ -922,6 +926,7 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 		/* enable AUTO-NEGO mode */
 		mii_set_auto_on(vptr);
 	} else {
+		u16 CTRL1000;
 		u16 ANAR;
 		u8 CHIPGCR;

@@ -936,7 +941,11 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 		BYTE_REG_BITS_ON(CHIPGCR_FCMODE, &regs->CHIPGCR);

 		CHIPGCR = readb(&regs->CHIPGCR);
-		CHIPGCR &= ~CHIPGCR_FCGMII;
+
+		if (mii_status & VELOCITY_SPEED_1000)
+			CHIPGCR |= CHIPGCR_FCGMII;
+		else
+			CHIPGCR &= ~CHIPGCR_FCGMII;

 		if (mii_status & VELOCITY_DUPLEX_FULL) {
 			CHIPGCR |= CHIPGCR_FCFDX;
@@ -952,7 +961,13 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 				BYTE_REG_BITS_ON(TCR_TB2BDIS, &regs->TCR);
 		}

-		MII_REG_BITS_OFF(ADVERTISE_1000FULL | ADVERTISE_1000HALF, MII_CTRL1000, vptr->mac_regs);
+		velocity_mii_read(vptr->mac_regs, MII_CTRL1000, &CTRL1000);
+		CTRL1000 &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
+		if ((mii_status & VELOCITY_SPEED_1000) &&
+		    (mii_status & VELOCITY_DUPLEX_FULL)) {
+			CTRL1000 |= ADVERTISE_1000FULL;
+		}
+		velocity_mii_write(vptr->mac_regs, MII_CTRL1000, CTRL1000);

 		if (!(mii_status & VELOCITY_DUPLEX_FULL) && (mii_status & VELOCITY_SPEED_10))
 			BYTE_REG_BITS_OFF(TESTCFG_HBDIS, &regs->TESTCFG);
@@ -967,7 +982,7 @@ static int velocity_set_media_mode(struct velocity_info *vptr, u32 mii_status)
 				ANAR |= ADVERTISE_100FULL;
 			else
 				ANAR |= ADVERTISE_100HALF;
-		} else {
+		} else if (mii_status & VELOCITY_SPEED_10) {
 			if (mii_status & VELOCITY_DUPLEX_FULL)
 				ANAR |= ADVERTISE_10FULL;
 			else
@@ -1013,6 +1028,9 @@ static void velocity_print_link_status(struct velocity_info *vptr)
 	} else {
 		VELOCITY_PRT(MSG_LEVEL_INFO, KERN_NOTICE "%s: Link forced", vptr->dev->name);
 		switch (vptr->options.spd_dpx) {
+		case SPD_DPX_1000_FULL:
+			VELOCITY_PRT(MSG_LEVEL_INFO, " speed 1000M bps full duplex\n");
+			break;
 		case SPD_DPX_100_HALF:
 			VELOCITY_PRT(MSG_LEVEL_INFO, " speed 100M bps half duplex\n");
 			break;
@@ -3170,6 +3188,37 @@ static int velocity_get_settings(struct net_device *dev, struct ethtool_cmd *cmd
 			SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Half |
 			SUPPORTED_1000baseT_Full;
+
+	cmd->advertising = ADVERTISED_TP | ADVERTISED_Autoneg;
+	if (vptr->options.spd_dpx == SPD_DPX_AUTO) {
+		cmd->advertising |=
+			ADVERTISED_10baseT_Half |
+			ADVERTISED_10baseT_Full |
+			ADVERTISED_100baseT_Half |
+			ADVERTISED_100baseT_Full |
+			ADVERTISED_1000baseT_Half |
+			ADVERTISED_1000baseT_Full;
+	} else {
+		switch (vptr->options.spd_dpx) {
+		case SPD_DPX_1000_FULL:
+			cmd->advertising |= ADVERTISED_1000baseT_Full;
+			break;
+		case SPD_DPX_100_HALF:
+			cmd->advertising |= ADVERTISED_100baseT_Half;
+			break;
+		case SPD_DPX_100_FULL:
+			cmd->advertising |= ADVERTISED_100baseT_Full;
+			break;
+		case SPD_DPX_10_HALF:
+			cmd->advertising |= ADVERTISED_10baseT_Half;
+			break;
+		case SPD_DPX_10_FULL:
+			cmd->advertising |= ADVERTISED_10baseT_Full;
+			break;
+		default:
+			break;
+		}
+	}
 	if (status & VELOCITY_SPEED_1000)
 		cmd->speed = SPEED_1000;
 	else if (status & VELOCITY_SPEED_100)
@@ -3200,14 +3249,35 @@ static int velocity_set_settings(struct net_device *dev, struct ethtool_cmd *cmd
 	curr_status &= (~VELOCITY_LINK_FAIL);

 	new_status |= ((cmd->autoneg) ? VELOCITY_AUTONEG_ENABLE : 0);
+	new_status |= ((cmd->speed == SPEED_1000) ? VELOCITY_SPEED_1000 : 0);
 	new_status |= ((cmd->speed == SPEED_100) ? VELOCITY_SPEED_100 : 0);
 	new_status |= ((cmd->speed == SPEED_10) ? VELOCITY_SPEED_10 : 0);
 	new_status |= ((cmd->duplex == DUPLEX_FULL) ? VELOCITY_DUPLEX_FULL : 0);

-	if ((new_status & VELOCITY_AUTONEG_ENABLE) && (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE)))
+	if ((new_status & VELOCITY_AUTONEG_ENABLE) &&
+	    (new_status != (curr_status | VELOCITY_AUTONEG_ENABLE))) {
 		ret = -EINVAL;
-	else
+	} else {
+		enum speed_opt spd_dpx;
+
+		if (new_status & VELOCITY_AUTONEG_ENABLE)
+			spd_dpx = SPD_DPX_AUTO;
+		else if ((new_status & VELOCITY_SPEED_1000) &&
+			 (new_status & VELOCITY_DUPLEX_FULL)) {
+			spd_dpx = SPD_DPX_1000_FULL;
+		} else if (new_status & VELOCITY_SPEED_100)
+			spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+				SPD_DPX_100_FULL : SPD_DPX_100_HALF;
+		else if (new_status & VELOCITY_SPEED_10)
+			spd_dpx = (new_status & VELOCITY_DUPLEX_FULL) ?
+				SPD_DPX_10_FULL : SPD_DPX_10_HALF;
+		else
+			return -EOPNOTSUPP;
+
+		vptr->options.spd_dpx = spd_dpx;
+
 		velocity_set_media_mode(vptr, new_status);
+	}

 	return ret;
 }
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h
index f7b33ae..df55f6c 100644
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -848,7 +848,7 @@ enum  velocity_owner {
  *	Bits in CHIPGCR register
  */

-#define CHIPGCR_FCGMII      0x80
+#define CHIPGCR_FCGMII      0x80	/* enable GMII mode */
 #define CHIPGCR_FCFDX       0x40
 #define CHIPGCR_FCRESV      0x20
 #define CHIPGCR_FCMODE      0x10
@@ -1390,7 +1390,8 @@ enum speed_opt {
 	SPD_DPX_100_HALF = 1,
 	SPD_DPX_100_FULL = 2,
 	SPD_DPX_10_HALF = 3,
-	SPD_DPX_10_FULL = 4
+	SPD_DPX_10_FULL = 4,
+	SPD_DPX_1000_FULL = 5
 };

 enum velocity_init_type {
--
1.7.2.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox