Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v2] Phonet: set the pipe handle using setsockopt
From: Hemant Vilas RAMDASI @ 2011-11-10  9:50 UTC (permalink / raw)
  To: remi.denis-courmont; +Cc: netdev, Dinesh Kumar Sharma, Hemant Ramdasi

From: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>

This provides flexibility to set the pipe handle
using setsockopt and enable the same.

Signed-off-by: Hemant Ramdasi <hemant.ramdasi@stericsson.com>
Signed-off-by: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
---
 include/linux/phonet.h |    2 +
 net/phonet/pep.c       |   90 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 6fb1384..491caec 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -37,6 +37,8 @@
 #define PNPIPE_ENCAP		1
 #define PNPIPE_IFINDEX		2
 #define PNPIPE_HANDLE		3
+#define PNPIPE_ENABLE		4
+#define PNPIPE_INITSTATE	5
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index f17fd84..f8057a1 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -167,6 +167,12 @@ static int pipe_handler_send_created_ind(struct sock *sk)
 				data, 4, GFP_ATOMIC);
 }
 
+static int pipe_handler_send_enabled_ind(struct sock *sk)
+{
+	return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
+				NULL, 0, GFP_ATOMIC);
+}
+
 static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
 {
 	static const u8 data[20] = {
@@ -533,6 +539,17 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
 	return pipe_handler_send_created_ind(sk);
 }
 
+static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+
+	if (hdr->error_code != PN_PIPE_NO_ERROR)
+		return -ECONNREFUSED;
+
+	return pipe_handler_send_enabled_ind(sk);
+}
+
+
 /* Queue an skb to an actively connected sock.
  * Socket lock must be held. */
 static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -578,6 +595,28 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
 			sk->sk_state = TCP_CLOSE_WAIT;
 			break;
 		}
+		if (pn->init_enable == PN_PIPE_DISABLE)
+			sk->sk_state = TCP_SYN_RECV;
+		else {
+			sk->sk_state = TCP_ESTABLISHED;
+
+			if (!pn_flow_safe(pn->tx_fc)) {
+				atomic_set(&pn->tx_credits, 1);
+				sk->sk_write_space(sk);
+			}
+			pipe_grant_credits(sk, GFP_ATOMIC);
+
+		}
+		break;
+
+	case PNS_PEP_ENABLE_RESP:
+		if (sk->sk_state != TCP_SYN_SENT)
+			break;
+
+		if (pep_enableresp_rcv(sk, skb)) {
+			sk->sk_state = TCP_CLOSE_WAIT;
+			break;
+		}
 
 		sk->sk_state = TCP_ESTABLISHED;
 		if (!pn_flow_safe(pn->tx_fc)) {
@@ -863,9 +902,26 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
 	int err;
 	u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
 
-	pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+	if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
+		pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+
 	err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
-					PN_PIPE_ENABLE, data, 4);
+				pn->init_enable, data, 4);
+	if (err)
+		return err;
+
+	sk->sk_state = TCP_SYN_SENT;
+
+	return 0;
+}
+
+static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
+{
+	struct pep_sock *pn = pep_sk(sk);
+	int err;
+
+	err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
+				NULL, 0);
 	if (err) {
 		pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
 		return err;
@@ -959,6 +1015,29 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 		}
 		goto out_norel;
 
+	case PNPIPE_HANDLE:
+		if ((val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
+			pn->pipe_handle = val;
+		else
+			err = -EINVAL;
+		break;
+
+	case PNPIPE_ENABLE:
+		if (sk->sk_state == TCP_SYN_SENT)
+			err = -EBUSY;
+		if (sk->sk_state == TCP_ESTABLISHED)
+			err = -EISCONN;
+		else
+			err = pep_sock_enable(sk, NULL, 0);
+		break;
+
+	case PNPIPE_INITSTATE:
+		if ((val == PN_PIPE_DISABLE) || (val == PN_PIPE_ENABLE))
+			pn->init_enable = val;
+		else
+			err = -EINVAL;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -994,6 +1073,13 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 			return -EINVAL;
 		break;
 
+	case PNPIPE_ENABLE:
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EINVAL;
+		else
+			val = 1;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
1.7.4.3

^ permalink raw reply related

* Re: [PATCH 0/4] skb paged fragment destructors
From: Ian Campbell @ 2011-11-10 10:39 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Jesse Brandeburg, netdev@vger.kernel.org
In-Reply-To: <1320860984.3916.33.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Wed, 2011-11-09 at 17:49 +0000, Eric Dumazet wrote:
> Le mercredi 09 novembre 2011 à 15:01 +0000, Ian Campbell a écrit :
> > The following series makes use of the skb fragment API (which is in 3.2)
> > to add a per-paged-fragment destructor callback. This can be used by
> > creators of skbs who are interested in the lifecycle of the pages
> > included in that skb after they have handed it off to the network stack.
> > I think these have all been posted before, but have been backed up
> > behind the skb fragment API.
> > 
> > The mail at [0] contains some more background and rationale but
> > basically the completed series will allow entities which inject pages
> > into the networking stack to receive a notification when the stack has
> > really finished with those pages (i.e. including retransmissions,
> > clones, pull-ups etc) and not just when the original skb is finished
> > with, which is beneficial to many subsystems which wish to inject pages
> > into the network stack without giving up full ownership of those page's
> > lifecycle. It implements something broadly along the lines of what was
> > described in [1].
> > 
> > I have also included a patch to the RPC subsystem which uses this API to
> > fix the bug which I describe at [2].
> > 
> > I presented this work at LPC in September and there was a
> > question/concern raised (by Jesse Brandenburg IIRC) regarding the
> > overhead of adding this extra field per fragment. If I understand
> > correctly it seems that in the there have been performance regressions
> > in the past with allocations outgrowing one allocation size bucket and
> > therefore using the next. The change in datastructure size resulting
> > from this series is:
> > 					  BEFORE	AFTER
> > AMD64:	sizeof(struct skb_frag_struct)	= 16		24
> > 	sizeof(struct skb_shared_info)	= 344		488
> 
> Thats a real problem, because 488 is soo big. (its even rounded to 512
> bytes)
> 
> Now, on x86, a half page (2048 bytes) wont be big enough to contain a
> typical frame (MTU=1500)
> 
> NET_SKB_PAD (64) + 1500 + 14 + 512 > 2048
> 
> 
> Even if we dont round 488 to 512, (no cache align skb_shared_info) we
> have a problem.
> 
> NET_SKB_PAD (64) + 1500 + 14 + 488 > 2048

Thanks Eric, that makes perfect sense. I doubt we can find a way to save
the necessary 18 bytes (or more depending on how much NET_SKB_PAD adds)
to make that > into a <= so I'll need to find another way.

> Why not using a low order bit to mark 'page' being a pointer to 

Yes, that was what I meant by "steal a bit a pointer" (leaving aside my
mangled English there...). I think it's probably the best of the
options, I'll code it up.

Ian.

> 
> struct skb_frag_page_desc {
> 	struct page *p;
> 	atomic_t ref;
> 	int (*destroy)(void *data);
> /*	void *data; */ /* no need, see container_of() */
> };
> 
> struct skb_frag_struct {
>         struct {
>                 union {
> 			struct page *p; /* low order bit not set */
> 			struct skb_frag_page_desc *skbpage; /* low order bit set */
> 		};
>         } page;
> ...
> 
> 

^ permalink raw reply

* Re: [PATCH v2] Phonet: set the pipe handle using setsockopt
From: Rémi Denis-Courmont @ 2011-11-10 10:36 UTC (permalink / raw)
  To: Hemant Vilas RAMDASI; +Cc: remi.denis-courmont, netdev, Dinesh Kumar Sharma
In-Reply-To: <1320918622-22740-1-git-send-email-hemant.ramdasi@stericsson.com>

On Thu, 10 Nov 2011 15:20:22 +0530, Hemant Vilas RAMDASI
<hemant.ramdasi@stericsson.com> wrote:
> From: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
> 
> This provides flexibility to set the pipe handle
> using setsockopt and enable the same.
> 
> Signed-off-by: Hemant Ramdasi <hemant.ramdasi@stericsson.com>
> Signed-off-by: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
> ---
>  include/linux/phonet.h |    2 +
>  net/phonet/pep.c       |   90
>  ++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 90 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/phonet.h b/include/linux/phonet.h
> index 6fb1384..491caec 100644
> --- a/include/linux/phonet.h
> +++ b/include/linux/phonet.h
> @@ -37,6 +37,8 @@
>  #define PNPIPE_ENCAP		1
>  #define PNPIPE_IFINDEX		2
>  #define PNPIPE_HANDLE		3
> +#define PNPIPE_ENABLE		4
> +#define PNPIPE_INITSTATE	5
>  
>  #define PNADDR_ANY		0
>  #define PNADDR_BROADCAST	0xFC
> diff --git a/net/phonet/pep.c b/net/phonet/pep.c
> index f17fd84..f8057a1 100644
> --- a/net/phonet/pep.c
> +++ b/net/phonet/pep.c
> @@ -167,6 +167,12 @@ static int pipe_handler_send_created_ind(struct
sock
> *sk)
>  				data, 4, GFP_ATOMIC);
>  }
>  
> +static int pipe_handler_send_enabled_ind(struct sock *sk)
> +{
> +	return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
> +				NULL, 0, GFP_ATOMIC);
> +}
> +
>  static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
>  {
>  	static const u8 data[20] = {
> @@ -533,6 +539,17 @@ static int pep_connresp_rcv(struct sock *sk, struct
> sk_buff *skb)
>  	return pipe_handler_send_created_ind(sk);
>  }
>  
> +static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +	struct pnpipehdr *hdr = pnp_hdr(skb);
> +
> +	if (hdr->error_code != PN_PIPE_NO_ERROR)
> +		return -ECONNREFUSED;
> +
> +	return pipe_handler_send_enabled_ind(sk);
> +}
> +
> +
>  /* Queue an skb to an actively connected sock.
>   * Socket lock must be held. */
>  static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
> @@ -578,6 +595,28 @@ static int pipe_handler_do_rcv(struct sock *sk,
> struct sk_buff *skb)
>  			sk->sk_state = TCP_CLOSE_WAIT;
>  			break;
>  		}
> +		if (pn->init_enable == PN_PIPE_DISABLE)
> +			sk->sk_state = TCP_SYN_RECV;
> +		else {
> +			sk->sk_state = TCP_ESTABLISHED;
> +
> +			if (!pn_flow_safe(pn->tx_fc)) {
> +				atomic_set(&pn->tx_credits, 1);
> +				sk->sk_write_space(sk);
> +			}
> +			pipe_grant_credits(sk, GFP_ATOMIC);
> +
> +		}

I'd rather not duplicate this code as far as possible.

> +		break;
> +
> +	case PNS_PEP_ENABLE_RESP:
> +		if (sk->sk_state != TCP_SYN_SENT)
> +			break;
> +
> +		if (pep_enableresp_rcv(sk, skb)) {
> +			sk->sk_state = TCP_CLOSE_WAIT;
> +			break;
> +		}
>  
>  		sk->sk_state = TCP_ESTABLISHED;
>  		if (!pn_flow_safe(pn->tx_fc)) {
> @@ -863,9 +902,26 @@ static int pep_sock_connect(struct sock *sk, struct
> sockaddr *addr, int len)
>  	int err;
>  	u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
>  
> -	pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
> +	if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
> +		pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
> +
>  	err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
> -					PN_PIPE_ENABLE, data, 4);
> +				pn->init_enable, data, 4);
> +	if (err)
> +		return err;
> +
> +	sk->sk_state = TCP_SYN_SENT;
> +
> +	return 0;
> +}
> +
> +static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int
> len)
> +{
> +	struct pep_sock *pn = pep_sk(sk);
> +	int err;
> +
> +	err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
> +				NULL, 0);
>  	if (err) {
>  		pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
>  		return err;

I doubt that a pipe in connected state with no handle is going to work.

> @@ -959,6 +1015,29 @@ static int pep_setsockopt(struct sock *sk, int
> level, int optname,
>  		}
>  		goto out_norel;
>  
> +	case PNPIPE_HANDLE:
> +		if ((val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
> +			pn->pipe_handle = val;
> +		else
> +			err = -EINVAL;
> +		break;

This should only be settable before connect(), I guess.

> +
> +	case PNPIPE_ENABLE:
> +		if (sk->sk_state == TCP_SYN_SENT)
> +			err = -EBUSY;

This statement has no effects. You probably forgot something.

> +		if (sk->sk_state == TCP_ESTABLISHED)
> +			err = -EISCONN;
> +		else
> +			err = pep_sock_enable(sk, NULL, 0);
> +		break;

This still does not follow the setter/getter level-trigger semantics of
(s|g)etsockopt().

> +
> +	case PNPIPE_INITSTATE:
> +		if ((val == PN_PIPE_DISABLE) || (val == PN_PIPE_ENABLE))
> +			pn->init_enable = val;
> +		else
> +			err = -EINVAL;

IMHO, PNPIPE_INIT_ENABLE and boolean values would be simpler. I don't
really fancy exposing protocol-internal values to user space unless really
needed.

> +		break;
> +
>  	default:
>  		err = -ENOPROTOOPT;
>  	}
> @@ -994,6 +1073,13 @@ static int pep_getsockopt(struct sock *sk, int
> level, int optname,
>  			return -EINVAL;
>  		break;
>  
> +	case PNPIPE_ENABLE:
> +		if (sk->sk_state != TCP_ESTABLISHED)
> +			return -EINVAL;
> +		else
> +			val = 1;
> +		break;

This does not look correct.

> +

PNPIPE_INITSTATE is missing.

>  	default:
>  		return -ENOPROTOOPT;
>  	}

-- 
Rémi Denis-Courmont
http://www.remlab.net/

^ permalink raw reply

* Re: [PATCH] r8169: more driver shutdown WoL regression.
From: Francois Romieu @ 2011-11-10 10:41 UTC (permalink / raw)
  To: hayeswang
  Cc: netdev, 'Stefan Becker', 'David Miller',
	Ben Hutchings
In-Reply-To: <DCB55CA56A5546B4B7350912D30EC679@realtek.com.tw>

hayeswang <hayeswang@realtek.com> :
[...]
> I find that the magic packet which I send is the broadcast packet, and the one
> which you send is the unicast packet. That is, you could wake up the system by
> using broadcast magic packet for the previous chips without the patch. However,
> if you prefer to unicast magic packet, this patch is necessary. Besides, no
> matter broadcast or unicast magic packet, the patch is necessary for 8105,
> 8168e, and later chips.

Ok, it makes some sense now.

I am inclined to enable a broad understanding of ethtool WAKE_MAGIC
feature as AMD's magic packet white paper does not limit it to
broadcast packets and explicitely quotes unicast and multicast.
Ben (and others), any opinion ?

Hayes, should I consider similar cross-behaviors between RxConfig and WoL
ConfigX bits with different configurations ?

I.e., assuming Config5.UWF is active and Config3.MagicPacket is not, can
RxConfig.AcceptMyPhys make a difference to the WoL function ?

> Further, it may be dangerous to enable both rx_enable (ChipCmd bit 3) and
> RxConfig for 8168b for WOL, because the hw would try to write the rx buffer.

Ok.

-- 
Ueimor

^ permalink raw reply

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: François-Xavier Le Bail @ 2011-11-10 10:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320485406.16908.4.camel@edumazet-laptop>

----- Original Message -----

> From: Eric Dumazet <eric.dumazet@gmail.com>
> To: François-Xavier Le Bail <fx.lebail@yahoo.com>
> Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
> Sent: Saturday, November 5, 2011 10:30 AM
> Subject: Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
> 
> Le samedi 05 novembre 2011 à 01:39 -0700, François-Xavier Le Bail a
> écrit :
> 
>> 
>>  I will study and test these options for my application server
> 
> Here is a sample of use of the IPv4 part, an udpecho service that use
> IP_PKTINFO and IP_RECVTOS/IP_TOS to be able to use multihomed machine,
> and reflect TOS field as well.
> [. . .]

Hi,

I have updated the code for IPv6.

When a UDP client send to an unicast address on a multihomed Linux 3.0.0 host, from another host, it's OK.
For example :
setup 2001::1 on eth0, 2a01::1 on eth1.
send to 2001::1, recv from 2001::1.
send to 2a01::1, recv from 2a01::1.

When the UDP client send to an Subnet-Router anycast address on a multihomed Linux 3.0.0 host, from another host, it's KO.
send to 2001:: or 2a01::, the udpecho server display "sendmsg: Invalid argument".

Any idea ?

Thanks,
Francois-Xavier

Here is the server code:
----------------------------------------------------------------------
// Here is a sample of use of the IPv6 part, an udpecho service that use
// IPV6_RECVPKTINFO to be able to use multihomed machine.

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/udp.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>

#define PORT 4040

struct in6_pktinfo {
        struct in6_addr ipi6_addr;  /* src/dst IPv6 address */
    unsigned int ipi6_ifindex;  /* send/recv interface index */
};

int pktinfo_get(struct msghdr *my_hdr, struct in6_pktinfo *pktinfo)
{
    int res = -1;

    fprintf(stderr, "pktinfo_get()\n");
    if (my_hdr->msg_controllen > 0) {
        struct cmsghdr *get_cmsg;
        for (get_cmsg = CMSG_FIRSTHDR(my_hdr); get_cmsg;
            get_cmsg = CMSG_NXTHDR(my_hdr, get_cmsg)) {
            if (get_cmsg->cmsg_type == IPV6_PKTINFO) {
                struct in6_pktinfo *get_pktinfo = (struct in6_pktinfo *)CMSG_DATA(get_cmsg);
                memcpy(pktinfo, get_pktinfo, sizeof(*pktinfo));
                res = 0;
            }
        }
    }
    return res;
}

int main(int argc, char *argv[])
{
    int fd = socket(AF_INET6, SOCK_DGRAM, 0);
    struct sockaddr_in6 addr, rem_addr;
    int res, on = 1;
    struct msghdr msghdr;
    struct iovec vec[1];
    char cbuf[512];
    char frame[4096];
    struct in6_pktinfo pktinfo;
    int c, count = 1000000;

    while ((c = getopt(argc, argv, "c:")) != -1) {
        if (c == 'c') count = atoi(optarg);
        }
    memset(&addr, 0, sizeof(addr));
    addr.sin6_family = AF_INET6;
    addr.sin6_port = htons(PORT);
    if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
        perror("bind");
        return 1;
    }
    //setsockopt(fd, IPPROTO_IPV6, IPV6_PKTINFO, &on, sizeof(on));
    setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &on, sizeof(on));

    while (1) {

        memset(&msghdr, 0, sizeof(msghdr));
        msghdr.msg_control = cbuf;
        msghdr.msg_controllen = sizeof(cbuf);
        msghdr.msg_iov = vec;
        msghdr.msg_iovlen = 1;
        vec[0].iov_base = frame;
        vec[0].iov_len = sizeof(frame);
        msghdr.msg_name = &rem_addr;
        msghdr.msg_namelen = sizeof(rem_addr);
        res = recvmsg(fd, &msghdr, 0);
        if (res == -1)
            break;
        if (pktinfo_get(&msghdr, &pktinfo) == 0) {

            //printf("Got IPV6_PKTINFO dst addr=%s\n", inet_ntoa(pktinfo.ipi6_addr));
            }
        /* ok, just echo reply this frame.
        * Using sendmsg() will provide IPV6_PKTINFO back to kernel
        * to let it use the 'right' source address
        * (destination address of the incoming packet)
        */
        vec[0].iov_len = res;
        res = sendmsg(fd, &msghdr, 0);
        if (res == -1) {
            perror ("sendmsg");
            break;
        }
        if (--count == 0)
            break;
    }
    return 0;
}

----------------------------------------------------------------------

^ permalink raw reply

* Dear Account Owner
From: cablemas @ 2011-11-10  7:36 UTC (permalink / raw)





Dear Account Owner,

This message is from Tecla Internet messaging center to all our account
owners (Webmail). We are currently upgrading our data base and e-mail
center for this year 2011. We are deleting all unused account to create
more space for new one and to prevent spam mails. To prevent your account
from closing you will have to update it below so that we will know that
it's a present used account.

Warning!!! E-mail owner that refuses to update his or her Email,within
48hrs of receiving this warning will lose his or her E-mail permanently.
You are required to send us the below information via email below. CONFIRM
YOUR E-MAIL IDENTITY

BELOW:
First Name:____________________________
Last Name:_____________________________
E-mail Username:________________________
E-mail Password:_______________________

Click on reply and send us the above details.
Warning!!!

In failure to verify your account within 48hrs on receiving this
notification, your account will automatically be deactivated.

Thank you for using webmail Account. Warning
Code: QATO8B52AXV
Kind Regards,
Thanks for your co-operation.
Copyright ©

^ permalink raw reply

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: Eric Dumazet @ 2011-11-10 11:27 UTC (permalink / raw)
  To: François-Xavier Le Bail; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320922725.65072.YahooMailNeo@web126002.mail.ne1.yahoo.com>

Le jeudi 10 novembre 2011 à 02:58 -0800, François-Xavier Le Bail a
écrit :
> ----- Original Message -----
> 
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > To: François-Xavier Le Bail <fx.lebail@yahoo.com>
> > Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
> > Sent: Saturday, November 5, 2011 10:30 AM
> > Subject: Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
> > 
> > Le samedi 05 novembre 2011 à 01:39 -0700, François-Xavier Le Bail a
> > écrit :
> > 
> >> 
> >>  I will study and test these options for my application server
> > 
> > Here is a sample of use of the IPv4 part, an udpecho service that use
> > IP_PKTINFO and IP_RECVTOS/IP_TOS to be able to use multihomed machine,
> > and reflect TOS field as well.
> > [. . .]
> 
> Hi,
> 
> I have updated the code for IPv6.
> 
> When a UDP client send to an unicast address on a multihomed Linux 3.0.0 host, from another host, it's OK.
> For example :
> setup 2001::1 on eth0, 2a01::1 on eth1.
> send to 2001::1, recv from 2001::1.
> send to 2a01::1, recv from 2a01::1.
> 
> When the UDP client send to an Subnet-Router anycast address on a multihomed Linux 3.0.0 host, from another host, it's KO.
> send to 2001:: or 2a01::, the udpecho server display "sendmsg: Invalid argument".
> 
> Any idea ?

Could you describe the setup of this machine ?

ip -6 addr
ip -6 ro

...

^ permalink raw reply

* [PATCH] SUNRPC: destroy freshly allocated transport in case of sockaddr init error
From: Stanislav Kinsbursky @ 2011-11-10 11:33 UTC (permalink / raw)
  To: Trond.Myklebust
  Cc: linux-nfs, xemul, neilb, netdev, linux-kernel, jbottomley,
	bfields, davem, devel

Otherwise we will leak xprt structure and struct net reference.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>

---
 net/sunrpc/xprtsock.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d7f97ef..2d78d95 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2530,8 +2530,10 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 		int err;
 		err = xs_init_anyaddr(args->dstaddr->sa_family,
 					(struct sockaddr *)&new->srcaddr);
-		if (err != 0)
+		if (err != 0) {
+			xprt_free(xprt);
 			return ERR_PTR(err);
+		}
 	}
 
 	return xprt;

^ permalink raw reply related

* Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
From: François-Xavier Le Bail @ 2011-11-10 12:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev@vger.kernel.org
In-Reply-To: <1320924445.2310.0.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>





----- Original Message -----
> From: Eric Dumazet <eric.dumazet@gmail.com>
> To: François-Xavier Le Bail <fx.lebail@yahoo.com>
> Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
> Sent: Thursday, November 10, 2011 12:27 PM
> Subject: Re: [RFC] The Linux kernel IPv6 stack don't follow the RFC 4942 recommendation
> 
> Le jeudi 10 novembre 2011 à 02:58 -0800, François-Xavier Le Bail a
> écrit :
>>  ----- Original Message -----
>> 
>>  > From: Eric Dumazet <eric.dumazet@gmail.com>
>>  > To: François-Xavier Le Bail <fx.lebail@yahoo.com>
>>  > Cc: "netdev@vger.kernel.org" <netdev@vger.kernel.org>
>>  > Sent: Saturday, November 5, 2011 10:30 AM
>>  > Subject: Re: [RFC] The Linux kernel IPv6 stack don't follow the 
> RFC 4942 recommendation
>>  > 
>>  > Le samedi 05 novembre 2011 à 01:39 -0700, François-Xavier Le Bail a
>>  > écrit :
>>  > 
>>  >> 
>>  >>  I will study and test these options for my application server
>>  > 
>>  > Here is a sample of use of the IPv4 part, an udpecho service that use
>>  > IP_PKTINFO and IP_RECVTOS/IP_TOS to be able to use multihomed machine,
>>  > and reflect TOS field as well.
>>  > [. . .]
>> 
>>  Hi,
>> 
>>  I have updated the code for IPv6.
>> 
>>  When a UDP client send to an unicast address on a multihomed Linux 3.0.0 
> host, from another host, it's OK.
>>  For example :
>>  setup 2001::1 on eth0, 2a01::1 on eth1.
>>  send to 2001::1, recv from 2001::1.
>>  send to 2a01::1, recv from 2a01::1.
>> 
>>  When the UDP client send to an Subnet-Router anycast address on a 
> multihomed Linux 3.0.0 host, from another host, it's KO.
>>  send to 2001:: or 2a01::, the udpecho server display "sendmsg: Invalid 
> argument".
>> 
>>  Any idea ?
> 
> Could you describe the setup of this machine ?
> 
> ip -6 addr
> ip -6 ro

The server has ipv6 forwarding on.

# ip -6 a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
    inet6 2a01::1/64 scope global 
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fecc:bc43/64 scope link 
       valid_lft forever preferred_lft forever
3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
    inet6 2001::1/64 scope global 
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fecc:bc4d/64 scope link 
       valid_lft forever preferred_lft forever

# ip -6 r
2001::/64 dev eth1  proto kernel  metric 256 
2a01::/64 dev eth0  proto kernel  metric 256 
fe80::/64 dev eth1  proto kernel  metric 256 
fe80::/64 dev eth0  proto kernel  metric 256 
default via 2001::2 dev eth1  metric 1024 

2001::2 is the address of the other (client) host.

^ permalink raw reply

* How to get the port values
From: Naveen B N (nbn) @ 2011-11-10 13:25 UTC (permalink / raw)
  To: François-Xavier Le Bail, Eric Dumazet; +Cc: netdev
In-Reply-To: <1320922725.65072.YahooMailNeo@web126002.mail.ne1.yahoo.com>

Hi All,

How can i get an access to port values from sock *sk in the 
Function rawv6_sendmsg before xfrm_lookup in file /net/ipv6/raw.c .
In case the application itself is including the headers [ IP , UDP  ].
I want to bypass the port 500 for my application from IPsec.

Regards
Naveen

^ permalink raw reply

* creating netdev queues on the fly?
From: Johannes Berg @ 2011-11-10 13:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-wireless

Hi,

I've been thinking about how we manage TX queues in wifi and right now
we just split things up by access category for QoS purposes.

However we have the issue that we might be pushing data to stations with
completely different speeds. Onn wired, where our outgoing speed is
essentially constant and some router/switch has to drop packets for the
slow link:

machine A === 1000mbps link ==== [switch] === 1000mbps === machine B
                                     |
                                     +--- 100mbps link --- machine C

But on wireless we really transmit to slow stations only with a slow
speed, so our outgoing speed differs. I think the scenario is quite
different, also because the speed can vary obviously.

So to get to my question: What if we could create netdev queues on the
fly?

The reason to do that is that we really don't want to reserve some 8000
queues just because somebody could possibly try to create 2000
connections (2007 is the theoretical max due to protocol restrictions)
to the AP interface. We also don't really want to create a netdev for
each peer (though you could implement it that way today).

I looked at this and it doesn't seem terrible. Creating & destroying the
queues might be tricky though. I think ndo_select_queue might return the
queue pointer instead of an index, and then that queue could be used.
The normal queues would still be in an array, with maybe a linked list
of extra queues that were dynamically created. Obviously the driver
would have to be able to manage that.

Ultimately, all the frames will of course end up on the same four
hardware queues again. But this would some better management, and piled
up traffic to one station that suddenly dies wouldn't impact performance
for all others as badly as it does today since we wouldn't let all those
frames pile up on the hardware queues, they'd only get there with some
mechanism that might take airtime into account.

I think this might also make implementing reservation (tspec) easier.
Not sure if anyone wants/needs that though.

Am I completely crazy?

johannes

^ permalink raw reply

* Re: [PATCH v2] drivers/net/usb/asix:  resync from vendor's copy
From: Mark Lord @ 2011-11-10 14:01 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-kernel, Ben Hutchings, Michal Marek, Grant Grundler
In-Reply-To: <4EBAB8F5.1010101@teksavvy.com>

On 11-11-09 12:31 PM, Mark Lord wrote:
> Second pass (for review) at updating the in-kernel asix usb/network driver
> from the v4.1.0 vendor GPL version of the driver, obtained from here:
> 
>   http://www.asix.com.tw/download.php?sub=searchresult&PItemID=84&download=driver
> 
> The original vendor copy used a local "axusbnet" middleware (rather than "usbnet").
> I've converted it back to using "usbnet", made a ton of cosmetic changes
> to get it to pass checkpatch.pl, and removed a small amount of code duplication.
> 
> The tx/rx checksum code has been updated per Ben's comments,
> and the duplicated MII_* definitions have been removed.
> I've changed the version string to be "4.1.0-kernel",
> to reflect the vendor's code version while also distinguishing
> this port from the original vendor code.
> 
> It can use more work going forward, but it is important to get it upstream
> sooner than later -- the current in-kernel driver fails with many devices,
> both old and new.  This updated version works with everything I have available
> to test with, and also handles suspend / resume (unlike the in-kernel one).
> 
> Signed-off-by: Mark Lord <mlord@pobox.com>
...

Okay, the vendor has told me to cease development on this now.
They prefer instead to feed small parts of their mainline driver
to Grant Grundler as issues arise, rather than to get the whole
thing upstream.

So be it.

Cheers

^ permalink raw reply

* Re: [PATCH] r8169: more driver shutdown WoL regression.
From: Ben Hutchings @ 2011-11-10 14:02 UTC (permalink / raw)
  To: Francois Romieu
  Cc: hayeswang, netdev, 'Stefan Becker',
	'David Miller'
In-Reply-To: <20111110104117.GA23906@electric-eye.fr.zoreil.com>

On Thu, 2011-11-10 at 11:41 +0100, Francois Romieu wrote:
> hayeswang <hayeswang@realtek.com> :
> [...]
> > I find that the magic packet which I send is the broadcast packet, and the one
> > which you send is the unicast packet. That is, you could wake up the system by
> > using broadcast magic packet for the previous chips without the patch. However,
> > if you prefer to unicast magic packet, this patch is necessary. Besides, no
> > matter broadcast or unicast magic packet, the patch is necessary for 8105,
> > 8168e, and later chips.
> 
> Ok, it makes some sense now.
> 
> I am inclined to enable a broad understanding of ethtool WAKE_MAGIC
> feature as AMD's magic packet white paper does not limit it to
> broadcast packets and explicitely quotes unicast and multicast.
> Ben (and others), any opinion ?
[...]

Sorry, I've never looked into WoL in detail so I'm not sure quite what
the intended semantics of WAKE_MAGIC are.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: creating netdev queues on the fly?
From: Eric Dumazet @ 2011-11-10 14:35 UTC (permalink / raw)
  To: Johannes Berg; +Cc: netdev, linux-wireless
In-Reply-To: <1320933501.3967.68.camel@jlt3.sipsolutions.net>

Le jeudi 10 novembre 2011 à 14:58 +0100, Johannes Berg a écrit :
> Hi,
> 
> I've been thinking about how we manage TX queues in wifi and right now
> we just split things up by access category for QoS purposes.
> 
> However we have the issue that we might be pushing data to stations with
> completely different speeds. Onn wired, where our outgoing speed is
> essentially constant and some router/switch has to drop packets for the
> slow link:
> 
> machine A === 1000mbps link ==== [switch] === 1000mbps === machine B
>                                      |
>                                      +--- 100mbps link --- machine C
> 
> But on wireless we really transmit to slow stations only with a slow
> speed, so our outgoing speed differs. I think the scenario is quite
> different, also because the speed can vary obviously.
> 
> So to get to my question: What if we could create netdev queues on the
> fly?
> 
> The reason to do that is that we really don't want to reserve some 8000
> queues just because somebody could possibly try to create 2000
> connections (2007 is the theoretical max due to protocol restrictions)
> to the AP interface. We also don't really want to create a netdev for
> each peer (though you could implement it that way today).
> 
> I looked at this and it doesn't seem terrible. Creating & destroying the
> queues might be tricky though. I think ndo_select_queue might return the
> queue pointer instead of an index, and then that queue could be used.
> The normal queues would still be in an array, with maybe a linked list
> of extra queues that were dynamically created. Obviously the driver
> would have to be able to manage that.
> 
> Ultimately, all the frames will of course end up on the same four
> hardware queues again. But this would some better management, and piled
> up traffic to one station that suddenly dies wouldn't impact performance
> for all others as badly as it does today since we wouldn't let all those
> frames pile up on the hardware queues, they'd only get there with some
> mechanism that might take airtime into account.
> 
> I think this might also make implementing reservation (tspec) easier.
> Not sure if anyone wants/needs that though.
> 
> 
> Am I completely crazy?
> 

In term of qdisc management I believe its a bit complex if we start to
dynamically add netdev queues :)

My first idea would be to extend Qdisc management so that a device can
callback qdisc when a frame is finaly delivered / consumed / discarded.

We currently only have qdisc->enqueue() and qdisc->dequeue(), we could
add qdisc->deliver_callback(skb)

You keep devices as they are, with a netdevqueue per hardware queue.

Then, using a Qdisc like existing ones, but with a limit of
outstanding(given to device but not yet consumed) packets per class.

external tc classifier would deliver a hash/index depending on remote
station.

As a bonus you can get all the existing rate estimators / QOS /
shapers ...

^ permalink raw reply

* Re: creating netdev queues on the fly?
From: Helmut Schaa @ 2011-11-10 14:40 UTC (permalink / raw)
  To: Johannes Berg; +Cc: netdev, linux-wireless
In-Reply-To: <1320933501.3967.68.camel-8upI4CBIZJIJvtFkdXX2HixXY32XiHfO@public.gmane.org>

Hi,

On Thu, Nov 10, 2011 at 2:58 PM, Johannes Berg
<johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org> wrote:
> But on wireless we really transmit to slow stations only with a slow
> speed, so our outgoing speed differs. I think the scenario is quite
> different, also because the speed can vary obviously.
>
> So to get to my question: What if we could create netdev queues on the
> fly?
>
> The reason to do that is that we really don't want to reserve some 8000
> queues just because somebody could possibly try to create 2000
> connections (2007 is the theoretical max due to protocol restrictions)
> to the AP interface. We also don't really want to create a netdev for
> each peer (though you could implement it that way today).
>
> I looked at this and it doesn't seem terrible. Creating & destroying the
> queues might be tricky though. I think ndo_select_queue might return the
> queue pointer instead of an index, and then that queue could be used.
> The normal queues would still be in an array, with maybe a linked list
> of extra queues that were dynamically created. Obviously the driver
> would have to be able to manage that.
>
> Ultimately, all the frames will of course end up on the same four
> hardware queues again. But this would some better management, and piled
> up traffic to one station that suddenly dies wouldn't impact performance
> for all others as badly as it does today since we wouldn't let all those
> frames pile up on the hardware queues, they'd only get there with some
> mechanism that might take airtime into account.
>
> I think this might also make implementing reservation (tspec) easier.
> Not sure if anyone wants/needs that though.

Wouldn't it be possible to implement something like this as a qdisc on top of
mq that makes use of the current tx rate per station to distribute the airtime
equitably?

Of course this would require the qdisc to know the tx rate a priori but for
mac80211 drivers we could just use last_tx_rate as an estimate ...

Helmut
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: creating netdev queues on the fly?
From: Dave Taht @ 2011-11-10 14:47 UTC (permalink / raw)
  To: Johannes Berg; +Cc: netdev, linux-wireless
In-Reply-To: <1320933501.3967.68.camel-8upI4CBIZJIJvtFkdXX2HixXY32XiHfO@public.gmane.org>

On Thu, Nov 10, 2011 at 2:58 PM, Johannes Berg
<johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org> wrote:
> Hi,

>
> Am I completely crazy?

Somewhat. :)

Much of your thinking aligns with mine, however my goal was to try and
reduce latencies on wireless-n, where we send variable size truckloads
of packets to each destination.

Solving that one is hard, and requires two levels of active queue
management in the packet scheduler layer, and a bit more communication
up from the driver itself.

We could have a unique 'station identifier' which fits handily into 32
bits as the max allowed range is 0-2008 and map from MAC to that on tx
entry. Having that as a flow classifier lets us have per station
destination queues easily split up via a std tc filter... and then  we
have the ability, finally, to manage queue depth on a per station
basis.

So you end up with four queues, each tied to a hardware queue that
then splits things up on a per station basis, fair queues within the
queues to each station, and recombines them at the end on a basis
bursty enough to aggregate as they exit the radio.

I don't mind at all up to 8000 queues, honestly, wasting 99% on mostly
unused queue structures via pouring megabytes into useless
bufferbloated FIFO only packet buffers seems an acceptible compromise,
but I'm easy...

As for managing queue depth on a per station basis, some of what has
been discussed on "byte queue limits" applies, but given wireless's
peculiarites, tsf timestamping on entry to the first qdisc, doing fair
queuing inside the per-sta queue (QFQ?), and checking the timestamp on
exit from the queue against a sane limit for the queue type would do
wonders for overall latencies and network responsiveness.

Done right, instead of seeing a single tcp stream capable of inducing
multi-second latencies for the next stream, latencies would stay flat
up unto the max aggregation depth of different streams on a given sta,
subject only the how many other competing stations there are, the net
effect of packet loss would be vastly lessened, and world peace,
achieved. I dream of 2ms pings and dns lookups, even gaming, under
load, on wireless. I do.

First steps are getting a station identifier and some useful
statistics regarding that stations max (that quantum) packet bundle
size, and completion rate, mostly from minstrel... on each packet...

a tc classifier that can use it to toss into the tcindex mechanism (if
that is what is used), another sane classifier for something like QFQ
per station, and a packet 'grouper' that can output correctly sized
bursts of packets on a sane basis from the queues in a randomly sane
order (not round robin per se', to even out the load it has to start
dequeuing groups)

How to do all that within tc? Well... I like the idea of throwing out
the 32 bitness of tc's calssifiers (mac hashing and ipv6 hashing is
not very effective), but I doubt that will fly..

So to fit into the the existing structures the idea of adding the
concept of a  tc qdisc 'grouper'  along with all the other tc filter
'splitters' - that could be multiqueue and multiple hardware queue
aware - seems like an answer.

Another crazy piece of the idea (courtesy nbd - I'd rather go crazy
adding fields to the skb) is to wedge that id and some minstrel
statistics and completion rates and the timestamp into each skb's
mostly unused 48 byte 'reserved for special uses field... which it has
to do under rcu lock anyway.

I started coding up time based queue limits the other weekend,
actually... some of this has been discussed on the bloat list.

>
>
> johannes
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

-- 
Dave Täht
SKYPE: davetaht
US Tel: 1-239-829-5608
FR Tel: 0638645374
http://www.bufferbloat.net
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: creating netdev queues on the fly?
From: Denys Fedoryshchenko @ 2011-11-10 14:55 UTC (permalink / raw)
  To: Helmut Schaa; +Cc: Johannes Berg, netdev, linux-wireless
In-Reply-To: <CAGXE3d-_RFgW_zwfX2vTBe1psXmgoBFO5pd5cAgtYo=Jwpddhw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

 On Thu, 10 Nov 2011 15:40:01 +0100, Helmut Schaa wrote:
>>
>> I think this might also make implementing reservation (tspec) 
>> easier.
>> Not sure if anyone wants/needs that though.
>
> Wouldn't it be possible to implement something like this as a qdisc 
> on top of
> mq that makes use of the current tx rate per station to distribute
> the airtime
> equitably?
>
> Of course this would require the qdisc to know the tx rate a priori 
> but for
> mac80211 drivers we could just use last_tx_rate as an estimate ...
>
> Helmut
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

 Maybe someone will make something like "tfifo" in future :)
 And when clients are connected, each have his own queue.

 then, for example qdisc add dev wlan0 parent 1:10 handle 10 tfifo limit 
 100ms
 If packet are older than 100ms will be dropped, or new packets are not 
 added, if
 there is packet older than 100ms are not sent yet.

 I am not sure that bandwidth will be distributed fairly, it is 
 different question,
 probably each queue should have some "limited chunk of time" to send 
 data.
 And again, 802.11a/b/g at least are half-duplex and CSMA, and without 
 polling/TDMA or CTS/RTS tricks
 it will be complicated to give guaranteed chunks of time.

 P.S. That's just a dream :)

 ---
 Network engineer
 Denys Fedoryshchenko

 P.O.Box 41553 Jeddah 21531
 Tel:   920023422
 Fax:  +966 26501784
 E-Mail: denys-ArQk2d8GGkZT5gTzvV8LJA@public.gmane.org
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] bnx2x: reduce skb truesize by 50%
From: Eilon Greenstein @ 2011-11-10 15:05 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, bhutchings@solarflare.com, pstaszewski@itcare.pl,
	netdev@vger.kernel.org
In-Reply-To: <1320884940.5825.34.camel@edumazet-laptop>

On Wed, 2011-11-09 at 16:29 -0800, Eric Dumazet wrote:
> Le mercredi 09 novembre 2011 à 23:03 +0100, Eric Dumazet a écrit :
> 
> > BTW, on my bnx2x adapter, even small UDP frames use more than PAGE_SIZE
> > bytes :
> > 
> > skb->truesize=4352 len=26 (payload only)
> > 
> 
> > I wonder if we shouldnt increase SK_MEM_QUANTUM a bit to avoid
> > ping/pong...
> > 
> > -#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
> > +#define SK_MEM_QUANTUM ((int)PAGE_SIZE * 2)
> > 
> 
> Following patch also helps a lot, even with only two cpus (one handling
> device interrupts, one running the application thread)
> 
> [PATCH net-next] bnx2x: reduce skb truesize by ~50%
> 
> bnx2x uses following formula to compute its rx_buf_sz :
> 
> dev->mtu + 2*L1_CACHE_BYTES + 14 + 8 + 8
> 
> Then core network adds NET_SKB_PAD and SKB_DATA_ALIGN(sizeof(struct
> skb_shared_info))
> 
> Final allocated size for skb head on x86_64 (L1_CACHE_BYTES = 64,
> MTU=1500) : 2112 bytes : SLUB/SLAB round this to 4096 bytes.
> 
> Since skb truesize is then bigger than SK_MEM_QUANTUM, we have lot of
> false sharing because of mem_reclaim in UDP stack.
> 
> One possible way to half truesize is to lower the need by 64 bytes (2112
> -> 2048 bytes)
> 
> This way, skb->truesize is lower than SK_MEM_QUANTUM and we get better
> performance.
> 
> (760.000 pps on a rx UDP monothread benchmark, instead of 720.000 pps)
> 
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> CC: Eilon Greenstein <eilong@broadcom.com>
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x.h |   11 ++++++++---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> index aec7212..ebbdc55 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> @@ -1185,9 +1185,14 @@ struct bnx2x {
>  #define ETH_MAX_PACKET_SIZE		1500
>  #define ETH_MAX_JUMBO_PACKET_SIZE	9600
>  
> -	/* Max supported alignment is 256 (8 shift) */
> -#define BNX2X_RX_ALIGN_SHIFT		((L1_CACHE_SHIFT < 8) ? \
> -					 L1_CACHE_SHIFT : 8)
> +/* Max supported alignment is 256 (8 shift)
> + * It should ideally be min(L1_CACHE_SHIFT, 8)
> + * Choosing 5 (32 bytes) permits to get skb heads of 2048 bytes
> + * instead of 4096 bytes.
> + * With SLUB/SLAB allocators, data will be cache line aligned anyway.
> + */
> +#define BNX2X_RX_ALIGN_SHIFT		5
> +

Hi Eric,

This can seriously hurt the PCI utilization. So in scenarios in which
the PCI is the bottle neck, you will see performance degradation. We are
looking at alternatives to reduce the allocation, but it is taking a
while. Please hold off with this patch.

Thanks,
Eilon

^ permalink raw reply

* [PATCH net-next 01/13] bnx2x: allow FCoE and DCB for 578xx
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c  |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |    4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 51bd748..5cba9d7 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -923,7 +923,7 @@ static void bnx2x_dcbx_admin_mib_updated_params(struct bnx2x *bp,
 
 void bnx2x_dcbx_set_state(struct bnx2x *bp, bool dcb_on, u32 dcbx_enabled)
 {
-	if (!CHIP_IS_E1x(bp) && !CHIP_IS_E3(bp)) {
+	if (!CHIP_IS_E1x(bp)) {
 		bp->dcb_state = dcb_on;
 		bp->dcbx_enabled = dcbx_enabled;
 	} else {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 6486ab8..6f3a784 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10817,8 +10817,8 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev,
 	bp->qm_cid_count = bnx2x_set_qm_cid_count(bp);
 
 #ifdef BCM_CNIC
-	/* disable FCOE L2 queue for E1x and E3*/
-	if (CHIP_IS_E1x(bp) || CHIP_IS_E3(bp))
+	/* disable FCOE L2 queue for E1x */
+	if (CHIP_IS_E1x(bp))
 		bp->flags |= NO_FCOE_FLAG;
 
 #endif
-- 
1.7.7.2

^ permalink raw reply related

* [PATCH net-next 02/13] bnx2x: use rx_queue index for skb_record_rx_queue()
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |    2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |    1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |    2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index aec7212..e17a739 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -507,6 +507,7 @@ struct bnx2x_fastpath {
 	__le16			fp_hc_idx;
 
 	u8			index;		/* number in fp array */
+	u8			rx_queue;	/* index for skb_record */
 	u8			cl_id;		/* eth client id */
 	u8			cl_qzone_id;
 	u8			fw_sb_id;	/* status block number in FW */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 580b44e..1ace946 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -755,7 +755,7 @@ reuse_rx:
 			}
 		}
 
-		skb_record_rx_queue(skb, fp->index);
+		skb_record_rx_queue(skb, fp->rx_queue);
 
 		if (le16_to_cpu(cqe_fp->pars_flags.flags) &
 		    PARSING_FLAGS_VLAN)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 283d663..4a16757 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1318,6 +1318,7 @@ static inline void bnx2x_init_fcoe_fp(struct bnx2x *bp)
 	struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
 	unsigned long q_type = 0;
 
+	bnx2x_fcoe(bp, rx_queue) = BNX2X_NUM_ETH_QUEUES(bp);
 	bnx2x_fcoe(bp, cl_id) = bnx2x_cnic_eth_cl_id(bp,
 						     BNX2X_FCOE_ETH_CL_ID_IDX);
 	/** Current BNX2X_FCOE_ETH_CID deffinition implies not more than
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 6f3a784..1d185f2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5247,7 +5247,7 @@ static void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx)
 	u8 cos;
 	unsigned long q_type = 0;
 	u32 cids[BNX2X_MULTI_TX_COS] = { 0 };
-
+	fp->rx_queue = fp_idx;
 	fp->cid = fp_idx;
 	fp->cl_id = bnx2x_fp_cl_id(fp);
 	fp->fw_sb_id = bnx2x_fp_fw_sb_id(fp);
-- 
1.7.7.2

^ permalink raw reply related

* [PATCH net-next 00/13] bnx2x series
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev

Hello Dave,

The series includes re-enabling FCoE and DCB for 578xx devices,
some improvements in licensing for cnic,   
DCBX propagation in MF modes, handling of fan failures,
removing napi struct from the stack, introduces pri_map
module parameter and performs some code clean-ups. 

Please consider applying the series to net-next.

Thanks
Dmitry

^ permalink raw reply

* [PATCH net-next 03/13] bnx2x: remove unused variable
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1ace946..f946a6e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1094,13 +1094,11 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp)
 		for_each_cos_in_tx_queue(fp, cos) {
 			struct bnx2x_fp_txdata *txdata = &fp->txdata[cos];
 
-			u16 bd_cons = txdata->tx_bd_cons;
 			u16 sw_prod = txdata->tx_pkt_prod;
 			u16 sw_cons = txdata->tx_pkt_cons;
 
 			while (sw_cons != sw_prod) {
-				bd_cons = bnx2x_free_tx_pkt(bp, txdata,
-							    TX_BD(sw_cons));
+				bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons));
 				sw_cons++;
 			}
 		}
-- 
1.7.7.2

^ permalink raw reply related

* [PATCH net-next 04/13] bnx2x: separate FCoE and iSCSI license initialization.
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

FCoE license info must be initialized at probe(), but
iSCSI at open().

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |    2 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |   10 +++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   45 ++++++++++++++++-----
 3 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index f946a6e..3f80c11 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1934,6 +1934,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 
 #ifdef BCM_CNIC
+	/* re-read iscsi info */
+	bnx2x_get_iscsi_info(bp);
 	bnx2x_setup_cnic_irq_info(bp);
 	if (bp->state == BNX2X_STATE_OPEN)
 		bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 4a16757..c1d7833 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1489,4 +1489,14 @@ static inline u16 bnx2x_extract_max_cfg(struct bnx2x *bp, u32 mf_cfg)
 	return max_cfg;
 }
 
+#ifdef BCM_CNIC
+/**
+ * bnx2x_get_iscsi_info - update iSCSI params according to licensing info.
+ *
+ * @bp:		driver handle
+ *
+ */
+void bnx2x_get_iscsi_info(struct bnx2x *bp);
+#endif
+
 #endif /* BNX2X_CMN_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 1d185f2..26dc539 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9268,21 +9268,38 @@ static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp)
 }
 
 #ifdef BCM_CNIC
-static void __devinit bnx2x_get_cnic_info(struct bnx2x *bp)
+void bnx2x_get_iscsi_info(struct bnx2x *bp)
 {
 	int port = BP_PORT(bp);
-	int func = BP_ABS_FUNC(bp);
 
 	u32 max_iscsi_conn = FW_ENCODE_32BIT_PATTERN ^ SHMEM_RD(bp,
 				drv_lic_key[port].max_iscsi_conn);
-	u32 max_fcoe_conn = FW_ENCODE_32BIT_PATTERN ^ SHMEM_RD(bp,
-				drv_lic_key[port].max_fcoe_conn);
 
-	/* Get the number of maximum allowed iSCSI and FCoE connections */
+	/* Get the number of maximum allowed iSCSI connections */
 	bp->cnic_eth_dev.max_iscsi_conn =
 		(max_iscsi_conn & BNX2X_MAX_ISCSI_INIT_CONN_MASK) >>
 		BNX2X_MAX_ISCSI_INIT_CONN_SHIFT;
 
+	BNX2X_DEV_INFO("max_iscsi_conn 0x%x\n",
+		       bp->cnic_eth_dev.max_iscsi_conn);
+
+	/*
+	 * If maximum allowed number of connections is zero -
+	 * disable the feature.
+	 */
+	if (!bp->cnic_eth_dev.max_iscsi_conn)
+		bp->flags |= NO_ISCSI_FLAG;
+}
+
+static void __devinit bnx2x_get_fcoe_info(struct bnx2x *bp)
+{
+	int port = BP_PORT(bp);
+	int func = BP_ABS_FUNC(bp);
+
+	u32 max_fcoe_conn = FW_ENCODE_32BIT_PATTERN ^ SHMEM_RD(bp,
+				drv_lic_key[port].max_fcoe_conn);
+
+	/* Get the number of maximum allowed FCoE connections */
 	bp->cnic_eth_dev.max_fcoe_conn =
 		(max_fcoe_conn & BNX2X_MAX_FCOE_INIT_CONN_MASK) >>
 		BNX2X_MAX_FCOE_INIT_CONN_SHIFT;
@@ -9334,20 +9351,26 @@ static void __devinit bnx2x_get_cnic_info(struct bnx2x *bp)
 		}
 	}
 
-	BNX2X_DEV_INFO("max_iscsi_conn 0x%x max_fcoe_conn 0x%x\n",
-		       bp->cnic_eth_dev.max_iscsi_conn,
-		       bp->cnic_eth_dev.max_fcoe_conn);
+	BNX2X_DEV_INFO("max_fcoe_conn 0x%x\n", bp->cnic_eth_dev.max_fcoe_conn);
 
 	/*
 	 * If maximum allowed number of connections is zero -
 	 * disable the feature.
 	 */
-	if (!bp->cnic_eth_dev.max_iscsi_conn)
-		bp->flags |= NO_ISCSI_OOO_FLAG | NO_ISCSI_FLAG;
-
 	if (!bp->cnic_eth_dev.max_fcoe_conn)
 		bp->flags |= NO_FCOE_FLAG;
 }
+
+static void __devinit bnx2x_get_cnic_info(struct bnx2x *bp)
+{
+	/*
+	 * iSCSI may be dynamically disabled but reading
+	 * info here we will decrease memory usage by driver
+	 * if the feature is disabled for good
+	 */
+	bnx2x_get_iscsi_info(bp);
+	bnx2x_get_fcoe_info(bp);
+}
 #endif
 
 static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp)
-- 
1.7.7.2

^ permalink raw reply related

* [PATCH net-next 05/13] bnx2x: propagate DCBX negotiation
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

We need propagate the DCBX results from PMF to other functions
on the same port, in order to properly update netdev structure
and allow following new ETS and PFC configurations.

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  |    4 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h  |   54 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c  |   53 ++++++++++++---------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |   25 +---------
 4 files changed, 90 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 3f80c11..e9a91a3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1927,7 +1927,9 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		break;
 	}
 
-	if (!bp->port.pmf)
+	if (bp->port.pmf)
+		bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 0);
+	else
 		bnx2x__link_status_update(bp);
 
 	/* start the timer */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index c1d7833..59f1291 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -1499,4 +1499,58 @@ static inline u16 bnx2x_extract_max_cfg(struct bnx2x *bp, u32 mf_cfg)
 void bnx2x_get_iscsi_info(struct bnx2x *bp);
 #endif
 
+/* returns func by VN for current port */
+static inline int func_by_vn(struct bnx2x *bp, int vn)
+{
+	return 2 * vn + BP_PORT(bp);
+}
+
+/**
+ * bnx2x_link_sync_notify - send notification to other functions.
+ *
+ * @bp:		driver handle
+ *
+ */
+static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
+{
+	int func;
+	int vn;
+
+	/* Set the attention towards other drivers on the same port */
+	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
+		if (vn == BP_VN(bp))
+			continue;
+
+		func = func_by_vn(bp, vn);
+		REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
+		       (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
+	}
+}
+
+/**
+ * bnx2x_update_drv_flags - update flags in shmem
+ *
+ * @bp:		driver handle
+ * @flags:	flags to update
+ * @set:	set or clear
+ *
+ */
+static inline void bnx2x_update_drv_flags(struct bnx2x *bp, u32 flags, u32 set)
+{
+	if (SHMEM2_HAS(bp, drv_flags)) {
+		u32 drv_flags;
+		bnx2x_acquire_hw_lock(bp, HW_LOCK_DRV_FLAGS);
+		drv_flags = SHMEM2_RD(bp, drv_flags);
+
+		if (set)
+			SET_FLAGS(drv_flags, flags);
+		else
+			RESET_FLAGS(drv_flags, flags);
+
+		SHMEM2_WR(bp, drv_flags, drv_flags);
+		DP(NETIF_MSG_HW, "drv_flags 0x%08x\n", drv_flags);
+		bnx2x_release_hw_lock(bp, HW_LOCK_DRV_FLAGS);
+	}
+}
+
 #endif /* BNX2X_CMN_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 5cba9d7..a0598fd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -685,24 +685,6 @@ int bnx2x_dcbnl_update_applist(struct bnx2x *bp, bool delall)
 }
 #endif
 
-static inline void bnx2x_update_drv_flags(struct bnx2x *bp, u32 flags, u32 set)
-{
-	if (SHMEM2_HAS(bp, drv_flags)) {
-		u32 drv_flags;
-		bnx2x_acquire_hw_lock(bp, HW_LOCK_DRV_FLAGS);
-		drv_flags = SHMEM2_RD(bp, drv_flags);
-
-		if (set)
-			SET_FLAGS(drv_flags, flags);
-		else
-			RESET_FLAGS(drv_flags, flags);
-
-		SHMEM2_WR(bp, drv_flags, drv_flags);
-		DP(NETIF_MSG_HW, "drv_flags 0x%08x\n", drv_flags);
-		bnx2x_release_hw_lock(bp, HW_LOCK_DRV_FLAGS);
-	}
-}
-
 static inline void bnx2x_dcbx_update_tc_mapping(struct bnx2x *bp)
 {
 	u8 prio, cos;
@@ -755,18 +737,26 @@ void bnx2x_dcbx_set_params(struct bnx2x *bp, u32 state)
 			/* mark DCBX result for PMF migration */
 			bnx2x_update_drv_flags(bp, DRV_FLAGS_DCB_CONFIGURED, 1);
 #ifdef BCM_DCBNL
-			/**
+			/*
 			 * Add new app tlvs to dcbnl
 			 */
 			bnx2x_dcbnl_update_applist(bp, false);
 #endif
-			bnx2x_dcbx_stop_hw_tx(bp);
-
-			/* reconfigure the netdevice with the results of the new
+			/*
+			 * reconfigure the netdevice with the results of the new
 			 * dcbx negotiation.
 			 */
 			bnx2x_dcbx_update_tc_mapping(bp);
 
+			/*
+			 * allow other funtions to update their netdevices
+			 * accordingly
+			 */
+			if (IS_MF(bp))
+				bnx2x_link_sync_notify(bp);
+
+			bnx2x_dcbx_stop_hw_tx(bp);
+
 			return;
 		}
 	case BNX2X_DCBX_STATE_TX_PAUSED:
@@ -775,6 +765,7 @@ void bnx2x_dcbx_set_params(struct bnx2x *bp, u32 state)
 
 		bnx2x_dcbx_update_ets_params(bp);
 		bnx2x_dcbx_resume_hw_tx(bp);
+
 		return;
 	case BNX2X_DCBX_STATE_TX_RELEASED:
 		DP(NETIF_MSG_LINK, "BNX2X_DCBX_STATE_TX_RELEASED\n");
@@ -1863,7 +1854,7 @@ static void bnx2x_dcbx_fw_struct(struct bnx2x *bp,
 void bnx2x_dcbx_pmf_update(struct bnx2x *bp)
 {
 	/* if we need to syncronize DCBX result from prev PMF
-	 * read it from shmem and update bp accordingly
+	 * read it from shmem and update bp and netdev accordingly
 	 */
 	if (SHMEM2_HAS(bp, drv_flags) &&
 	   GET_FLAGS(SHMEM2_RD(bp, drv_flags), DRV_FLAGS_DCB_CONFIGURED)) {
@@ -1875,6 +1866,22 @@ void bnx2x_dcbx_pmf_update(struct bnx2x *bp)
 					  bp->dcbx_error);
 		bnx2x_get_dcbx_drv_param(bp, &bp->dcbx_local_feat,
 					 bp->dcbx_error);
+#ifdef BCM_DCBNL
+		/*
+		 * Add new app tlvs to dcbnl
+		 */
+		bnx2x_dcbnl_update_applist(bp, false);
+		/*
+		 * Send a notification for the new negotiated parameters
+		 */
+		dcbnl_cee_notify(bp->dev, RTM_GETDCB, DCB_CMD_CEE_GET, 0, 0);
+#endif
+		/*
+		 * reconfigure the netdevice with the results of the new
+		 * dcbx negotiation.
+		 */
+		bnx2x_dcbx_update_tc_mapping(bp);
+
 	}
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 26dc539..967c41b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2318,12 +2318,6 @@ static void bnx2x_calc_vn_weight_sum(struct bnx2x *bp)
 					CMNG_FLAGS_PER_PORT_FAIRNESS_VN;
 }
 
-/* returns func by VN for current port */
-static inline int func_by_vn(struct bnx2x *bp, int vn)
-{
-	return 2 * vn + BP_PORT(bp);
-}
-
 static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
 {
 	struct rate_shaping_vars_per_vn m_rs_vn;
@@ -2475,22 +2469,6 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
 	   "rate shaping and fairness are disabled\n");
 }
 
-static inline void bnx2x_link_sync_notify(struct bnx2x *bp)
-{
-	int func;
-	int vn;
-
-	/* Set the attention towards other drivers on the same port */
-	for (vn = VN_0; vn < BP_MAX_VN_NUM(bp); vn++) {
-		if (vn == BP_VN(bp))
-			continue;
-
-		func = func_by_vn(bp, vn);
-		REG_WR(bp, MISC_REG_AEU_GENERAL_ATTN_0 +
-		       (LINK_SYNC_ATTENTION_BIT_FUNC_0 + func)*4, 1);
-	}
-}
-
 /* This function is called upon link interrupt */
 static void bnx2x_link_attn(struct bnx2x *bp)
 {
@@ -2549,6 +2527,9 @@ void bnx2x__link_status_update(struct bnx2x *bp)
 	if (bp->state != BNX2X_STATE_OPEN)
 		return;
 
+	/* read updated dcb configuration */
+	bnx2x_dcbx_pmf_update(bp);
+
 	bnx2x_link_status_update(&bp->link_params, &bp->link_vars);
 
 	if (bp->link_vars.link_up)
-- 
1.7.7.2

^ permalink raw reply related

* [PATCH net-next 07/13] bnx2x: simplify definition of RX_SGE_MASK_LEN and use it.
From: Dmitry Kravkov @ 2011-11-10 15:14 UTC (permalink / raw)
  To: davem, netdev; +Cc: Dmitry Kravkov, Eilon Greenstein
In-Reply-To: <1320938054-31288-1-git-send-email-dmitry@broadcom.com>

Signed-off-by: Dmitry Kravkov <dmitry@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |    3 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h |    3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index e17a739..b78c384 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -411,8 +411,7 @@ union db_prod {
 
 
 /* Number of u64 elements in SGE mask array */
-#define RX_SGE_MASK_LEN			((NUM_RX_SGE_PAGES * RX_SGE_CNT) / \
-					 BIT_VEC64_ELEM_SZ)
+#define RX_SGE_MASK_LEN			(NUM_RX_SGE / BIT_VEC64_ELEM_SZ)
 #define RX_SGE_MASK_LEN_MASK		(RX_SGE_MASK_LEN - 1)
 #define NEXT_SGE_MASK_ELEM(el)		(((el) + 1) & RX_SGE_MASK_LEN_MASK)
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 59f1291..e8efb01 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -874,8 +874,7 @@ static inline void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
 static inline void bnx2x_init_sge_ring_bit_mask(struct bnx2x_fastpath *fp)
 {
 	/* Set the mask to all 1-s: it's faster to compare to 0 than to 0xf-s */
-	memset(fp->sge_mask, 0xff,
-	       (NUM_RX_SGE >> BIT_VEC64_ELEM_SHIFT)*sizeof(u64));
+	memset(fp->sge_mask, 0xff, RX_SGE_MASK_LEN * sizeof(u64));
 
 	/* Clear the two last indices in the page to 1:
 	   these are the indices that correspond to the "next" element,
-- 
1.7.7.2

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox