Netdev List
 help / color / mirror / Atom feed
* Re: net-2.6.24 build broken (allyesconfig)
From: Johannes Berg @ 2007-08-27 12:52 UTC (permalink / raw)
  To: Ilpo Järvinen; +Cc: John W. Linville, David S. Miller, Netdev
In-Reply-To: <Pine.LNX.4.64.0708271522400.16245@kivilampi-30.cs.helsinki.fi>

On Mon, 2007-08-27 at 15:32 +0300, Ilpo Järvinen wrote:

> drivers/net/82596.c:1618:1: error: unterminated argument list invoking 
> macro "DEB"

> Hmm, I would guess that "[NET]: Introduce MAC_FMT/MAC_ARG" broken it, 
> though didn't verify it.
> 
> The fix is left as an exercise of the reader (i.e., the solution wasn't 
> too obvious for me :-) )...

Yup, my fault, sorry about that.

From: Johannes Berg <johannes@sipsolutions.net>
Subject: fix MAC_FMT/MAC_ARG in 82596.c

This fixes a typo in commit f98d4ca4986fec.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

---
 drivers/net/82596.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- netdev-2.6.orig/drivers/net/82596.c	2007-08-27 14:48:19.674646075 +0200
+++ netdev-2.6/drivers/net/82596.c	2007-08-27 14:48:21.674646075 +0200
@@ -1562,7 +1562,7 @@ static void set_multicast_list(struct ne
 			memcpy(cp, dmi->dmi_addr, 6);
 			if (i596_debug > 1)
 				DEB(DEB_MULTI,printk(KERN_INFO "%s: Adding address " MAC_FMT "\n",
-						dev->name, MAC_ARG(cp));
+						dev->name, MAC_ARG(cp)));
 		}
 		i596_add_cmd(dev, &cmd->cmd);
 	}



^ permalink raw reply

* pktgen Multiqueue support [PATCH]
From: Robert Olsson @ 2007-08-27 14:55 UTC (permalink / raw)
  To: David Miller; +Cc: Robert.Olsson, netdev



Hello,

Below some pktgen support to send into different TX queues.
This can of course be feed into input queues on other machines

Cheers
					--ro



Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>


diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a0db4d1..18601af 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -186,6 +186,7 @@
 #define F_SVID_RND    (1<<10)	/* Random SVLAN ID */
 #define F_FLOW_SEQ    (1<<11)	/* Sequential flows */
 #define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
+#define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
 
 /* Thread control flag bits */
 #define T_TERMINATE   (1<<0)
@@ -328,6 +329,7 @@ struct pktgen_dev {
 	__be32 cur_daddr;
 	__u16 cur_udp_dst;
 	__u16 cur_udp_src;
+	__u16 cur_queue_map;
 	__u32 cur_pkt_size;
 
 	__u8 hh[14];
@@ -355,6 +357,10 @@ struct pktgen_dev {
 	unsigned lflow;		/* Flow length  (config) */
 	unsigned nflows;	/* accumulated flows (stats) */
 	unsigned curfl;		/* current sequenced flow (state)*/
+
+	u16 queue_map_min;
+	u16 queue_map_max;
+
 #ifdef CONFIG_XFRM
 	__u8	ipsmode;		/* IPSEC mode (config) */
 	__u8	ipsproto;		/* IPSEC type (config) */
@@ -611,6 +617,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "     flows: %u flowlen: %u\n", pkt_dev->cflows,
 		   pkt_dev->lflow);
 
+	seq_printf(seq,
+		   "     queue_map_min: %u  queue_map_max: %u\n",
+		   pkt_dev->queue_map_min,
+		   pkt_dev->queue_map_max);
+
 	if (pkt_dev->flags & F_IPV6) {
 		char b1[128], b2[128], b3[128];
 		fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr);
@@ -707,6 +718,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_MPLS_RND)
 		seq_printf(seq,  "MPLS_RND  ");
 
+	if (pkt_dev->flags & F_QUEUE_MAP_RND)
+		seq_printf(seq,  "QUEUE_MAP_RND  ");
+
 	if (pkt_dev->cflows) {
 		if (pkt_dev->flags & F_FLOW_SEQ)
 			seq_printf(seq,  "FLOW_SEQ  "); /*in sequence flows*/
@@ -762,6 +776,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "     cur_udp_dst: %d  cur_udp_src: %d\n",
 		   pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
 
+	seq_printf(seq, "     cur_queue_map: %u\n", pkt_dev->cur_queue_map);
+
 	seq_printf(seq, "     flows: %u\n", pkt_dev->nflows);
 
 	if (pkt_dev->result[0])
@@ -1213,6 +1229,11 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "FLOW_SEQ") == 0)
 			pkt_dev->flags |= F_FLOW_SEQ;
 
+		else if (strcmp(f, "QUEUE_MAP_RND") == 0)
+			pkt_dev->flags |= F_QUEUE_MAP_RND;
+
+		else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
+			pkt_dev->flags &= ~F_QUEUE_MAP_RND;
 #ifdef CONFIG_XFRM
 		else if (strcmp(f, "IPSEC") == 0)
 			pkt_dev->flags |= F_IPSEC_ON;
@@ -1517,6 +1538,28 @@ static ssize_t pktgen_if_write(struct file *file,
 		return count;
 	}
 
+	if (!strcmp(name, "queue_map_min")) {
+		len = num_arg(&user_buffer[i], 5, &value);
+		if (len < 0) {
+			return len;
+		}
+		i += len;
+		pkt_dev->queue_map_min = value;
+		sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min);
+		return count;
+	}
+
+	if (!strcmp(name, "queue_map_max")) {
+		len = num_arg(&user_buffer[i], 5, &value);
+		if (len < 0) {
+			return len;
+		}
+		i += len;
+		pkt_dev->queue_map_max = value;
+		sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max);
+		return count;
+	}
+
 	if (!strcmp(name, "mpls")) {
 		unsigned n, offset;
 		len = get_labels(&user_buffer[i], pkt_dev);
@@ -2378,6 +2421,20 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		pkt_dev->cur_pkt_size = t;
 	}
 
+	if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+		__u16 t;
+		if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+			t = random32() %
+				(pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
+				+ pkt_dev->queue_map_min;
+		} else {
+			t = pkt_dev->cur_queue_map + 1;
+			if (t > pkt_dev->queue_map_max)
+				t = pkt_dev->queue_map_min;
+		}
+		pkt_dev->cur_queue_map = t;
+	}
+
 	pkt_dev->flows[flow].count++;
 }
 
@@ -2548,6 +2605,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+	skb->queue_mapping = pkt_dev->cur_queue_map;
 
 	iph = ip_hdr(skb);
 	udph = udp_hdr(skb);
@@ -2889,6 +2947,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+	skb->queue_mapping = pkt_dev->cur_queue_map;
 
 	iph = ipv6_hdr(skb);
 	udph = udp_hdr(skb);

^ permalink raw reply related

* [PATCH 1/2] ehea: propagate physical port state
From: Jan-Bernd Themann @ 2007-08-27 15:06 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: netdev, Christoph Raisch, Jan-Bernd Themann, linux-kernel,
	linux-ppc, Marcus Eder, Thomas Klein, Stefan Roscher

Introduces a module parameter to decide whether the physical
port link state is propagated to the network stack or not.
It makes sense not to take the physical port state into account
on machines with more logical partitions that communicate
with each other. This is always possible no matter what the physical
port state is. Thus eHEA can be considered as a switch there.

Signed-off-by: Jan-Bernd Themann <themann@de.ibm.com>

---
 drivers/net/ehea/ehea.h      |    5 ++++-
 drivers/net/ehea/ehea_main.c |   14 +++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index d67f97b..8d58be5 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -39,7 +39,7 @@
 #include <asm/io.h>
 
 #define DRV_NAME	"ehea"
-#define DRV_VERSION	"EHEA_0073"
+#define DRV_VERSION	"EHEA_0074"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
@@ -402,6 +402,8 @@ struct ehea_mc_list {
 
 #define EHEA_PORT_UP 1
 #define EHEA_PORT_DOWN 0
+#define EHEA_PHY_LINK_UP 1
+#define EHEA_PHY_LINK_DOWN 0
 #define EHEA_MAX_PORT_RES 16
 struct ehea_port {
 	struct ehea_adapter *adapter;	 /* adapter that owns this port */
@@ -427,6 +429,7 @@ struct ehea_port {
 	u32 msg_enable;
 	u32 sig_comp_iv;
 	u32 state;
+	u8 phy_link;
 	u8 full_duplex;
 	u8 autoneg;
 	u8 num_def_qps;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index db57474..1e9fd6f 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -53,17 +53,21 @@ static int rq3_entries = EHEA_DEF_ENTRIES_RQ3;
 static int sq_entries = EHEA_DEF_ENTRIES_SQ;
 static int use_mcs = 0;
 static int num_tx_qps = EHEA_NUM_TX_QP;
+static int prop_carrier_state = 0;
 
 module_param(msg_level, int, 0);
 module_param(rq1_entries, int, 0);
 module_param(rq2_entries, int, 0);
 module_param(rq3_entries, int, 0);
 module_param(sq_entries, int, 0);
+module_param(prop_carrier_state, int, 0);
 module_param(use_mcs, int, 0);
 module_param(num_tx_qps, int, 0);
 
 MODULE_PARM_DESC(num_tx_qps, "Number of TX-QPS");
 MODULE_PARM_DESC(msg_level, "msg_level");
+MODULE_PARM_DESC(prop_carrier_state, "Propagate carrier state of physical "
+		 "port to stack. 1:yes, 0:no.  Default = 0 ");
 MODULE_PARM_DESC(rq3_entries, "Number of entries for Receive Queue 3 "
 		 "[2^x - 1], x = [6..14]. Default = "
 		 __MODULE_STRING(EHEA_DEF_ENTRIES_RQ3) ")");
@@ -814,7 +818,9 @@ int ehea_set_portspeed(struct ehea_port *port, u32 port_speed)
 			ehea_error("Failed setting port speed");
 		}
 	}
-	netif_carrier_on(port->netdev);
+	if (!prop_carrier_state || (port->phy_link == EHEA_PHY_LINK_UP))
+		netif_carrier_on(port->netdev);
+
 	kfree(cb4);
 out:
 	return ret;
@@ -869,13 +875,19 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe)
 			}
 
 		if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PORT_UP, eqe)) {
+			port->phy_link = EHEA_PHY_LINK_UP;
 			if (netif_msg_link(port))
 				ehea_info("%s: Physical port up",
 					  port->netdev->name);
+			if (prop_carrier_state)
+				netif_carrier_on(port->netdev);
 		} else {
+			port->phy_link = EHEA_PHY_LINK_DOWN;
 			if (netif_msg_link(port))
 				ehea_info("%s: Physical port down",
 					  port->netdev->name);
+			if (prop_carrier_state)
+				netif_carrier_off(port->netdev);
 		}
 
 		if (EHEA_BMASK_GET(NEQE_EXTSWITCH_PRIMARY, eqe))
-- 
1.5.2


^ permalink raw reply related

* [PATCH 2/2] ehea: fix last_rx update
From: Jan-Bernd Themann @ 2007-08-27 15:07 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: netdev, Christoph Raisch, Jan-Bernd Themann, linux-kernel,
	linux-ppc, Marcus Eder, Thomas Klein, Stefan Roscher

Update last_rx in registered device struct instead of
in the dummy device.

Signed-off-by: Jan-Bernd Themann <themann@de.ibm.com>

---
 drivers/net/ehea/ehea_main.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 1e9fd6f..717b129 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -471,7 +471,7 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 			else
 				netif_receive_skb(skb);
 
-			dev->last_rx = jiffies;
+			port->netdev->last_rx = jiffies;
 		} else {
 			pr->p_stats.poll_receive_errors++;
 			port_reset = ehea_treat_poll_error(pr, rq, cqe,
-- 
1.5.2


^ permalink raw reply related

* Re: [PATCH net-2.6.24] introduce MAC_FMT/MAC_ARG
From: Joe Perches @ 2007-08-27 15:44 UTC (permalink / raw)
  To: Johannes Berg; +Cc: David S. Miller, netdev
In-Reply-To: <1188212049.6756.18.camel@johannes.berg>

On Mon, 2007-08-27 at 12:54 +0200, Johannes Berg wrote:
> Thanks for this patch though, I'd have done it otherwise.

I had it, it was just a s/EUI48/MAC/ and copy/paste thing.

There are also the arch, drivers/[^net], and net directories
that have a few of these.

The patch also added the missing ")" in drivers/net/82596.c

> I was rereading your original conversion and noticed that it is now
> trivial to make the kernel smaller like you originally wanted by doing
> something like this
> 
> -- define this function somewhere --
> print_mac(u8 *mac, char *buf)
> {
> 	sprintf(buf, "%02x:...", mac[0], mac[1], ...);
> }
> EXPORT_SYMBOL(print_mac)
> 
> -- change macros to --
> #define MAC_FMT "%s"
> #define MAC_ARG(a) ({char __buf[18]; print_mac(a, buf); __buf})
> 
> I'm not sure we'd want that, but at the time you said it made the kernel
> significantly smaller and I doubt there's a performance problem with it
> (who prints mac addresses regularly?)

The reduction is ~.1% in an allyesconfig.
The compound statement to hide the automatic works well.
The function call is noise compared to the printk.

cheers, Joe


^ permalink raw reply

* Re: Problem with semantics?
From: Michael Kerrisk @ 2007-08-27 15:46 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Shay Goikhman, netdev, davem
In-Reply-To: <p737inzne6i.fsf@bingen.suse.de>

Hi Andi,

Andi Kleen wrote:
> Shay Goikhman <GOIKHMAN@il.ibm.com> writes:
> 
>> Dear Linux maintainers,
>>
>>  I'm doing :
>>
>>       setsockopt(s,  SO_RCVTIMEO, t1 );                  // set time-out
>> t1 on socket while block receiving on it
>>       select(,,, &fd_set_including(s), .., &errs, t2);      // block till
>> receive or time-out  t 2 jointly on a set of sockets
>>
>> Apparently, I could no find reference on the coupled behavior of the two
>> above statements in Linux documentation.
>> As I understand the blocking semantics, I would expect  that  if t1<t2 ,
>> select should return after t1 with the descriptor 's' in 'errs' if 's' does
>> not become readable in the t1 interval.
>>
>> It is not so in life -- select ignores t1 altogether.
>>
>> Do you have some enlightening knowledge on the matter?
> 
> RCVTIMEO only applies to recvmsg et.al., similar to SNDTIMEO only
> apply to sendmsg etc. But select/poll only report events, they
> do not actually send or receive by themselves.
> 
> Michael, perhaps you can clarify that in the manpages

I added the following to sockets.7:

              Timeouts have
              effect   for  socket  I/O  calls  (read(2),  recv(2),
              recvfrom(2),    recvmsg(2),    write(2),     send(2),
              sendto(2),  sendmsg(2));  timeouts have no effect for
              select(2), poll(2), epoll_wait(2), etc.

The change will be in man-pages-2.65.

Thanks for your note.

Cheers,

Michael

-- 
Michael Kerrisk
maintainer of Linux man pages Sections 2, 3, 4, 5, and 7

Want to help with man page maintenance?  Grab the latest tarball at
http://www.kernel.org/pub/linux/docs/manpages/
read the HOWTOHELP file and grep the source files for 'FIXME'.

^ permalink raw reply

* Re: RFC: issues concerning the next NAPI interface
From: James Chapman @ 2007-08-27 15:51 UTC (permalink / raw)
  To: David Miller
  Cc: shemminger, ossthema, akepner, netdev, raisch, themann,
	linux-kernel, linuxppc-dev, meder, tklein, stefan.roscher
In-Reply-To: <20070826.185815.93042514.davem@davemloft.net>

David Miller wrote:
> From: James Chapman <jchapman@katalix.com>
> Date: Sun, 26 Aug 2007 20:36:20 +0100
> 
>> David Miller wrote:
>>> From: James Chapman <jchapman@katalix.com>
>>> Date: Fri, 24 Aug 2007 18:16:45 +0100
>>>
>>>> Does hardware interrupt mitigation really interact well with NAPI?
>>> It interacts quite excellently.
>> If NAPI disables interrupts and keeps them disabled while there are more 
>> packets arriving or more transmits being completed, why do hardware 
>> interrupt mitigation / coalescing features of the network silicon help?
> 
> Because if your packet rate is low enough such that the cpu can
> process the interrupt fast enough and thus only one packet gets
> processed per NAPI poll, the cost of going into and out of NAPI mode
> dominates the packet processing costs.

In the second half of my previous reply (which seems to have been 
deleted), I suggest a way to avoid this problem without using hardware 
interrupt mitigation / coalescing. Original text is quoted below.

 >> I've seen the same and I'm suggesting that the NAPI driver keeps
 >> itself in polled mode for N polls or M jiffies after it sees
 >> workdone=0. This has always worked for me in packet forwarding
 >> scenarios to maximize packets/sec and minimize latency.

To implement this, there's no need for timers, hrtimers or generic NAPI 
support that others have suggested. A driver's poll() would set an 
internal flag and record the current jiffies value when finding 
workdone=0 rather than doing an immediate napi_complete(). Early in 
poll() it would test this flag and if set, do a low-cost test to see if 
it had any work to do. If no work, it would check the saved jiffies 
value and do the napi_complete() only if no work has been done for a 
configurable number of jiffies. This keeps interrupts disabled longer at 
the expense of many more calls to poll() where no work is done. So 
critical to this scheme is modifying the driver's poll() to fastpath the 
case of having no work to do while waiting for its local jiffy count to 
expire.

Here's an untested patch for tg3 that illustrates the idea.

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 710dccc..59e151b 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3473,6 +3473,24 @@ static int tg3_poll(struct napi_struct *napi,
  	struct tg3_hw_status *sblk = tp->hw_status;
  	int work_done = 0;

+	/* fastpath having no work while we're holding ourself in
+	 * polled mode
+	 */
+	if ((tp->exit_poll_time) && (!tg3_has_work(tp))) {
+		if (time_after(jiffies, tp->exit_poll_time)) {
+			tp->exit_poll_time = 0;
+			/* tell net stack and NIC we're done */
+			netif_rx_complete(netdev, napi);
+			tg3_restart_ints(tp);
+		}
+		return 0;
+	}
+
+	/* if we get here, there might be work to do so disable the
+	 * poll hold fastpath above
+	 */
+	tp->exit_poll_time = 0;
+
  	/* handle link change and other phy events */
  	if (!(tp->tg3_flags &
  	      (TG3_FLAG_USE_LINKCHG_REG |
@@ -3511,11 +3529,11 @@ static int tg3_poll(struct napi_struct *napi,
  	} else
  		sblk->status &= ~SD_STATUS_UPDATED;

-	/* if no more work, tell net stack and NIC we're done */
-	if (!tg3_has_work(tp)) {
-		netif_rx_complete(netdev, napi);
-		tg3_restart_ints(tp);
-	}
+	/* if no more work, set the time in jiffies when we should
+	 * exit polled mode
+	 */
+	if (!tg3_has_work(tp))
+		tp->exit_poll_time = jiffies + 2;

  	return work_done;
  }
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index a6a23bb..a0d24d3 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2163,6 +2163,7 @@ struct tg3 {
  	u32				last_tag;

  	u32				msg_enable;
+	unsigned long			exit_poll_time;

  	/* begin "tx thread" cacheline section */
  	void				(*write32_tx_mbox) (struct tg3 *, u32,


-- 
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development


^ permalink raw reply related

* Re: RFC: issues concerning the next NAPI interface
From: Jan-Bernd Themann @ 2007-08-27 16:02 UTC (permalink / raw)
  To: James Chapman
  Cc: David Miller, shemminger, akepner, netdev, raisch, themann,
	linux-kernel, linuxppc-dev, meder, tklein, stefan.roscher
In-Reply-To: <46D2F301.7050105@katalix.com>

On Monday 27 August 2007 17:51, James Chapman wrote:

> In the second half of my previous reply (which seems to have been 
> deleted), I suggest a way to avoid this problem without using hardware 
> interrupt mitigation / coalescing. Original text is quoted below.
> 
>  >> I've seen the same and I'm suggesting that the NAPI driver keeps
>  >> itself in polled mode for N polls or M jiffies after it sees
>  >> workdone=0. This has always worked for me in packet forwarding
>  >> scenarios to maximize packets/sec and minimize latency.
> 
> To implement this, there's no need for timers, hrtimers or generic NAPI 
> support that others have suggested. A driver's poll() would set an 
> internal flag and record the current jiffies value when finding 
> workdone=0 rather than doing an immediate napi_complete(). Early in 
> poll() it would test this flag and if set, do a low-cost test to see if 
> it had any work to do. If no work, it would check the saved jiffies 
> value and do the napi_complete() only if no work has been done for a 
> configurable number of jiffies. This keeps interrupts disabled longer at 
> the expense of many more calls to poll() where no work is done. So 
> critical to this scheme is modifying the driver's poll() to fastpath the 
> case of having no work to do while waiting for its local jiffy count to 
> expire.
> 

The problem I see with this approach is that the time that passes between
two jiffies might be too long for 10G ethernet adapters. (I tried to implement
a timer based approach with usual timers and the result was a disaster).
HW interrupts / or HP timer avoid the jiffy problem as they activate softIRQs
as soon as you call netif_rx_schedule. 

^ permalink raw reply

* Re: [PATCH] isdn capi driver broken on 64 bit.
From: Stephen Hemminger @ 2007-08-27 16:16 UTC (permalink / raw)
  To: Karsten Keil; +Cc: isdn4linux, netdev
In-Reply-To: <20070827110226.GA15362@pingi.kke.suse.de>

On Mon, 27 Aug 2007 13:02:26 +0200
Karsten Keil <kkeil@suse.de> wrote:

> On Fri, Aug 24, 2007 at 11:08:11AM -0700, Stephen Hemminger wrote:
> > The following driver API is broken on any architecture with 64 bit addresses.
> > because of cast that loses high bits.
> > 
> > Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
> > 
> > 
> > --- a/drivers/isdn/capi/capidrv.c	2007-06-25 09:03:12.000000000 -0700
> > +++ b/drivers/isdn/capi/capidrv.c	2007-08-24 11:06:46.000000000 -0700
> > @@ -1855,6 +1855,9 @@ static int if_sendbuf(int id, int channe
> >  		return 0;
> >  	}
> >  	datahandle = nccip->datahandle;
> > +
> > +	/* This won't work on 64 bit! */
> > +	BUILD_BUG_ON(sizeof(skb->data) > sizeof(u32));
> >  	capi_fill_DATA_B3_REQ(&sendcmsg, global.ap.applid, card->msgid++,
> >  			      nccip->ncci,	/* adr */
> >  			      (u32) skb->data,	/* Data */
> 
> 
> NACK.
> 
> It is not a BUG.
> 
> This is OK, since this field must have a value and on 32 it has the correct
> one) On 64 bit this field is ignored (but also need a value, random data is
> bad as well).

If you are using it as a transaction ID, then you should generate one.
There is no guarantee that two skb's won't have the same 32 bit data value
on 64 bit.
-- 
Stephen Hemminger <shemminger@linux-foundation.org>

^ permalink raw reply

* Re: net/ipv4/fib_trie.c - compile error (Re: 2.6.23-rc3-mm1)
From: Paul E. McKenney @ 2007-08-27 16:23 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: Adrian Bunk, Gabriel C, Andrew Morton, linux-kernel, netdev
In-Reply-To: <20070827063635.GB1703@ff.dom.local>

On Mon, Aug 27, 2007 at 08:36:35AM +0200, Jarek Poplawski wrote:
> On 22-08-2007 19:03, Paul E. McKenney wrote:
> > On Wed, Aug 22, 2007 at 05:41:11PM +0200, Adrian Bunk wrote:
> >> On Wed, Aug 22, 2007 at 05:30:13PM +0200, Gabriel C wrote:
> >>> Got it with a randconfig ( http://194.231.229.228/kernel/mm/2.6.23-rc3-mm1/r/randconfig-8 )
> >>>
> >>> ...
> >>>
> >>> net/ipv4/fib_trie.c: In function 'trie_rebalance':
> >>> net/ipv4/fib_trie.c:969: error: lvalue required as unary '&' operand
> >>> net/ipv4/fib_trie.c:971: error: lvalue required as unary '&' operand
> >>> net/ipv4/fib_trie.c:977: error: lvalue required as unary '&' operand
> >>> net/ipv4/fib_trie.c:980: error: lvalue required as unary '&' operand
> >>> ...
> >> Side effect of the git-net removal, temporarily removing 
> >> immunize-rcu_dereference-against-crazy-compiler-writers.patch should 
> >> work around it.
> > 
> > Alternatively, the following one-line patch to net/ipv4/fib_trie.c could
> > be used.
> > 
> > Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> > ---
> > 
> >  fib_trie.c |    2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff -urpNa -X dontdiff linux-2.6.23-rc3-mm1/net/ipv4/fib_trie.c linux-2.6.23-rc3-mm1.compile/net/ipv4/fib_trie.c
> > --- linux-2.6.23-rc3-mm1/net/ipv4/fib_trie.c	2007-08-22 09:20:33.000000000 -0700
> > +++ linux-2.6.23-rc3-mm1.compile/net/ipv4/fib_trie.c	2007-08-22 09:47:33.000000000 -0700
> > @@ -94,7 +94,7 @@ typedef unsigned int t_key;
> >  #define T_LEAF  1
> >  #define NODE_TYPE_MASK	0x1UL
> >  #define NODE_PARENT(node) \
> > -	((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
> > +	((struct tnode *)(rcu_dereference((node)->parent) & ~NODE_TYPE_MASK))
> ...
> 
> After first reading of this thread I've had an impression it's about
> compiler's behavior, but now it seems to me this patch is not an
> alternative, but a 'must be' and only proper way of calling
> rcu_dereference (with a variable instead of an expression)? Am I
> right?

Yes, rcu_dereference() does indeed need to be invoked on a lvalue.

							Thanx, Paul

^ permalink raw reply

* Remove softirq scheduling from pktgen [PATCH]
From: Robert Olsson @ 2007-08-27 16:57 UTC (permalink / raw)
  To: David Miller; +Cc: Robert.Olsson, netdev



Hello, It's not a job for pktgen.

Cheers
					--ro


Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>


diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 18601af..975e887 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,7 +164,7 @@
 #include <asm/div64.h>		/* do_div */
 #include <asm/timex.h>
 
-#define VERSION  "pktgen v2.68: Packet Generator for packet performance testing.\n"
+#define VERSION  "pktgen v2.69: Packet Generator for packet performance testing.\n"
 
 /* The buckets are exponential in 'width' */
 #define LAT_BUCKETS_MAX 32
@@ -381,7 +381,6 @@ struct pktgen_thread {
 	struct list_head th_list;
 	struct task_struct *tsk;
 	char result[512];
-	u32 max_before_softirq;	/* We'll call do_softirq to prevent starvation. */
 
 	/* Field for thread to receive "posted" events terminate, stop ifs etc. */
 
@@ -1752,9 +1751,6 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 
 	BUG_ON(!t);
 
-	seq_printf(seq, "Name: %s  max_before_softirq: %d\n",
-		   t->tsk->comm, t->max_before_softirq);
-
 	seq_printf(seq, "Running: ");
 
 	if_lock(t);
@@ -1787,7 +1783,6 @@ static ssize_t pktgen_thread_write(struct file *file,
 	int i = 0, max, len, ret;
 	char name[40];
 	char *pg_result;
-	unsigned long value = 0;
 
 	if (count < 1) {
 		//      sprintf(pg_result, "Wrong command format");
@@ -1861,12 +1856,8 @@ static ssize_t pktgen_thread_write(struct file *file,
 	}
 
 	if (!strcmp(name, "max_before_softirq")) {
-		len = num_arg(&user_buffer[i], 10, &value);
-		mutex_lock(&pktgen_thread_lock);
-		t->max_before_softirq = value;
-		mutex_unlock(&pktgen_thread_lock);
+		sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use");
 		ret = count;
-		sprintf(pg_result, "OK: max_before_softirq=%lu", value);
 		goto out;
 	}
 
@@ -2145,7 +2136,6 @@ static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us)
 		if (spin_until_us - now > jiffies_to_usecs(1) + 1)
 			schedule_timeout_interruptible(1);
 		else if (spin_until_us - now > 100) {
-			do_softirq();
 			if (!pkt_dev->running)
 				return;
 			if (need_resched())
@@ -3515,8 +3505,6 @@ static int pktgen_thread_worker(void *arg)
 	struct pktgen_thread *t = arg;
 	struct pktgen_dev *pkt_dev = NULL;
 	int cpu = t->cpu;
-	u32 max_before_softirq;
-	u32 tx_since_softirq = 0;
 
 	BUG_ON(smp_processor_id() != cpu);
 
@@ -3526,8 +3514,6 @@ static int pktgen_thread_worker(void *arg)
 
 	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
 
-	max_before_softirq = t->max_before_softirq;
-
 	set_current_state(TASK_INTERRUPTIBLE);
 
 	set_freezable();
@@ -3546,24 +3532,9 @@ static int pktgen_thread_worker(void *arg)
 
 		__set_current_state(TASK_RUNNING);
 
-		if (pkt_dev) {
-
+		if (pkt_dev) 
 			pktgen_xmit(pkt_dev);
 
-			/*
-			 * We like to stay RUNNING but must also give
-			 * others fair share.
-			 */
-
-			tx_since_softirq += pkt_dev->last_ok;
-
-			if (tx_since_softirq > max_before_softirq) {
-				if (local_softirq_pending())
-					do_softirq();
-				tx_since_softirq = 0;
-			}
-		}
-
 		if (t->control & T_STOP) {
 			pktgen_stop(t);
 			t->control &= ~(T_STOP);

^ permalink raw reply related

* Re: RFC: issues concerning the next NAPI interface
From: James Chapman @ 2007-08-27 17:05 UTC (permalink / raw)
  To: Jan-Bernd Themann
  Cc: David Miller, shemminger, akepner, netdev, raisch, themann,
	linux-kernel, linuxppc-dev, meder, tklein, stefan.roscher
In-Reply-To: <200708271802.48691.ossthema@de.ibm.com>

Jan-Bernd Themann wrote:
> On Monday 27 August 2007 17:51, James Chapman wrote:
> 
>> In the second half of my previous reply (which seems to have been 
>> deleted), I suggest a way to avoid this problem without using hardware 
>> interrupt mitigation / coalescing. Original text is quoted below.
>>
>>  >> I've seen the same and I'm suggesting that the NAPI driver keeps
>>  >> itself in polled mode for N polls or M jiffies after it sees
>>  >> workdone=0. This has always worked for me in packet forwarding
>>  >> scenarios to maximize packets/sec and minimize latency.
>>
>> To implement this, there's no need for timers, hrtimers or generic NAPI 
>> support that others have suggested. A driver's poll() would set an 
>> internal flag and record the current jiffies value when finding 
>> workdone=0 rather than doing an immediate napi_complete(). Early in 
>> poll() it would test this flag and if set, do a low-cost test to see if 
>> it had any work to do. If no work, it would check the saved jiffies 
>> value and do the napi_complete() only if no work has been done for a 
>> configurable number of jiffies. This keeps interrupts disabled longer at 
>> the expense of many more calls to poll() where no work is done. So 
>> critical to this scheme is modifying the driver's poll() to fastpath the 
>> case of having no work to do while waiting for its local jiffy count to 
>> expire.
>>
> 
> The problem I see with this approach is that the time that passes between
> two jiffies might be too long for 10G ethernet adapters. 

Why would staying in polled mode for 2 jiffies be too long in the 10G 
case? I don't see why 10G makes any difference. Your poll() would be 
called as fast as your CPU allows during those 2 jiffies (it would 
actually be between 1 and 2 jiffies in practice). It is therefore 
critical that the driver's poll() implementation is as efficient as 
possible for the "no work" case to minimize the overhead of the extra 
poll() calls. Your poll might be called thousands of times in 1-2 
jiffies with nothing to do...

> (I tried to implement
> a timer based approach with usual timers and the result was a disaster).
> HW interrupts / or HP timer avoid the jiffy problem as they activate softIRQs
> as soon as you call netif_rx_schedule. 

My scheme doesn't use timers to do netif_rx_schedule() because the 
device stays in polled mode for 1-2 jiffies _after_ it detects it has no 
more work. So the device remains scheduled, processing packets as usual. 
The device deschedules itself and re-enables its interrupts only when it 
has a period of 1-2 jiffies of doing no work.

BTW, I chose 2 jiffies in the example patch just to keep the patch 
simple. It might be more for systems with large HZ or those that want to 
be even more aggressive at staying in polled mode. I envisage it being 
another parameter that can be tweaked using ethtool if people see a 
benefit of this scheme.

-- 
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development


^ permalink raw reply

* [PATCH net-2.6.24] e100: fix driver init lockup on e100_up()
From: James Chapman @ 2007-08-27 17:06 UTC (permalink / raw)
  To: netdev; +Cc: e1000-devel

Recent NAPI changes require that napi_enable() is always matched with
a napi_disable(). This patch makes sure that this invariant holds for
e100. It also moves the netif_napi_add() call until after private
pointers have been intialized, though this might only be significant
for cases where netpoll is being used.

Signed-off-by: James Chapman <jchapman@katalix.com>

diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index e25f5ec..48996a4 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2575,11 +2575,12 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
 	nic = netdev_priv(netdev);
-	netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
 	nic->netdev = netdev;
 	nic->pdev = pdev;
 	nic->msg_enable = (1 << debug) - 1;
 	pci_set_drvdata(pdev, netdev);
+	netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
+	napi_disable(&nic->napi);
 
 	if((err = pci_enable_device(pdev))) {
 		DPRINTK(PROBE, ERR, "Cannot enable PCI device, aborting.\n");

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply related

* Re: [E1000-devel] [PATCH net-2.6.24] e100: fix driver init lockup on e100_up()
From: Kok, Auke @ 2007-08-27 17:09 UTC (permalink / raw)
  To: James Chapman; +Cc: netdev, e1000-devel
In-Reply-To: <200708271706.l7RH6wT0024567@quickie.katalix.com>

James Chapman wrote:
> Recent NAPI changes require that napi_enable() is always matched with
> a napi_disable(). This patch makes sure that this invariant holds for
> e100. It also moves the netif_napi_add() call until after private
> pointers have been intialized, though this might only be significant
> for cases where netpoll is being used.
> 
> Signed-off-by: James Chapman <jchapman@katalix.com>
> 
> diff --git a/drivers/net/e100.c b/drivers/net/e100.c
> index e25f5ec..48996a4 100644
> --- a/drivers/net/e100.c
> +++ b/drivers/net/e100.c
> @@ -2575,11 +2575,12 @@ static int __devinit e100_probe(struct pci_dev *pdev,
>  	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
>  
>  	nic = netdev_priv(netdev);
> -	netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
>  	nic->netdev = netdev;
>  	nic->pdev = pdev;
>  	nic->msg_enable = (1 << debug) - 1;
>  	pci_set_drvdata(pdev, netdev);
> +	netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
> +	napi_disable(&nic->napi);

Just wondering, could we even reverse this order? IOW disable NAPI first, then 
add it ?

Otherwise this sounds OK to me.

Auke

^ permalink raw reply

* Re: Remove softirq scheduling from pktgen [PATCH]
From: Christoph Hellwig @ 2007-08-27 17:18 UTC (permalink / raw)
  To: Robert Olsson; +Cc: David Miller, netdev
In-Reply-To: <18131.623.945573.425948@robur.slu.se>

On Mon, Aug 27, 2007 at 06:57:19PM +0200, Robert Olsson wrote:
> 
> 
> Hello, It's not a job for pktgen.

Please also kill the do_softirq export while you're at it.


^ permalink raw reply

* [patch 1/7] qeth: ungrouping a device must not be interruptible
From: Ursula Braun @ 2007-08-27 17:27 UTC (permalink / raw)
  To: jgarzik, netdev, linux-s390; +Cc: frank.blaschka
In-Reply-To: <20070827172731.370533000@linux.vnet.ibm.com>

[-- Attachment #1: 703-qeth-ungroup.diff --]
[-- Type: text/plain, Size: 1494 bytes --]

From: Ursula Braun <braunu@de.ibm.com>

Problem:
A recovery thread must not be active when device is removed.
In qeth_remove_device() an interruptible wait operation is used
to wait until a qeth recovery thread is finished. If a user really
interrupts the ungroup operation of a qeth device while a recovery
is running, cio and qeth are out of sync (device already removed
from cio, but kept in qeth). A following module unload of qeth
results in a kernel OOPS here.

Solution:
Do not allow interruption of ungroup operation to guarantee
finishing of a potentially running qeth recovery thread.

Signed-off-by: Ursula Braun <braunu@de.ibm.com>
---

 drivers/s390/net/qeth_main.c |    5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

Index: linux-2.6-uschi/drivers/s390/net/qeth_main.c
===================================================================
--- linux-2.6-uschi.orig/drivers/s390/net/qeth_main.c
+++ linux-2.6-uschi/drivers/s390/net/qeth_main.c
@@ -561,7 +561,7 @@ qeth_set_offline(struct ccwgroup_device 
 }
 
 static int
-qeth_wait_for_threads(struct qeth_card *card, unsigned long threads);
+qeth_threads_running(struct qeth_card *card, unsigned long threads);
 
 
 static void
@@ -576,8 +576,7 @@ qeth_remove_device(struct ccwgroup_devic
 	if (!card)
 		return;
 
-	if (qeth_wait_for_threads(card, 0xffffffff))
-		return;
+	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
 
 	if (cgdev->state == CCWGROUP_ONLINE){
 		card->use_hard_stop = 1;

-- 

^ permalink raw reply

* [patch 2/7] qeth: enforce a rate limit for inbound scatter gather messages
From: Ursula Braun @ 2007-08-27 17:27 UTC (permalink / raw)
  To: jgarzik, netdev, linux-s390; +Cc: frank.blaschka
In-Reply-To: <20070827172731.370533000@linux.vnet.ibm.com>

[-- Attachment #1: 704-qeth-rate-limit.diff --]
[-- Type: text/plain, Size: 1369 bytes --]

From: Frank Blaschka <frank.blaschka@de.ibm.com>

under memory pressure scatter gather mode switching messages must be
rate limited.

Signed-off-by: Frank Blaschka <frank.blaschka@de.ibm.com>
Signed-off-by: Ursula Braun <braunu@de.ibm.com>
---

 drivers/s390/net/qeth_main.c |   13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Index: linux-2.6-uschi/drivers/s390/net/qeth_main.c
===================================================================
--- linux-2.6-uschi.orig/drivers/s390/net/qeth_main.c
+++ linux-2.6-uschi/drivers/s390/net/qeth_main.c
@@ -2803,13 +2803,16 @@ qeth_queue_input_buffer(struct qeth_card
 		if (newcount < count) {
 			/* we are in memory shortage so we switch back to
 			   traditional skb allocation and drop packages */
-			if (atomic_cmpxchg(&card->force_alloc_skb, 0, 1))
-				printk(KERN_WARNING
-					"qeth: switch to alloc skb\n");
+			if (!atomic_read(&card->force_alloc_skb) &&
+			    net_ratelimit())
+				PRINT_WARN("Switch to alloc skb\n");
+			atomic_set(&card->force_alloc_skb, 3);
 			count = newcount;
 		} else {
-			if (atomic_cmpxchg(&card->force_alloc_skb, 1, 0))
-				printk(KERN_WARNING "qeth: switch to sg\n");
+			if ((atomic_read(&card->force_alloc_skb) == 1) &&
+			    net_ratelimit())
+				PRINT_WARN("Switch to sg\n");
+			atomic_add_unless(&card->force_alloc_skb, -1, 0);
 		}
 
 		/*

-- 

^ permalink raw reply

* [patch 3/7] qeth: dont return the return values of void functions.
From: Ursula Braun @ 2007-08-27 17:27 UTC (permalink / raw)
  To: jgarzik, netdev, linux-s390; +Cc: frank.blaschka, Heiko Carstens
In-Reply-To: <20070827172731.370533000@linux.vnet.ibm.com>

[-- Attachment #1: 705-qeth-return.diff --]
[-- Type: text/plain, Size: 1619 bytes --]

From: Heiko Carstens <heiko.carstens@de.ibm.com>

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ursula Braun <braunu@de.ibm.com>
---

 drivers/s390/net/qeth.h     |    4 ++--
 drivers/s390/net/qeth_sys.c |    8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

Index: linux-2.6-uschi/drivers/s390/net/qeth.h
===================================================================
--- linux-2.6-uschi.orig/drivers/s390/net/qeth.h
+++ linux-2.6-uschi/drivers/s390/net/qeth.h
@@ -1178,9 +1178,9 @@ qeth_ipaddr_to_string(enum qeth_prot_ver
 		      char *buf)
 {
 	if (proto == QETH_PROT_IPV4)
-		return qeth_ipaddr4_to_string(addr, buf);
+		qeth_ipaddr4_to_string(addr, buf);
 	else if (proto == QETH_PROT_IPV6)
-		return qeth_ipaddr6_to_string(addr, buf);
+		qeth_ipaddr6_to_string(addr, buf);
 }
 
 static inline int
Index: linux-2.6-uschi/drivers/s390/net/qeth_sys.c
===================================================================
--- linux-2.6-uschi.orig/drivers/s390/net/qeth_sys.c
+++ linux-2.6-uschi/drivers/s390/net/qeth_sys.c
@@ -1760,10 +1760,10 @@ qeth_remove_device_attributes(struct dev
 {
 	struct qeth_card *card = dev->driver_data;
 
-	if (card->info.type == QETH_CARD_TYPE_OSN)
-		return sysfs_remove_group(&dev->kobj,
-					  &qeth_osn_device_attr_group);
-
+	if (card->info.type == QETH_CARD_TYPE_OSN) {
+		sysfs_remove_group(&dev->kobj, &qeth_osn_device_attr_group);
+		return;
+	}
 	sysfs_remove_group(&dev->kobj, &qeth_device_attr_group);
 	sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group);
 	sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group);

-- 

^ permalink raw reply

* Re: [PATCH] fix e100 rx path on ARM (was [PATCH] e100 rx: or s and el bits)
From: Kok, Auke @ 2007-08-27 17:34 UTC (permalink / raw)
  To: Milton Miller, David Acker
  Cc: Kok, Auke, e1000-devel, netdev, Jesse Brandeburg, Scott Feldman,
	John Ronciak, Jeff Kirsher, Jeff Garzik
In-Reply-To: <f2baf1c4eb6d0eff237e6f252ce671c7@bga.com>

Milton Miller wrote:
> On Jun 5, 2007, at 8:34 AM, David Acker wrote:

David, Milton,

This was the last communication on-topic for the proposed changes to fix e100 on 
ARM. We're holding our breath here waiting for more, and would love to hear that 
this issue and fixes hasn't died off.

Thanks,

Auke



>> Milton Miller wrote:
>>> On Jun 1, 2007, at 3:45 PM, David Acker wrote:
>>>> Ok, I took a stab at coding and testing these ideas.  Below is a 
>>>> patch against 2.6.22-rc3.
>>>> Let me know what you think.
>>> I think you got most of the ideas.   As Auke noted, your coding style 
>>> is showing again.   And your mailer again munged whitespace (fixed by 
>>> s/^<space><space>/<space>/ s/^$/<space>/).
>> Sorry about the coding style.  I instinctively followed what was there 
>> instead of kernel coding convention.  I will look into how whitespace 
>> is getting screwed up.
> 
> I have to watch my coding style too (I like to indent the closing 
> brace).
> 
> At least the white space damage seems to be reversable.  More than I 
> can say for this mailer.
> 
>>>> Find a buffer that is complete with rx->el not set and rx->s0 set.
>>>>     It appears that hardware can read the rfd's el-bit, then 
>>>> software can clear the rfd el-bit and set the rfd size, and then 
>>>> hardware can come in and read the size.
>>> Yes, since the size is after the EL flag in the descriptor, this can 
>>> happen since the pci read is not atomic.
>>>> I am reading the status back, although I don't think that I have to 
>>>> in this instance.
>>> Actually, you are reading it when the rfd still has EL set.  Since 
>>> the cpu will never encounter that case, the if condition is never 
>>> satisfied.
>> In my tests, every time I found a completed rfd with the el-bit set, 
>> the receiver was in the out of resources state.
> 
> Yes, if the EL was set, it would be a real hard race to find the 
> completed packet with EL but not RNR.   I was trying to refer to where 
> you find a completed packet and then check for EL in the RFD.  That is 
> what I was claiming can not be observed by the cpu (unless the card 
> writes the EL bit back, and not just the status u16).
> 
> If the unless ... above is true, then please put a comment that the 
> device can write RFD->EL back to 1 if we raced.
> 
> 
>>> How about creating a state unknown, for when we think we should check 
>>> the device if its running.
>>> If we are in this state and then encounter a received packet without 
>>> s0 set, we can set it back
>>> to running.   We set it when we rx a packet with s0 set.
>>> We then move both io_status reads to the caller.
>> I can look into that as I clean this up.
>>
>>
>>>> I am testing a version of this code patched against 2.6.18.4 on my 
>>>> PXA 255 based system.  I will let you all know how it goes.
>> The testing I did so far did well.  I will try to get some more going 
>> tonight, hopefully on a cleaned up patch.
> 
> Good to hear our expectiations match reality.
> 
>>
>>> I'm assuming this is why the cleanup of the receiver start to always 
>>> start on rx_to_clean got dropped again. :-)
>> Yep.  I will get that in the next patch.
> 
> Ok.
> 
>>> Also, I would like a few sentences in the Driver Operation section IV 
>>> Receive big comment.  Something like
>>> In order to keep updates to the RFD link field from colliding with 
>>> hardware writes to mark packets complete, we use the feature that 
>>> hardware will not write to a size 0 descriptor and mark the previous 
>>> packet as end-of-list (EL).   After updating the link, we remove EL 
>>> and only then restore the size such that hardware may use the 
>>> previous-to-end RFD.
>>> at the end of the first paragraph, and insert software before "no 
>>> locking is required" in the second.
>> Sounds good to me.
>>
>> I will see if I can get into a cleaned up patch today and get it out 
>> by tomorrow.  Thanks for dealing with me...I have been around kernel 
>> code for awhile but posting official patches to linux is new to me.
>> -Ack
> 
> I've just learned by watching the lists over the last several years.  
> Well, and actually writing the odd patch here and there.
> 
> It occurs to me that I have been focusing on the code and not the 
> changelog.   I'll send a seperate reply on that thread shortly.
> 
> One more thing I'll state here ... as per the perfect patch guidelines, 
> it is preferred that the meta-discussion about the patch and its 
> history go after the change log, seperated from it by a line of "--- " 
> so that the patch application scripts can just extract the email 
> subject as the title and through the firsst line of --- as the commit 
> log.  (This saves some manual editing).
> 
> [1] http://kernelnewbies.org/UpstreamMerge
> 
> milton
> 
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply

* Re: Remove softirq scheduling from pktgen [PATCH]
From: Robert Olsson @ 2007-08-27 18:22 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Robert Olsson, David Miller, netdev
In-Reply-To: <20070827171846.GA2264@infradead.org>


Christoph Hellwig writes:

 > > Hello, It's not a job for pktgen.
 > 
 > Please also kill the do_softirq export while you're at it.


 Right seems like pktgen luckily was the only user.

 Cheers
					--ro


Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>


diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0f546dd..dbbdcd7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -271,8 +271,6 @@ asmlinkage void do_softirq(void)
 	local_irq_restore(flags);
 }
 
-EXPORT_SYMBOL(do_softirq);
-
 #endif
 
 /*

^ permalink raw reply related

* Re: [PATCH] fix e100 rx path on ARM (was [PATCH] e100 rx: or s and el bits)
From: David Acker @ 2007-08-27 18:32 UTC (permalink / raw)
  To: Kok, Auke
  Cc: e1000-devel, netdev, Jesse Brandeburg, Milton Miller,
	Scott Feldman, John Ronciak, Jeff Kirsher, Jeff Garzik
In-Reply-To: <46D30B0D.5020605@intel.com>

Kok, Auke wrote:
> Milton Miller wrote:
>> On Jun 5, 2007, at 8:34 AM, David Acker wrote:
> 
> David, Milton,
> 
> This was the last communication on-topic for the proposed changes to fix 
> e100 on ARM. We're holding our breath here waiting for more, and would 
> love to hear that this issue and fixes hasn't died off.
> 
> Thanks,
> 
> Auke

I am sorry folks, this is my fault.  I got pulled onto a fire on one of 
our other products.  I have only recently come back to working on our 
product that uses the e100 on ARM.  Based on my current time available 
to finish cleaning up this patch, I should have a new version available 
by the end of this week.
-Ack


> 
> 
> 
>>> Milton Miller wrote:
>>>> On Jun 1, 2007, at 3:45 PM, David Acker wrote:
>>>>> Ok, I took a stab at coding and testing these ideas.  Below is a 
>>>>> patch against 2.6.22-rc3.
>>>>> Let me know what you think.
>>>> I think you got most of the ideas.   As Auke noted, your coding 
>>>> style is showing again.   And your mailer again munged whitespace 
>>>> (fixed by s/^<space><space>/<space>/ s/^$/<space>/).
>>> Sorry about the coding style.  I instinctively followed what was 
>>> there instead of kernel coding convention.  I will look into how 
>>> whitespace is getting screwed up.
>>
>> I have to watch my coding style too (I like to indent the closing brace).
>>
>> At least the white space damage seems to be reversable.  More than I 
>> can say for this mailer.
>>
>>>>> Find a buffer that is complete with rx->el not set and rx->s0 set.
>>>>>     It appears that hardware can read the rfd's el-bit, then 
>>>>> software can clear the rfd el-bit and set the rfd size, and then 
>>>>> hardware can come in and read the size.
>>>> Yes, since the size is after the EL flag in the descriptor, this can 
>>>> happen since the pci read is not atomic.
>>>>> I am reading the status back, although I don't think that I have to 
>>>>> in this instance.
>>>> Actually, you are reading it when the rfd still has EL set.  Since 
>>>> the cpu will never encounter that case, the if condition is never 
>>>> satisfied.
>>> In my tests, every time I found a completed rfd with the el-bit set, 
>>> the receiver was in the out of resources state.
>>
>> Yes, if the EL was set, it would be a real hard race to find the 
>> completed packet with EL but not RNR.   I was trying to refer to where 
>> you find a completed packet and then check for EL in the RFD.  That is 
>> what I was claiming can not be observed by the cpu (unless the card 
>> writes the EL bit back, and not just the status u16).
>>
>> If the unless ... above is true, then please put a comment that the 
>> device can write RFD->EL back to 1 if we raced.
>>
>>
>>>> How about creating a state unknown, for when we think we should 
>>>> check the device if its running.
>>>> If we are in this state and then encounter a received packet without 
>>>> s0 set, we can set it back
>>>> to running.   We set it when we rx a packet with s0 set.
>>>> We then move both io_status reads to the caller.
>>> I can look into that as I clean this up.
>>>
>>>
>>>>> I am testing a version of this code patched against 2.6.18.4 on my 
>>>>> PXA 255 based system.  I will let you all know how it goes.
>>> The testing I did so far did well.  I will try to get some more going 
>>> tonight, hopefully on a cleaned up patch.
>>
>> Good to hear our expectiations match reality.
>>
>>>
>>>> I'm assuming this is why the cleanup of the receiver start to always 
>>>> start on rx_to_clean got dropped again. :-)
>>> Yep.  I will get that in the next patch.
>>
>> Ok.
>>
>>>> Also, I would like a few sentences in the Driver Operation section 
>>>> IV Receive big comment.  Something like
>>>> In order to keep updates to the RFD link field from colliding with 
>>>> hardware writes to mark packets complete, we use the feature that 
>>>> hardware will not write to a size 0 descriptor and mark the previous 
>>>> packet as end-of-list (EL).   After updating the link, we remove EL 
>>>> and only then restore the size such that hardware may use the 
>>>> previous-to-end RFD.
>>>> at the end of the first paragraph, and insert software before "no 
>>>> locking is required" in the second.
>>> Sounds good to me.
>>>
>>> I will see if I can get into a cleaned up patch today and get it out 
>>> by tomorrow.  Thanks for dealing with me...I have been around kernel 
>>> code for awhile but posting official patches to linux is new to me.
>>> -Ack
>>
>> I've just learned by watching the lists over the last several years.  
>> Well, and actually writing the odd patch here and there.
>>
>> It occurs to me that I have been focusing on the code and not the 
>> changelog.   I'll send a seperate reply on that thread shortly.
>>
>> One more thing I'll state here ... as per the perfect patch 
>> guidelines, it is preferred that the meta-discussion about the patch 
>> and its history go after the change log, seperated from it by a line 
>> of "--- " so that the patch application scripts can just extract the 
>> email subject as the title and through the firsst line of --- as the 
>> commit log.  (This saves some manual editing).
>>
>> [1] http://kernelnewbies.org/UpstreamMerge
>>
>> milton
>>
>> -
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply

* Re: RFC: issues concerning the next NAPI interface
From: David Miller @ 2007-08-27 20:37 UTC (permalink / raw)
  To: ossthema
  Cc: jchapman, shemminger, akepner, netdev, raisch, themann,
	linux-kernel, linuxppc-dev, meder, tklein, stefan.roscher
In-Reply-To: <200708271147.01890.ossthema@de.ibm.com>

From: Jan-Bernd Themann <ossthema@de.ibm.com>
Date: Mon, 27 Aug 2007 11:47:01 +0200

> So the question is simply: Do we want drivers that need (benefit
> from) a timer based polling support to implement their own timers
> each, or should there be a generic support?

I'm trying to figure out how an hrtimer implementation would
even work.

Would you start the timer from the chip interrupt handler?  If so,
that's taking two steps backwards as you've already taken all of the
overhead of running the interrupt handler.

^ permalink raw reply

* Re: [PATCH net-2.6.24] introduce MAC_FMT/MAC_ARG
From: David Miller @ 2007-08-27 20:41 UTC (permalink / raw)
  To: johannes; +Cc: joe, netdev
In-Reply-To: <1188212049.6756.18.camel@johannes.berg>

From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 27 Aug 2007 12:54:09 +0200

> -- change macros to --
> #define MAC_FMT "%s"
> #define MAC_ARG(a) ({char __buf[18]; print_mac(a, buf); __buf})
> 
> I'm not sure we'd want that, but at the time you said it made the kernel
> significantly smaller and I doubt there's a performance problem with it
> (who prints mac addresses regularly?)

I don't think this works.

The scope of the __buf[18] array is inside of that MAC_ARG()
expression, which will be fully evaluated before constructing
the argument to printk().

Therefore printk() will be passed what is essentially a stale stack
pointer.

You'd need something like a "MAC_BUF buf;" all the callers need
to declare, and a new "buf" argument to MAC_ARG().

If this was the goal, there are better approches to this, how
about just calling:

	print_mac(dev->dev_addr);

Sure, we'll have to split up printk() calls, but in the end it's
likely still smaller and better.  And I think it's much cleaner
than this macro stuff.


^ permalink raw reply

* Re: RFC: issues concerning the next NAPI interface
From: David Miller @ 2007-08-27 21:02 UTC (permalink / raw)
  To: jchapman
  Cc: tklein, themann, stefan.roscher, netdev, linux-kernel, raisch,
	linuxppc-dev, akepner, meder, ossthema, shemminger
In-Reply-To: <46D2F301.7050105@katalix.com>

From: James Chapman <jchapman@katalix.com>
Date: Mon, 27 Aug 2007 16:51:29 +0100

> To implement this, there's no need for timers, hrtimers or generic NAPI 
> support that others have suggested. A driver's poll() would set an 
> internal flag and record the current jiffies value when finding 
> workdone=0 rather than doing an immediate napi_complete(). Early in 
> poll() it would test this flag and if set, do a low-cost test to see if 
> it had any work to do. If no work, it would check the saved jiffies 
> value and do the napi_complete() only if no work has been done for a 
> configurable number of jiffies. This keeps interrupts disabled longer at 
> the expense of many more calls to poll() where no work is done. So 
> critical to this scheme is modifying the driver's poll() to fastpath the 
> case of having no work to do while waiting for its local jiffy count to 
> expire.
> 
> Here's an untested patch for tg3 that illustrates the idea.

It's only going to work with hrtimers, these interfaces can
process at least 100,000 per jiffies tick.

And the hrtimer granularity is going to need to be significantly low,
and futhermore you're adding a guaranteed extra interrupt (for the
hrtimer firing) in these cases where we're exactly trying to avoid is
more interrupts.

If you can make it work, fine, but it's going to need to be at a
minimum disabled when the hrtimer granularity is not sufficient.

But there are huger fish to fry for you I think.  Talk to your
platform maintainers and ask for an interface for obtaining
a flat static distribution of interrupts to cpus in order to
support multiqueue NAPI better.

In your previous postings you made arguments saying that the
automatic placement of interrupts to cpus made everything
bunch of to a single cpu and you wanted to propagate the
NAPI work to other cpu's software interrupts from there.

That logic is bogus, because it merely proves that the hardware
interrupt distribution is broken.  If it's a bad cpu to run
software interrupts on, it's also a bad cpu to run hardware
interrupts on.

^ permalink raw reply

* Re: [E1000-devel] [PATCH net-2.6.24] e100: fix driver init lockup on e100_up()
From: James Chapman @ 2007-08-27 21:03 UTC (permalink / raw)
  To: Kok, Auke, David S. Miller; +Cc: netdev, e1000-devel
In-Reply-To: <46D3055F.5060201@intel.com>

Kok, Auke wrote:
> James Chapman wrote:
>> Recent NAPI changes require that napi_enable() is always matched with
>> a napi_disable(). This patch makes sure that this invariant holds for
>> e100. It also moves the netif_napi_add() call until after private
>> pointers have been intialized, though this might only be significant
>> for cases where netpoll is being used.
>>
>> Signed-off-by: James Chapman <jchapman@katalix.com>
>>
>> diff --git a/drivers/net/e100.c b/drivers/net/e100.c
>> index e25f5ec..48996a4 100644
>> --- a/drivers/net/e100.c
>> +++ b/drivers/net/e100.c
>> @@ -2575,11 +2575,12 @@ static int __devinit e100_probe(struct pci_dev 
>> *pdev,
>>      strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
>>  
>>      nic = netdev_priv(netdev);
>> -    netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
>>      nic->netdev = netdev;
>>      nic->pdev = pdev;
>>      nic->msg_enable = (1 << debug) - 1;
>>      pci_set_drvdata(pdev, netdev);
>> +    netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
>> +    napi_disable(&nic->napi);
> 
> Just wondering, could we even reverse this order? IOW disable NAPI 
> first, then add it ?

I think the order shouldn't matter. DaveM?

> Otherwise this sounds OK to me.

-- 
James Chapman
Katalix Systems Ltd
http://www.katalix.com
Catalysts for your Embedded Linux software development


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox