[PATCH] proportional share accept()

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] proportional share accept()
@ 2004-02-25 16:43 Vivek Kashyap
  2004-02-25 17:21 ` Stephen Hemminger
  2004-02-26  2:35 ` James Morris
  0 siblings, 2 replies; 15+ messages in thread
From: Vivek Kashyap @ 2004-02-25 16:43 UTC (permalink / raw)
  To: netdev


Attached is a patch for an application to selectively provide a 
faster response and connection rate to favoured clients. 

The single accept queue in the socket is modified to be multiple
queues with a weight assigned to each queue. The accept()ance of 
connections is scheduled in proportion of the weights assigned to the
queues.

The incoming connection requests are associated with queues using
iptables MARK target rules(using the client's src/dest ip address).
The MARK used is the index of the relevant queue. The proportions on
the queues can be modified by the application to suit the
requirements using a socket option.

The results of an example run are on http://ckrm.sourceforge.net.

Thanks
	Vivek

-----------------------------------------------------------------------------

diff -urN linux-2.6.3_old/include/linux/sysctl.h linux-2.6.3/include/linux/sysctl.h
--- linux-2.6.3_old/include/linux/sysctl.h	2004-02-17 19:58:10.000000000 -0800
+++ linux-2.6.3/include/linux/sysctl.h	2004-02-23 
17:49:49.000000000 -0800
@@ -312,6 +312,7 @@
 	NET_TCP_LOW_LATENCY=93,
 	NET_IPV4_IPFRAG_SECRET_INTERVAL=94,
 	NET_TCP_WESTWOOD=95,
+	NET_TCP_ACCEPTQ_SHARE=96,
 };
 
 enum {
diff -urN linux-2.6.3_old/include/linux/tcp.h linux-2.6.3/include/linux/tcp.h
--- linux-2.6.3_old/include/linux/tcp.h	2004-02-17 19:57:52.000000000 -0800
+++ linux-2.6.3/include/linux/tcp.h	2004-02-24 15:34:35.000000000 -0800
@@ -20,6 +20,8 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
+#define NUM_ACCEPT_QUEUES	8
+
 struct tcphdr {
 	__u16	source;
 	__u16	dest;
@@ -127,6 +129,7 @@
 #define TCP_WINDOW_CLAMP	10	/* Bound advertised window */
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
+#define TCP_ACCEPTQ_SHARE	13	/* Set accept queue share */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -185,6 +188,13 @@
 	__u32	tcpi_reordering;
 };
 
+struct tcp_acceptq_info {
+	unsigned char acceptq_shares;
+	unsigned long acceptq_wait_time;
+	unsigned int acceptq_count;
+	unsigned int acceptq_qcount;
+};
+
 #ifdef __KERNEL__
 
 #include <linux/config.h>
@@ -362,7 +372,6 @@
 
 	/* FIFO of established children */
 	struct open_request	*accept_queue;
-	struct open_request	*accept_queue_tail;
 
 	int			write_pending;	/* A write to socket waits to start. */
 
@@ -388,6 +397,20 @@
                 __u32    rtt;
                 __u32    rtt_min;          /* minimum observed RTT */
         } westwood;
+
+	char 		acceptq_max_class;
+	unsigned long	acceptq_share_clock;
+	struct {
+		struct open_request     *aq_head;
+		struct open_request     *aq_tail;
+		unsigned int		 aq_weight;
+		unsigned int 		 aq_finish_ticket;
+		unsigned int             aq_wait_time;
+		unsigned int             aq_count;
+		unsigned int             aq_qcount;
+		unsigned int             aq_backlog;
+	} acceptq[NUM_ACCEPT_QUEUES];
+
 };
 
 /* WARNING: don't change the layout of the members in tcp_sock! */
diff -urN linux-2.6.3_old/include/net/tcp.h linux-2.6.3/include/net/tcp.h
--- linux-2.6.3_old/include/net/tcp.h	2004-02-17 19:57:16.000000000 -0800
+++ linux-2.6.3/include/net/tcp.h	2004-02-24 15:23:18.000000000 -0800
@@ -580,6 +580,7 @@
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_westwood;
+extern int sysctl_tcp_acceptq_share[NUM_ACCEPT_QUEUES];
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
@@ -639,6 +640,9 @@
 		struct tcp_v6_open_req v6_req;
 #endif
 	} af;
+	unsigned int acceptq_start_ticket;
+	unsigned long acceptq_time_stamp;
+	int	      acceptq_class;
 };
 
 /* SLAB cache for open requests. */
@@ -1688,43 +1692,28 @@
 	return tcp_win_from_space(sk->sk_rcvbuf); 
 }
 
-static inline void tcp_acceptq_removed(struct sock *sk)
+static inline void tcp_acceptq_removed(struct sock *sk, int class)
 {
-	sk->sk_ack_backlog--;
+	tcp_sk(sk)->acceptq[class].aq_backlog--;
 }
 
-static inline void tcp_acceptq_added(struct sock *sk)
+static inline void tcp_acceptq_added(struct sock *sk, int class)
 {
-	sk->sk_ack_backlog++;
+	tcp_sk(sk)->acceptq[class].aq_backlog++;
 }
 
-static inline int tcp_acceptq_is_full(struct sock *sk)
+static inline int tcp_acceptq_is_full(struct sock *sk, int class)
 {
-	return sk->sk_ack_backlog > sk->sk_max_ack_backlog;
+	return tcp_sk(sk)->acceptq[class].aq_backlog >
+		sk->sk_max_ack_backlog;
 }
 
-static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
-					 struct sock *child)
-{
-	struct tcp_opt *tp = tcp_sk(sk);
-
-	req->sk = child;
-	tcp_acceptq_added(sk);
-
-	if (!tp->accept_queue_tail) {
-		tp->accept_queue = req;
-	} else {
-		tp->accept_queue_tail->dl_next = req;
-	}
-	tp->accept_queue_tail = req;
-	req->dl_next = NULL;
-}
 
 struct tcp_listen_opt
 {
 	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
 	int			qlen;
-	int			qlen_young;
+	int			qlen_young[NUM_ACCEPT_QUEUES];
 	int			clock_hand;
 	u32			hash_rnd;
 	struct open_request	*syn_table[TCP_SYNQ_HSIZE];
@@ -1738,16 +1727,16 @@
 	if (--lopt->qlen == 0)
 		tcp_delete_keepalive_timer(sk);
 	if (req->retrans == 0)
-		lopt->qlen_young--;
+		lopt->qlen_young[req->acceptq_class]--;
 }
 
-static inline void tcp_synq_added(struct sock *sk)
+static inline void tcp_synq_added(struct sock *sk, struct open_request *req)
 {
 	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
 
 	if (lopt->qlen++ == 0)
 		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
-	lopt->qlen_young++;
+	lopt->qlen_young[req->acceptq_class]++;
 }
 
 static inline int tcp_synq_len(struct sock *sk)
@@ -1755,9 +1744,9 @@
 	return tcp_sk(sk)->listen_opt->qlen;
 }
 
-static inline int tcp_synq_young(struct sock *sk)
+static inline int tcp_synq_young(struct sock *sk, int class)
 {
-	return tcp_sk(sk)->listen_opt->qlen_young;
+	return tcp_sk(sk)->listen_opt->qlen_young[class];
 }
 
 static inline int tcp_synq_is_full(struct sock *sk)
@@ -1796,6 +1785,12 @@
 	req->acked = 0;
 	req->ecn_ok = 0;
 	req->rmt_port = skb->h.th->source;
+	req->acceptq_start_ticket = 0;
+	req->acceptq_time_stamp = 0;
+	if (tp->acceptq_max_class)
+		req->acceptq_class = (skb->nfmark <= 0) ? 0 :
+			((skb->nfmark > NUM_ACCEPT_QUEUES) ?
+			 0: skb->nfmark);
 }
 
 #define TCP_MEM_QUANTUM	((int)PAGE_SIZE)
diff -urN linux-2.6.3_old/net/ipv4/sysctl_net_ipv4.c linux-2.6.3/net/ipv4/sysctl_net_ipv4.c
--- linux-2.6.3_old/net/ipv4/sysctl_net_ipv4.c	2004-02-17 19:58:50.000000000 -0800
+++ linux-2.6.3/net/ipv4/sysctl_net_ipv4.c	2004-02-22 07:46:27.000000000 -0800
@@ -592,6 +592,14 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+        {
+                .ctl_name       = NET_TCP_ACCEPTQ_SHARE,
+                .procname       = "tcp_acceptq_share",
+                .data           = &sysctl_tcp_acceptq_share,
+                .maxlen         = sizeof(sysctl_tcp_acceptq_share),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec
+	        },
 	{ .ctl_name = 0 }
 };
 
diff -urN linux-2.6.3_old/net/ipv4/tcp.c linux-2.6.3/net/ipv4/tcp.c
--- linux-2.6.3_old/net/ipv4/tcp.c	2004-02-17 19:57:21.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp.c	2004-02-25 00:25:52.000000000 -0800
@@ -280,6 +280,12 @@
 int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
 int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
 
+/*
+ *  By default all fall in class 0 with all time allocated to the given
+ *  class
+ */
+int sysctl_tcp_acceptq_share[NUM_ACCEPT_QUEUES] = { 100 };
+
 atomic_t tcp_memory_allocated;	/* Current allocated memory. */
 atomic_t tcp_sockets_allocated;	/* Current number of TCP sockets. */
 
@@ -534,13 +540,34 @@
 
 int tcp_listen_start(struct sock *sk)
 {
+	int i, j = 0;
 	struct inet_opt *inet = inet_sk(sk);
 	struct tcp_opt *tp = tcp_sk(sk);
 	struct tcp_listen_opt *lopt;
 
 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+	tp->accept_queue = NULL;
+	tp->acceptq_max_class = 0;
+
+	for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+		tp->acceptq[i].aq_tail = NULL;
+		tp->acceptq[i].aq_head = NULL;
+		j += sysctl_tcp_acceptq_share[i];
+		if (j > 100)    /* ignore other values */
+			tp->acceptq[i].aq_weight = 0;
+		else { 
+			if (sysctl_tcp_acceptq_share[i]) { 
+				tp->acceptq[i].aq_weight = 
+					1000/sysctl_tcp_acceptq_share[i]; 
+				tp->acceptq_max_class = i; 
+			} 
+		} 
+		tp->acceptq[i].aq_wait_time = 0; 
+		tp->acceptq[i].aq_qcount = 0; 
+		tp->acceptq[i].aq_count = 0; 
+	}
+
 	tp->syn_wait_lock = RW_LOCK_UNLOCKED;
 	tcp_delack_init(tp);
 
@@ -600,7 +627,7 @@
 	write_lock_bh(&tp->syn_wait_lock);
 	tp->listen_opt = NULL;
 	write_unlock_bh(&tp->syn_wait_lock);
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+	tp->accept_queue = NULL;
 
 	if (lopt->qlen) {
 		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -646,7 +673,7 @@
 		local_bh_enable();
 		sock_put(child);
 
-		tcp_acceptq_removed(sk);
+		tcp_acceptq_removed(sk, req->acceptq_class);
 		tcp_openreq_fastfree(req);
 	}
 	BUG_TRAP(!sk->sk_ack_backlog);
@@ -2221,6 +2248,62 @@
 }
 
 /*
+ *  This function will queue a new request into the accept queue.
+ */
+void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
+			struct sock *child)
+{
+	struct tcp_opt *tp = tcp_sk(sk);
+	int class = req->acceptq_class;
+	int prev_class;
+
+	req->sk = child;
+	tcp_acceptq_added(sk,class);
+
+	if (!tp->acceptq[class].aq_weight) 
+		class = 0;
+
+	tp->acceptq[class].aq_qcount++;
+
+	if (!tp->acceptq[class].aq_tail) {
+		if (tp->acceptq[class].aq_finish_ticket<tp->acceptq_share_clock)
+		       	req->acceptq_start_ticket = tp->acceptq_share_clock;
+		else
+			req->acceptq_start_ticket = 
+					tp->acceptq[class].aq_finish_ticket;
+		tp->acceptq[class].aq_finish_ticket =req->acceptq_start_ticket +
+			tp->acceptq[class].aq_weight;
+
+		tp->acceptq[class].aq_head = req;
+		tp->acceptq[class].aq_tail = req;
+
+		prev_class = class - 1;
+		while (prev_class >= 0) {
+			if (tp->acceptq[prev_class].aq_tail)
+				break;
+			prev_class--;
+		}
+		if (prev_class >= 0) {
+			req->dl_next = 
+				tp->acceptq[prev_class].aq_tail->dl_next;
+			tp->acceptq[prev_class].aq_tail->dl_next = req;
+		} else {
+				req->dl_next = tp->accept_queue;
+				tp->accept_queue = req;
+			}
+	}
+	else {
+		req->acceptq_start_ticket = tp->acceptq[class].aq_finish_ticket;
+		tp->acceptq[class].aq_finish_ticket += 
+					tp->acceptq[class].aq_weight;
+		req->dl_next = tp->acceptq[class].aq_tail->dl_next;
+		tp->acceptq[class].aq_tail->dl_next = req;
+		tp->acceptq[class].aq_tail = req;
+	}
+	req->acceptq_time_stamp = jiffies;
+}
+
+/*
  *	This will accept the next outstanding connection.
  */
 
@@ -2230,6 +2313,8 @@
 	struct open_request *req;
 	struct sock *newsk;
 	int error;
+	int prev_class, i, first, min_class;
+	unsigned int min_st;
 
 	lock_sock(sk);
 
@@ -2244,6 +2329,10 @@
 	if (!tp->accept_queue) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
+		tp->acceptq_share_clock = 0;
+		for (i=0; i < NUM_ACCEPT_QUEUES; i++)
+			tp->acceptq[i].aq_finish_ticket = 0;
+
 		/* If this is a non blocking socket don't sleep */
 		error = -EAGAIN;
 		if (!timeo)
@@ -2254,12 +2343,44 @@
 			goto out;
 	}
 
-	req = tp->accept_queue;
-	if ((tp->accept_queue = req->dl_next) == NULL)
-		tp->accept_queue_tail = NULL;
+	first = 1;
+	for( i =0; i <= tp->acceptq_max_class; i++) {
+		if ((req = tp->acceptq[i].aq_head)) {
+			if (first) {
+				min_st = req->acceptq_start_ticket;
+				min_class = i;
+				first = 0;
+			}
+			else 
+				if (req->acceptq_start_ticket < min_st) {
+					min_st = req->acceptq_start_ticket;
+					min_class = i;
+				}
+		}
+	}
+
+        req = tp->acceptq[min_class].aq_head;
+	tp->acceptq[min_class].aq_count++;
+	tp->acceptq[min_class].aq_qcount--;
+	tp->acceptq_share_clock = req->acceptq_start_ticket;
+	tp->acceptq[min_class].aq_wait_time+=(jiffies- req->acceptq_time_stamp);
+
+	for (prev_class= min_class-1 ; prev_class >=0; prev_class--)
+		if (tp->acceptq[prev_class].aq_tail)
+			break;
+	if (prev_class>=0)
+		tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next; 
+	else 
+		tp->accept_queue = req->dl_next;
+
+	if (req == tp->acceptq[min_class].aq_tail) 
+		tp->acceptq[min_class].aq_head = 
+			tp->acceptq[min_class].aq_tail = NULL;
+	else
+		tp->acceptq[min_class].aq_head = req->dl_next;
 
  	newsk = req->sk;
-	tcp_acceptq_removed(sk);
+	tcp_acceptq_removed(sk, req->acceptq_class);
 	tcp_openreq_fastfree(req);
 	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
 	release_sock(sk);
@@ -2429,6 +2550,34 @@
 			}
 		}
 		break;
+		
+	case TCP_ACCEPTQ_SHARE:
+		{
+			char share_wt[NUM_ACCEPT_QUEUES];
+			int i, j = 0;
+
+			if (copy_from_user(share_wt,optval, optlen)) {
+				err = -EFAULT;
+				break;
+			}
+			for (i=0; i < NUM_ACCEPT_QUEUES; i++) 
+				j += share_wt[i];
+
+			if (!j || j > 100 )
+				err = -EINVAL;
+			else {
+				for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+				    if (share_wt[i]) {
+					tp->acceptq_max_class = i;
+					tp->acceptq[i].aq_weight = 
+							1000/ share_wt[i];
+					}
+					else
+						tp->acceptq[i].aq_weight = 0;
+				}
+			}
+		}
+		break;
 
 	default:
 		err = -ENOPROTOOPT;
@@ -2555,6 +2704,34 @@
 	case TCP_QUICKACK:
 		val = !tp->ack.pingpong;
 		break;
+
+	case TCP_ACCEPTQ_SHARE: {
+		struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES];
+		int i;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+		bzero(tinfo, sizeof(tinfo));
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++) {
+			tinfo[i].acceptq_wait_time = 
+				tp->acceptq[i].aq_wait_time/(HZ/USER_HZ);
+			if (tp->acceptq[i].aq_weight)
+				tinfo[i].acceptq_shares = 
+					1000/ tp->acceptq[i].aq_weight;
+			else
+				tinfo[i].acceptq_shares = 0; 
+			tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount;
+			tinfo[i].acceptq_count = tp->acceptq[i].aq_count;
+		}
+		len = min_t(unsigned int, len, sizeof(tinfo));
+		if (put_user(len, optlen)) 
+			return -EFAULT;
+			
+		if (copy_to_user(optval, (char *)tinfo, len))
+			return -EFAULT;
+		
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	};
diff -urN linux-2.6.3_old/net/ipv4/tcp_ipv4.c linux-2.6.3/net/ipv4/tcp_ipv4.c
--- linux-2.6.3_old/net/ipv4/tcp_ipv4.c	2004-02-17 19:57:22.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_ipv4.c	2004-02-23 17:59:38.000000000 -0800
@@ -916,7 +916,7 @@
 	lopt->syn_table[h] = req;
 	write_unlock(&tp->syn_wait_lock);
 
-	tcp_synq_added(sk);
+	tcp_synq_added(sk, req);
 }
 
 
@@ -1413,6 +1413,7 @@
 	__u32 daddr = skb->nh.iph->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
+	int class = 0;
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1437,12 +1438,17 @@
 		goto drop;
 	}
 
+	if (tcp_sk(sk)->acceptq_max_class)
+		class = (skb->nfmark <= 0) ? 0 :
+			((skb->nfmark > NUM_ACCEPT_QUEUES) ? NUM_ACCEPT_QUEUES:
+			 skb->nfmark);
+
 	/* Accept backlog is full. If we have already queued enough
 	 * of warm entries in syn queue, drop request. It is better than
 	 * clogging syn queue with openreqs with exponentially increasing
 	 * timeout.
 	 */
-	if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+	if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
 		goto drop;
 
 	req = tcp_openreq_alloc();
@@ -1567,7 +1573,7 @@
 	struct tcp_opt *newtp;
 	struct sock *newsk;
 
-	if (tcp_acceptq_is_full(sk))
+	if (tcp_acceptq_is_full(sk, req->acceptq_class))
 		goto exit_overflow;
 
 	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
diff -urN linux-2.6.3_old/net/ipv4/tcp_minisocks.c linux-2.6.3/net/ipv4/tcp_minisocks.c
--- linux-2.6.3_old/net/ipv4/tcp_minisocks.c	2004-02-17 19:58:56.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_minisocks.c	2004-02-24 15:38:58.000000000 -0800
@@ -779,7 +779,9 @@
 		newtp->num_sacks = 0;
 		newtp->urg_data = 0;
 		newtp->listen_opt = NULL;
-		newtp->accept_queue = newtp->accept_queue_tail = NULL;
+		newtp->accept_queue = NULL;
+		memset(newtp->acceptq, 0,sizeof(newtp->acceptq));
+		newtp->acceptq_share_clock = newtp->acceptq_max_class = 0;
 		/* Deinitialize syn_wait_lock to trap illegal accesses. */
 		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
 
diff -urN linux-2.6.3_old/net/ipv4/tcp_timer.c linux-2.6.3/net/ipv4/tcp_timer.c
--- linux-2.6.3_old/net/ipv4/tcp_timer.c	2004-02-17 19:59:28.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_timer.c	2004-02-24 19:50:51.000000000 -0800
@@ -498,7 +498,12 @@
 	 * ones are about to clog our table.
 	 */
 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
-		int young = (lopt->qlen_young<<1);
+		int young = 0;
+	       
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++) 
+			young += lopt->qlen_young[i];
+		
+		young <<= 1;
 
 		while (thresh > 2) {
 			if (lopt->qlen < young)
@@ -524,9 +529,8 @@
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
-						lopt->qlen_young--;
-					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-						    TCP_RTO_MAX);
+			         		lopt->qlen_young[req->acceptq_class]--;
+					timeo = min((TCP_TIMEOUT_INIT << req->retrans), TCP_RTO_MAX);
 					req->expires = now + timeo;
 					reqp = &req->dl_next;
 					continue;
@@ -538,7 +542,7 @@
 				write_unlock(&tp->syn_wait_lock);
 				lopt->qlen--;
 				if (req->retrans == 0)
-					lopt->qlen_young--;
+					lopt->qlen_young[req->acceptq_class]--;
 				tcp_openreq_free(req);
 				continue;
 			}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 16:43 [PATCH] proportional share accept() Vivek Kashyap
@ 2004-02-25 17:21 ` Stephen Hemminger
  2004-02-25 18:12   ` David S. Miller
       [not found]   ` <Pine.WNT.4.44.0402251101300.1572-100000@w-vkashyap95.des.sequent.com>
  2004-02-26  2:35 ` James Morris
  1 sibling, 2 replies; 15+ messages in thread
From: Stephen Hemminger @ 2004-02-25 17:21 UTC (permalink / raw)
  To: Vivek Kashyap; +Cc: netdev

Just a cursory question, but would it be possible to just use iptables
to classify the packets and assign them to different IP addresses.
The the application could manage the priorities by opening two different
sockets (bound to different IP addresses) and do the scheduling there?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 17:21 ` Stephen Hemminger
@ 2004-02-25 18:12   ` David S. Miller
  2004-02-25 19:07     ` kashyapv
       [not found]   ` <Pine.WNT.4.44.0402251101300.1572-100000@w-vkashyap95.des.sequent.com>
  1 sibling, 1 reply; 15+ messages in thread
From: David S. Miller @ 2004-02-25 18:12 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: kashyapv, netdev

On Wed, 25 Feb 2004 09:21:16 -0800
Stephen Hemminger <shemminger@osdl.org> wrote:

> Just a cursory question, but would it be possible to just use iptables
> to classify the packets and assign them to different IP addresses.
> The the application could manage the priorities by opening two different
> sockets (bound to different IP addresses) and do the scheduling there?

That's how I would attempt to implement this.  I really don't like this
idea of putting a pseudo-classifier in the accept queue logic, and an
ugly GLOBAL one at that, if anything it ought to be per-socket but even
that I don't like.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 18:12   ` David S. Miller
@ 2004-02-25 19:07     ` kashyapv
  2004-02-25 19:17       ` David S. Miller
  0 siblings, 1 reply; 15+ messages in thread
From: kashyapv @ 2004-02-25 19:07 UTC (permalink / raw)
  To: David S. Miller; +Cc: Stephen Hemminger, netdev

It is a per socket scheduler not a global one. The control of the proportions
is with the socket/application. The sysctl can be removed - even then it only 
initialises the socket's proportions individually. The default is to use
only one queue.

A user space solution forces existing  applications to be modified. 
Additionally, it is not as efficient - If I understand the suggestion 
correctly the incoming request and data will have to be mangled both while 
coming in and going out if the client expects the reply from the same 
address it sent the conenction request to. 

Also, an in-kernel solution allows administrative control and tuning 
without affecting the applications at all. The administrator can as per 
the policy (which may change over time) modify the proportions using another
interface. Otherwise, each application must provide a way to manage/modify 
its scheduler.

Vivek

On Wed, 25 Feb 2004, David S. Miller wrote:

> On Wed, 25 Feb 2004 09:21:16 -0800
> Stephen Hemminger <shemminger@osdl.org> wrote:
> 
> > Just a cursory question, but would it be possible to just use iptables
> > to classify the packets and assign them to different IP addresses.
> > The the application could manage the priorities by opening two different
> > sockets (bound to different IP addresses) and do the scheduling there?
> 
> That's how I would attempt to implement this.  I really don't like this
> idea of putting a pseudo-classifier in the accept queue logic, and an
> ugly GLOBAL one at that, if anything it ought to be per-socket but even
> that I don't like.
> 
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 19:07     ` kashyapv
@ 2004-02-25 19:17       ` David S. Miller
  2004-02-25 19:30         ` kashyapv
  0 siblings, 1 reply; 15+ messages in thread
From: David S. Miller @ 2004-02-25 19:17 UTC (permalink / raw)
  To: kashyapv; +Cc: shemminger, netdev

On Wed, 25 Feb 2004 11:07:48 -0800 (PST)
kashyapv <kashyapv@us.ibm.com> wrote:

> Also, an in-kernel solution allows administrative control and tuning 
> without affecting the applications at all. The administrator can as per 
> the policy (which may change over time) modify the proportions using another
> interface. Otherwise, each application must provide a way to manage/modify 
> its scheduler.

Since all of the classification we're suggesting is via the kernel, the
administrator has the same kinds of controls and it is also without any
application modifications.

We're saying, to classify packets so that they get prioritized however you
would have prioritized things in the accept queue (ie. mark SYN packets
with address X as having priority Y).  The let the packet scheduler or
netfilter take care of the rest.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 19:17       ` David S. Miller
@ 2004-02-25 19:30         ` kashyapv
  2004-02-26 22:08           ` Ronghua Zhang
  0 siblings, 1 reply; 15+ messages in thread
From: kashyapv @ 2004-02-25 19:30 UTC (permalink / raw)
  To: David S. Miller; +Cc: shemminger, netdev

On Wed, 25 Feb 2004, David S. Miller wrote:

> On Wed, 25 Feb 2004 11:07:48 -0800 (PST)
> kashyapv <kashyapv@us.ibm.com> wrote:
> 
> > Also, an in-kernel solution allows administrative control and tuning 
> > without affecting the applications at all. The administrator can as per 
> > the policy (which may change over time) modify the proportions using another
> > interface. Otherwise, each application must provide a way to manage/modify 
> > its scheduler.
> 
> Since all of the classification we're suggesting is via the kernel, the
> administrator has the same kinds of controls and it is also without any
> application modifications.

How do you change the scheduler's proportions? Not the classification
itself which is controlled using iptables.

> 
> We're saying, to classify packets so that they get prioritized however you
> would have prioritized things in the accept queue (ie. mark SYN packets
> with address X as having priority Y).  The let the packet scheduler or
> netfilter take care of the rest.

In the in-kernel accept queues the netfilter MARKs it and the packet
is queued to the relevant accept queue. Where and how is the packet getting
queued to a differnt socket (address mangling?)? 

> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
       [not found]   ` <Pine.WNT.4.44.0402251101300.1572-100000@w-vkashyap95.des.sequent.com>
@ 2004-02-25 20:03     ` Stephen Hemminger
  2004-02-25 22:31       ` kashyapv
  0 siblings, 1 reply; 15+ messages in thread
From: Stephen Hemminger @ 2004-02-25 20:03 UTC (permalink / raw)
  To: Vivek Kashyap; +Cc: netdev, David S. Miller

On Wed, 25 Feb 2004 11:04:13 -0800 (Pacific Standard Time)
Vivek Kashyap <kashyapv@us.ibm.com> wrote:

> Stephen,
> 
> I've not understood your suggestion. How would you implement the solution?

I assume the goal is to provide some high level of resource control over network utilization
in a system. The accept proposal is part of more general problem of how to virtualize
the network infrastructure and services to provide different QoS based on a set of
parameters.

Your assumption that server application is blind to all this. But the kind of applications
that would need this resource control are already pretty smart (databases and web servers).
So wouldn't you want the application to be involved.  Imagine a database;  wouldn't you want
connections that come in with high priority to be scheduled in the database with high priority?

Your solution seems like having an express line to get into Disneyland, but still
ending up waiting for all the rides...

It just seems to add more complexity, complexity isn't always bad; just something
that should be avoided if possible.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 20:03     ` Stephen Hemminger
@ 2004-02-25 22:31       ` kashyapv
  0 siblings, 0 replies; 15+ messages in thread
From: kashyapv @ 2004-02-25 22:31 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, David S. Miller

On Wed, 25 Feb 2004, Stephen Hemminger wrote:

> On Wed, 25 Feb 2004 11:04:13 -0800 (Pacific Standard Time)
> Vivek Kashyap <kashyapv@us.ibm.com> wrote:
> 
> > Stephen,
> > 
> > I've not understood your suggestion. How would you implement the solution?
> 
> I assume the goal is to provide some high level of resource control over network utilization
> in a system. The accept proposal is part of more general problem of how to virtualize
> the network infrastructure and services to provide different QoS based on a set of
> parameters.
> 
> Your assumption that server application is blind to all this. But the kind of applications

Not at all.  The  'seeing' application has it much easier if it can
set the prioritisation in the accept queue using this proposal.  Makes it
easier for the application to do what it does well rather than worry about
another feature  that rightfully needs to be a  service (in the kernel). 

The 'blind' applcation can be affected in the desired way 
without modification using the in-kernel solution. 

> that would need this resource control are already pretty smart (databases and web servers).
> So wouldn't you want the application to be involved.  Imagine a database;  wouldn't you want
> connections that come in with high priority to be scheduled in the database with high priority?

And that is what is being done quite efficiently as per the proposal. The
higher priority request is picked more favourably than the lower-priority 
requests in accordance with the proportion set.  The connections from 
addresses in range X can be classified as 'bronze' - 1, Y as silver - 2, 
and  Z as gold -3. The weights are assigned/modified per socket. 

Now if the application is 'smart' and knows what to do, it can use the 
socket option to set/reset its shares.  Alternatively, the admin keeps 
track of the traffic and response needs and accordingly tunes the proportions.
The in-kernel solution is quite generic -- the change is only to the 
accept logic.

In the case you suggest  it is per application. How does the application
become 'smart' is not clear. Mangling packets to queue to different
ip addresses, if that is how the solution will work, is not a good idea. 
Also, it is a lot better to solve it in place rather than in every application
that needs it.

Vivek

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 16:43 [PATCH] proportional share accept() Vivek Kashyap
  2004-02-25 17:21 ` Stephen Hemminger
@ 2004-02-26  2:35 ` James Morris
  1 sibling, 0 replies; 15+ messages in thread
From: James Morris @ 2004-02-26  2:35 UTC (permalink / raw)
  To: Vivek Kashyap; +Cc: netdev

On Wed, 25 Feb 2004, Vivek Kashyap wrote:

>  static inline int tcp_synq_is_full(struct sock *sk)
> @@ -1796,6 +1785,12 @@
>  	req->acked = 0;
>  	req->ecn_ok = 0;
>  	req->rmt_port = skb->h.th->source;
> +	req->acceptq_start_ticket = 0;
> +	req->acceptq_time_stamp = 0;
> +	if (tp->acceptq_max_class)
> +		req->acceptq_class = (skb->nfmark <= 0) ? 0 :
> +			((skb->nfmark > NUM_ACCEPT_QUEUES) ?
> +			 0: skb->nfmark);
>  }

Have you tried compiling this without CONFIG_NETFILTER?


- James
-- 
James Morris
<jmorris@redhat.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-25 19:30         ` kashyapv
@ 2004-02-26 22:08           ` Ronghua Zhang
  2004-02-27 22:13             ` David S. Miller
  2004-03-01  6:05             ` David S. Miller
  0 siblings, 2 replies; 15+ messages in thread
From: Ronghua Zhang @ 2004-02-26 22:08 UTC (permalink / raw)
  To: kashyapv; +Cc: David S. Miller, shemminger, netdev

Here is another option. We can either start multiple instances of the
applications on different ports, or modify the application to listen to
several ports. All these ports are not the well-known service ports, and
not visible to the clients. Instead, the kernel creates a 'steathy' listen
socket on the well-known service port. After the connection is
established, based on some classification rules, the open_request
structure is moved from the accept queue of the 'steathy' socket to
the the accept queue of one socket the application is listening to.

This procedure is transparent to the client and the application. The
classification can be implemented as a module, and only a few places in
the kernel need to be touched.

I have a prototype implementation. You can check out
www.cs.virginia.edu/~rz5b/research/kernel-qos.htm for details.

Ronghua

 On Wed, 25 Feb 2004, kashyapv wrote:

> On Wed, 25 Feb 2004, David S. Miller wrote:
>
> > On Wed, 25 Feb 2004 11:07:48 -0800 (PST)
> > kashyapv <kashyapv@us.ibm.com> wrote:
> >
> > > Also, an in-kernel solution allows administrative control and tuning
> > > without affecting the applications at all. The administrator can as per
> > > the policy (which may change over time) modify the proportions using another
> > > interface. Otherwise, each application must provide a way to manage/modify
> > > its scheduler.
> >
> > Since all of the classification we're suggesting is via the kernel, the
> > administrator has the same kinds of controls and it is also without any
> > application modifications.
>
> How do you change the scheduler's proportions? Not the classification
> itself which is controlled using iptables.
>
> >
> > We're saying, to classify packets so that they get prioritized however you
> > would have prioritized things in the accept queue (ie. mark SYN packets
> > with address X as having priority Y).  The let the packet scheduler or
> > netfilter take care of the rest.
>
> In the in-kernel accept queues the netfilter MARKs it and the packet
> is queued to the relevant accept queue. Where and how is the packet getting
> queued to a differnt socket (address mangling?)?
>
> >
> >
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-26 22:08           ` Ronghua Zhang
@ 2004-02-27 22:13             ` David S. Miller
  2004-03-04 20:51               ` Vivek Kashyap
  2004-03-01  6:05             ` David S. Miller
  1 sibling, 1 reply; 15+ messages in thread
From: David S. Miller @ 2004-02-27 22:13 UTC (permalink / raw)
  To: Ronghua Zhang; +Cc: kashyapv, shemminger, netdev

On Thu, 26 Feb 2004 17:08:23 -0500 (EST)
Ronghua Zhang <rz5b@cs.virginia.edu> wrote:

> I have a prototype implementation. You can check out
> www.cs.virginia.edu/~rz5b/research/kernel-qos.htm for details.

Let me think about this, I'm going to need some time as I have some
higher-prio stuff to look at right now.

I'm sort of willing to entertain the idea of some kind of scheme like
this new idea seems to be.

We'll see...

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-26 22:08           ` Ronghua Zhang
  2004-02-27 22:13             ` David S. Miller
@ 2004-03-01  6:05             ` David S. Miller
  2004-03-01 10:11               ` Vivek Kashyap
  1 sibling, 1 reply; 15+ messages in thread
From: David S. Miller @ 2004-03-01  6:05 UTC (permalink / raw)
  To: Ronghua Zhang; +Cc: kashyapv, shemminger, netdev

On Thu, 26 Feb 2004 17:08:23 -0500 (EST)
Ronghua Zhang <rz5b@cs.virginia.edu> wrote:

> www.cs.virginia.edu/~rz5b/research/kernel-qos.htm

I looked at this, it's basically netfilter TCP port rewriting
which knows about per-socket quotas and limits.

You could write this as a 10 line netfilter module, with zero
modifications to any of the core net/ipv4 TCP code at all.
And that's how I'd like to see something like this done.

Otherwise it looks like a nice way of doing this.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-03-01  6:05             ` David S. Miller
@ 2004-03-01 10:11               ` Vivek Kashyap
  0 siblings, 0 replies; 15+ messages in thread
From: Vivek Kashyap @ 2004-03-01 10:11 UTC (permalink / raw)
  To: David S. Miller; +Cc: Ronghua Zhang, shemminger, netdev

On Sun, 29 Feb 2004, David S. Miller wrote:

> On Thu, 26 Feb 2004 17:08:23 -0500 (EST)
> Ronghua Zhang <rz5b@cs.virginia.edu> wrote:
>
> > www.cs.virginia.edu/~rz5b/research/kernel-qos.htm
>
> I looked at this, it's basically netfilter TCP port rewriting
> which knows about per-socket quotas and limits.
>
> You could write this as a 10 line netfilter module, with zero
> modifications to any of the core net/ipv4 TCP code at all.
> And that's how I'd like to see something like this done.


Multiple sockets can be forwarded the requests either at an alternative port
or address using netfilter. However one has to now mangle the data packets
too since the client is still talking to the advertised port (and
address).

I've modified the earlier post to remove the sysctl and removed the use of
STFQ/virtual clock and instead used 'weighted round robin' in
tcp_accept().  The solution is very much per socket.

The default is to have only one queue that is active. If the shares (simple
ratios) are set then the packets are accept()ed in a WRR fashion.  Since
all work is done in context of a single socket no additional locks have to
be held. If the shares are unset then the processing reverts to single
queue mode.

The per queue backlog is to avoid lower priority requests from filling up
the synq and thereby blocking the higher priority requests. Also the
iptables' MARK target is quite lightweight.

Vivek


-----------------------------------------------------------------------------

diff -urN linux-2.6.3_old/include/linux/tcp.h linux-2.6.3/include/linux/tcp.h
--- linux-2.6.3_old/include/linux/tcp.h	2004-02-17 19:57:52.000000000 -0800
+++ linux-2.6.3/include/linux/tcp.h	2004-02-29 22:01:11.000000000 -0800
@@ -128,6 +128,10 @@
 #define TCP_INFO		11	/* Information about this connection. */
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */

+#ifdef CONFIG_ACCEPT_QUEUES
+#define TCP_ACCEPTQ_SHARE	13	/* Set accept queue share */
+#endif
+
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
 #define TCPI_OPT_WSCALE		4
@@ -185,6 +189,18 @@
 	__u32	tcpi_reordering;
 };

+#ifdef CONFIG_ACCEPT_QUEUES
+
+#define NUM_ACCEPT_QUEUES	8 	/* Must be power of 2 */
+
+struct tcp_acceptq_info {
+	unsigned char acceptq_shares;
+	unsigned long acceptq_wait_time;
+	unsigned int acceptq_qcount;
+	unsigned int acceptq_count;
+};
+#endif
+
 #ifdef __KERNEL__

 #include <linux/config.h>
@@ -362,8 +378,9 @@

 	/* FIFO of established children */
 	struct open_request	*accept_queue;
-	struct open_request	*accept_queue_tail;
-
+#ifndef CONFIG_ACCEPT_QUEUES
+	struct open_request     *accept_queue_tail;
+#endif
 	int			write_pending;	/* A write to socket waits to start. */

 	unsigned int		keepalive_time;	  /* time before keep alive takes place */
@@ -388,6 +405,22 @@
                 __u32    rtt;
                 __u32    rtt_min;          /* minimum observed RTT */
         } westwood;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	/* move to listen opt... */
+	char		class_index;
+	struct {
+		struct open_request     *aq_head;
+		struct open_request     *aq_tail;
+		unsigned int		 aq_cnt;
+		unsigned int		 aq_ratio;
+		unsigned int             aq_count;
+		unsigned int             aq_qcount;
+		unsigned int             aq_backlog;
+		unsigned int             aq_wait_time;
+		int			 aq_valid;
+	} acceptq[NUM_ACCEPT_QUEUES];
+#endif
 };

 /* WARNING: don't change the layout of the members in tcp_sock! */
diff -urN linux-2.6.3_old/include/net/tcp.h linux-2.6.3/include/net/tcp.h
--- linux-2.6.3_old/include/net/tcp.h	2004-02-17 19:57:16.000000000 -0800
+++ linux-2.6.3/include/net/tcp.h	2004-02-29 21:32:44.000000000 -0800
@@ -639,6 +639,10 @@
 		struct tcp_v6_open_req v6_req;
 #endif
 	} af;
+#ifdef CONFIG_ACCEPT_QUEUES
+	unsigned long acceptq_time_stamp;
+	int	      acceptq_class;
+#endif
 };

 /* SLAB cache for open requests. */
@@ -1688,6 +1692,69 @@
 	return tcp_win_from_space(sk->sk_rcvbuf);
 }

+#ifdef CONFIG_ACCEPT_QUEUES
+static inline void tcp_acceptq_removed(struct sock *sk, int class)
+{
+	tcp_sk(sk)->acceptq[class].aq_backlog--;
+}
+
+static inline void tcp_acceptq_added(struct sock *sk, int class)
+{
+	tcp_sk(sk)->acceptq[class].aq_backlog++;
+}
+
+static inline int tcp_acceptq_is_full(struct sock *sk, int class)
+{
+	return tcp_sk(sk)->acceptq[class].aq_backlog >
+		sk->sk_max_ack_backlog;
+}
+
+static inline void tcp_set_acceptq(struct tcp_opt *tp, struct open_request *req)
+{
+	int class = req->acceptq_class;
+	int prev_class;
+
+	if (!tp->acceptq[class].aq_ratio) {
+		req->acceptq_class = 0;
+		class = 0;
+	}
+
+	tp->acceptq[class].aq_qcount++;
+	req->acceptq_time_stamp = jiffies;
+
+	if (tp->acceptq[class].aq_tail) {
+		req->dl_next = tp->acceptq[class].aq_tail->dl_next;
+		tp->acceptq[class].aq_tail->dl_next = req;
+		tp->acceptq[class].aq_tail = req;
+	} else { /* if first request in the class */
+		tp->acceptq[class].aq_head = req;
+		tp->acceptq[class].aq_tail = req;
+
+		prev_class = class - 1;
+		while (prev_class >= 0) {
+			if (tp->acceptq[prev_class].aq_tail)
+				break;
+			prev_class--;
+		}
+		if (prev_class < 0) {
+			req->dl_next = tp->accept_queue;
+			tp->accept_queue = req;
+		}
+		else {
+			req->dl_next = tp->acceptq[prev_class].aq_tail->dl_next;
+			tp->acceptq[prev_class].aq_tail->dl_next = req;
+		}
+	}
+}
+static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req,
+					 struct sock *child)
+{
+	tcp_set_acceptq(tcp_sk(sk),req);
+	req->sk = child;
+	tcp_acceptq_added(sk,req->acceptq_class);
+}
+
+#else
 static inline void tcp_acceptq_removed(struct sock *sk)
 {
 	sk->sk_ack_backlog--;
@@ -1720,16 +1787,55 @@
 	req->dl_next = NULL;
 }

+#endif
+
 struct tcp_listen_opt
 {
 	u8			max_qlen_log;	/* log_2 of maximal queued SYNs */
 	int			qlen;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int			qlen_young[NUM_ACCEPT_QUEUES];
+#else
 	int			qlen_young;
+#endif
 	int			clock_hand;
 	u32			hash_rnd;
 	struct open_request	*syn_table[TCP_SYNQ_HSIZE];
 };

+#ifdef CONFIG_ACCEPT_QUEUES
+static inline void
+tcp_synq_removed(struct sock *sk, struct open_request *req)
+{
+	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
+
+	if (--lopt->qlen == 0)
+		tcp_delete_keepalive_timer(sk);
+	if (req->retrans == 0)
+		lopt->qlen_young[req->acceptq_class]--;
+}
+
+static inline void tcp_synq_added(struct sock *sk, struct open_request *req)
+{
+	struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt;
+
+	if (lopt->qlen++ == 0)
+		tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
+	lopt->qlen_young[req->acceptq_class]++;
+}
+
+static inline int tcp_synq_len(struct sock *sk)
+{
+	return tcp_sk(sk)->listen_opt->qlen;
+}
+
+static inline int tcp_synq_young(struct sock *sk, int class)
+{
+	return tcp_sk(sk)->listen_opt->qlen_young[class];
+}
+
+#else
+
 static inline void
 tcp_synq_removed(struct sock *sk, struct open_request *req)
 {
@@ -1759,6 +1865,7 @@
 {
 	return tcp_sk(sk)->listen_opt->qlen_young;
 }
+#endif

 static inline int tcp_synq_is_full(struct sock *sk)
 {
diff -urN linux-2.6.3_old/net/ipv4/Kconfig linux-2.6.3/net/ipv4/Kconfig
--- linux-2.6.3_old/net/ipv4/Kconfig	2004-02-17 19:59:05.000000000 -0800
+++ linux-2.6.3/net/ipv4/Kconfig	2004-02-29 13:41:58.000000000 -0800
@@ -379,5 +379,28 @@

 	  If unsure, say Y.

+config ACCEPT_QUEUES
+	bool "IP: TCP Multiple accept queues support"
+	depends on INET && NETFILTER
+	---help---
+	  Support multiple accept queues per listening socket. If you say Y
+	  here, multiple accept queues will be configured per listening
+	  socket.
+
+	  Each queue is mapped to a priority class. Incoming connection
+	  requests can be classified (see iptables(8), MARK target), depending
+	  on the packet's src/dest address or other parameters, into one of
+	  the priority classes. The requests are then queued to the relevant
+	  accept queue.
+
+	  Each of the queues can be assigned a weight. The accept()ance
+	  of packets is then scheduled in accordance with the weight
+	  assigned to the priority class.
+
+	  Be sure to enable "Network packet filtering" if you wish
+	  to use this feature.
+
+	  If unsure, say N.
+
 source "net/ipv4/ipvs/Kconfig"

diff -urN linux-2.6.3_old/net/ipv4/tcp.c linux-2.6.3/net/ipv4/tcp.c
--- linux-2.6.3_old/net/ipv4/tcp.c	2004-02-17 19:57:21.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp.c	2004-03-01 00:47:47.000000000 -0800
@@ -534,13 +534,34 @@

 int tcp_listen_start(struct sock *sk)
 {
+#ifdef CONFIG_ACCEPT_QUEUES
+	int i = 0;
+#endif
 	struct inet_opt *inet = inet_sk(sk);
 	struct tcp_opt *tp = tcp_sk(sk);
 	struct tcp_listen_opt *lopt;

 	sk->sk_max_ack_backlog = 0;
 	sk->sk_ack_backlog = 0;
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+	tp->accept_queue = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+	tp->class_index = 0;
+	for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+		tp->acceptq[i].aq_tail = NULL;
+		tp->acceptq[i].aq_head = NULL;
+		tp->acceptq[i].aq_wait_time = 0;
+		tp->acceptq[i].aq_qcount = 0;
+		tp->acceptq[i].aq_count = 0;
+		if (i == 0) {
+			tp->acceptq[i].aq_valid = 1;
+			tp->acceptq[i].aq_ratio = 1;
+		}
+		else {
+			tp->acceptq[i].aq_valid = 0;
+			tp->acceptq[i].aq_ratio = 0;
+		}
+	}
+#endif
 	tp->syn_wait_lock = RW_LOCK_UNLOCKED;
 	tcp_delack_init(tp);

@@ -600,7 +621,13 @@
 	write_lock_bh(&tp->syn_wait_lock);
 	tp->listen_opt = NULL;
 	write_unlock_bh(&tp->syn_wait_lock);
-	tp->accept_queue = tp->accept_queue_tail = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+	for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
+		tp->acceptq[i].aq_head = tp->acceptq[i].aq_tail = NULL;
+#else
+	tp->accept_queue_tail = NULL;
+#endif
+	tp->accept_queue = NULL;

 	if (lopt->qlen) {
 		for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -646,7 +673,11 @@
 		local_bh_enable();
 		sock_put(child);

+#ifdef CONFIG_ACCEPT_QUEUES
+		tcp_acceptq_removed(sk, req->acceptq_class);
+#else
 		tcp_acceptq_removed(sk);
+#endif
 		tcp_openreq_fastfree(req);
 	}
 	BUG_TRAP(!sk->sk_ack_backlog);
@@ -2230,6 +2261,10 @@
 	struct open_request *req;
 	struct sock *newsk;
 	int error;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int prev_class = 0;
+	int first;
+#endif

 	lock_sock(sk);

@@ -2243,7 +2278,6 @@
 	/* Find already established connection */
 	if (!tp->accept_queue) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
 		/* If this is a non blocking socket don't sleep */
 		error = -EAGAIN;
 		if (!timeo)
@@ -2254,12 +2288,46 @@
 			goto out;
 	}

+#ifndef CONFIG_ACCEPT_QUEUES
 	req = tp->accept_queue;
 	if ((tp->accept_queue = req->dl_next) == NULL)
 		tp->accept_queue_tail = NULL;

- 	newsk = req->sk;
 	tcp_acceptq_removed(sk);
+#else
+	first = tp->class_index;
+	/* We should always have  request queued here. The accept_queue
+	 * is already checked for NULL above.
+	 */
+	while(!tp->acceptq[first].aq_head) {
+		tp->acceptq[first].aq_cnt = 0;
+		first = ++first & ~NUM_ACCEPT_QUEUES;
+	}
+        req = tp->acceptq[first].aq_head;
+	tp->acceptq[first].aq_qcount--;
+	tp->acceptq[first].aq_count++;
+	tp->acceptq[first].aq_wait_time+=(jiffies - req->acceptq_time_stamp);
+
+	for (prev_class= first-1 ; prev_class >=0; prev_class--)
+		if (tp->acceptq[prev_class].aq_tail)
+			break;
+	if (prev_class>=0)
+		tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next;
+	else
+		tp->accept_queue = req->dl_next;
+
+	if (req == tp->acceptq[first].aq_tail)
+		tp->acceptq[first].aq_head = tp->acceptq[first].aq_tail = NULL;
+	else
+		tp->acceptq[first].aq_head = req->dl_next;
+
+	if((++(tp->acceptq[first].aq_cnt)) >= tp->acceptq[first].aq_ratio){
+		tp->acceptq[first].aq_cnt = 0;
+		tp->class_index = ++first & ~NUM_ACCEPT_QUEUES;
+	}
+	tcp_acceptq_removed(sk, req->acceptq_class);
+#endif
+ 	newsk = req->sk;
 	tcp_openreq_fastfree(req);
 	BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
 	release_sock(sk);
@@ -2429,6 +2497,49 @@
 			}
 		}
 		break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	case TCP_ACCEPTQ_SHARE:
+		{
+			char share_wt[NUM_ACCEPT_QUEUES];
+			int i,j;
+
+			if (sk->sk_state != TCP_LISTEN)
+				return -EOPNOTSUPP;
+
+			if (copy_from_user(share_wt,optval, optlen)) {
+				err = -EFAULT;
+				break;
+			}
+			j = 0;
+			for (i = 0; i < NUM_ACCEPT_QUEUES; i++) {
+				if (share_wt[i]) {
+					if (!j)
+						j = share_wt[i];
+					else if (share_wt[i] < j) {
+						j = share_wt[i];
+					}
+					tp->acceptq[i].aq_valid = 1;
+				}
+				else
+					tp->acceptq[i].aq_valid = 0;
+
+			}
+			if (j == 0) {
+				/* Class 0 is always valid. If nothing is
+				 * specified set class 0 as 1.
+				 */
+				share_wt[0] = 1;
+				tp->acceptq[0].aq_valid = 1;
+				j = 1;
+			}
+			for (i=0; i < NUM_ACCEPT_QUEUES; i++)  {
+				tp->acceptq[i].aq_ratio = share_wt[i]/j;
+				tp->acceptq[i].aq_cnt = 0;
+			}
+		}
+		break;
+#endif

 	default:
 		err = -ENOPROTOOPT;
@@ -2555,6 +2666,41 @@
 	case TCP_QUICKACK:
 		val = !tp->ack.pingpong;
 		break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+	case TCP_ACCEPTQ_SHARE: {
+		struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES];
+		int i;
+
+		if (sk->sk_state != TCP_LISTEN)
+			return -EOPNOTSUPP;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		memset(tinfo, 0, sizeof(tinfo));
+
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++) {
+			tinfo[i].acceptq_wait_time =
+				tp->acceptq[i].aq_wait_time/(HZ/USER_HZ);
+			tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount;
+			tinfo[i].acceptq_count = tp->acceptq[i].aq_count;
+			if (tp->acceptq[i].aq_valid)
+				tinfo[i].acceptq_shares=tp->acceptq[i].aq_ratio;
+			else
+				tinfo[i].acceptq_shares = 0;
+		}
+
+		len = min_t(unsigned int, len, sizeof(tinfo));
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		if (copy_to_user(optval, (char *)tinfo, len))
+			return -EFAULT;
+
+		return 0;
+	}
+#endif
 	default:
 		return -ENOPROTOOPT;
 	};
diff -urN linux-2.6.3_old/net/ipv4/tcp_ipv4.c linux-2.6.3/net/ipv4/tcp_ipv4.c
--- linux-2.6.3_old/net/ipv4/tcp_ipv4.c	2004-02-17 19:57:22.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_ipv4.c	2004-02-29 23:59:09.000000000 -0800
@@ -916,7 +916,11 @@
 	lopt->syn_table[h] = req;
 	write_unlock(&tp->syn_wait_lock);

+#ifdef CONFIG_ACCEPT_QUEUES
+	tcp_synq_added(sk, req);
+#else
 	tcp_synq_added(sk);
+#endif
 }


@@ -1413,6 +1417,9 @@
 	__u32 daddr = skb->nh.iph->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+	int class = 0;
+#endif
 #ifdef CONFIG_SYN_COOKIES
 	int want_cookie = 0;
 #else
@@ -1437,12 +1444,32 @@
 		goto drop;
 	}

+#ifdef CONFIG_ACCEPT_QUEUES
+	class = (skb->nfmark <= 0) ? 0 :
+		((skb->nfmark > NUM_ACCEPT_QUEUES) ? NUM_ACCEPT_QUEUES:
+		 skb->nfmark);
+	/*
+	 * Accept only if the class has shares set or if the default class
+	 * i.e. class 0 has shares
+	 */
+	if (!(tcp_sk(sk)->acceptq[class].aq_valid)) {
+		if (tcp_sk(sk)->acceptq[0].aq_valid)
+			class = 0;
+		else
+			goto drop;
+	}
+#endif
+
 	/* Accept backlog is full. If we have already queued enough
 	 * of warm entries in syn queue, drop request. It is better than
 	 * clogging syn queue with openreqs with exponentially increasing
 	 * timeout.
 	 */
+#ifdef CONFIG_ACCEPT_QUEUES
+	if (tcp_acceptq_is_full(sk, class) && tcp_synq_young(sk, class) > 1)
+#else
 	if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
+#endif
 		goto drop;

 	req = tcp_openreq_alloc();
@@ -1472,7 +1499,10 @@
 	tp.tstamp_ok = tp.saw_tstamp;

 	tcp_openreq_init(req, &tp, skb);
-
+#ifdef CONFIG_ACCEPT_QUEUES
+	req->acceptq_class = class;
+	req->acceptq_time_stamp = jiffies;
+#endif
 	req->af.v4_req.loc_addr = daddr;
 	req->af.v4_req.rmt_addr = saddr;
 	req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
@@ -1567,7 +1597,11 @@
 	struct tcp_opt *newtp;
 	struct sock *newsk;

+#ifdef CONFIG_ACCEPT_QUEUES
+	if (tcp_acceptq_is_full(sk, req->acceptq_class))
+#else
 	if (tcp_acceptq_is_full(sk))
+#endif
 		goto exit_overflow;

 	if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
diff -urN linux-2.6.3_old/net/ipv4/tcp_minisocks.c linux-2.6.3/net/ipv4/tcp_minisocks.c
--- linux-2.6.3_old/net/ipv4/tcp_minisocks.c	2004-02-17 19:58:56.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_minisocks.c	2004-02-29 21:49:34.000000000 -0800
@@ -779,7 +779,14 @@
 		newtp->num_sacks = 0;
 		newtp->urg_data = 0;
 		newtp->listen_opt = NULL;
+#ifdef CONFIG_ACCEPT_QUEUES
+		newtp->accept_queue = NULL;
+		memset(newtp->acceptq, 0,sizeof(newtp->acceptq));
+		newtp->class_index = 0;
+
+#else
 		newtp->accept_queue = newtp->accept_queue_tail = NULL;
+#endif
 		/* Deinitialize syn_wait_lock to trap illegal accesses. */
 		memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));

diff -urN linux-2.6.3_old/net/ipv4/tcp_timer.c linux-2.6.3/net/ipv4/tcp_timer.c
--- linux-2.6.3_old/net/ipv4/tcp_timer.c	2004-02-17 19:59:28.000000000 -0800
+++ linux-2.6.3/net/ipv4/tcp_timer.c	2004-02-27 17:38:55.000000000 -0800
@@ -498,7 +498,16 @@
 	 * ones are about to clog our table.
 	 */
 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
+#ifdef CONFIG_ACCEPT_QUEUES
+		int young = 0;
+
+		for(i=0; i < NUM_ACCEPT_QUEUES; i++)
+			young += lopt->qlen_young[i];
+
+		young <<= 1;
+#else
 		int young = (lopt->qlen_young<<1);
+#endif

 		while (thresh > 2) {
 			if (lopt->qlen < young)
@@ -524,9 +533,12 @@
 					unsigned long timeo;

 					if (req->retrans++ == 0)
-						lopt->qlen_young--;
-					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
-						    TCP_RTO_MAX);
+#ifdef CONFIG_ACCEPT_QUEUES
+			         		lopt->qlen_young[req->acceptq_class]--;
+#else
+			         		lopt->qlen_young--;
+#endif
+					timeo = min((TCP_TIMEOUT_INIT << req->retrans), TCP_RTO_MAX);
 					req->expires = now + timeo;
 					reqp = &req->dl_next;
 					continue;
@@ -538,7 +550,11 @@
 				write_unlock(&tp->syn_wait_lock);
 				lopt->qlen--;
 				if (req->retrans == 0)
-					lopt->qlen_young--;
+#ifdef CONFIG_ACCEPT_QUEUES
+			         		lopt->qlen_young[req->acceptq_class]--;
+#else
+			         		lopt->qlen_young--;
+#endif
 				tcp_openreq_free(req);
 				continue;
 			}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-02-27 22:13             ` David S. Miller
@ 2004-03-04 20:51               ` Vivek Kashyap
  2004-03-04 21:52                 ` Ronghua Zhang
  0 siblings, 1 reply; 15+ messages in thread
From: Vivek Kashyap @ 2004-03-04 20:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: Ronghua Zhang, netdev

The following results show the efficacy of the patch for proportional share 
accept queues.

I utilised Apache with no changes in configuration. Then used httperf to 
on three different machines. Did not change any parameters 
on the server or the client machines. Ran httperf to get the default test.html
page repeatedly: 
/httperf --server xx.xx.xx.xx --port 80 --uri /test.html --rate 2000 \
	--num-conn 100000 --num-call 1 --timeout 5

Three clients simultaneously bombarded the server. When the shares patch is
not used the three clients get about equal service. 

With the proportional shares patch the favoured client gets a proportional 
number of its requests services in much shorter time.

With port redirection the results are much poorer. The conntrack module is
randomly dropping some of the requests too since its hash table gets full. The
application will further require modification to provide proportional shares 
which is much more work.

1. The results without utilising the shares are:
 
Total: connections 41993 requests 37195 replies 36951 test-duration 52.984 s
Reply rate [replies/s]: min 646.1 avg 723.0 max 946.5 stddev 95.1 (10 samples)
Reply time [ms]: response 151.3 transfer 0.0

Total: connections 41972 requests 37132 replies 37081 test-duration 52.996 s
Reply rate [replies/s]: min 674.7 avg 724.9 max 805.3 stddev 35.1 (10 samples)
Reply time [ms]: response 95.0 transfer 0.0

Total: connections 47171 requests 43037 replies 42991 test-duration 53.122 s
Reply rate [replies/s]: min 777.2 avg 844.2 max 895.1 stddev 39.3 (10 samples)
Reply time [ms]: response 92.1 transfer 0.0

2. With proportional share patch:

 The shares are were assigned in the ratio: 1:3:6. The results are listed
in order:

Total: connections 21566 requests 14695 replies 14251 test-duration 52.947 s
Reply rate [replies/s]: min 208.0 avg 268.0 max 730.0 stddev 162.4 (10 samples)
Reply time [ms]: response 518.3 transfer 0.0

Total: connections 41016 requests 34110 replies 34060 test-duration 52.981 s
Reply rate [replies/s]: min 632.9 avg 667.9 max 707.5 stddev 20.1 (10 samples)
Reply time [ms]: response 204.4 transfer 0.0

Total: connections 71264 requests 67089 replies 67008 test-duration 53.003 s
Reply rate [replies/s]: min 1214.3 avg 1323.1 max 1366.7 stddev 44.2 (10 samples)
Reply time [ms]: response 110.5 transfer 0.0


3. Port redirect test

As a test case then modified httpd.conf to listen on three ports 8081/82/83.
Used iptables REDIRECT target to forward the packets to these ports. The 
clients still communicate with port 80. As expected the results are not as 
good.

Total: connections 33104 requests 27385 replies 26845 test-duration 54.992 s
Reply rate [replies/s]: min 325.8 avg 534.9 max 868.5 stddev 205.0 (10 samples)
Reply time [ms]: response 300.1 transfer 0.0
                                                                                
Total: connections 28022 requests 21536 replies 21391 test-duration 54.994 s
Reply rate [replies/s]: min 2.0 avg 427.8 max 686.8 stddev 211.7 (10 samples)
Reply time [ms]: response 309.8 transfer 0.0

Total: connections 27452 requests 21045 replies 20907 test-duration 54.992 s
Reply rate [replies/s]: min 1.4 avg 418.1 max 632.7 stddev 208.2 (10 samples)
Reply time [ms]: response 310.0 transfer 0.0

Vivek

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] proportional share accept()
  2004-03-04 20:51               ` Vivek Kashyap
@ 2004-03-04 21:52                 ` Ronghua Zhang
  0 siblings, 0 replies; 15+ messages in thread
From: Ronghua Zhang @ 2004-03-04 21:52 UTC (permalink / raw)
  To: Vivek Kashyap; +Cc: David S. Miller, netdev

This is not surprising, since using iptable means every packet, inbound or
outbound, has to be changed.

My scheme does not have this overhead either, for it only involves moving
open_request among the accept queues.

Ronghua

On Thu, 4 Mar 2004, Vivek Kashyap wrote:

> The following results show the efficacy of the patch for proportional share
> accept queues.
>
> I utilised Apache with no changes in configuration. Then used httperf to
> on three different machines. Did not change any parameters
> on the server or the client machines. Ran httperf to get the default test.html
> page repeatedly:
> /httperf --server xx.xx.xx.xx --port 80 --uri /test.html --rate 2000 \
> 	--num-conn 100000 --num-call 1 --timeout 5
>
> Three clients simultaneously bombarded the server. When the shares patch is
> not used the three clients get about equal service.
>
> With the proportional shares patch the favoured client gets a proportional
> number of its requests services in much shorter time.
>
> With port redirection the results are much poorer. The conntrack module is
> randomly dropping some of the requests too since its hash table gets full. The
> application will further require modification to provide proportional shares
> which is much more work.
>
> 1. The results without utilising the shares are:
>
> Total: connections 41993 requests 37195 replies 36951 test-duration 52.984 s
> Reply rate [replies/s]: min 646.1 avg 723.0 max 946.5 stddev 95.1 (10 samples)
> Reply time [ms]: response 151.3 transfer 0.0
>
> Total: connections 41972 requests 37132 replies 37081 test-duration 52.996 s
> Reply rate [replies/s]: min 674.7 avg 724.9 max 805.3 stddev 35.1 (10 samples)
> Reply time [ms]: response 95.0 transfer 0.0
>
> Total: connections 47171 requests 43037 replies 42991 test-duration 53.122 s
> Reply rate [replies/s]: min 777.2 avg 844.2 max 895.1 stddev 39.3 (10 samples)
> Reply time [ms]: response 92.1 transfer 0.0
>
> 2. With proportional share patch:
>
>  The shares are were assigned in the ratio: 1:3:6. The results are listed
> in order:
>
> Total: connections 21566 requests 14695 replies 14251 test-duration 52.947 s
> Reply rate [replies/s]: min 208.0 avg 268.0 max 730.0 stddev 162.4 (10 samples)
> Reply time [ms]: response 518.3 transfer 0.0
>
> Total: connections 41016 requests 34110 replies 34060 test-duration 52.981 s
> Reply rate [replies/s]: min 632.9 avg 667.9 max 707.5 stddev 20.1 (10 samples)
> Reply time [ms]: response 204.4 transfer 0.0
>
> Total: connections 71264 requests 67089 replies 67008 test-duration 53.003 s
> Reply rate [replies/s]: min 1214.3 avg 1323.1 max 1366.7 stddev 44.2 (10 samples)
> Reply time [ms]: response 110.5 transfer 0.0
>
>
> 3. Port redirect test
>
> As a test case then modified httpd.conf to listen on three ports 8081/82/83.
> Used iptables REDIRECT target to forward the packets to these ports. The
> clients still communicate with port 80. As expected the results are not as
> good.
>
> Total: connections 33104 requests 27385 replies 26845 test-duration 54.992 s
> Reply rate [replies/s]: min 325.8 avg 534.9 max 868.5 stddev 205.0 (10 samples)
> Reply time [ms]: response 300.1 transfer 0.0
>
> Total: connections 28022 requests 21536 replies 21391 test-duration 54.994 s
> Reply rate [replies/s]: min 2.0 avg 427.8 max 686.8 stddev 211.7 (10 samples)
> Reply time [ms]: response 309.8 transfer 0.0
>
> Total: connections 27452 requests 21045 replies 20907 test-duration 54.992 s
> Reply rate [replies/s]: min 1.4 avg 418.1 max 632.7 stddev 208.2 (10 samples)
> Reply time [ms]: response 310.0 transfer 0.0
>
> Vivek
>
>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2004-03-04 21:52 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-02-25 16:43 [PATCH] proportional share accept() Vivek Kashyap
2004-02-25 17:21 ` Stephen Hemminger
2004-02-25 18:12   ` David S. Miller
2004-02-25 19:07     ` kashyapv
2004-02-25 19:17       ` David S. Miller
2004-02-25 19:30         ` kashyapv
2004-02-26 22:08           ` Ronghua Zhang
2004-02-27 22:13             ` David S. Miller
2004-03-04 20:51               ` Vivek Kashyap
2004-03-04 21:52                 ` Ronghua Zhang
2004-03-01  6:05             ` David S. Miller
2004-03-01 10:11               ` Vivek Kashyap
     [not found]   ` <Pine.WNT.4.44.0402251101300.1572-100000@w-vkashyap95.des.sequent.com>
2004-02-25 20:03     ` Stephen Hemminger
2004-02-25 22:31       ` kashyapv
2004-02-26  2:35 ` James Morris

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).