[PATCH 2/5]: DCCP Recalc on non-loss intervals

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 2/5]: DCCP Recalc on non-loss intervals
@ 2006-12-20  2:45 Ian McDonald
  2006-12-21 13:20 ` Gerrit Renker
                   ` (17 more replies)
  0 siblings, 18 replies; 19+ messages in thread
From: Ian McDonald @ 2006-12-20  2:45 UTC (permalink / raw)
  To: dccp

This works out when to recalculate the loss rate due to significant
non-loss interval. We do this at time of recalulating loss rate to save
having to do it every packet. Instead we just check if it's time to
recalculate. It looked like there was an early attempt to do this but it
was quite wrong in its implementation.

This is making the code conform to RFC 3448, section 5.4

I've added some extra debugging which is useful in testing as well. In
loss_interval.c I've just used dccp_pr_debug. I tried to use ccid3_pr_debug
but ran into problems with circular references.

This patch applies on top of Gerrit's patches.

If possible I'd like this to go into 2.6.21 as it's been reported broken by
about 4 or 5 people.

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
---
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index e7eeeeb..a74f905 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -36,9 +36,9 @@
 #include "../ccid.h"
 #include "../dccp.h"
 #include "lib/packet_history.h"
-#include "lib/loss_interval.h"
 #include "lib/tfrc.h"
 #include "ccid3.h"
+#include "lib/loss_interval.h"
 
 static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
@@ -384,6 +384,9 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
 	packet->dccphtx_sent   = 1;
 	hctx->ccid3hctx_idle   = 0;
+
+	ccid3_pr_debug("seqno = %llu, rtt = %u\n",
+	   (long long unsigned)packet->dccphtx_seqno, packet->dccphtx_rtt);
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -810,7 +813,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
  *
  * returns estimated loss interval in usecs */
 
-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk, struct sk_buff *skb)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
@@ -901,13 +904,31 @@ found:
 	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
 		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
 
-	if (p = 0)
+	if (p = 0) {
+		DCCP_WARN("p=0, fval = %llu\n", fval);
 		return ~0;
-	else
-		return 1000000 / p;
+	} else {
+		u32 new_interval = 1000000 / p;
+		u64 recalc_interval;
+
+		if (new_interval < 150)
+			recalc_interval = new_interval + 1;
+		else
+			recalc_interval = (new_interval *
+			(DCCP_RECALC_LOSS_FACTOR+1)) / DCCP_RECALC_LOSS_FACTOR;
+		hcrx->ccid3hcrx_seq_recalc_loss = dccp_hdr_seq(skb);
+		add48(&hcrx->ccid3hcrx_seq_recalc_loss, recalc_interval);
+		ccid3_pr_debug("%s(%p), interval=%u, recalc_interval=%llu, "
+		   "seqno=%llu recalc_loss=%llu\n", dccp_role(sk), sk,
+		   new_interval, recalc_interval, dccp_hdr_seq(skb),
+		   hcrx->ccid3hcrx_seq_recalc_loss);
+
+		return new_interval;
+	}
 }
 
-static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+static void ccid3_hc_rx_update_li(struct sock *sk, struct sk_buff *skb,
+                                  u64 seq_loss, u8 win_loss)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_li_hist_entry *head;
@@ -920,7 +941,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 
 		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
 		   struct dccp_li_hist_entry, dccplih_node);
-		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk, skb);
 	} else {
 		struct dccp_li_hist_entry *entry;
 		struct list_head *tail;
@@ -951,10 +972,12 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 		entry->dccplih_seqno = seq_loss;
 		entry->dccplih_interval = seq_temp;
 		entry->dccplih_win_count = win_loss;
+		ccid3_pr_debug("adding node seqno=%llu, interval=%u\n",
+		   (u64)entry->dccplih_seqno, entry->dccplih_interval);
 	}
 }
 
-static int ccid3_hc_rx_detect_loss(struct sock *sk,
+static int ccid3_hc_rx_detect_loss(struct sock *sk, struct sk_buff *skb,
                                     struct dccp_rx_hist_entry *packet)
 {
 	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
@@ -979,7 +1002,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
 	   > TFRC_RECV_NUM_LATE_LOSS) {
 		loss = 1;
-		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
+		ccid3_hc_rx_update_li(sk, skb, hcrx->ccid3hcrx_seqno_nonloss,
 		   hcrx->ccid3hcrx_ccval_nonloss);
 		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
 		dccp_inc_seqno(&tmp_seqno);
@@ -1067,7 +1090,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	loss = ccid3_hc_rx_detect_loss(sk, packet);
+	loss = ccid3_hc_rx_detect_loss(sk, skb, packet);
 
 	if (DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK)
 		return;
@@ -1088,6 +1111,15 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		if (loss)
 			break;
 
+		if (hcrx->ccid3hcrx_seq_recalc_loss &&
+		 after48(dccp_hdr_seq(skb), hcrx->ccid3hcrx_seq_recalc_loss)) {
+			ccid3_pr_debug("%s(%p, state=%s) checking loss "
+			   "recalc skb=%llu, recalc_loss = %llu\n",
+			   dccp_role(sk), sk, dccp_state_name(sk->sk_state),
+			   dccp_hdr_seq(skb),hcrx->ccid3hcrx_seq_recalc_loss);
+			break;
+		}
+
 		dccp_timestamp(sk, &now);
 		if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
 		     (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
@@ -1100,15 +1132,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		return;
 	}
 
-	/* Dealing with packet loss */
-	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
+	/* Dealing with loss intervals*/
+	if (loss)
+		ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
 		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
 
 	p_prev = hcrx->ccid3hcrx_p;
 	
 	/* Calculate loss event rate */
 	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
-		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
+		u32 i_mean = dccp_li_hist_calc_i_mean(hcrx, skb);
 
 		/* Scaling up by 1000000 as fixed decimal */
 		if (i_mean != 0)
@@ -1116,7 +1149,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	} else
 		DCCP_BUG("empty loss history");
 
-	if (hcrx->ccid3hcrx_p > p_prev) {
+	if (hcrx->ccid3hcrx_p != p_prev) {
+		ccid3_pr_debug("p_prev = %u, hcrx_p = %u\n", p_prev,
+		   hcrx->ccid3hcrx_p);
 		ccid3_hc_rx_send_feedback(sk);
 		return;
 	}
@@ -1135,6 +1170,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
 	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
 	hcrx->ccid3hcrx_s   = 0;
 	hcrx->ccid3hcrx_rtt = 0;
+	hcrx->ccid3hcrx_seqno_nonloss = 0;
 	return 0;
 }
 
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 2b18db4..99182bf 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -158,6 +158,7 @@ enum ccid3_hc_rx_states {
  *  @ccid3hcrx_s  -  Received packet size in bytes
  *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
  *  @ccid3hcrx_elapsed_time  -  Time since packet reception
+ *  @ccid3hcrx_seq_recalc_loss - recalc loss due to nonloss (RFC 3448, 5.4)
  */
 struct ccid3_hc_rx_sock {
 	struct tfrc_rx_info		ccid3hcrx_tfrc;
@@ -176,6 +177,7 @@ struct ccid3_hc_rx_sock {
 	u16				ccid3hcrx_s;
 	u32				ccid3hcrx_pinv;
 	u32				ccid3hcrx_elapsed_time;
+	u64				ccid3hcrx_seq_recalc_loss;
 };
 
 static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 372d7e7..cd03ae0 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <net/sock.h>
 #include "../../dccp.h"
+#include "../ccid3.h"
 #include "loss_interval.h"
 
 struct dccp_li_hist *dccp_li_hist_new(const char *name)
@@ -81,31 +82,106 @@ static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
 	4, 4, 4, 4, 3, 2, 1, 1,
 };
 
-u32 dccp_li_hist_calc_i_mean(struct list_head *list)
+/* This code implements the part of section 5.4 of RFC3448 which says we should
+ * recalculate the average loss interval if we have a sufficient long loss
+ * interval.
+ *
+ * Given that i_mean = weighted average of loss then we can say that
+ * 4*n + 4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6] + *  y*(4*i[0] + 4*i[i] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])
+ *
+ * where y is a factor so that we don't check as soon as we hit average
+ * interval and waste CPU cycles needlessly. n is the non-loss interval
+ * and i[x] are the loss intervals. We don't need to divide by the sum
+ * of the weighting as these cancel out on each side.
+ *
+ * We can solve for this equation for n which yields:
+ * n + *  ((4*i[0] + 4*i[1] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])/4) -
+ *  (y*(4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6])/4) */
+
+static u64 dccp_li_hist_recalc_recalcloss(struct sk_buff *skb,
+                                        u32 i_tot0, u32 i_tot1)
+{
+	u64 recalcloss_seq = dccp_hdr_seq(skb);
+
+	dccp_pr_debug("recalcloss_seq=%llu\n", recalcloss_seq);
+
+	if (!i_tot0) {
+		DCCP_WARN("i_tot0 = 0\n");
+		return 0;
+	}
+
+	/* We don't adjust if > 1 million as will get overflow
+	 * in calculations and not so serious anyway */
+	if (i_tot0 > 1000000) {
+		DCCP_WARN("i_mean > 100000\n");
+		return 0;
+	}
+
+	i_tot0 /= 4;
+	i_tot1 = (i_tot1 * (DCCP_RECALC_LOSS_FACTOR+1))/
+	   (DCCP_RECALC_LOSS_FACTOR*4);
+
+	if (i_tot1 >= i_tot0) {
+		dccp_pr_debug("only incrementing by 1\n");
+		add48(&recalcloss_seq, 1);
+	} else
+		add48(&recalcloss_seq,(u64)(i_tot0 - i_tot1));
+
+	dccp_pr_debug("recalcloss_seq=%llu, i_tot0=%u, i_tot1=%u\n",
+	   recalcloss_seq, i_tot0, i_tot1);
+
+	return recalcloss_seq;
+}
+
+u32 dccp_li_hist_calc_i_mean(struct ccid3_hc_rx_sock *hcrx, struct sk_buff *skb)
 {
 	struct dccp_li_hist_entry *li_entry, *li_next;
-	int i = 0;
+	unsigned int i = 0;
 	u32 i_tot;
 	u32 i_tot0 = 0;
 	u32 i_tot1 = 0;
 	u32 w_tot  = 0;
+	u64 non_loss = 0;
+	u32 i_mean;
+	struct list_head *list = &hcrx->ccid3hcrx_li_hist;
 
 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
 		if (li_entry->dccplih_interval != ~0U) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
-			if (i != 0)
-				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
-		}
-
+			if (i != 7)
+				i_tot1 += li_entry->dccplih_interval
+				   * dccp_li_hist_w[i + 1];
+			if (i = 0) {
+				non_loss = dccp_hdr_seq(skb);
+				sub48(&non_loss, li_entry->dccplih_seqno);
+			}
+			dccp_pr_debug("i=%d, interval=%u, seqno=%llu, "
+			  "i_tot0=%u, i_tot1=%u, w_tot=%u\n", i,
+			  li_entry->dccplih_interval,
+			  (u64)li_entry->dccplih_seqno,i_tot0, i_tot1, w_tot);
+		} else
+			dccp_pr_debug("empty loss interval\n");
 
 		if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
 			break;
 	}
 
-	if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
+	if (i != DCCP_LI_HIST_IVAL_F_LENGTH) {
+		DCCP_BUG("Length of loss interval list is %u\n", i);
 		return 0;
+	}
 
+	hcrx->ccid3hcrx_seq_recalc_loss +	   dccp_li_hist_recalc_recalcloss(skb, i_tot0, i_tot1);
+
+	i_tot1 += non_loss * dccp_li_hist_w[0];
+	if (i_tot1 > i_tot0)
+		dccp_pr_debug("i_mean recalculateed due to high nonloss "
+		   " interval seqno=%llu nonloss=%llu i_tot0=%u, i_tot1=%u\n",
+		   dccp_hdr_seq(skb), non_loss, i_tot0, i_tot1);
 	i_tot = max(i_tot0, i_tot1);
 
 	if (!w_tot) {
@@ -113,7 +189,10 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 		return 1;
 	}
 
-	return i_tot / w_tot;
+	i_mean = i_tot / w_tot;
+	dccp_pr_debug("i_mean=%u\n", i_mean);
+
+	return i_mean;
 }
 
 EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
@@ -131,7 +210,7 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
 			DCCP_BUG("loss interval list entry is NULL");
 			return 0;
 		}
-		entry->dccplih_interval = ~0;
+		entry->dccplih_interval = ~0U;
 		list_add(&entry->dccplih_node, list);
 	}
 
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index eb25701..6fed81b 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -50,7 +50,8 @@ static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
 extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
 			       struct list_head *list);
 
-extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+extern u32 dccp_li_hist_calc_i_mean(struct ccid3_hc_rx_sock *hcrx,
+   struct sk_buff *skb);
 
 extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
    struct list_head *list, const u64 seq_loss, const u8 win_loss);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index dd0a986..546ca70 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -80,6 +80,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 
 #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
 
+/* Value as a reciprocal to check for change in loss through long
+ * non-loss intervals. If you change this you must recheck existing
+ * code for overflow possibilities */
+#define DCCP_RECALC_LOSS_FACTOR 100
+
 /* sysctl variables for DCCP */
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
@ 2006-12-21 13:20 ` Gerrit Renker
  2006-12-21 22:48 ` Ian McDonald
                   ` (16 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2006-12-21 13:20 UTC (permalink / raw)
  To: dccp

|  This works out when to recalculate the loss rate due to significant
|  non-loss interval. We do this at time of recalulating loss rate to save
|  having to do it every packet. Instead we just check if it's time to
|  recalculate. It looked like there was an early attempt to do this but it
|  was quite wrong in its implementation.
|  
|  This is making the code conform to RFC 3448, section 5.4
I think there is a big misunderstanding here and the existing code, as far as I can see,
already conforms to [RFC 3448, 5.4].

I got alerted when I re-read the following comment:
 /* This code implements the part of section 5.4 of RFC3448 which says we should
  * recalculate the average loss interval if we have a sufficient long loss
  * interval.
There is no such statement in RFC 3448, 5.4. The most close to this is 
  "When calculating the average loss interval we need to decide whether
   to include the interval since the most recent packet loss event.  We
   only do this if it is sufficiently large to increase the average loss
   interval."
If you mean this, then the patch is not necessary. The reason is that this statement
refers to I_tot0 versus I_tot1. The statement 

  I_tot = max(I_tot0, I_tot1);

takes care of using the `interval since the most recent packet loss event' only if 
it is sufficiently large. It is not explicit to see, but is implicitly contained in the
way the two weighted averages are compared against each other: it boils down to
comparing I_0 against the oldest 5 loss intervals. 

[ For a more detailed explanation, cf. the MSc thesis by Widmer, I also have a set of
  notes on this case ]

Hence the recalculation is taken care of already, by the way I_tot0/1 are used.

I have had some more time looking at the patch, and it seems a performance-booster, I don't
understand why it is there, and I can find no support in the RFCs for it. As far as I can see,
it is non-standard.

Can you please tell me what the intention is - I think that there may be a better solution
to recalculating p when, for a longer time, there has been no loss. I'd be more than willing to
help with this, but I would like to avoid using non-standard solutions.

|  +		if (new_interval < 150)
|  +			recalc_interval = new_interval + 1;
|  +		else
|  +			recalc_interval = (new_interval *
|  +			(DCCP_RECALC_LOSS_FACTOR+1)) / DCCP_RECALC_LOSS_FACTOR;
If DCCP_RECALC_LOSS_FACTOR is a constant,
 * why is  150 not? and 
 * why was 150 chosen here?

|  -u32 dccp_li_hist_calc_i_mean(struct list_head *list)
|  +/* This code implements the part of section 5.4 of RFC3448 which says we should
|  + * recalculate the average loss interval if we have a sufficient long loss
|  + * interval.
Please see above.

|  + *
|  + * Given that i_mean = weighted average of loss then we can say that
|  + * 4*n + 4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6] |  + *  y*(4*i[0] + 4*i[i] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])
                      ^
                      i[1]
|  + * 
|  + * where y is a factor so that we don't check as soon as we hit average
|  + * interval and waste CPU cycles needlessly. n is the non-loss interval
|  + * and i[x] are the loss intervals. We don't need to divide by the sum
|  + * of the weighting as these cancel out on each side.
|  + *
|  + * We can solve for this equation for n which yields:
|  + * n |  + *  ((4*i[0] + 4*i[1] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])/4) -
|  + *  (y*(4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6])/4) */
|  +
I can not at the moment say what this does - is your intention to compare I_tot0 and I_tot1?
It seems this is something new, and it would make sense to communicate the idea to the 3448bis
authors (and discuss this with them), maybe this is a new solution and has merit. 

What I know of 3448/3448bis however, is that the code presented therein resulted
from several years of intensive ns-2 testing as well as practical experiments. 
With that, I think 3448/3448bis should be given preference when it comes to implementing.

|  --- a/net/dccp/dccp.h
|  +++ b/net/dccp/dccp.h
|  @@ -80,6 +80,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
|   
|   #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
|   
|  +/* Value as a reciprocal to check for change in loss through long
|  + * non-loss intervals. If you change this you must recheck existing
|  + * code for overflow possibilities */
|  +#define DCCP_RECALC_LOSS_FACTOR 100
|  +
I have a quibble with this one: it is a CCID3 variable (not used in the global DCCP code),
it would be better to rename it into CCID3_RECALC_LOSS_FACTOR and put it into ccid3.h

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
  2006-12-21 13:20 ` Gerrit Renker
@ 2006-12-21 22:48 ` Ian McDonald
  2007-01-03 12:56 ` Gerrit Renker
                   ` (15 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2006-12-21 22:48 UTC (permalink / raw)
  To: dccp

Gerrit,

Comments inline.

On 12/22/06, Gerrit Renker <gerrit@erg.abdn.ac.uk> wrote:
> |  This works out when to recalculate the loss rate due to significant
> |  non-loss interval. We do this at time of recalulating loss rate to save
> |  having to do it every packet. Instead we just check if it's time to
> |  recalculate. It looked like there was an early attempt to do this but it
> |  was quite wrong in its implementation.
> |
> |  This is making the code conform to RFC 3448, section 5.4
> I think there is a big misunderstanding here and the existing code, as far as I can see,
> already conforms to [RFC 3448, 5.4].
>
> I got alerted when I re-read the following comment:
>  /* This code implements the part of section 5.4 of RFC3448 which says we should
>   * recalculate the average loss interval if we have a sufficient long loss
>   * interval.
> There is no such statement in RFC 3448, 5.4. The most close to this is
>   "When calculating the average loss interval we need to decide whether
>    to include the interval since the most recent packet loss event.  We
>    only do this if it is sufficiently large to increase the average loss
>    interval."

The RFC is a little confusing but I think I have carried out it's
intention correctly. I'll quote verbatim here the relevant parts:

   When calculating the average loss interval we need to decide whether
   to include the interval since the most recent packet loss event.  We
   only do this if it is sufficiently large to increase the average loss
   interval.

   Thus if the most recent loss intervals are I_0 to I_n, with I_0 being
   the interval since the most recent loss event, then we calculate the
   average loss interval I_mean as:

Notice here the "interval since the most recent packet loss event".
This implies (but would help if explicit) that this is an interval
with no loss. We only use if it would increase the average.

Backing up my implications is that we have intervals from I_0 to I_n
which is actually n+1 intervals so therefore I read I_0 to be the
interval of non-loss. Does this help? Once you see that it all falls
into place.

> If you mean this, then the patch is not necessary. The reason is that this statement
> refers to I_tot0 versus I_tot1. The statement
>
>   I_tot = max(I_tot0, I_tot1);
>
> takes care of using the `interval since the most recent packet loss event' only if
> it is sufficiently large. It is not explicit to see, but is implicitly contained in the
> way the two weighted averages are compared against each other: it boils down to
> comparing I_0 against the oldest 5 loss intervals.
>
> [ For a more detailed explanation, cf. the MSc thesis by Widmer, I also have a set of
>   notes on this case ]
>
I didn't mean that but someone had tried to implement it and had done
it incorrectly. The intent as I read is not to compare the calculation
with and without the most recent loss interval but to compare with and
without the non-loss interval.

The implementation in the kernel, which was wrong logic, also had
wrong implementation! The code was taken over from FreeBSD (relicensed
under GPL) and we actually didn't analyse properly and were excluding
the wrong item from the loop as linked lists ran the opposite
direction in FreeBSD to Linux in this case!!

> Hence the recalculation is taken care of already, by the way I_tot0/1 are used.
>
> I have had some more time looking at the patch, and it seems a performance-booster, I don't
> understand why it is there, and I can find no support in the RFCs for it. As far as I can see,
> it is non-standard.

I believe it is standard as per earlier. It does boost performance but
correctly so. Search the archives for Burak and you will see what it
solves. It is particularly applicable to links like satellite with
fixed bandwidth. Think about say a 128 Kbits link and you get a loss
when you are at 40 Kbits per second. If you get no more loss then you
can never achieve your link speed as i_mean is not recalculated.

>
> Can you please tell me what the intention is - I think that there may be a better solution
> to recalculating p when, for a longer time, there has been no loss. I'd be more than willing to
> help with this, but I would like to avoid using non-standard solutions.
>
See above.

>
> |  +            if (new_interval < 150)
> |  +                    recalc_interval = new_interval + 1;
> |  +            else
> |  +                    recalc_interval = (new_interval *
> |  +                    (DCCP_RECALC_LOSS_FACTOR+1)) / DCCP_RECALC_LOSS_FACTOR;
> If DCCP_RECALC_LOSS_FACTOR is a constant,
>  * why is  150 not? and
>  * why was 150 chosen here?
>
I should put this in a constant and document. Basically if you do the
maths and check the rounding then anything less than 150 will give you
a result of 0 but we want a minimum of 1. This just saves doing a
multiply and a divide and my guess is that a compare is a relatively
cheap operation to save on the following two operations. Saying this I
haven't profiled it.

> |  + * Given that i_mean = weighted average of loss then we can say that
> |  + * 4*n + 4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6] > |  + *  y*(4*i[0] + 4*i[i] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])
>                       ^
>                       i[1]
> |  + *
> |  + * where y is a factor so that we don't check as soon as we hit average
> |  + * interval and waste CPU cycles needlessly. n is the non-loss interval
> |  + * and i[x] are the loss intervals. We don't need to divide by the sum
> |  + * of the weighting as these cancel out on each side.
> |  + *
> |  + * We can solve for this equation for n which yields:
> |  + * n > |  + *  ((4*i[0] + 4*i[1] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])/4) -
> |  + *  (y*(4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6])/4) */
> |  +
> I can not at the moment say what this does - is your intention to compare I_tot0 and I_tot1?
> It seems this is something new, and it would make sense to communicate the idea to the 3448bis
> authors (and discuss this with them), maybe this is a new solution and has merit.
>
All this is is a method of not having to call calc_i_mean for EVERY
packet which would be very expensive on CPU. It is new but is a very
simple mathematical solution to detect when the change is big enough,
so once again you do a compare on each packet rather than a whole lot
of multiplies/divides. It isn't going beyond the RFC really at all -
just a clever implementation I like to think :-)

> What I know of 3448/3448bis however, is that the code presented therein resulted
> from several years of intensive ns-2 testing as well as practical experiments.
> With that, I think 3448/3448bis should be given preference when it comes to implementing.
>

Agree and believe I have done so. I have tested myself in quite a few
different cases. This took me quite a large number of hours to work
this patch out.
>
> |  --- a/net/dccp/dccp.h
> |  +++ b/net/dccp/dccp.h
> |  @@ -80,6 +80,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
> |
> |   #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
> |
> |  +/* Value as a reciprocal to check for change in loss through long
> |  + * non-loss intervals. If you change this you must recheck existing
> |  + * code for overflow possibilities */
> |  +#define DCCP_RECALC_LOSS_FACTOR 100
> |  +
> I have a quibble with this one: it is a CCID3 variable (not used in the global DCCP code),
> it would be better to rename it into CCID3_RECALC_LOSS_FACTOR and put it into ccid3.h
>
No problem. Will change the location.

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://imcdnzl.blogspot.com
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
  2006-12-21 13:20 ` Gerrit Renker
  2006-12-21 22:48 ` Ian McDonald
@ 2007-01-03 12:56 ` Gerrit Renker
  2007-01-03 22:22 ` Ian McDonald
                   ` (14 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2007-01-03 12:56 UTC (permalink / raw)
  To: dccp

Hi Ian,

I appreciate the work you have put into this patch and have no doubt that
it may have cost a lot of time, but the problem here is one of perspective.

Let's separate the issues:

|  The RFC is a little confusing but I think I have carried out it's
|  intention correctly. I'll quote verbatim here the relevant parts:
|  
|     When calculating the average loss interval we need to decide whether
|     to include the interval since the most recent packet loss event.  We
|     only do this if it is sufficiently large to increase the average loss
|     interval.
|  
|     Thus if the most recent loss intervals are I_0 to I_n, with I_0 being
|     the interval since the most recent loss event, then we calculate the
|     average loss interval I_mean as:
|  
|  Notice here the "interval since the most recent packet loss event".
|  This implies (but would help if explicit) that this is an interval
|  with no loss. We only use if it would increase the average.
|  
|  Backing up my implications is that we have intervals from I_0 to I_n
|  which is actually n+1 intervals so therefore I read I_0 to be the
|  interval of non-loss. Does this help? Once you see that it all falls
|  into place.
|  
|  > If you mean this, then the patch is not necessary. The reason is that this statement
|  > refers to I_tot0 versus I_tot1. The statement
|  >
|  >   I_tot = max(I_tot0, I_tot1);
|  >
|  > takes care of using the `interval since the most recent packet loss event' only if
|  > it is sufficiently large. It is not explicit to see, but is implicitly contained in the
|  > way the two weighted averages are compared against each other: it boils down to
|  > comparing I_0 against the oldest 5 loss intervals.
|  >
|  > [ For a more detailed explanation, cf. the MSc thesis by Widmer, I also have a set of
|  >   notes on this case ]
|  >
|  I didn't mean that but someone had tried to implement it and had done
|  it incorrectly. The intent as I read is not to compare the calculation
|  with and without the most recent loss interval but to compare with and
|  without the non-loss interval.
 1) see previous posting - I think we are still having a misunderstanding here and
    I do think that what you cite above is already covered by the condition 
    I_tot = max(I_tot0, I_tot1)". That would mean, as soon as we implement [RFC 3448, 5.4],
    we have this sorted out (and this method has been tested for 7 years).

|  The implementation in the kernel, which was wrong logic, also had
|  wrong implementation! The code was taken over from FreeBSD (relicensed
|  under GPL) and we actually didn't analyse properly and were excluding
|  the wrong item from the loop as linked lists ran the opposite
|  direction in FreeBSD to Linux in this case!!
 2) if you think this has not been implemented correctly, then can you please point out
    > where <. The situation at the moment is: you are saying "X has been implemented
    wrongly, so I use Y to solve the problem". Y however has not much in common with X, so
    we are left with a broken method X and a non-standard method Y. 

|  > As far as I can see, it is non-standard.
|  
|  I believe it is standard as per earlier. It does boost performance but
|  correctly so. Search the archives for Burak and you will see what it
|  solves. It is particularly applicable to links like satellite with
|  fixed bandwidth. Think about say a 128 Kbits link and you get a loss
|  when you are at 40 Kbits per second. If you get no more loss then you
|  can never achieve your link speed as i_mean is not recalculated.
 3) I have a suggestion: you seem to be sure of the merits of this solution and report on 
    test results. My suggestion would be to offer using this method via a configuration
    option (same as per the nofeedback timer threshold). You could put detailed advice into
    the kernel configuration menu, and we would have the best of both worlds - a standards-
    compliant TFRC implementation and a configurable Ian's nifty solution. Would that settle
    things?


|  All this is is a method of not having to call calc_i_mean for EVERY
|  packet which would be very expensive on CPU. It is new but is a very
 4) This code is only invoked when ccid3_hc_rx_detect_loss() returns true, hence it is not
    invoked for every packet. 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (2 preceding siblings ...)
  2007-01-03 12:56 ` Gerrit Renker
@ 2007-01-03 22:22 ` Ian McDonald
  2007-01-04 23:25 ` Eddie Kohler
                   ` (13 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2007-01-03 22:22 UTC (permalink / raw)
  To: dccp

Gerrit,

Comments inline

On 1/4/07, Gerrit Renker <gerrit@erg.abdn.ac.uk> wrote:
>  1) see previous posting - I think we are still having a misunderstanding here and
>     I do think that what you cite above is already covered by the condition
>     I_tot = max(I_tot0, I_tot1)". That would mean, as soon as we implement [RFC 3448, 5.4],
>     we have this sorted out (and this method has been tested for 7 years).
>

I agree we have a misunderstanding here with each other. I would like
to see the comments from Eddie and others. We both think we're right!!
I'm not prepared to move yet on my position but could still be
persuaded.

Probably the easiest way to illustrate this is:
- Fixed 128 kbits second link.
- Loss occurs when we are only up to 32 kbits per second
- No loss then occurs again or for a very long time
- How do we then use the full bandwidth?

This is what I think the RFC is saying.

> |  The implementation in the kernel, which was wrong logic, also had
> |  wrong implementation! The code was taken over from FreeBSD (relicensed
> |  under GPL) and we actually didn't analyse properly and were excluding
> |  the wrong item from the loop as linked lists ran the opposite
> |  direction in FreeBSD to Linux in this case!!
>  2) if you think this has not been implemented correctly, then can you please point out
>     > where <. The situation at the moment is: you are saying "X has been implemented
>     wrongly, so I use Y to solve the problem". Y however has not much in common with X, so
>     we are left with a broken method X and a non-standard method Y.
>
That is this part of the patch:

u32 dccp_li_hist_calc_i_mean(struct ccid3_hc_rx_sock *hcrx, struct sk_buff *skb)
 {

 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
 		if (li_entry->dccplih_interval != ~0U) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
-			if (i != 0)
-				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
-		}
-
+			if (i != 7)
+				i_tot1 += li_entry->dccplih_interval
+				   * dccp_li_hist_w[i + 1];
+

If you want me to split that into a separate patch I'm fine with that
as I think you can see that is quite separate from the rest.


>  3) I have a suggestion: you seem to be sure of the merits of this solution and report on
>     test results. My suggestion would be to offer using this method via a configuration
>     option (same as per the nofeedback timer threshold). You could put detailed advice into
>     the kernel configuration menu, and we would have the best of both worlds - a standards-
>     compliant TFRC implementation and a configurable Ian's nifty solution. Would that settle
>     things?
>
I'll wait to see the feedback from others first before heading down
that path. Maybe I can even convince you yet! But if we can't resolve
that would be good.

>
> |  All this is is a method of not having to call calc_i_mean for EVERY
> |  packet which would be very expensive on CPU. It is new but is a very
>  4) This code is only invoked when ccid3_hc_rx_detect_loss() returns true, hence it is not
>     invoked for every packet.
>

What I'm referring to here is this part of the patch:
@@ -955,6 +958,15 @@ static void ccid3_hc_rx_packet_recv(struct sock
*sk, struct sk_buff *skb)
 		if (loss)
 			break;

+		if (hcrx->ccid3hcrx_seq_recalc_loss &&
+		 after48(dccp_hdr_seq(skb), hcrx->ccid3hcrx_seq_recalc_loss)) {

If we didn't calculate ccid3hcrx_seq_recalc_loss I would have to run
the code to detect whether we had a long non-loss interval EVERY time
we receive a packet which would use lots of CPU cycles.

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://imcdnzl.blogspot.com
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (3 preceding siblings ...)
  2007-01-03 22:22 ` Ian McDonald
@ 2007-01-04 23:25 ` Eddie Kohler
  2007-01-04 23:31 ` Ian McDonald
                   ` (12 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-04 23:25 UTC (permalink / raw)
  To: dccp

Guys,

I can't follow this code 100%, but here is an analysis of what I can 
understand.  Summary: Ian is correct (I think).

 From Ian's patch, it appears that the OLD code DID NOT include the most 
recent loss interval (i.e., the incomplete loss interval, the one that has no 
losses) in its calculation of i_tot1.  Ian has added the following line to do 
this:

 > +	i_tot1 += non_loss * dccp_li_hist_w[0];

This is correct.  The RFC requires that one of the i_tots include the 
incomplete loss interval.  So this part of the patch is required for RFC 
compliance.

I am assuming however that the incomplete loss interval is NOT included in the 
'hcrx->ccid3hcrx_li_hist' list.  If that list DOES include the incomplete loss 
interval, then the code would need to be different.

I don't quite get why one needs dccp_li_hist_recalc_recalcloss.  One could do 
probably do that simpler, and maybe Ian can explain his reasoning.  Why is it 
necessary at all?  But RFC3448 does require that one use the incomplete loss 
interval in the i_tot calculations.

One nit.  If you are following RFC terminology i_tot0 should include non_loss, 
but in the code i_tot1 does.  The code seems correct except for the switching 
of i_tot0 and i_tot1.

Eddie



Ian McDonald wrote:
> This works out when to recalculate the loss rate due to significant
> non-loss interval. We do this at time of recalulating loss rate to save
> having to do it every packet. Instead we just check if it's time to
> recalculate. It looked like there was an early attempt to do this but it
> was quite wrong in its implementation.
> 
> This is making the code conform to RFC 3448, section 5.4
> 
> I've added some extra debugging which is useful in testing as well. In
> loss_interval.c I've just used dccp_pr_debug. I tried to use ccid3_pr_debug
> but ran into problems with circular references.
> 
> This patch applies on top of Gerrit's patches.
> 
> If possible I'd like this to go into 2.6.21 as it's been reported broken by
> about 4 or 5 people.
> 
> Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
> ---
> diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
> index e7eeeeb..a74f905 100644
> --- a/net/dccp/ccids/ccid3.c
> +++ b/net/dccp/ccids/ccid3.c
> @@ -36,9 +36,9 @@
>  #include "../ccid.h"
>  #include "../dccp.h"
>  #include "lib/packet_history.h"
> -#include "lib/loss_interval.h"
>  #include "lib/tfrc.h"
>  #include "ccid3.h"
> +#include "lib/loss_interval.h"
>  
>  static struct dccp_tx_hist *ccid3_tx_hist;
>  static struct dccp_rx_hist *ccid3_rx_hist;
> @@ -384,6 +384,9 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
>  	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
>  	packet->dccphtx_sent   = 1;
>  	hctx->ccid3hctx_idle   = 0;
> +
> +	ccid3_pr_debug("seqno = %llu, rtt = %u\n",
> +	   (long long unsigned)packet->dccphtx_seqno, packet->dccphtx_rtt);
>  }
>  
>  static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
> @@ -810,7 +813,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
>   *
>   * returns estimated loss interval in usecs */
>  
> -static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
> +static u32 ccid3_hc_rx_calc_first_li(struct sock *sk, struct sk_buff *skb)
>  {
>  	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
>  	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
> @@ -901,13 +904,31 @@ found:
>  	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
>  		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
>  
> -	if (p = 0)
> +	if (p = 0) {
> +		DCCP_WARN("p=0, fval = %llu\n", fval);
>  		return ~0;
> -	else
> -		return 1000000 / p;
> +	} else {
> +		u32 new_interval = 1000000 / p;
> +		u64 recalc_interval;
> +
> +		if (new_interval < 150)
> +			recalc_interval = new_interval + 1;
> +		else
> +			recalc_interval = (new_interval *
> +			(DCCP_RECALC_LOSS_FACTOR+1)) / DCCP_RECALC_LOSS_FACTOR;
> +		hcrx->ccid3hcrx_seq_recalc_loss = dccp_hdr_seq(skb);
> +		add48(&hcrx->ccid3hcrx_seq_recalc_loss, recalc_interval);
> +		ccid3_pr_debug("%s(%p), interval=%u, recalc_interval=%llu, "
> +		   "seqno=%llu recalc_loss=%llu\n", dccp_role(sk), sk,
> +		   new_interval, recalc_interval, dccp_hdr_seq(skb),
> +		   hcrx->ccid3hcrx_seq_recalc_loss);
> +
> +		return new_interval;
> +	}
>  }
>  
> -static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
> +static void ccid3_hc_rx_update_li(struct sock *sk, struct sk_buff *skb,
> +                                  u64 seq_loss, u8 win_loss)
>  {
>  	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
>  	struct dccp_li_hist_entry *head;
> @@ -920,7 +941,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
>  
>  		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
>  		   struct dccp_li_hist_entry, dccplih_node);
> -		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
> +		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk, skb);
>  	} else {
>  		struct dccp_li_hist_entry *entry;
>  		struct list_head *tail;
> @@ -951,10 +972,12 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
>  		entry->dccplih_seqno = seq_loss;
>  		entry->dccplih_interval = seq_temp;
>  		entry->dccplih_win_count = win_loss;
> +		ccid3_pr_debug("adding node seqno=%llu, interval=%u\n",
> +		   (u64)entry->dccplih_seqno, entry->dccplih_interval);
>  	}
>  }
>  
> -static int ccid3_hc_rx_detect_loss(struct sock *sk,
> +static int ccid3_hc_rx_detect_loss(struct sock *sk, struct sk_buff *skb,
>                                      struct dccp_rx_hist_entry *packet)
>  {
>  	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
> @@ -979,7 +1002,7 @@ static int ccid3_hc_rx_detect_loss(struct sock *sk,
>  	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
>  	   > TFRC_RECV_NUM_LATE_LOSS) {
>  		loss = 1;
> -		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
> +		ccid3_hc_rx_update_li(sk, skb, hcrx->ccid3hcrx_seqno_nonloss,
>  		   hcrx->ccid3hcrx_ccval_nonloss);
>  		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
>  		dccp_inc_seqno(&tmp_seqno);
> @@ -1067,7 +1090,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
>  		return;
>  	}
>  
> -	loss = ccid3_hc_rx_detect_loss(sk, packet);
> +	loss = ccid3_hc_rx_detect_loss(sk, skb, packet);
>  
>  	if (DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK)
>  		return;
> @@ -1088,6 +1111,15 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
>  		if (loss)
>  			break;
>  
> +		if (hcrx->ccid3hcrx_seq_recalc_loss &&
> +		 after48(dccp_hdr_seq(skb), hcrx->ccid3hcrx_seq_recalc_loss)) {
> +			ccid3_pr_debug("%s(%p, state=%s) checking loss "
> +			   "recalc skb=%llu, recalc_loss = %llu\n",
> +			   dccp_role(sk), sk, dccp_state_name(sk->sk_state),
> +			   dccp_hdr_seq(skb),hcrx->ccid3hcrx_seq_recalc_loss);
> +			break;
> +		}
> +
>  		dccp_timestamp(sk, &now);
>  		if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) -
>  		     (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) {
> @@ -1100,15 +1132,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
>  		return;
>  	}
>  
> -	/* Dealing with packet loss */
> -	ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
> +	/* Dealing with loss intervals*/
> +	if (loss)
> +		ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n",
>  		       dccp_role(sk), sk, dccp_state_name(sk->sk_state));
>  
>  	p_prev = hcrx->ccid3hcrx_p;
>  	
>  	/* Calculate loss event rate */
>  	if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
> -		u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
> +		u32 i_mean = dccp_li_hist_calc_i_mean(hcrx, skb);
>  
>  		/* Scaling up by 1000000 as fixed decimal */
>  		if (i_mean != 0)
> @@ -1116,7 +1149,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
>  	} else
>  		DCCP_BUG("empty loss history");
>  
> -	if (hcrx->ccid3hcrx_p > p_prev) {
> +	if (hcrx->ccid3hcrx_p != p_prev) {
> +		ccid3_pr_debug("p_prev = %u, hcrx_p = %u\n", p_prev,
> +		   hcrx->ccid3hcrx_p);
>  		ccid3_hc_rx_send_feedback(sk);
>  		return;
>  	}
> @@ -1135,6 +1170,7 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
>  	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
>  	hcrx->ccid3hcrx_s   = 0;
>  	hcrx->ccid3hcrx_rtt = 0;
> +	hcrx->ccid3hcrx_seqno_nonloss = 0;
>  	return 0;
>  }
>  
> diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
> index 2b18db4..99182bf 100644
> --- a/net/dccp/ccids/ccid3.h
> +++ b/net/dccp/ccids/ccid3.h
> @@ -158,6 +158,7 @@ enum ccid3_hc_rx_states {
>   *  @ccid3hcrx_s  -  Received packet size in bytes
>   *  @ccid3hcrx_pinv  -  Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
>   *  @ccid3hcrx_elapsed_time  -  Time since packet reception
> + *  @ccid3hcrx_seq_recalc_loss - recalc loss due to nonloss (RFC 3448, 5.4)
>   */
>  struct ccid3_hc_rx_sock {
>  	struct tfrc_rx_info		ccid3hcrx_tfrc;
> @@ -176,6 +177,7 @@ struct ccid3_hc_rx_sock {
>  	u16				ccid3hcrx_s;
>  	u32				ccid3hcrx_pinv;
>  	u32				ccid3hcrx_elapsed_time;
> +	u64				ccid3hcrx_seq_recalc_loss;
>  };
>  
>  static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
> diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
> index 372d7e7..cd03ae0 100644
> --- a/net/dccp/ccids/lib/loss_interval.c
> +++ b/net/dccp/ccids/lib/loss_interval.c
> @@ -14,6 +14,7 @@
>  #include <linux/module.h>
>  #include <net/sock.h>
>  #include "../../dccp.h"
> +#include "../ccid3.h"
>  #include "loss_interval.h"
>  
>  struct dccp_li_hist *dccp_li_hist_new(const char *name)
> @@ -81,31 +82,106 @@ static const int dccp_li_hist_w[DCCP_LI_HIST_IVAL_F_LENGTH] = {
>  	4, 4, 4, 4, 3, 2, 1, 1,
>  };
>  
> -u32 dccp_li_hist_calc_i_mean(struct list_head *list)
> +/* This code implements the part of section 5.4 of RFC3448 which says we should
> + * recalculate the average loss interval if we have a sufficient long loss
> + * interval.
> + *
> + * Given that i_mean = weighted average of loss then we can say that
> + * 4*n + 4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6] > + *  y*(4*i[0] + 4*i[i] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])
> + *
> + * where y is a factor so that we don't check as soon as we hit average
> + * interval and waste CPU cycles needlessly. n is the non-loss interval
> + * and i[x] are the loss intervals. We don't need to divide by the sum
> + * of the weighting as these cancel out on each side.
> + *
> + * We can solve for this equation for n which yields:
> + * n > + *  ((4*i[0] + 4*i[1] + 4*i[2] + 4*i[3] + 3*i[4]+ 2*i[5] + i[6] + i[7])/4) -
> + *  (y*(4*i[0] + 4*i[1] + 4*i[2] + 3*i[3] + 2*i[4] + i[5] + i[6])/4) */
> +
> +static u64 dccp_li_hist_recalc_recalcloss(struct sk_buff *skb,
> +                                        u32 i_tot0, u32 i_tot1)
> +{
> +	u64 recalcloss_seq = dccp_hdr_seq(skb);
> +
> +	dccp_pr_debug("recalcloss_seq=%llu\n", recalcloss_seq);
> +
> +	if (!i_tot0) {
> +		DCCP_WARN("i_tot0 = 0\n");
> +		return 0;
> +	}
> +
> +	/* We don't adjust if > 1 million as will get overflow
> +	 * in calculations and not so serious anyway */
> +	if (i_tot0 > 1000000) {
> +		DCCP_WARN("i_mean > 100000\n");
> +		return 0;
> +	}
> +
> +	i_tot0 /= 4;
> +	i_tot1 = (i_tot1 * (DCCP_RECALC_LOSS_FACTOR+1))/
> +	   (DCCP_RECALC_LOSS_FACTOR*4);
> +
> +	if (i_tot1 >= i_tot0) {
> +		dccp_pr_debug("only incrementing by 1\n");
> +		add48(&recalcloss_seq, 1);
> +	} else
> +		add48(&recalcloss_seq,(u64)(i_tot0 - i_tot1));
> +
> +	dccp_pr_debug("recalcloss_seq=%llu, i_tot0=%u, i_tot1=%u\n",
> +	   recalcloss_seq, i_tot0, i_tot1);
> +
> +	return recalcloss_seq;
> +}
> +
> +u32 dccp_li_hist_calc_i_mean(struct ccid3_hc_rx_sock *hcrx, struct sk_buff *skb)
>  {
>  	struct dccp_li_hist_entry *li_entry, *li_next;
> -	int i = 0;
> +	unsigned int i = 0;
>  	u32 i_tot;
>  	u32 i_tot0 = 0;
>  	u32 i_tot1 = 0;
>  	u32 w_tot  = 0;
> +	u64 non_loss = 0;
> +	u32 i_mean;
> +	struct list_head *list = &hcrx->ccid3hcrx_li_hist;
>  
>  	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
>  		if (li_entry->dccplih_interval != ~0U) {
>  			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
>  			w_tot  += dccp_li_hist_w[i];
> -			if (i != 0)
> -				i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1];
> -		}
> -
> +			if (i != 7)
> +				i_tot1 += li_entry->dccplih_interval
> +				   * dccp_li_hist_w[i + 1];
> +			if (i = 0) {
> +				non_loss = dccp_hdr_seq(skb);
> +				sub48(&non_loss, li_entry->dccplih_seqno);
> +			}
> +			dccp_pr_debug("i=%d, interval=%u, seqno=%llu, "
> +			  "i_tot0=%u, i_tot1=%u, w_tot=%u\n", i,
> +			  li_entry->dccplih_interval,
> +			  (u64)li_entry->dccplih_seqno,i_tot0, i_tot1, w_tot);
> +		} else
> +			dccp_pr_debug("empty loss interval\n");
>  
>  		if (++i > DCCP_LI_HIST_IVAL_F_LENGTH)
>  			break;
>  	}
>  
> -	if (i != DCCP_LI_HIST_IVAL_F_LENGTH)
> +	if (i != DCCP_LI_HIST_IVAL_F_LENGTH) {
> +		DCCP_BUG("Length of loss interval list is %u\n", i);
>  		return 0;
> +	}
>  
> +	hcrx->ccid3hcrx_seq_recalc_loss > +	   dccp_li_hist_recalc_recalcloss(skb, i_tot0, i_tot1);
> +
> +	i_tot1 += non_loss * dccp_li_hist_w[0];
> +	if (i_tot1 > i_tot0)
> +		dccp_pr_debug("i_mean recalculateed due to high nonloss "
> +		   " interval seqno=%llu nonloss=%llu i_tot0=%u, i_tot1=%u\n",
> +		   dccp_hdr_seq(skb), non_loss, i_tot0, i_tot1);
>  	i_tot = max(i_tot0, i_tot1);
>  
>  	if (!w_tot) {
> @@ -113,7 +189,10 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
>  		return 1;
>  	}
>  
> -	return i_tot / w_tot;
> +	i_mean = i_tot / w_tot;
> +	dccp_pr_debug("i_mean=%u\n", i_mean);
> +
> +	return i_mean;
>  }
>  
>  EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
> @@ -131,7 +210,7 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
>  			DCCP_BUG("loss interval list entry is NULL");
>  			return 0;
>  		}
> -		entry->dccplih_interval = ~0;
> +		entry->dccplih_interval = ~0U;
>  		list_add(&entry->dccplih_node, list);
>  	}
>  
> diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
> index eb25701..6fed81b 100644
> --- a/net/dccp/ccids/lib/loss_interval.h
> +++ b/net/dccp/ccids/lib/loss_interval.h
> @@ -50,7 +50,8 @@ static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
>  extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
>  			       struct list_head *list);
>  
> -extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
> +extern u32 dccp_li_hist_calc_i_mean(struct ccid3_hc_rx_sock *hcrx,
> +   struct sk_buff *skb);
>  
>  extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
>     struct list_head *list, const u64 seq_loss, const u8 win_loss);
> diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
> index dd0a986..546ca70 100644
> --- a/net/dccp/dccp.h
> +++ b/net/dccp/dccp.h
> @@ -80,6 +80,11 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
>  
>  #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
>  
> +/* Value as a reciprocal to check for change in loss through long
> + * non-loss intervals. If you change this you must recheck existing
> + * code for overflow possibilities */
> +#define DCCP_RECALC_LOSS_FACTOR 100
> +
>  /* sysctl variables for DCCP */
>  extern int  sysctl_dccp_request_retries;
>  extern int  sysctl_dccp_retries1;
> -
> To unsubscribe from this list: send the line "unsubscribe dccp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (4 preceding siblings ...)
  2007-01-04 23:25 ` Eddie Kohler
@ 2007-01-04 23:31 ` Ian McDonald
  2007-01-04 23:45 ` [dccp] " Eddie Kohler
                   ` (11 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2007-01-04 23:31 UTC (permalink / raw)
  To: dccp

On 1/5/07, Eddie Kohler <kohler@cs.ucla.edu> wrote:
> I am assuming however that the incomplete loss interval is NOT included in the
> 'hcrx->ccid3hcrx_li_hist' list.  If that list DOES include the incomplete loss
> interval, then the code would need to be different.
>
Correct.

> I don't quite get why one needs dccp_li_hist_recalc_recalcloss.  One could do
> probably do that simpler, and maybe Ian can explain his reasoning.  Why is it
> necessary at all?  But RFC3448 does require that one use the incomplete loss
> interval in the i_tot calculations.
>
The reason for this is if you are recalculating i_mean based on non
loss you should check after every packet received. However this
involves quite a lot of calculations on linked lists which are CPU
intensive and also stall other processes potentially with locks being
taken. So what I've done is looked at how many packets of non loss
would be required to alter i_mean. This is then added to the current
sequence number and stored in hist_recalc_recalcloss. I then just do a
simple comparison on every packet to see if we've met this high water
mark.

> One nit.  If you are following RFC terminology i_tot0 should include non_loss,
> but in the code i_tot1 does.  The code seems correct except for the switching
> of i_tot0 and i_tot1.
>
I'll look into that when I get a chance.

Thanks Eddie,

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://imcdnzl.blogspot.com
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (5 preceding siblings ...)
  2007-01-04 23:31 ` Ian McDonald
@ 2007-01-04 23:45 ` Eddie Kohler
  2007-01-04 23:58 ` Ian McDonald
                   ` (10 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-04 23:45 UTC (permalink / raw)
  To: dccp

> The reason for this is if you are recalculating i_mean based on non
> loss you should check after every packet received. However this
> involves quite a lot of calculations on linked lists which are CPU
> intensive and also stall other processes potentially with locks being
> taken. So what I've done is looked at how many packets of non loss
> would be required to alter i_mean. This is then added to the current
> sequence number and stored in hist_recalc_recalcloss. I then just do a
> simple comparison on every packet to see if we've met this high water
> mark.

I guess there's a minor 4-byte space tradeoff here, but it would seem simpler 
just to store i_tot0 and i_tot1 as variables in the ccid3 structure.  Then on 
every consecutive non-lost packet you simply increment i_tot1 by 4 and 
recalculate i_mean.

Eddie

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (6 preceding siblings ...)
  2007-01-04 23:45 ` [dccp] " Eddie Kohler
@ 2007-01-04 23:58 ` Ian McDonald
  2007-01-05  0:10 ` Eddie Kohler
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2007-01-04 23:58 UTC (permalink / raw)
  To: dccp

On 1/5/07, Eddie Kohler <kohler@cs.ucla.edu> wrote:
> > The reason for this is if you are recalculating i_mean based on non
> > loss you should check after every packet received. However this
> > involves quite a lot of calculations on linked lists which are CPU
> > intensive and also stall other processes potentially with locks being
> > taken. So what I've done is looked at how many packets of non loss
> > would be required to alter i_mean. This is then added to the current
> > sequence number and stored in hist_recalc_recalcloss. I then just do a
> > simple comparison on every packet to see if we've met this high water
> > mark.
>
> I guess there's a minor 4-byte space tradeoff here, but it would seem simpler
> just to store i_tot0 and i_tot1 as variables in the ccid3 structure.  Then on
> every consecutive non-lost packet you simply increment i_tot1 by 4 and
> recalculate i_mean.
>
> Eddie
>
Don't you still need to iterate through each element of the list then
and also do some divisions? This becomes expensive quickly if done for
every packet. With my code for example if you are running at 1% loss
then you will only recalculate roughly every 100 packets and only then
if you haven't had a loss.

I would like to change the loss interval linked list to a fixed size 8
element array as we're not changing the size of the linked list at any
time so it is very inefficient (probably a text book case of when not
to use a linked list!!)

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://imcdnzl.blogspot.com
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (7 preceding siblings ...)
  2007-01-04 23:58 ` Ian McDonald
@ 2007-01-05  0:10 ` Eddie Kohler
  2007-01-05  0:24 ` Ian McDonald
                   ` (8 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-05  0:10 UTC (permalink / raw)
  To: dccp

You shouldn't need to iterate through the list, since i_mean is just 
i_tot/w_tot, and w_tot is a constant.  You do need to divide, though.

If it makes no difference to you I'd recommend going with the simpler version 
-- the logic in dccp_li_hist_recalc_recalcloss is difficult to follow; I 
wouldn't want to be on the hook for its correctness ;)

Also, the weights in dccp_li_hist_w appear to be wrong.  They should be 5, 5, 
5, 5, 4, 3, 2, 1, not 4,4,4,4,3,2,1,1, according to rfc3448.

Eddie


Ian McDonald wrote:
> Don't you still need to iterate through each element of the list then
> and also do some divisions? This becomes expensive quickly if done for
> every packet. With my code for example if you are running at 1% loss
> then you will only recalculate roughly every 100 packets and only then
> if you haven't had a loss.
> 
> I would like to change the loss interval linked list to a fixed size 8
> element array as we're not changing the size of the linked list at any
> time so it is very inefficient (probably a text book case of when not
> to use a linked list!!)
> 
> Ian

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (8 preceding siblings ...)
  2007-01-05  0:10 ` Eddie Kohler
@ 2007-01-05  0:24 ` Ian McDonald
  2007-01-05  0:34 ` Eddie Kohler
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2007-01-05  0:24 UTC (permalink / raw)
  To: dccp

On 1/5/07, Eddie Kohler <kohler@cs.ucla.edu> wrote:
> You shouldn't need to iterate through the list, since i_mean is just
> i_tot/w_tot, and w_tot is a constant.  You do need to divide, though.
>
> If it makes no difference to you I'd recommend going with the simpler version
> -- the logic in dccp_li_hist_recalc_recalcloss is difficult to follow; I
> wouldn't want to be on the hook for its correctness ;)
>

I'll have a look at it later on, but don't have much free time at
present due to family responsibilities. I've done quite a lot of
testing and believe to be correct, and have included a lot of the
thinking in the comments. The thing is that it doesn't actually
calculate i_mean itself so that same base calculation is used.

> Also, the weights in dccp_li_hist_w appear to be wrong.  They should be 5, 5,
> 5, 5, 4, 3, 2, 1, not 4,4,4,4,3,2,1,1, according to rfc3448.
>
I don't believe so. We've done this as per section 8 of RFC3448 and
divide by 4 rather than 5.

-- 
Web: http://wand.net.nz/~iam4
Blog: http://imcdnzl.blogspot.com
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (9 preceding siblings ...)
  2007-01-05  0:24 ` Ian McDonald
@ 2007-01-05  0:34 ` Eddie Kohler
  2007-01-05 14:47 ` Gerrit Renker
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-05  0:34 UTC (permalink / raw)
  To: dccp

>> Also, the weights in dccp_li_hist_w appear to be wrong.  They should 
>> be 5, 5,
>> 5, 5, 4, 3, 2, 1, not 4,4,4,4,3,2,1,1, according to rfc3448.
>>
> I don't believe so. We've done this as per section 8 of RFC3448 and
> divide by 4 rather than 5.
> 

Ah yes, never mind.
Eddie

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (10 preceding siblings ...)
  2007-01-05  0:34 ` Eddie Kohler
@ 2007-01-05 14:47 ` Gerrit Renker
  2007-01-05 16:00 ` Eddie Kohler
                   ` (5 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2007-01-05 14:47 UTC (permalink / raw)
  To: dccp

Hi Ian,

sorry for the misunderstandings. I do think that you raise important issues. However, the problem
I had with the patch is that it is actually _two_ patches:

	(1) One is fix to make the loss interval calculation compatible with RFC 3448, section 5.4
            Here I couldn't agree with you more and this is a priority issue. However, as I sat down
            and went through your patch, I noted that there are other problems, too. I will post a
            write-up of what I found. Now I see that Eddie has also confirmed that there are problems
            with this code, so we should follow this up.

	(2) The other is a novel method of recalculating the loss rate when, after some first loss,
            there has been a longer period of non-loss (did I get this right?). This is not tackled
            by an RFC so far, hence is experimental.

The fact that there are two patches in one made it difficult to reply and caused the misunderstandings.
It would have been easier to follow if the patch had been split into two; sorry I was not able to follow
the format as it was. 

Now, for (1) we have a bona-fide bug fix and this should be given priority over other issues. With regard
to (2), I say the following: if you can prove or show that using the technique which is embedded into your
algorithm leads to performance improvements in cases such as the following:

|  Probably the easiest way to illustrate this is:
|  - Fixed 128 kbits second link.
|  - Loss occurs when we are only up to 32 kbits per second
|  - No loss then occurs again or for a very long time
|  - How do we then use the full bandwidth?

then you have tackled an original problem - too precious to just code away as a patch. As far as I know,
this problem has _not been tackled_ neither by any RFC (the closest I can find is RFC 4342, 5.1) nor an Internet
Draft. Ergo, you would have the material for writing an Internet draft which, if the technique indeed helps
performance, would be a valuable contribution to the CCID 3 infrastructure.

|  This is what I think the RFC is saying.
See above :-)

|  >  3) I have a suggestion: you seem to be sure of the merits of this solution and report on
|  >     test results. My suggestion would be to offer using this method via a configuration
|  >     option (same as per the nofeedback timer threshold). You could put detailed advice into
|  >     the kernel configuration menu, and we would have the best of both worlds - a standards-
|  >     compliant TFRC implementation and a configurable Ian's nifty solution. Would that settle
|  >     things?
|  >
|  I'll wait to see the feedback from others first before heading down
|  that path. Maybe I can even convince you yet! But if we can't resolve
|  that would be good.
I can reformulate this now with regard to (2): the technique is novel and so far not documented
in any RFC/Internet Draft. It would be a valuable addition, but for testing it would be better to have
it as a configuration option so that people can play with it and note the differences between using it
and not using it. Given that most of it is already in the dccp_li_hist_recalcloss function, it does
not seem too hard to encapsulate the algorithm into its own separate function. 

Given that you are busy at the moment and that I am bogged down with trying to sort out TX/RX history
locking issues, can we put this on top of the todo list? With regard to the bug fix, I will summarize
in separate posting.

Gerrit

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (11 preceding siblings ...)
  2007-01-05 14:47 ` Gerrit Renker
@ 2007-01-05 16:00 ` Eddie Kohler
  2007-01-05 16:31 ` Gerrit Renker
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-05 16:00 UTC (permalink / raw)
  To: dccp

Gerrit,

> Now, for (1) we have a bona-fide bug fix and this should be given priority over other issues. With regard
> to (2), I say the following: if you can prove or show that using the technique which is embedded into your
> algorithm leads to performance improvements in cases such as the following:
> 
> |  Probably the easiest way to illustrate this is:
> |  - Fixed 128 kbits second link.
> |  - Loss occurs when we are only up to 32 kbits per second
> |  - No loss then occurs again or for a very long time
> |  - How do we then use the full bandwidth?
> 
> then you have tackled an original problem - too precious to just code away as a patch. As far as I know,
> this problem has _not been tackled_ neither by any RFC (the closest I can find is RFC 4342, 5.1) nor an Internet
> Draft. Ergo, you would have the material for writing an Internet draft which, if the technique indeed helps
> performance, would be a valuable contribution to the CCID 3 infrastructure.

Um, you misunderstand.  The problem that Ian's patch addresses seems 
entirely within the purview of RFC 3448/4342.  He is not trying to 
innovate.  Recalculating the loss rate after a longer period of non-loss 
is NOT EXPERIMENTAL it is REQUIRED.  The loss event rate is NOT 
calculated ONLY when losses are detected.  It must be calculated on 
every feedback packet.  I don't have time at the moment to look up 
chapter and verse, maybe later.

Ian's implementation strategy is unconventional, it's true, but he is 
attempting to implement RFC-compliant behavior, no more, no less.  He 
might have a bug, but his code is strictly better and more RFC compliant 
than what exists.

I think (for what it's worth) his patch should be accepted as is, and 
maybe he, or someone, will simplify the recalc_recalcloss part later.

Eddie



>  
> |  This is what I think the RFC is saying.
> See above :-)
> 
> 
> 
> |  >  3) I have a suggestion: you seem to be sure of the merits of this solution and report on
> |  >     test results. My suggestion would be to offer using this method via a configuration
> |  >     option (same as per the nofeedback timer threshold). You could put detailed advice into
> |  >     the kernel configuration menu, and we would have the best of both worlds - a standards-
> |  >     compliant TFRC implementation and a configurable Ian's nifty solution. Would that settle
> |  >     things?
> |  >
> |  I'll wait to see the feedback from others first before heading down
> |  that path. Maybe I can even convince you yet! But if we can't resolve
> |  that would be good.
> I can reformulate this now with regard to (2): the technique is novel and so far not documented
> in any RFC/Internet Draft. It would be a valuable addition, but for testing it would be better to have
> it as a configuration option so that people can play with it and note the differences between using it
> and not using it. Given that most of it is already in the dccp_li_hist_recalcloss function, it does
> not seem too hard to encapsulate the algorithm into its own separate function. 
> 
> Given that you are busy at the moment and that I am bogged down with trying to sort out TX/RX history
> locking issues, can we put this on top of the todo list? With regard to the bug fix, I will summarize
> in separate posting.
> 
> Gerrit
> -
> To unsubscribe from this list: send the line "unsubscribe dccp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (12 preceding siblings ...)
  2007-01-05 16:00 ` Eddie Kohler
@ 2007-01-05 16:31 ` Gerrit Renker
  2007-01-05 16:36 ` [dccp] " Gerrit Renker
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2007-01-05 16:31 UTC (permalink / raw)
  To: dccp

|  Um, you misunderstand.  The problem that Ian's patch addresses seems 
|  entirely within the purview of RFC 3448/4342.  He is not trying to 
|  innovate.  Recalculating the loss rate after a longer period of non-loss 
|  is NOT EXPERIMENTAL it is REQUIRED.  The loss event rate is NOT 
|  calculated ONLY when losses are detected.  It must be calculated on 
|  every feedback packet.  I don't have time at the moment to look up 
|  chapter and verse, maybe later.
Please take another look at the patch: what you are saying is true but applies to the _sender_, but Ian's
code is entirely within the _receiver_. We are not at 4342 yet, at the moment the receiver calculates
the loss event rate and then contacts the receiver.

Can you please look up chapter and verse -- I have read all related documents and can only identify
section 5.4 of RFC 3448. With regard to this, we are in agreement: this needs to be audited to be conform.

But with regard to recalculating the loss rate after a longer period of non-loss: 

 => either there is indeed a reference for this in the current RFCs (it is not [RFC 3448, 5.4] and not the
     corresponding section in 3448bis) but then neither Ian nor I have found this

 => or there is no such reference and then we should clearly separate the issues into (a) making sure that
     the code complies to [RFC 3448, 5.4] and (b) what do we do with the "recalculating the loss rate after
     a longer period of non-loss" ?

|  I think (for what it's worth) his patch should be accepted as is, and 
|  maybe he, or someone, will simplify the recalc_recalcloss part later.
I agree with the points you raised, but disagree with adding an experimental part. The reason is that
the CCID 3 module is still seriously broken:

	* from the output of dccp_probe one can see that the entire system is oscillating and not
          reaching a stable fixed point; even under ideal, non-loss conditions
        * the behaviour using tools such as iperf is unpredictable
        * performance is very poor: sometimes, on a good day one gets 50% of the bandwidth, but at other
          test runs the speed goes down to only a couple of Mbits/sec (and this with a loss rate of 0)

Therefore my suggestion is for the moment to only put in changes which are documented in the RFCs, fix
all these bugs, and add everything else once the basic problems have been sorted out.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [dccp] Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (13 preceding siblings ...)
  2007-01-05 16:31 ` Gerrit Renker
@ 2007-01-05 16:36 ` Gerrit Renker
  2007-01-05 21:38 ` Eddie Kohler
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2007-01-05 16:36 UTC (permalink / raw)
  To: dccp

|  I would like to change the loss interval linked list to a fixed size 8
|  element array as we're not changing the size of the linked list at any
|  time so it is very inefficient (probably a text book case of when not
|  to use a linked list!!)
Absolutely agree - I was scratching my head about the complexity of the code
and with an array we would have simpler code. 

Witness for instance, dccp_li_hist_interval_new() -- it allocates a fixed list of
length 8, the corresponding (static) array allocation is a one-liner. 

In ccid3_hc_rx_update_li(), there is a complex allocation when the list has not
just been created:

	* first list_add() is called to enqueue at the head of the list
	* and then list_del() is called to remove the oldest loss interval

This is a very complex way of managing a ring buffer. Adding to this that we are
using ~0U to identify empty loss intervals, we would probably much better off
with an array; but this array would have to be managed in the same way as a ring
buffer (but that is not difficult, also textbook stuff)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (14 preceding siblings ...)
  2007-01-05 16:36 ` [dccp] " Gerrit Renker
@ 2007-01-05 21:38 ` Eddie Kohler
  2007-01-08 15:05 ` Gerrit Renker
  2007-01-10  1:39 ` Ian McDonald
  17 siblings, 0 replies; 19+ messages in thread
From: Eddie Kohler @ 2007-01-05 21:38 UTC (permalink / raw)
  To: dccp

Gerrit

Chapter and verse, all from RFC 3448.

5.4: "When calculating the average loss interval we need to decide whether
    to include the interval since the most recent packet loss event.  We
    only do this if it is sufficiently large to increase the average loss
    interval."

    => If the time since the last loss interval will increase the average 
loss interval, then it should be included.
    => Note that the time since the last loss interval will increase on every 
successful packet receipt.
    => Therefore we may need to recalculate the average loss interval after 
every successful packet receipt (or, at the sender, on every feedback packet).

5.4: "I_0 being the interval since the most recent loss event"

    => I_0 is the time since the last loss interval.

5.5: "As described in Section 5.4 the average loss interval is calculated
    using the n previous loss intervals I_1, ..., I_n, and the interval
    I_0 that represents the number of packets received since the last
    loss event."

    => thus I_0 means the same thing in 5.4 and 5.5

5.5: "I_0 [is] the number of packets received since the last loss event"

    => yet another way to say the same thing

5.5: "Note that with each new packet arrival, I_0 will increase further"

    => YET ANOTHER way to say the same thing.

In summary
- I_0 is the interval since the most recent loss event
- I_0 increases with every packet received
- The average loss interval calculation depends on I_0
- The average loss interval should be recalculated whenever I_0 changes
- I_0 changes with every packet received
- What more can I say here?????
- All of this average loss interval calculation applies equally to sender and 
receiver; it is in section 5, a general section on "calculating loss event 
rate"; and as the intro to section 5 says, "Loss rate measurement is performed 
at the receiver"

Ian's patch is NOT experimental; Ian is attempting to implement standard 
compliant behavior; Ian's change is documented in the RFCs; etc.; etc.; etc.

Can we put this to bed?

If you still don't understand, please be clearer about why not.

Eddie




Gerrit Renker wrote:
> |  Um, you misunderstand.  The problem that Ian's patch addresses seems 
> |  entirely within the purview of RFC 3448/4342.  He is not trying to 
> |  innovate.  Recalculating the loss rate after a longer period of non-loss 
> |  is NOT EXPERIMENTAL it is REQUIRED.  The loss event rate is NOT 
> |  calculated ONLY when losses are detected.  It must be calculated on 
> |  every feedback packet.  I don't have time at the moment to look up 
> |  chapter and verse, maybe later.
> Please take another look at the patch: what you are saying is true but applies to the _sender_, but Ian's
> code is entirely within the _receiver_. We are not at 4342 yet, at the moment the receiver calculates
> the loss event rate and then contacts the receiver.
> 
> Can you please look up chapter and verse -- I have read all related documents and can only identify
> section 5.4 of RFC 3448. With regard to this, we are in agreement: this needs to be audited to be conform.
> 
> But with regard to recalculating the loss rate after a longer period of non-loss: 
> 
>  => either there is indeed a reference for this in the current RFCs (it is not [RFC 3448, 5.4] and not the
>      corresponding section in 3448bis) but then neither Ian nor I have found this
> 
>  => or there is no such reference and then we should clearly separate the issues into (a) making sure that
>      the code complies to [RFC 3448, 5.4] and (b) what do we do with the "recalculating the loss rate after
>      a longer period of non-loss" ?
> 
> 
> |  I think (for what it's worth) his patch should be accepted as is, and 
> |  maybe he, or someone, will simplify the recalc_recalcloss part later.
> I agree with the points you raised, but disagree with adding an experimental part. The reason is that
> the CCID 3 module is still seriously broken:
> 
> 	* from the output of dccp_probe one can see that the entire system is oscillating and not
>           reaching a stable fixed point; even under ideal, non-loss conditions
>         * the behaviour using tools such as iperf is unpredictable
>         * performance is very poor: sometimes, on a good day one gets 50% of the bandwidth, but at other
>           test runs the speed goes down to only a couple of Mbits/sec (and this with a loss rate of 0)
> 
> Therefore my suggestion is for the moment to only put in changes which are documented in the RFCs, fix
> all these bugs, and add everything else once the basic problems have been sorted out.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (15 preceding siblings ...)
  2007-01-05 21:38 ` Eddie Kohler
@ 2007-01-08 15:05 ` Gerrit Renker
  2007-01-10  1:39 ` Ian McDonald
  17 siblings, 0 replies; 19+ messages in thread
From: Gerrit Renker @ 2007-01-08 15:05 UTC (permalink / raw)
  To: dccp

Hi Eddie

thanks for the clarification. I think this makes clear what Ian's recalculate-loss optimization
does. I am copying this to dccp@ietf as well since a related question was posed there.

What made the analysis so confusing is that [RFC 3448, 5.4] was used as reference, such as in 
the following commment.

    /* This code implements the part of section 5.4 of RFC3448 which says we should
     * recalculate the average loss interval if we have a sufficient long loss
     * interval.

It is now, with the below, however clear that the relevant section is not 5.4, but rather 
section 6.1 - whichstates that the receiver has to recompute p on reception of each data packet.
And it seems to be the point of Ian's patch to relax this requirement somehow. 

Hence you are right, it is not a new technique but rather an attempt to optimise the implementation.

Thanks again for the references.

Gerrit
|  Chapter and verse, all from RFC 3448.
|  
|  5.4: "When calculating the average loss interval we need to decide whether
|      to include the interval since the most recent packet loss event.  We
|      only do this if it is sufficiently large to increase the average loss
|      interval."
|  
|      => If the time since the last loss interval will increase the average 
|  loss interval, then it should be included.
|      => Note that the time since the last loss interval will increase on every 
|  successful packet receipt.
|      => Therefore we may need to recalculate the average loss interval after 
|  every successful packet receipt (or, at the sender, on every feedback packet).
|  
|  5.4: "I_0 being the interval since the most recent loss event"
|  
|      => I_0 is the time since the last loss interval.
|  
|  5.5: "As described in Section 5.4 the average loss interval is calculated
|      using the n previous loss intervals I_1, ..., I_n, and the interval
|      I_0 that represents the number of packets received since the last
|      loss event."
|  
|      => thus I_0 means the same thing in 5.4 and 5.5
|  
|  5.5: "I_0 [is] the number of packets received since the last loss event"
|  
|      => yet another way to say the same thing
|  
|  5.5: "Note that with each new packet arrival, I_0 will increase further"
|  
|      => YET ANOTHER way to say the same thing.
|  
|  In summary
|  - I_0 is the interval since the most recent loss event
|  - I_0 increases with every packet received
|  - The average loss interval calculation depends on I_0
|  - The average loss interval should be recalculated whenever I_0 changes
|  - I_0 changes with every packet received
|  - What more can I say here?????
|  - All of this average loss interval calculation applies equally to sender and 
|  receiver; it is in section 5, a general section on "calculating loss event 
|  rate"; and as the intro to section 5 says, "Loss rate measurement is performed 
|  at the receiver"
|  
|  Ian's patch is NOT experimental; Ian is attempting to implement standard 
|  compliant behavior; Ian's change is documented in the RFCs; etc.; etc.; etc.
|  
|  Can we put this to bed?
|  
|  If you still don't understand, please be clearer about why not.
|  
|  Eddie
|  
|  
|  
|  
|  Gerrit Renker wrote:
|  > |  Um, you misunderstand.  The problem that Ian's patch addresses seems 
|  > |  entirely within the purview of RFC 3448/4342.  He is not trying to 
|  > |  innovate.  Recalculating the loss rate after a longer period of non-loss 
|  > |  is NOT EXPERIMENTAL it is REQUIRED.  The loss event rate is NOT 
|  > |  calculated ONLY when losses are detected.  It must be calculated on 
|  > |  every feedback packet.  I don't have time at the moment to look up 
|  > |  chapter and verse, maybe later.
|  > Please take another look at the patch: what you are saying is true but applies to the _sender_, but Ian's
|  > code is entirely within the _receiver_. We are not at 4342 yet, at the moment the receiver calculates
|  > the loss event rate and then contacts the receiver.
|  > 
|  > Can you please look up chapter and verse -- I have read all related documents and can only identify
|  > section 5.4 of RFC 3448. With regard to this, we are in agreement: this needs to be audited to be conform.
|  > 
|  > But with regard to recalculating the loss rate after a longer period of non-loss: 
|  > 
|  >  => either there is indeed a reference for this in the current RFCs (it is not [RFC 3448, 5.4] and not the
|  >      corresponding section in 3448bis) but then neither Ian nor I have found this
|  > 
|  >  => or there is no such reference and then we should clearly separate the issues into (a) making sure that
|  >      the code complies to [RFC 3448, 5.4] and (b) what do we do with the "recalculating the loss rate after
|  >      a longer period of non-loss" ?
|  > 
|  > 
|  > |  I think (for what it's worth) his patch should be accepted as is, and 
|  > |  maybe he, or someone, will simplify the recalc_recalcloss part later.
|  > I agree with the points you raised, but disagree with adding an experimental part. The reason is that
|  > the CCID 3 module is still seriously broken:
|  > 
|  > 	* from the output of dccp_probe one can see that the entire system is oscillating and not
|  >           reaching a stable fixed point; even under ideal, non-loss conditions
|  >         * the behaviour using tools such as iperf is unpredictable
|  >         * performance is very poor: sometimes, on a good day one gets 50% of the bandwidth, but at other
|  >           test runs the speed goes down to only a couple of Mbits/sec (and this with a loss rate of 0)
|  > 
|  > Therefore my suggestion is for the moment to only put in changes which are documented in the RFCs, fix
|  > all these bugs, and add everything else once the basic problems have been sorted out.
|  
|  

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 2/5]: DCCP Recalc on non-loss intervals
  2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
                   ` (16 preceding siblings ...)
  2007-01-08 15:05 ` Gerrit Renker
@ 2007-01-10  1:39 ` Ian McDonald
  17 siblings, 0 replies; 19+ messages in thread
From: Ian McDonald @ 2007-01-10  1:39 UTC (permalink / raw)
  To: dccp

> One nit.  If you are following RFC terminology i_tot0 should include non_loss,
> but in the code i_tot1 does.  The code seems correct except for the switching
> of i_tot0 and i_tot1.
>
> Eddie

You're right here and I think this is contributing to much of the
confusion as I read through Gerrit's most recent emails.

I'm reworking this patch at present and adding quite a few comments.

Ian
-- 
Web: http://wand.net.nz/~iam4
Blog: http://iansblog.jandi.co.nz
WAND Network Research Group

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2007-01-10  1:39 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-12-20  2:45 [PATCH 2/5]: DCCP Recalc on non-loss intervals Ian McDonald
2006-12-21 13:20 ` Gerrit Renker
2006-12-21 22:48 ` Ian McDonald
2007-01-03 12:56 ` Gerrit Renker
2007-01-03 22:22 ` Ian McDonald
2007-01-04 23:25 ` Eddie Kohler
2007-01-04 23:31 ` Ian McDonald
2007-01-04 23:45 ` [dccp] " Eddie Kohler
2007-01-04 23:58 ` Ian McDonald
2007-01-05  0:10 ` Eddie Kohler
2007-01-05  0:24 ` Ian McDonald
2007-01-05  0:34 ` Eddie Kohler
2007-01-05 14:47 ` Gerrit Renker
2007-01-05 16:00 ` Eddie Kohler
2007-01-05 16:31 ` Gerrit Renker
2007-01-05 16:36 ` [dccp] " Gerrit Renker
2007-01-05 21:38 ` Eddie Kohler
2007-01-08 15:05 ` Gerrit Renker
2007-01-10  1:39 ` Ian McDonald

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.