grub-devel.gnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] tcp: add window scaling and RTTM support
@ 2016-02-01 18:30 Josef Bacik
  2016-02-13 15:40 ` Andrei Borzenkov
  0 siblings, 1 reply; 4+ messages in thread
From: Josef Bacik @ 2016-02-01 18:30 UTC (permalink / raw)
  To: grub-devel, kernel-team, arvidjaar; +Cc: Josef Bacik

Sometimes we have to provision boxes across regions, such as California to
Sweden.  The http server has a 10 minute timeout, so if we can't get our 250mb
image transferred fast enough our provisioning fails, which is not ideal.  So
add tcp window scaling on open connections and set the window size to 1mb.  With
this change we're able to get higher sustained transfers between regions and can
transfer our image in well below 10 minutes.  Without this patch we'd time out
every time halfway through the transfer.

RTTM is needed in order to make window scaling work well under heavy congestion
or packet loss.  In most cases grub could recover with just window scaling
enabled, but on some machines the congestion would be so high that it would
never recover and would timeout.

I've made the window size configureable with the grub env variable
"tcp_window_size".  By default this is set to 1mb but can be configured to
whatever a user wants, and we will calculate the appropriate window size and
scale settings.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
---
v2->v3:
-Addressed all of Andrei's comments.
-Fixed the tcp_window_size option to set the global window size and scale so we
 don't calculate every time we open a new connection.
-Fixed how we check the options on receive.

 grub-core/net/net.c |   1 +
 grub-core/net/tcp.c | 186 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/grub/net.h  |   2 +
 3 files changed, 181 insertions(+), 8 deletions(-)

diff --git a/grub-core/net/net.c b/grub-core/net/net.c
index 21a4e94..16e985b 100644
--- a/grub-core/net/net.c
+++ b/grub-core/net/net.c
@@ -1751,6 +1751,7 @@ GRUB_MOD_INIT(net)
 				       "", N_("list network cards"));
   cmd_lsaddr = grub_register_command ("net_ls_addr", grub_cmd_listaddrs,
 				       "", N_("list network addresses"));
+  grub_net_tcp_init ();
   grub_bootp_init ();
   grub_dns_init ();
 
diff --git a/grub-core/net/tcp.c b/grub-core/net/tcp.c
index 5da8b11..5dcc888 100644
--- a/grub-core/net/tcp.c
+++ b/grub-core/net/tcp.c
@@ -22,6 +22,7 @@
 #include <grub/net/netbuff.h>
 #include <grub/time.h>
 #include <grub/priority_queue.h>
+#include <grub/env.h>
 
 #define TCP_SYN_RETRANSMISSION_TIMEOUT GRUB_NET_INTERVAL
 #define TCP_SYN_RETRANSMISSION_COUNT GRUB_NET_TRIES
@@ -61,10 +62,12 @@ struct grub_net_tcp_socket
   int they_reseted;
   int i_reseted;
   int i_stall;
+  int timestamp_supported;
   grub_uint32_t my_start_seq;
   grub_uint32_t my_cur_seq;
   grub_uint32_t their_start_seq;
   grub_uint32_t their_cur_seq;
+  grub_uint32_t cur_tsecr;
   grub_uint16_t my_window;
   struct unacked *unack_first;
   struct unacked *unack_last;
@@ -106,6 +109,31 @@ struct tcphdr
   grub_uint16_t urgent;
 } GRUB_PACKED;
 
+enum
+  {
+    TCP_SCALE_OPT = 3,
+    TCP_TIMESTAMP_OPT = 8,
+  };
+
+struct tcp_opt_hdr
+{
+  grub_uint8_t kind;
+  grub_uint8_t length;
+} GRUB_PACKED;
+
+struct tcp_scale_opt
+{
+  struct tcp_opt_hdr opt;
+  grub_uint8_t scale;
+} GRUB_PACKED;
+
+struct tcp_timestamp_opt
+{
+  struct tcp_opt_hdr opt;
+  grub_uint32_t tsval;
+  grub_uint32_t tsecr;
+} GRUB_PACKED;
+
 struct tcp_pseudohdr
 {
   grub_uint32_t src;
@@ -126,6 +154,9 @@ struct tcp6_pseudohdr
 
 static struct grub_net_tcp_socket *tcp_sockets;
 static struct grub_net_tcp_listen *tcp_listens;
+static char *grub_net_tcp_window_size;
+static grub_uint32_t tcp_window_size;
+static grub_uint8_t tcp_window_scale;
 
 #define FOR_TCP_SOCKETS(var) FOR_LIST_ELEMENTS (var, tcp_sockets)
 #define FOR_TCP_LISTENS(var) FOR_LIST_ELEMENTS (var, tcp_listens)
@@ -299,9 +330,16 @@ ack_real (grub_net_tcp_socket_t sock, int res)
 {
   struct grub_net_buff *nb_ack;
   struct tcphdr *tcph_ack;
+  grub_size_t headersize;
   grub_err_t err;
 
-  nb_ack = grub_netbuff_alloc (sizeof (*tcph_ack) + 128);
+  if (sock->timestamp_supported)
+    headersize = ALIGN_UP (sizeof (*tcph_ack) +
+			   sizeof (struct tcp_timestamp_opt), 4);
+  else
+    headersize = ALIGN_UP (sizeof (*tcph_ack), 4);
+
+  nb_ack = grub_netbuff_alloc (headersize + 128);
   if (!nb_ack)
     return;
   err = grub_netbuff_reserve (nb_ack, 128);
@@ -313,7 +351,7 @@ ack_real (grub_net_tcp_socket_t sock, int res)
       return;
     }
 
-  err = grub_netbuff_put (nb_ack, sizeof (*tcph_ack));
+  err = grub_netbuff_put (nb_ack, headersize);
   if (err)
     {
       grub_netbuff_free (nb_ack);
@@ -322,22 +360,35 @@ ack_real (grub_net_tcp_socket_t sock, int res)
       return;
     }
   tcph_ack = (void *) nb_ack->data;
+  grub_memset (tcph_ack, 0, headersize);
   if (res)
     {
       tcph_ack->ack = grub_cpu_to_be32_compile_time (0);
-      tcph_ack->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_RST);
+      tcph_ack->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_RST);
       tcph_ack->window = grub_cpu_to_be16_compile_time (0);
     }
   else
     {
       tcph_ack->ack = grub_cpu_to_be32 (sock->their_cur_seq);
-      tcph_ack->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_ACK);
+      /* See comment in grub_net_tcp_open for how this magic works. */
+      tcph_ack->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_ACK);
       tcph_ack->window = !sock->i_stall ? grub_cpu_to_be16 (sock->my_window)
 	: 0;
     }
   tcph_ack->urgent = 0;
   tcph_ack->src = grub_cpu_to_be16 (sock->in_port);
   tcph_ack->dst = grub_cpu_to_be16 (sock->out_port);
+  if (sock->timestamp_supported)
+    {
+      struct tcp_timestamp_opt *timestamp;
+
+      timestamp = (struct tcp_timestamp_opt *)(tcph_ack + 1);
+      timestamp->opt.kind = TCP_TIMESTAMP_OPT;
+      timestamp->opt.length = sizeof (struct tcp_timestamp_opt);
+      timestamp->tsval = grub_cpu_to_be32 (grub_get_time_ms ());
+      timestamp->tsecr = grub_cpu_to_be32 (sock->cur_tsecr);
+    }
+
   err = tcp_send (nb_ack, sock);
   if (err)
     {
@@ -556,9 +607,12 @@ grub_net_tcp_open (char *server,
   static grub_uint16_t in_port = 21550;
   struct grub_net_buff *nb;
   struct tcphdr *tcph;
+  struct tcp_scale_opt *scale;
+  struct tcp_timestamp_opt *timestamp;
   int i;
   grub_uint8_t *nbd;
   grub_net_link_level_address_t ll_target_addr;
+  grub_size_t headersize;
 
   err = grub_net_resolve_address (server, &addr);
   if (err)
@@ -593,7 +647,9 @@ grub_net_tcp_open (char *server,
   socket->fin_hook = fin_hook;
   socket->hook_data = hook_data;
 
-  nb = grub_netbuff_alloc (sizeof (*tcph) + 128);
+  headersize = ALIGN_UP (sizeof (*tcph) + sizeof (*scale) +
+			 sizeof (*timestamp), 4);
+  nb = grub_netbuff_alloc (headersize + 128);
   if (!nb)
     return NULL;
   err = grub_netbuff_reserve (nb, 128);
@@ -603,7 +659,7 @@ grub_net_tcp_open (char *server,
       return NULL;
     }
 
-  err = grub_netbuff_put (nb, sizeof (*tcph));
+  err = grub_netbuff_put (nb, headersize);
   if (err)
     {
       grub_netbuff_free (nb);
@@ -617,17 +673,33 @@ grub_net_tcp_open (char *server,
     }
 
   tcph = (void *) nb->data;
+  grub_memset(tcph, 0, headersize);
   socket->my_start_seq = grub_get_time_ms ();
   socket->my_cur_seq = socket->my_start_seq + 1;
-  socket->my_window = 8192;
+  socket->my_window = tcp_window_size;
   tcph->seqnr = grub_cpu_to_be32 (socket->my_start_seq);
   tcph->ack = grub_cpu_to_be32_compile_time (0);
-  tcph->flags = grub_cpu_to_be16_compile_time ((5 << 12) | TCP_SYN);
+  /* The top 4 bits of flags indicate how many words long the header is, and
+     since headersize is in bytes we can just shif up 10 to get the right number
+     of words in headersize, it's equivalent to ((headersize >> 2) << 12). */
+  tcph->flags = grub_cpu_to_be16 ((headersize << 10) | TCP_SYN);
   tcph->window = grub_cpu_to_be16 (socket->my_window);
   tcph->urgent = 0;
   tcph->src = grub_cpu_to_be16 (socket->in_port);
   tcph->dst = grub_cpu_to_be16 (socket->out_port);
   tcph->checksum = 0;
+
+  scale = (struct tcp_scale_opt *)(tcph + 1);
+  scale->opt.kind = TCP_SCALE_OPT;
+  scale->opt.length = sizeof (struct tcp_scale_opt);
+  scale->scale = tcp_window_scale;
+
+  timestamp = (struct tcp_timestamp_opt *)(scale + 1);
+  timestamp->opt.kind = TCP_TIMESTAMP_OPT;
+  timestamp->opt.length = sizeof (struct tcp_timestamp_opt);
+  timestamp->tsval = grub_cpu_to_be32 (grub_get_time_ms ());
+  timestamp->tsecr = 0;
+
   tcph->checksum = grub_net_ip_transport_checksum (nb, GRUB_NET_IP_TCP,
 						   &socket->inf->address,
 						   &socket->out_nla);
@@ -745,6 +817,7 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 {
   struct tcphdr *tcph;
   grub_net_tcp_socket_t sock;
+  grub_uint32_t tsecr = 0;
   grub_err_t err;
 
   /* Ignore broadcast.  */
@@ -771,6 +844,38 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
       return GRUB_ERR_NONE;
     }
 
+  /* If the packet is large enough to have the timestamp opt then lets look for
+     the tsecr value. */
+  if ((grub_be_to_cpu16 (tcph->flags >> 12) * sizeof (grub_uint32_t)) >=
+      ALIGN_UP (sizeof (struct tcphdr) + sizeof (struct tcp_timestamp_opt), 4))
+    {
+      struct tcp_opt_hdr *opt;
+      grub_size_t remaining = nb->tail - nb->data;
+
+      opt = (struct tcp_opt_hdr *)(tcph + 1);
+      while (remaining > 0)
+	{
+	  grub_uint8_t len = 1;
+	  if (opt->kind == 8 || opt->kind == 0)
+	    break;
+	  if (opt->kind > 1)
+	    len = opt->length;
+	  if (len > remaining)
+	    len = remaining;
+	  remaining -= len;
+	  opt = (struct tcp_opt_hdr *)((grub_uint8_t *)opt + len);
+	}
+
+      /* Ok we definitely have the timestamp option. */
+      if (opt->kind == 8)
+	{
+	  struct tcp_timestamp_opt *timestamp;
+
+	  timestamp = (struct tcp_timestamp_opt *)opt;
+	  tsecr = grub_be_to_cpu32 (timestamp->tsval);
+	}
+    }
+
   FOR_TCP_SOCKETS (sock)
   {
     if (!(grub_be_to_cpu16 (tcph->dst) == sock->in_port
@@ -805,6 +910,9 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 	sock->their_start_seq = grub_be_to_cpu32 (tcph->seqnr);
 	sock->their_cur_seq = sock->their_start_seq + 1;
 	sock->established = 1;
+	sock->timestamp_supported = 0;
+	if (tsecr)
+	  sock->timestamp_supported = 1;
       }
 
     if (grub_be_to_cpu16 (tcph->flags) & TCP_RST)
@@ -906,6 +1014,8 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
 	      return err;
 	    }
 
+	  /* We only update the tsecr when we advance the window. */
+	  sock->cur_tsecr = tsecr;
 	  sock->their_cur_seq += (nb_top->tail - nb_top->data);
 	  if (grub_be_to_cpu16 (tcph->flags) & TCP_FIN)
 	    {
@@ -999,3 +1109,63 @@ grub_net_tcp_unstall (grub_net_tcp_socket_t sock)
   sock->i_stall = 0;
   ack (sock);
 }
+
+static const char *
+window_get_env (struct grub_env_var *var __attribute__ ((unused)),
+		const char *val __attribute__ ((unused)))
+{
+  return grub_net_tcp_window_size;
+}
+
+static char *
+window_set_env (struct grub_env_var *var __attribute__ ((unused)),
+		const char *val)
+{
+  grub_uint32_t ret;
+
+  if (val == NULL)
+    return NULL;
+
+  grub_error_push ();
+  ret = (grub_uint32_t) grub_strtoul (val, 0, 0);
+  if (grub_errno != GRUB_ERR_NONE)
+    {
+      grub_printf ("Invalid number for window size '%s'.\n", val);
+      grub_errno = GRUB_ERR_NONE;
+      grub_error_pop ();
+      return NULL;
+    }
+  grub_error_pop ();
+
+  /* A window size greater than 1gib is invalid. */
+  if (ret > 1024 * 1024 * 1024)
+    {
+      grub_printf ("TCP window size must be <= 1gib.\n");
+      return NULL;
+    }
+  grub_net_tcp_window_size = grub_strdup (val);
+  tcp_window_size = ret;
+  tcp_window_scale = 0;
+
+  /* The window size is only 16 bits long, so we have to scale it down to fit in
+     the header and calculate the scale along the way. */
+  while (tcp_window_size > 65535)
+    {
+      tcp_window_size >>= 1;
+      tcp_window_scale += 1;
+    }
+
+  return grub_net_tcp_window_size;
+}
+
+/* We set the default window size to 1mib. */
+#define DEFAULT_TCP_WINDOW_SIZE "1048576"
+
+void
+grub_net_tcp_init (void)
+{
+  grub_register_variable_hook ("net_tcp_window_size", window_get_env,
+			       window_set_env);
+  grub_env_export ("net_tcp_window_size");
+  grub_env_set ("net_tcp_window_size", DEFAULT_TCP_WINDOW_SIZE);
+}
diff --git a/include/grub/net.h b/include/grub/net.h
index 4571b72..fa3d286 100644
--- a/include/grub/net.h
+++ b/include/grub/net.h
@@ -551,6 +551,8 @@ grub_net_add_dns_server (const struct grub_net_network_level_address *s);
 void
 grub_net_remove_dns_server (const struct grub_net_network_level_address *s);
 
+void
+grub_net_tcp_init (void);
 
 extern char *grub_net_default_server;
 
-- 
1.8.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] tcp: add window scaling and RTTM support
  2016-02-01 18:30 [PATCH v3] tcp: add window scaling and RTTM support Josef Bacik
@ 2016-02-13 15:40 ` Andrei Borzenkov
  2016-02-18 19:20   ` Josef Bacik
  0 siblings, 1 reply; 4+ messages in thread
From: Andrei Borzenkov @ 2016-02-13 15:40 UTC (permalink / raw)
  To: Josef Bacik, grub-devel, kernel-team

01.02.2016 21:30, Josef Bacik пишет:
...

> @@ -745,6 +817,7 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>  {
>    struct tcphdr *tcph;
>    grub_net_tcp_socket_t sock;
> +  grub_uint32_t tsecr = 0;
>    grub_err_t err;
>  
>    /* Ignore broadcast.  */
> @@ -771,6 +844,38 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>        return GRUB_ERR_NONE;
>      }
>  

Still no proper boundary check.

> +  /* If the packet is large enough to have the timestamp opt then lets look for
> +     the tsecr value. */
> +  if ((grub_be_to_cpu16 (tcph->flags >> 12) * sizeof (grub_uint32_t)) >=
> +      ALIGN_UP (sizeof (struct tcphdr) + sizeof (struct tcp_timestamp_opt), 4))
> +    {
> +      struct tcp_opt_hdr *opt;
> +      grub_size_t remaining = nb->tail - nb->data;
> +
> +      opt = (struct tcp_opt_hdr *)(tcph + 1);
> +      while (remaining > 0)
> +	{
> +	  grub_uint8_t len = 1;
> +	  if (opt->kind == 8 || opt->kind == 0)
> +	    break;
> +	  if (opt->kind > 1)

Here (we only can ensure opt->kind here)

> +	    len = opt->length;
> +	  if (len > remaining)
> +	    len = remaining;
> +	  remaining -= len;
> +	  opt = (struct tcp_opt_hdr *)((grub_uint8_t *)opt + len);
> +	}
> +
> +      /* Ok we definitely have the timestamp option. */
> +      if (opt->kind == 8)
> +	{
> +	  struct tcp_timestamp_opt *timestamp;
> +
> +	  timestamp = (struct tcp_timestamp_opt *)opt;
> +	  tsecr = grub_be_to_cpu32 (timestamp->tsval);

And of course here (there is no length check at all when we break out of
loop).

> +	}
> +    }
> +
>    FOR_TCP_SOCKETS (sock)
>    {
>      if (!(grub_be_to_cpu16 (tcph->dst) == sock->in_port
> @@ -805,6 +910,9 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>  	sock->their_start_seq = grub_be_to_cpu32 (tcph->seqnr);
>  	sock->their_cur_seq = sock->their_start_seq + 1;
>  	sock->established = 1;
> +	sock->timestamp_supported = 0;
> +	if (tsecr)
> +	  sock->timestamp_supported = 1;
>        }
>  
>      if (grub_be_to_cpu16 (tcph->flags) & TCP_RST)
> @@ -906,6 +1014,8 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>  	      return err;
>  	    }
>  
> +	  /* We only update the tsecr when we advance the window. */
> +	  sock->cur_tsecr = tsecr;

You intended to check that tsecr >= cur_tsecr.

>  	  sock->their_cur_seq += (nb_top->tail - nb_top->data);
>  	  if (grub_be_to_cpu16 (tcph->flags) & TCP_FIN)
>  	    {
> @@ -999,3 +1109,63 @@ grub_net_tcp_unstall (grub_net_tcp_socket_t sock)
>    sock->i_stall = 0;
>    ack (sock);
>  }
> +
> +static const char *
> +window_get_env (struct grub_env_var *var __attribute__ ((unused)),
> +		const char *val __attribute__ ((unused)))
> +{
> +  return grub_net_tcp_window_size;
> +}
> +

Oh, that's really redundant as is, string is stored as variable value
anyway, no need to duplicate it, just return from window_set_env.

> +static char *
> +window_set_env (struct grub_env_var *var __attribute__ ((unused)),
> +		const char *val)
> +{
> +  grub_uint32_t ret;
> +
> +  if (val == NULL)
> +    return NULL;
> +
> +  grub_error_push ();
> +  ret = (grub_uint32_t) grub_strtoul (val, 0, 0);
> +  if (grub_errno != GRUB_ERR_NONE)
> +    {
> +      grub_printf ("Invalid number for window size '%s'.\n", val);
> +      grub_errno = GRUB_ERR_NONE;
> +      grub_error_pop ();

You just lost proper error return from grub_strtoul. Just return NULL
here without any push/pop and error from strtoul will be correctly
propagated and printed when set command completes.

> +      return NULL;
> +    }
> +  grub_error_pop ();
> +
> +  /* A window size greater than 1gib is invalid. */
> +  if (ret > 1024 * 1024 * 1024)
> +    {
> +      grub_printf ("TCP window size must be <= 1gib.\n");

Please use grub_error to set grub_errno and error string;
GRUB_ERR_BAD_ARGUMENT looks suitable here.

> +      return NULL;
> +    }
> +  grub_net_tcp_window_size = grub_strdup (val);
> +  tcp_window_size = ret;
> +  tcp_window_scale = 0;
> +
> +  /* The window size is only 16 bits long, so we have to scale it down to fit in
> +     the header and calculate the scale along the way. */
> +  while (tcp_window_size > 65535)
> +    {
> +      tcp_window_size >>= 1;
> +      tcp_window_scale += 1;
> +    }
> +
> +  return grub_net_tcp_window_size;
> +}
> +
> +/* We set the default window size to 1mib. */
> +#define DEFAULT_TCP_WINDOW_SIZE "1048576"

Well ... I'm still unsure we should do it by default.

> +
> +void
> +grub_net_tcp_init (void)
> +{
> +  grub_register_variable_hook ("net_tcp_window_size", window_get_env,
> +			       window_set_env);
> +  grub_env_export ("net_tcp_window_size");
> +  grub_env_set ("net_tcp_window_size", DEFAULT_TCP_WINDOW_SIZE);
> +}
> diff --git a/include/grub/net.h b/include/grub/net.h
> index 4571b72..fa3d286 100644
> --- a/include/grub/net.h
> +++ b/include/grub/net.h
> @@ -551,6 +551,8 @@ grub_net_add_dns_server (const struct grub_net_network_level_address *s);
>  void
>  grub_net_remove_dns_server (const struct grub_net_network_level_address *s);
>  
> +void
> +grub_net_tcp_init (void);
>  
>  extern char *grub_net_default_server;
>  
> 



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] tcp: add window scaling and RTTM support
  2016-02-13 15:40 ` Andrei Borzenkov
@ 2016-02-18 19:20   ` Josef Bacik
  2016-02-22  8:00     ` Andrei Borzenkov
  0 siblings, 1 reply; 4+ messages in thread
From: Josef Bacik @ 2016-02-18 19:20 UTC (permalink / raw)
  To: Andrei Borzenkov, grub-devel, kernel-team

On 02/13/2016 10:40 AM, Andrei Borzenkov wrote:
> 01.02.2016 21:30, Josef Bacik пишет:
> ...
>
>> @@ -745,6 +817,7 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>>   {
>>     struct tcphdr *tcph;
>>     grub_net_tcp_socket_t sock;
>> +  grub_uint32_t tsecr = 0;
>>     grub_err_t err;
>>
>>     /* Ignore broadcast.  */
>> @@ -771,6 +844,38 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>>         return GRUB_ERR_NONE;
>>       }
>>
>
> Still no proper boundary check.
>
>> +  /* If the packet is large enough to have the timestamp opt then lets look for
>> +     the tsecr value. */
>> +  if ((grub_be_to_cpu16 (tcph->flags >> 12) * sizeof (grub_uint32_t)) >=
>> +      ALIGN_UP (sizeof (struct tcphdr) + sizeof (struct tcp_timestamp_opt), 4))
>> +    {
>> +      struct tcp_opt_hdr *opt;
>> +      grub_size_t remaining = nb->tail - nb->data;
>> +
>> +      opt = (struct tcp_opt_hdr *)(tcph + 1);
>> +      while (remaining > 0)
>> +	{
>> +	  grub_uint8_t len = 1;
>> +	  if (opt->kind == 8 || opt->kind == 0)
>> +	    break;
>> +	  if (opt->kind > 1)
>
> Here (we only can ensure opt->kind here)
>
>> +	    len = opt->length;
>> +	  if (len > remaining)
>> +	    len = remaining;
>> +	  remaining -= len;
>> +	  opt = (struct tcp_opt_hdr *)((grub_uint8_t *)opt + len);
>> +	}
>> +
>> +      /* Ok we definitely have the timestamp option. */
>> +      if (opt->kind == 8)
>> +	{
>> +	  struct tcp_timestamp_opt *timestamp;
>> +
>> +	  timestamp = (struct tcp_timestamp_opt *)opt;
>> +	  tsecr = grub_be_to_cpu32 (timestamp->tsval);
>
> And of course here (there is no length check at all when we break out of
> loop).
>
>> +	}
>> +    }
>> +
>>     FOR_TCP_SOCKETS (sock)
>>     {
>>       if (!(grub_be_to_cpu16 (tcph->dst) == sock->in_port
>> @@ -805,6 +910,9 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>>   	sock->their_start_seq = grub_be_to_cpu32 (tcph->seqnr);
>>   	sock->their_cur_seq = sock->their_start_seq + 1;
>>   	sock->established = 1;
>> +	sock->timestamp_supported = 0;
>> +	if (tsecr)
>> +	  sock->timestamp_supported = 1;
>>         }
>>
>>       if (grub_be_to_cpu16 (tcph->flags) & TCP_RST)
>> @@ -906,6 +1014,8 @@ grub_net_recv_tcp_packet (struct grub_net_buff *nb,
>>   	      return err;
>>   	    }
>>
>> +	  /* We only update the tsecr when we advance the window. */
>> +	  sock->cur_tsecr = tsecr;
>
> You intended to check that tsecr >= cur_tsecr.
>
>>   	  sock->their_cur_seq += (nb_top->tail - nb_top->data);
>>   	  if (grub_be_to_cpu16 (tcph->flags) & TCP_FIN)
>>   	    {
>> @@ -999,3 +1109,63 @@ grub_net_tcp_unstall (grub_net_tcp_socket_t sock)
>>     sock->i_stall = 0;
>>     ack (sock);
>>   }
>> +
>> +static const char *
>> +window_get_env (struct grub_env_var *var __attribute__ ((unused)),
>> +		const char *val __attribute__ ((unused)))
>> +{
>> +  return grub_net_tcp_window_size;
>> +}
>> +
>
> Oh, that's really redundant as is, string is stored as variable value
> anyway, no need to duplicate it, just return from window_set_env.
>
>> +static char *
>> +window_set_env (struct grub_env_var *var __attribute__ ((unused)),
>> +		const char *val)
>> +{
>> +  grub_uint32_t ret;
>> +
>> +  if (val == NULL)
>> +    return NULL;
>> +
>> +  grub_error_push ();
>> +  ret = (grub_uint32_t) grub_strtoul (val, 0, 0);
>> +  if (grub_errno != GRUB_ERR_NONE)
>> +    {
>> +      grub_printf ("Invalid number for window size '%s'.\n", val);
>> +      grub_errno = GRUB_ERR_NONE;
>> +      grub_error_pop ();
>
> You just lost proper error return from grub_strtoul. Just return NULL
> here without any push/pop and error from strtoul will be correctly
> propagated and printed when set command completes.
>
>> +      return NULL;
>> +    }
>> +  grub_error_pop ();
>> +
>> +  /* A window size greater than 1gib is invalid. */
>> +  if (ret > 1024 * 1024 * 1024)
>> +    {
>> +      grub_printf ("TCP window size must be <= 1gib.\n");
>
> Please use grub_error to set grub_errno and error string;
> GRUB_ERR_BAD_ARGUMENT looks suitable here.
>
>> +      return NULL;
>> +    }
>> +  grub_net_tcp_window_size = grub_strdup (val);
>> +  tcp_window_size = ret;
>> +  tcp_window_scale = 0;
>> +
>> +  /* The window size is only 16 bits long, so we have to scale it down to fit in
>> +     the header and calculate the scale along the way. */
>> +  while (tcp_window_size > 65535)
>> +    {
>> +      tcp_window_size >>= 1;
>> +      tcp_window_scale += 1;
>> +    }
>> +
>> +  return grub_net_tcp_window_size;
>> +}
>> +
>> +/* We set the default window size to 1mib. */
>> +#define DEFAULT_TCP_WINDOW_SIZE "1048576"
>
> Well ... I'm still unsure we should do it by default.

Sorry I just now noticed you replied.  I'll fix up the rest of the 
stuff.  And apparently we're the only ones doing netbooting so I feel 
like we get to pick the default ;).  Thanks,

Josef



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] tcp: add window scaling and RTTM support
  2016-02-18 19:20   ` Josef Bacik
@ 2016-02-22  8:00     ` Andrei Borzenkov
  0 siblings, 0 replies; 4+ messages in thread
From: Andrei Borzenkov @ 2016-02-22  8:00 UTC (permalink / raw)
  To: Josef Bacik, grub-devel, kernel-team

18.02.2016 22:20, Josef Bacik пишет:
>>> +/* We set the default window size to 1mib. */
>>> +#define DEFAULT_TCP_WINDOW_SIZE "1048576"
>>
>> Well ... I'm still unsure we should do it by default.
> 
> Sorry I just now noticed you replied.  I'll fix up the rest of the
> stuff.  And apparently we're the only ones doing netbooting so I feel
> like we get to pick the default ;).

Not really. netbooting is quite by many; but not everyone is doing it
across continents :)


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-02-22  8:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-01 18:30 [PATCH v3] tcp: add window scaling and RTTM support Josef Bacik
2016-02-13 15:40 ` Andrei Borzenkov
2016-02-18 19:20   ` Josef Bacik
2016-02-22  8:00     ` Andrei Borzenkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).