* [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port
@ 2010-02-15 22:00 Octavian Purdila
2010-02-15 22:00 ` [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code Octavian Purdila
` (3 more replies)
0 siblings, 4 replies; 38+ messages in thread
From: Octavian Purdila @ 2010-02-15 22:00 UTC (permalink / raw)
To: David Miller
Cc: Octavian Purdila, Linux Kernel Network Developers,
Linux Kernel Developers, Amerigo Wang
This iteration makes the bitmap dynamically allocated since it is
quite big (8192 bytes) and adding that much in BSS may still,
apparently, cause problems on some architectures.
Octavian Purdila (3):
sysctl: refactor integer handling proc code
sysctl: add proc_dobitmap
net: reserve ports for applications using fixed port numbers
Documentation/networking/ip-sysctl.txt | 12 +
drivers/infiniband/core/cma.c | 7 +-
include/linux/sysctl.h | 2 +
include/net/ip.h | 6 +
kernel/sysctl.c | 374 +++++++++++++++++++-------------
net/ipv4/af_inet.c | 8 +-
net/ipv4/inet_connection_sock.c | 6 +
net/ipv4/inet_hashtables.c | 2 +
net/ipv4/sysctl_net_ipv4.c | 17 ++
net/ipv4/udp.c | 3 +-
net/sctp/socket.c | 2 +
11 files changed, 282 insertions(+), 157 deletions(-)
^ permalink raw reply [flat|nested] 38+ messages in thread* [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-15 22:00 [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Octavian Purdila @ 2010-02-15 22:00 ` Octavian Purdila 2010-02-16 8:41 ` Cong Wang 2010-02-15 22:00 ` [net-next PATCH v4 2/3] sysctl: add proc_dobitmap Octavian Purdila ` (2 subsequent siblings) 3 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-15 22:00 UTC (permalink / raw) To: David Miller Cc: Octavian Purdila, Linux Kernel Network Developers, Linux Kernel Developers, WANG Cong, Eric W. Biederman As we are about to add another integer handling proc function a little bit of cleanup is in order: add a few helper functions to improve code readability and decrease code duplication. In the process a bug is fixed as well: if the user specifies a number with more then 20 digits it will be interpreted as two integers (e.g. 10000...13 will be interpreted as 100.... and 13). Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> Cc: WANG Cong <amwang@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> --- kernel/sysctl.c | 298 +++++++++++++++++++++++++++---------------------------- 1 files changed, 144 insertions(+), 154 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8a68b24..b0f9618 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2039,8 +2039,98 @@ int proc_dostring(struct ctl_table *table, int write, buffer, lenp, ppos); } +static int proc_skip_wspace(char __user **buf, size_t *size) +{ + char c; + + while (*size) { + if (get_user(c, *buf)) + return -EFAULT; + if (!isspace(c)) + break; + (*size)--; (*buf)++; + } + + return 0; +} + +#define TMPBUFLEN 22 +static int proc_get_next_ulong(char __user **buf, size_t *size, + unsigned long *val, bool *neg) +{ + int len; + char *p, tmp[TMPBUFLEN]; + int err; + + err = proc_skip_wspace(buf, size); + if (err) + return err; + if (!*size) + return -EINVAL; + + len = *size; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + + if (copy_from_user(tmp, *buf, len)) + return -EFAULT; + + tmp[len] = 0; + p = tmp; + if (*p == '-' && *size > 1) { + *neg = 1; + p++; + } else + *neg = 0; + if (*p < '0' || *p > '9') + return -EINVAL; + + *val = simple_strtoul(p, &p, 0); + + len = p - tmp; + if (((len < *size) && *p && !isspace(*p)) || + /* We don't know if the next char is whitespace thus we may accept + * invalid integers (e.g. 1234...a) or two integers instead of one + * (e.g. 123...1). So lets not allow such large numbers. */ + len == TMPBUFLEN - 1) + return -EINVAL; -static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, + *buf += len; *size -= len; + + return 0; +} + +static int proc_put_ulong(char __user **buf, size_t *size, unsigned long val, + bool neg, bool first) +{ + int len; + char tmp[TMPBUFLEN], *p = tmp; + + if (!first) + *p++ = '\t'; + sprintf(p, "%s%lu", neg ? "-" : "", val); + len = strlen(tmp); + if (len > *size) + len = *size; + if (copy_to_user(*buf, tmp, len)) + return -EFAULT; + *size -= len; + *buf += len; + return 0; +} +#undef TMPBUFLEN + +static int proc_put_newline(char __user **buf, size_t *size) +{ + if (*size) { + if (put_user('\n', *buf)) + return -EFAULT; + (*size)--, (*buf)++; + } + return 0; +} + +static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2049,7 +2139,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } else { int val = *valp; if (val < 0) { - *negp = -1; + *negp = 1; *lvalp = (unsigned long)-val; } else { *negp = 0; @@ -2060,19 +2150,15 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, } static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, - int write, void __user *buffer, + int write, void __user *_buffer, size_t *lenp, loff_t *ppos, - int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { -#define TMPBUFLEN 21 - int *i, vleft, first = 1, neg; - unsigned long lval; - size_t left, len; - - char buf[TMPBUFLEN], *p; - char __user *s = buffer; + int *i, vleft, first = 1, err = 0; + size_t left; + char __user *buffer = (char __user *) _buffer; if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { @@ -2088,88 +2174,39 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, conv = do_proc_dointvec_conv; for (; left && vleft--; i++, first=0) { - if (write) { - while (left) { - char c; - if (get_user(c, s)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - s++; - } - if (!left) - break; - neg = 0; - len = left; - if (len > sizeof(buf) - 1) - len = sizeof(buf) - 1; - if (copy_from_user(buf, s, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - p++; - } - if (*p < '0' || *p > '9') - break; - - lval = simple_strtoul(p, &p, 0); + unsigned long lval; + bool neg; - len = p-buf; - if ((len < left) && *p && !isspace(*p)) + if (write) { + err = proc_get_next_ulong(&buffer, &left, &lval, &neg); + if (err) break; - s += len; - left -= len; - if (conv(&neg, &lval, i, 1, data)) break; } else { - p = buf; - if (!first) - *p++ = '\t'; - if (conv(&neg, &lval, i, 0, data)) break; - - sprintf(p, "%s%lu", neg ? "-" : "", lval); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(s, buf, len)) - return -EFAULT; - left -= len; - s += len; - } - } - - if (!write && !first && left) { - if(put_user('\n', s)) - return -EFAULT; - left--, s++; - } - if (write) { - while (left) { - char c; - if (get_user(c, s++)) - return -EFAULT; - if (!isspace(c)) + err = proc_put_ulong(&buffer, &left, lval, neg, first); + if (err) break; - left--; } } - if (write && first) - return -EINVAL; + + if (!write && !first && left && !err) + err = proc_put_newline(&buffer, &left); + if (write && !err) + err = proc_skip_wspace(&buffer, &left); + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || + (write && first)) + return err ? : -EINVAL; *lenp -= left; *ppos += *lenp; return 0; -#undef TMPBUFLEN } static int do_proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(int *negp, unsigned long *lvalp, int *valp, + int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) { @@ -2237,8 +2274,8 @@ struct do_proc_dointvec_minmax_conv_param { int *max; }; -static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, - int *valp, +static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, + int *valp, int write, void *data) { struct do_proc_dointvec_minmax_conv_param *param = data; @@ -2251,7 +2288,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, } else { int val = *valp; if (val < 0) { - *negp = -1; + *negp = 1; *lvalp = (unsigned long)-val; } else { *negp = 0; @@ -2289,17 +2326,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, } static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, - void __user *buffer, + void __user *_buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { -#define TMPBUFLEN 21 - unsigned long *i, *min, *max, val; - int vleft, first=1, neg; - size_t len, left; - char buf[TMPBUFLEN], *p; - char __user *s = buffer; + unsigned long *i, *min, *max; + int vleft, first = 1, err = 0; + size_t left; + char __user *buffer = (char __user *) _buffer; if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { @@ -2314,82 +2349,37 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int left = *lenp; for (; left && vleft--; i++, min++, max++, first=0) { + unsigned long val; + if (write) { - while (left) { - char c; - if (get_user(c, s)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - s++; - } - if (!left) - break; - neg = 0; - len = left; - if (len > TMPBUFLEN-1) - len = TMPBUFLEN-1; - if (copy_from_user(buf, s, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - p++; - } - if (*p < '0' || *p > '9') - break; - val = simple_strtoul(p, &p, 0) * convmul / convdiv ; - len = p-buf; - if ((len < left) && *p && !isspace(*p)) + bool neg; + + err = proc_get_next_ulong(&buffer, &left, &val, &neg); + if (err) break; if (neg) - val = -val; - s += len; - left -= len; - - if(neg) continue; if ((min && val < *min) || (max && val > *max)) continue; *i = val; } else { - p = buf; - if (!first) - *p++ = '\t'; - sprintf(p, "%lu", convdiv * (*i) / convmul); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(s, buf, len)) - return -EFAULT; - left -= len; - s += len; - } - } - - if (!write && !first && left) { - if(put_user('\n', s)) - return -EFAULT; - left--, s++; - } - if (write) { - while (left) { - char c; - if (get_user(c, s++)) - return -EFAULT; - if (!isspace(c)) + val = convdiv * (*i) / convmul; + err = proc_put_ulong(&buffer, &left, val, 0, first); + if (err) break; - left--; } } - if (write && first) - return -EINVAL; + + if (!write && !first && left && !err) + err = proc_put_newline(&buffer, &left); + if (write && !err) + err = proc_skip_wspace(&buffer, &left); + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || + (write && first)) + return err ? : -EINVAL; *lenp -= left; *ppos += *lenp; return 0; -#undef TMPBUFLEN } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, @@ -2450,7 +2440,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, } -static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2462,7 +2452,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = 1; lval = (unsigned long)-val; } else { *negp = 0; @@ -2473,7 +2463,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2485,7 +2475,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = 1; lval = (unsigned long)-val; } else { *negp = 0; @@ -2496,7 +2486,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp, return 0; } -static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, +static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, int *valp, int write, void *data) { @@ -2506,7 +2496,7 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp, int val = *valp; unsigned long lval; if (val < 0) { - *negp = -1; + *negp = 1; lval = (unsigned long)-val; } else { *negp = 0; -- 1.5.6.5 ^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-15 22:00 ` [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code Octavian Purdila @ 2010-02-16 8:41 ` Cong Wang 2010-02-16 10:48 ` Octavian Purdila 2010-02-16 11:41 ` Octavian Purdila 0 siblings, 2 replies; 38+ messages in thread From: Cong Wang @ 2010-02-16 8:41 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > As we are about to add another integer handling proc function a little > bit of cleanup is in order: add a few helper functions to improve code > readability and decrease code duplication. > > In the process a bug is fixed as well: if the user specifies a number > with more then 20 digits it will be interpreted as two integers > (e.g. 10000...13 will be interpreted as 100.... and 13). > > Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> > Cc: WANG Cong <amwang@redhat.com> > Cc: Eric W. Biederman <ebiederm@xmission.com> Some comments below. > --- > kernel/sysctl.c | 298 +++++++++++++++++++++++++++---------------------------- > 1 files changed, 144 insertions(+), 154 deletions(-) > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 8a68b24..b0f9618 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -2039,8 +2039,98 @@ int proc_dostring(struct ctl_table *table, int write, > buffer, lenp, ppos); > } > > +static int proc_skip_wspace(char __user **buf, size_t *size) > +{ > + char c; > + > + while (*size) { > + if (get_user(c, *buf)) > + return -EFAULT; > + if (!isspace(c)) > + break; > + (*size)--; (*buf)++; > + } > + > + return 0; > +} In lib/string.c we have skip_spaces(), I think we can use it here instead of inventing another one. > + > +#define TMPBUFLEN 22 > +static int proc_get_next_ulong(char __user **buf, size_t *size, > + unsigned long *val, bool *neg) > +{ > + int len; > + char *p, tmp[TMPBUFLEN]; > + int err; > + > + err = proc_skip_wspace(buf, size); > + if (err) > + return err; > + if (!*size) > + return -EINVAL; > + > + len = *size; > + if (len > TMPBUFLEN-1) > + len = TMPBUFLEN-1; > + > + if (copy_from_user(tmp, *buf, len)) > + return -EFAULT; > + > + tmp[len] = 0; > + p = tmp; > + if (*p == '-' && *size > 1) { > + *neg = 1; > + p++; > + } else > + *neg = 0; > + if (*p < '0' || *p > '9') > + return -EINVAL; isdigit(). > + > + *val = simple_strtoul(p, &p, 0); > + > + len = p - tmp; > + if (((len < *size) && *p && !isspace(*p)) || > + /* We don't know if the next char is whitespace thus we may accept > + * invalid integers (e.g. 1234...a) or two integers instead of one > + * (e.g. 123...1). So lets not allow such large numbers. */ > + len == TMPBUFLEN - 1) > + return -EINVAL; > > -static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, > + *buf += len; *size -= len; > + > + return 0; > +} > + > +static int proc_put_ulong(char __user **buf, size_t *size, unsigned long val, > + bool neg, bool first) > +{ > + int len; > + char tmp[TMPBUFLEN], *p = tmp; > + > + if (!first) > + *p++ = '\t'; > + sprintf(p, "%s%lu", neg ? "-" : "", val); > + len = strlen(tmp); > + if (len > *size) > + len = *size; > + if (copy_to_user(*buf, tmp, len)) > + return -EFAULT; > + *size -= len; > + *buf += len; > + return 0; > +} > +#undef TMPBUFLEN > + > +static int proc_put_newline(char __user **buf, size_t *size) > +{ > + if (*size) { > + if (put_user('\n', *buf)) > + return -EFAULT; > + (*size)--, (*buf)++; > + } > + return 0; > +} > + > +static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, > int *valp, > int write, void *data) > { > @@ -2049,7 +2139,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, > } else { > int val = *valp; > if (val < 0) { > - *negp = -1; > + *negp = 1; > *lvalp = (unsigned long)-val; > } else { > *negp = 0; > @@ -2060,19 +2150,15 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp, > } > > static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, > - int write, void __user *buffer, > + int write, void __user *_buffer, > size_t *lenp, loff_t *ppos, > - int (*conv)(int *negp, unsigned long *lvalp, int *valp, > + int (*conv)(bool *negp, unsigned long *lvalp, int *valp, > int write, void *data), > void *data) > { > -#define TMPBUFLEN 21 > - int *i, vleft, first = 1, neg; > - unsigned long lval; > - size_t left, len; > - > - char buf[TMPBUFLEN], *p; > - char __user *s = buffer; > + int *i, vleft, first = 1, err = 0; > + size_t left; > + char __user *buffer = (char __user *) _buffer; > > if (!tbl_data || !table->maxlen || !*lenp || > (*ppos && !write)) { > @@ -2088,88 +2174,39 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, > conv = do_proc_dointvec_conv; > > for (; left && vleft--; i++, first=0) { > - if (write) { > - while (left) { > - char c; > - if (get_user(c, s)) > - return -EFAULT; > - if (!isspace(c)) > - break; > - left--; > - s++; > - } > - if (!left) > - break; > - neg = 0; > - len = left; > - if (len > sizeof(buf) - 1) > - len = sizeof(buf) - 1; > - if (copy_from_user(buf, s, len)) > - return -EFAULT; > - buf[len] = 0; > - p = buf; > - if (*p == '-' && left > 1) { > - neg = 1; > - p++; > - } > - if (*p < '0' || *p > '9') > - break; > - > - lval = simple_strtoul(p, &p, 0); > + unsigned long lval; > + bool neg; > > - len = p-buf; > - if ((len < left) && *p && !isspace(*p)) > + if (write) { > + err = proc_get_next_ulong(&buffer, &left, &lval, &neg); > + if (err) > break; > - s += len; > - left -= len; > - > if (conv(&neg, &lval, i, 1, data)) > break; > } else { > - p = buf; > - if (!first) > - *p++ = '\t'; > - > if (conv(&neg, &lval, i, 0, data)) > break; > - > - sprintf(p, "%s%lu", neg ? "-" : "", lval); > - len = strlen(buf); > - if (len > left) > - len = left; > - if(copy_to_user(s, buf, len)) > - return -EFAULT; > - left -= len; > - s += len; > - } > - } > - > - if (!write && !first && left) { > - if(put_user('\n', s)) > - return -EFAULT; > - left--, s++; > - } > - if (write) { > - while (left) { > - char c; > - if (get_user(c, s++)) > - return -EFAULT; > - if (!isspace(c)) > + err = proc_put_ulong(&buffer, &left, lval, neg, first); > + if (err) > break; > - left--; > } > } > - if (write && first) > - return -EINVAL; > + > + if (!write && !first && left && !err) > + err = proc_put_newline(&buffer, &left); > + if (write && !err) > + err = proc_skip_wspace(&buffer, &left); > + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || > + (write && first)) > + return err ? : -EINVAL; The logic here seems messy, adding one or two goto's may help? > *lenp -= left; > *ppos += *lenp; > return 0; > -#undef TMPBUFLEN > } > The rest looks fine. Thanks! ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 8:41 ` Cong Wang @ 2010-02-16 10:48 ` Octavian Purdila 2010-02-16 13:08 ` Cong Wang 2010-02-18 3:58 ` Octavian Purdila 2010-02-16 11:41 ` Octavian Purdila 1 sibling, 2 replies; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 10:48 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman On Tuesday 16 February 2010 10:41:07 you wrote: > > + > > + if (!write && !first && left && !err) > > + err = proc_put_newline(&buffer, &left); > > + if (write && !err) > > + err = proc_skip_wspace(&buffer, &left); > > + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || > > + (write && first)) > > + return err ? : -EINVAL; > > The logic here seems messy, adding one or two goto's may help? > OK, I'll give it a try. What about the EFAULT check, is that really required? ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 10:48 ` Octavian Purdila @ 2010-02-16 13:08 ` Cong Wang 2010-02-16 14:00 ` Octavian Purdila 2010-02-18 3:58 ` Octavian Purdila 1 sibling, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-16 13:08 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > On Tuesday 16 February 2010 10:41:07 you wrote: > >>> + >>> + if (!write && !first && left && !err) >>> + err = proc_put_newline(&buffer, &left); >>> + if (write && !err) >>> + err = proc_skip_wspace(&buffer, &left); >>> + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || >>> + (write && first)) >>> + return err ? : -EINVAL; >> The logic here seems messy, adding one or two goto's may help? >> > > OK, I'll give it a try. > > What about the EFAULT check, is that really required? I think so, it means to keep the errno to user-space when it is EFAULT, right? This seems reasonable. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 13:08 ` Cong Wang @ 2010-02-16 14:00 ` Octavian Purdila 2010-02-17 16:31 ` Cong Wang 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 14:00 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman On Tuesday 16 February 2010 15:08:23 you wrote: > Octavian Purdila wrote: > > On Tuesday 16 February 2010 10:41:07 you wrote: > >>> + > >>> + if (!write && !first && left && !err) > >>> + err = proc_put_newline(&buffer, &left); > >>> + if (write && !err) > >>> + err = proc_skip_wspace(&buffer, &left); > >>> + if (err == -EFAULT /* do we really need to check for -EFAULT? */ > >>> || + (write && first)) > >>> + return err ? : -EINVAL; > >> > >> The logic here seems messy, adding one or two goto's may help? > > > > OK, I'll give it a try. > > > > What about the EFAULT check, is that really required? > > I think so, it means to keep the errno to user-space when it is EFAULT, > right? This seems reasonable. > The problem I see is that this way we don't actually acknowledge some of the set values, e.g. say that we have buffer="1 2 3" and length = 100. Although we do accept values 1, 2 and 3 we don't acknowledge that to the user (as we would do for, say "1 2 3 4a"), but return -EFAULT. I think it would be better to skip this check. That means that the user will get the ack for the 1, 2 and 3 values and next time it continues the write it will get -EFAULT. This will of course change the userspace ABI, albeit in a minor way, and it is not clear to me if doing this is allowed (even if this new approach would be the correct one). ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 14:00 ` Octavian Purdila @ 2010-02-17 16:31 ` Cong Wang 2010-02-17 21:09 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-17 16:31 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > On Tuesday 16 February 2010 15:08:23 you wrote: >> Octavian Purdila wrote: >>> On Tuesday 16 February 2010 10:41:07 you wrote: >>>>> + >>>>> + if (!write && !first && left && !err) >>>>> + err = proc_put_newline(&buffer, &left); >>>>> + if (write && !err) >>>>> + err = proc_skip_wspace(&buffer, &left); >>>>> + if (err == -EFAULT /* do we really need to check for -EFAULT? */ >>>>> || + (write && first)) >>>>> + return err ? : -EINVAL; >>>> The logic here seems messy, adding one or two goto's may help? >>> OK, I'll give it a try. >>> >>> What about the EFAULT check, is that really required? >> I think so, it means to keep the errno to user-space when it is EFAULT, >> right? This seems reasonable. >> > > The problem I see is that this way we don't actually acknowledge some of the > set values, e.g. say that we have buffer="1 2 3" and length = 100. Although we > do accept values 1, 2 and 3 we don't acknowledge that to the user (as we would > do for, say "1 2 3 4a"), but return -EFAULT. > > I think it would be better to skip this check. That means that the user will > get the ack for the 1, 2 and 3 values and next time it continues the write it > will get -EFAULT. > > This will of course change the userspace ABI, albeit in a minor way, and it is > not clear to me if doing this is allowed (even if this new approach would be > the correct one). > I think the right behavior is accept "1 2 3" and return the number of bytes that we accept. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-17 16:31 ` Cong Wang @ 2010-02-17 21:09 ` Octavian Purdila 0 siblings, 0 replies; 38+ messages in thread From: Octavian Purdila @ 2010-02-17 21:09 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman On Wednesday 17 February 2010 15:31:45 you wrote: > >>> > >>> What about the EFAULT check, is that really required? > >> > >> I think so, it means to keep the errno to user-space when it is EFAULT, > >> right? This seems reasonable. > > > > The problem I see is that this way we don't actually acknowledge some of > > the set values, e.g. say that we have buffer="1 2 3" and length = 100. > > Although we do accept values 1, 2 and 3 we don't acknowledge that to the > > user (as we would do for, say "1 2 3 4a"), but return -EFAULT. > > > > I think it would be better to skip this check. That means that the user > > will get the ack for the 1, 2 and 3 values and next time it continues the > > write it will get -EFAULT. > > > > This will of course change the userspace ABI, albeit in a minor way, and > > it is not clear to me if doing this is allowed (even if this new approach > > would be the correct one). > > I think the right behavior is accept "1 2 3" and return the number of > bytes that we accept. > OK, it seems nobody is complaining about this corner case ABI change. I will remove the EFAULT check then. This will also help with making the code clearer, I hope. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 10:48 ` Octavian Purdila 2010-02-16 13:08 ` Cong Wang @ 2010-02-18 3:58 ` Octavian Purdila 1 sibling, 0 replies; 38+ messages in thread From: Octavian Purdila @ 2010-02-18 3:58 UTC (permalink / raw) To: Cong Wang, Linux Kernel Network Developers Cc: Linux Kernel Developers, Eric W. Biederman, David Miller On Tuesday 16 February 2010 09:48:56 you wrote: > On Tuesday 16 February 2010 10:41:07 you wrote: > > > + > > > + if (!write && !first && left && !err) > > > + err = proc_put_newline(&buffer, &left); > > > + if (write && !err) > > > + err = proc_skip_wspace(&buffer, &left); > > > + if (err == -EFAULT /* do we really need to check for -EFAULT? */ > > > || + (write && first)) > > > + return err ? : -EINVAL; > > > > The logic here seems messy, adding one or two goto's may help? > > OK, I'll give it a try. > After a couple of tries which didn't make it clearer, I've given up. Maybe its not clear, but is not too terribly messy IMO. I'll rather spend time on getting the new list range format done. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 8:41 ` Cong Wang 2010-02-16 10:48 ` Octavian Purdila @ 2010-02-16 11:41 ` Octavian Purdila 2010-02-16 13:09 ` Cong Wang 1 sibling, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 11:41 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman On Tuesday 16 February 2010 10:41:07 you wrote: > > +static int proc_skip_wspace(char __user **buf, size_t *size) > > +{ > > + char c; > > + > > + while (*size) { > > + if (get_user(c, *buf)) > > + return -EFAULT; > > + if (!isspace(c)) > > + break; > > + (*size)--; (*buf)++; > > + } > > + > > + return 0; > > +} > > In lib/string.c we have skip_spaces(), I think we can use it > here instead of inventing another one. > I'm afraid we can't, skip_spaces does not accept userspace buffers. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 11:41 ` Octavian Purdila @ 2010-02-16 13:09 ` Cong Wang 2010-02-16 13:44 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-16 13:09 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > On Tuesday 16 February 2010 10:41:07 you wrote: > >>> +static int proc_skip_wspace(char __user **buf, size_t *size) >>> +{ >>> + char c; >>> + >>> + while (*size) { >>> + if (get_user(c, *buf)) >>> + return -EFAULT; >>> + if (!isspace(c)) >>> + break; >>> + (*size)--; (*buf)++; >>> + } >>> + >>> + return 0; >>> +} >> In lib/string.c we have skip_spaces(), I think we can use it >> here instead of inventing another one. >> > > I'm afraid we can't, skip_spaces does not accept userspace buffers. Well, you need to use copy_from_user() before call it. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 13:09 ` Cong Wang @ 2010-02-16 13:44 ` Octavian Purdila 2010-02-17 16:21 ` Cong Wang 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 13:44 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman On Tuesday 16 February 2010 15:09:51 you wrote: > Octavian Purdila wrote: > > On Tuesday 16 February 2010 10:41:07 you wrote: > >>> +static int proc_skip_wspace(char __user **buf, size_t *size) > >>> +{ > >>> + char c; > >>> + > >>> + while (*size) { > >>> + if (get_user(c, *buf)) > >>> + return -EFAULT; > >>> + if (!isspace(c)) > >>> + break; > >>> + (*size)--; (*buf)++; > >>> + } > >>> + > >>> + return 0; > >>> +} > >> > >> In lib/string.c we have skip_spaces(), I think we can use it > >> here instead of inventing another one. > > > > I'm afraid we can't, skip_spaces does not accept userspace buffers. > > Well, you need to use copy_from_user() before call it. > And how much would you copy? You need to either use a stack buffer and do a loop copy or you would need to copy the whole userspace buffer which means we need to allocate a kernel buffer. I think its much cleaner the way is currently done. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-16 13:44 ` Octavian Purdila @ 2010-02-17 16:21 ` Cong Wang 2010-02-17 16:33 ` Eric W. Biederman 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-17 16:21 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > On Tuesday 16 February 2010 15:09:51 you wrote: >> Octavian Purdila wrote: >>> On Tuesday 16 February 2010 10:41:07 you wrote: >>>>> +static int proc_skip_wspace(char __user **buf, size_t *size) >>>>> +{ >>>>> + char c; >>>>> + >>>>> + while (*size) { >>>>> + if (get_user(c, *buf)) >>>>> + return -EFAULT; >>>>> + if (!isspace(c)) >>>>> + break; >>>>> + (*size)--; (*buf)++; >>>>> + } >>>>> + >>>>> + return 0; >>>>> +} >>>> In lib/string.c we have skip_spaces(), I think we can use it >>>> here instead of inventing another one. >>> I'm afraid we can't, skip_spaces does not accept userspace buffers. >> Well, you need to use copy_from_user() before call it. >> > > And how much would you copy? You need to either use a stack buffer and do a > loop copy or you would need to copy the whole userspace buffer which means we > need to allocate a kernel buffer. I think its much cleaner the way is currently > done. Yeah, maybe just a personal preference. :-/ ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-17 16:21 ` Cong Wang @ 2010-02-17 16:33 ` Eric W. Biederman 2010-02-18 4:25 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Eric W. Biederman @ 2010-02-17 16:33 UTC (permalink / raw) To: Cong Wang Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers Cong Wang <amwang@redhat.com> writes: > Octavian Purdila wrote: >> On Tuesday 16 February 2010 15:09:51 you wrote: >>> Octavian Purdila wrote: >>>> On Tuesday 16 February 2010 10:41:07 you wrote: >>>>>> +static int proc_skip_wspace(char __user **buf, size_t *size) >>>>>> +{ >>>>>> + char c; >>>>>> + >>>>>> + while (*size) { >>>>>> + if (get_user(c, *buf)) >>>>>> + return -EFAULT; >>>>>> + if (!isspace(c)) >>>>>> + break; >>>>>> + (*size)--; (*buf)++; >>>>>> + } >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>> In lib/string.c we have skip_spaces(), I think we can use it >>>>> here instead of inventing another one. >>>> I'm afraid we can't, skip_spaces does not accept userspace buffers. >>> Well, you need to use copy_from_user() before call it. >>> >> >> And how much would you copy? You need to either use a stack buffer and do a >> loop copy or you would need to copy the whole userspace buffer which means we >> need to allocate a kernel buffer. I think its much cleaner the way is >> currently done. > > Yeah, maybe just a personal preference. :-/ There can be valid security reasons for copying all of the data before processing it. Semantically if we an guarantee that we either have processed the entire buffer or failed the entire buffer and no changes have occurred in the kernel that seems like a much easier semantic to work with in user space. Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code 2010-02-17 16:33 ` Eric W. Biederman @ 2010-02-18 4:25 ` Octavian Purdila 0 siblings, 0 replies; 38+ messages in thread From: Octavian Purdila @ 2010-02-18 4:25 UTC (permalink / raw) To: Eric W. Biederman Cc: Cong Wang, David Miller, Linux Kernel Network Developers, Linux Kernel Developers On Wednesday 17 February 2010 15:33:03 you wrote: > Cong Wang <amwang@redhat.com> writes: > > Octavian Purdila wrote: > >> On Tuesday 16 February 2010 15:09:51 you wrote: > >>> Octavian Purdila wrote: > >>>> On Tuesday 16 February 2010 10:41:07 you wrote: > >>>>>> +static int proc_skip_wspace(char __user **buf, size_t *size) > >>>>>> +{ > >>>>>> + char c; > >>>>>> + > >>>>>> + while (*size) { > >>>>>> + if (get_user(c, *buf)) > >>>>>> + return -EFAULT; > >>>>>> + if (!isspace(c)) > >>>>>> + break; > >>>>>> + (*size)--; (*buf)++; > >>>>>> + } > >>>>>> + > >>>>>> + return 0; > >>>>>> +} > >>>>> > >>>>> In lib/string.c we have skip_spaces(), I think we can use it > >>>>> here instead of inventing another one. > >>>> > >>>> I'm afraid we can't, skip_spaces does not accept userspace buffers. > >>> > >>> Well, you need to use copy_from_user() before call it. > >> > >> And how much would you copy? You need to either use a stack buffer and > >> do a loop copy or you would need to copy the whole userspace buffer > >> which means we need to allocate a kernel buffer. I think its much > >> cleaner the way is currently done. > > > > Yeah, maybe just a personal preference. :-/ > > There can be valid security reasons for copying all of the data before > processing it. > How so? I only know about security issues when you copy & process the same data more than one time. And all existing code I've looked over (proc_dointvec, proc_dostring, bitmap_parse_user) does not copy all of the data. In fact the code in question here is just existing code moved to its own function. There must be a reason for doing that, as copying whole buffer seems like a much simple implementation? (my guess is to avoid an extra allocation) > Semantically if we an guarantee that we either have processed the > entire buffer or failed the entire buffer and no changes have occurred > in the kernel that seems like a much easier semantic to work with in > user space. > OK, but this is not how various proc routines currently handles it. For example proc_dointvec won't undo the changes for previous items when we get an error and I think for good reasons as I don't see a clean and generic way to do the undo stuff. ^ permalink raw reply [flat|nested] 38+ messages in thread
* [net-next PATCH v4 2/3] sysctl: add proc_dobitmap 2010-02-15 22:00 [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Octavian Purdila 2010-02-15 22:00 ` [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code Octavian Purdila @ 2010-02-15 22:00 ` Octavian Purdila 2010-02-16 9:12 ` Cong Wang 2010-02-15 22:00 ` [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila 2010-02-16 17:25 ` [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Eric W. Biederman 3 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-15 22:00 UTC (permalink / raw) To: David Miller Cc: Octavian Purdila, Linux Kernel Network Developers, Linux Kernel Developers, WANG Cong, Eric W. Biederman The new function can be used to update bitmaps via /proc. Bits can be set by writing positive values in the file and cleared by writing negative values (e.g. 0 2 will set bits 1 and 3, -0 -2 will clear them). Reading will show only the set bits. Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> Cc: WANG Cong <amwang@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> --- include/linux/sysctl.h | 2 + kernel/sysctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 0 deletions(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 9f236cd..ba89bf2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -985,6 +985,8 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void __user *, size_t *, loff_t *); +extern int proc_dobitmap(struct ctl_table *, int, + void __user *, size_t *, loff_t *); /* * Register a set of sysctl names by calling register_sysctl_table diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b0f9618..b8959f4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2596,6 +2596,82 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, return 0; } +/** + * proc_dobitmap - read/write from/to a bitmap + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * @ppos: the current position in the file + * + * The bitmap is stored at table->data and the bitmap length (in bits) + * in table->maxlen. Reading from the proc file will show the set bits. + * Writing positive values sets the bits, negative values clears them + * (e.g. 0 2 sets the first and 3rd bit, -0 -2 clears them). + * + * Returns 0 on success. + */ +int proc_dobitmap(struct ctl_table *table, int write, + void __user *_buffer, size_t *lenp, loff_t *ppos) +{ + bool first = 1; + unsigned long *bitmap = (unsigned long *) table->data; + unsigned long bitmap_len = table->maxlen; + int left = *lenp, err = 0; + char __user *buffer = (char __user *) _buffer; + + if (!bitmap_len || !left || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + while (left) { + unsigned long val; + bool neg; + + err = proc_get_next_ulong(&buffer, &left, &val, &neg); + if (err) + break; + if (val >= bitmap_len) { + err = -EINVAL; + break; + } + if (neg) + clear_bit(val, bitmap); + else + set_bit(val, bitmap); + first = 0; + } + if (!err) + err = proc_skip_wspace(&buffer, &left); + } else { + unsigned long bit = 0; + + while (left) { + bit = find_next_bit(bitmap, bitmap_len, bit); + if (bit >= bitmap_len) + break; + err = proc_put_ulong(&buffer, &left, bit, 0, first); + if (err) + break; + first = 0; bit++; + } + if (!err) + err = proc_put_newline(&buffer, &left); + } + + if (first && write && !err) + err = -EINVAL; + if (err == -EFAULT /* do we really need to check for -EFAULT? */ || + (write && first)) + return err ? : -EINVAL; + *lenp -= left; + *ppos += *lenp; + return 0; +} + #else /* CONFIG_PROC_FS */ int proc_dostring(struct ctl_table *table, int write, -- 1.5.6.5 ^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 2/3] sysctl: add proc_dobitmap 2010-02-15 22:00 ` [net-next PATCH v4 2/3] sysctl: add proc_dobitmap Octavian Purdila @ 2010-02-16 9:12 ` Cong Wang 0 siblings, 0 replies; 38+ messages in thread From: Cong Wang @ 2010-02-16 9:12 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric W. Biederman Octavian Purdila wrote: > The new function can be used to update bitmaps via /proc. Bits can be > set by writing positive values in the file and cleared by writing > negative values (e.g. 0 2 will set bits 1 and 3, -0 -2 will clear > them). Reading will show only the set bits. > > Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> > Cc: WANG Cong <amwang@redhat.com> > Cc: Eric W. Biederman <ebiederm@xmission.com> > --- > include/linux/sysctl.h | 2 + > kernel/sysctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 78 insertions(+), 0 deletions(-) > > diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h > index 9f236cd..ba89bf2 100644 > --- a/include/linux/sysctl.h > +++ b/include/linux/sysctl.h > @@ -985,6 +985,8 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int, > void __user *, size_t *, loff_t *); > extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, > void __user *, size_t *, loff_t *); > +extern int proc_dobitmap(struct ctl_table *, int, > + void __user *, size_t *, loff_t *); > > /* > * Register a set of sysctl names by calling register_sysctl_table > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index b0f9618..b8959f4 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -2596,6 +2596,82 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, > return 0; > } > > +/** > + * proc_dobitmap - read/write from/to a bitmap > + * @table: the sysctl table > + * @write: %TRUE if this is a write to the sysctl file > + * @buffer: the user buffer > + * @lenp: the size of the user buffer > + * @ppos: file position > + * @ppos: the current position in the file Duplicated. I hope Eric can also comment on these two patches. ^ permalink raw reply [flat|nested] 38+ messages in thread
* [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-15 22:00 [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Octavian Purdila 2010-02-15 22:00 ` [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code Octavian Purdila 2010-02-15 22:00 ` [net-next PATCH v4 2/3] sysctl: add proc_dobitmap Octavian Purdila @ 2010-02-15 22:00 ` Octavian Purdila 2010-02-16 9:37 ` Cong Wang 2010-02-16 17:25 ` [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Eric W. Biederman 3 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-15 22:00 UTC (permalink / raw) To: David Miller Cc: Octavian Purdila, Linux Kernel Network Developers, Linux Kernel Developers, WANG Cong, Neil Horman, Eric Dumazet This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports (bitmap type) which allows users to reserve ports for third-party applications. The reserved ports will not be used by automatic port assignments (e.g. when calling connect() or bind() with port number 0). Explicit port allocation behavior is unchanged. Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> Signed-off-by: WANG Cong <amwang@redhat.com> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> --- Documentation/networking/ip-sysctl.txt | 12 ++++++++++++ drivers/infiniband/core/cma.c | 7 ++++++- include/net/ip.h | 6 ++++++ net/ipv4/af_inet.c | 8 +++++++- net/ipv4/inet_connection_sock.c | 6 ++++++ net/ipv4/inet_hashtables.c | 2 ++ net/ipv4/sysctl_net_ipv4.c | 17 +++++++++++++++++ net/ipv4/udp.c | 3 ++- net/sctp/socket.c | 2 ++ 9 files changed, 60 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2dc7a1d..23be7a4 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -564,6 +564,18 @@ ip_local_port_range - 2 INTEGERS (i.e. by default) range 1024-4999 is enough to issue up to 2000 connections per second to systems supporting timestamps. +ip_local_reserved_ports - BITMAP of 65536 ports + Specify the ports which are reserved for known third-party + applications. These ports will not be used by automatic port assignments + (e.g. when calling connect() or bind() with port number 0). Explicit + port allocation behavior is unchanged. + + Reserving ports is done by writing positive numbers in this proc entry, + clearing them is done by writing negative numbers (e.g. 8080 reserves + port number, -8080 makes it available for automatic assignment again). + + Default: Empty + ip_nonlocal_bind - BOOLEAN If set, allows processes to bind() to non-local IP addresses, which can be quite useful - but may break some applications. diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index cc9b594..8248fc6 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1979,6 +1979,8 @@ retry: /* FIXME: add proper port randomization per like inet_csk_get_port */ do { ret = idr_get_new_above(ps, bind_list, next_port, &port); + if (inet_is_reserved_local_port(port)) + ret = -EAGAIN; } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); if (ret) @@ -2997,10 +2999,13 @@ static int __init cma_init(void) { int ret, low, high, remaining; - get_random_bytes(&next_port, sizeof next_port); inet_get_local_port_range(&low, &high); +again: + get_random_bytes(&next_port, sizeof next_port); remaining = (high - low) + 1; next_port = ((unsigned int) next_port % remaining) + low; + if (inet_is_reserved_local_port(next_port)) + goto again; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) diff --git a/include/net/ip.h b/include/net/ip.h index fb63371..2e24256 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -184,6 +184,12 @@ extern struct local_ports { } sysctl_local_ports; extern void inet_get_local_port_range(int *low, int *high); +extern unsigned long *sysctl_local_reserved_ports; +static inline int inet_is_reserved_local_port(int port) +{ + return test_bit(port, sysctl_local_reserved_ports); +} + extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d12c6a..06810b0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1546,9 +1546,13 @@ static int __init inet_init(void) BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); + if (!sysctl_local_reserved_ports) + goto out; + rc = proto_register(&tcp_prot, 1); if (rc) - goto out; + goto out_free_reserved_ports; rc = proto_register(&udp_prot, 1); if (rc) @@ -1647,6 +1651,8 @@ out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); +out_free_reserved_ports: + kfree(sysctl_local_reserved_ports); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429..1acb462 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = { .range = { 32768, 61000 }, }; +unsigned long *sysctl_local_reserved_ports; +EXPORT_SYMBOL(sysctl_local_reserved_ports); + void inet_get_local_port_range(int *low, int *high) { unsigned seq; @@ -108,6 +111,8 @@ again: smallest_size = -1; do { + if (inet_is_reserved_local_port(rover)) + goto next_nolock; head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); @@ -130,6 +135,7 @@ again: break; next: spin_unlock(&head->lock); + next_nolock: if (++rover > high) rover = low; } while (--remaining > 0); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2b79377..d3e160a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; + if (inet_is_reserved_local_port(port)) + continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; spin_lock(&head->lock); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712c..ce3597a 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -298,6 +298,13 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = ipv4_local_port_range, }, + { + .procname = "ip_local_reserved_ports", + .data = NULL, /* initialized in sysctl_ipv4_init */ + .maxlen = 65536, + .mode = 0644, + .proc_handler = proc_dobitmap, + }, #ifdef CONFIG_IP_MULTICAST { .procname = "igmp_max_memberships", @@ -721,6 +728,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { static __init int sysctl_ipv4_init(void) { struct ctl_table_header *hdr; + struct ctl_table *i; + + for (i = ipv4_table; i->procname; i++) { + if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { + i->data = sysctl_local_reserved_ports; + break; + } + } + if (!i->procname[0]) + return -EINVAL; hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); if (hdr == NULL) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 608a544..bfd0a6a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -232,7 +232,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, */ do { if (low <= snum && snum <= high && - !test_bit(snum >> udptable->log, bitmap)) + !test_bit(snum >> udptable->log, bitmap) && + !inet_is_reserved_local_port(snum)) goto found; snum += rand; } while (snum != first); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f6d1e59..1f839d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) rover++; if ((rover < low) || (rover > high)) rover = low; + if (inet_is_reserved_local_port(rover)) + continue; index = sctp_phashfn(rover); head = &sctp_port_hashtable[index]; sctp_spin_lock(&head->lock); -- 1.5.6.5 ^ permalink raw reply related [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-15 22:00 ` [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila @ 2010-02-16 9:37 ` Cong Wang 2010-02-16 11:06 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-16 9:37 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman, Eric Dumazet Octavian Purdila wrote: > This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports > (bitmap type) which allows users to reserve ports for third-party > applications. > > The reserved ports will not be used by automatic port assignments > (e.g. when calling connect() or bind() with port number 0). Explicit > port allocation behavior is unchanged. > > Signed-off-by: Octavian Purdila <opurdila@ixiacom.com> > Signed-off-by: WANG Cong <amwang@redhat.com> > Cc: Neil Horman <nhorman@tuxdriver.com> > Cc: Eric Dumazet <eric.dumazet@gmail.com> > --- > Documentation/networking/ip-sysctl.txt | 12 ++++++++++++ > drivers/infiniband/core/cma.c | 7 ++++++- > include/net/ip.h | 6 ++++++ > net/ipv4/af_inet.c | 8 +++++++- > net/ipv4/inet_connection_sock.c | 6 ++++++ > net/ipv4/inet_hashtables.c | 2 ++ > net/ipv4/sysctl_net_ipv4.c | 17 +++++++++++++++++ > net/ipv4/udp.c | 3 ++- > net/sctp/socket.c | 2 ++ > 9 files changed, 60 insertions(+), 3 deletions(-) > > diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt > index 2dc7a1d..23be7a4 100644 > --- a/Documentation/networking/ip-sysctl.txt > +++ b/Documentation/networking/ip-sysctl.txt > @@ -564,6 +564,18 @@ ip_local_port_range - 2 INTEGERS > (i.e. by default) range 1024-4999 is enough to issue up to > 2000 connections per second to systems supporting timestamps. > > +ip_local_reserved_ports - BITMAP of 65536 ports > + Specify the ports which are reserved for known third-party > + applications. These ports will not be used by automatic port assignments > + (e.g. when calling connect() or bind() with port number 0). Explicit > + port allocation behavior is unchanged. > + > + Reserving ports is done by writing positive numbers in this proc entry, > + clearing them is done by writing negative numbers (e.g. 8080 reserves > + port number, -8080 makes it available for automatic assignment again). > + > + Default: Empty > + > ip_nonlocal_bind - BOOLEAN > If set, allows processes to bind() to non-local IP addresses, > which can be quite useful - but may break some applications. > diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c > index cc9b594..8248fc6 100644 > --- a/drivers/infiniband/core/cma.c > +++ b/drivers/infiniband/core/cma.c > @@ -1979,6 +1979,8 @@ retry: > /* FIXME: add proper port randomization per like inet_csk_get_port */ > do { > ret = idr_get_new_above(ps, bind_list, next_port, &port); > + if (inet_is_reserved_local_port(port)) > + ret = -EAGAIN; > } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); > > if (ret) > @@ -2997,10 +2999,13 @@ static int __init cma_init(void) > { > int ret, low, high, remaining; > > - get_random_bytes(&next_port, sizeof next_port); > inet_get_local_port_range(&low, &high); > +again: > + get_random_bytes(&next_port, sizeof next_port); > remaining = (high - low) + 1; > next_port = ((unsigned int) next_port % remaining) + low; > + if (inet_is_reserved_local_port(next_port)) > + goto again; > > cma_wq = create_singlethread_workqueue("rdma_cm"); > if (!cma_wq) > diff --git a/include/net/ip.h b/include/net/ip.h > index fb63371..2e24256 100644 > --- a/include/net/ip.h > +++ b/include/net/ip.h > @@ -184,6 +184,12 @@ extern struct local_ports { > } sysctl_local_ports; > extern void inet_get_local_port_range(int *low, int *high); > > +extern unsigned long *sysctl_local_reserved_ports; > +static inline int inet_is_reserved_local_port(int port) > +{ > + return test_bit(port, sysctl_local_reserved_ports); > +} > + > extern int sysctl_ip_default_ttl; > extern int sysctl_ip_nonlocal_bind; > > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index 7d12c6a..06810b0 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1546,9 +1546,13 @@ static int __init inet_init(void) > > BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); > > + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); > + if (!sysctl_local_reserved_ports) > + goto out; > + I think we should also consider the ports in ip_local_port_range, since we can only reserve the ports in that range. > rc = proto_register(&tcp_prot, 1); > if (rc) > - goto out; > + goto out_free_reserved_ports; > > rc = proto_register(&udp_prot, 1); > if (rc) > @@ -1647,6 +1651,8 @@ out_unregister_udp_proto: > proto_unregister(&udp_prot); > out_unregister_tcp_proto: > proto_unregister(&tcp_prot); > +out_free_reserved_ports: > + kfree(sysctl_local_reserved_ports); > goto out; > } > > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index 8da6429..1acb462 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = { > .range = { 32768, 61000 }, > }; > > +unsigned long *sysctl_local_reserved_ports; > +EXPORT_SYMBOL(sysctl_local_reserved_ports); > + > void inet_get_local_port_range(int *low, int *high) > { > unsigned seq; > @@ -108,6 +111,8 @@ again: > > smallest_size = -1; > do { > + if (inet_is_reserved_local_port(rover)) > + goto next_nolock; > head = &hashinfo->bhash[inet_bhashfn(net, rover, > hashinfo->bhash_size)]; > spin_lock(&head->lock); > @@ -130,6 +135,7 @@ again: > break; > next: > spin_unlock(&head->lock); > + next_nolock: > if (++rover > high) > rover = low; > } while (--remaining > 0); > diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c > index 2b79377..d3e160a 100644 > --- a/net/ipv4/inet_hashtables.c > +++ b/net/ipv4/inet_hashtables.c > @@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, > local_bh_disable(); > for (i = 1; i <= remaining; i++) { > port = low + (i + offset) % remaining; > + if (inet_is_reserved_local_port(port)) > + continue; > head = &hinfo->bhash[inet_bhashfn(net, port, > hinfo->bhash_size)]; > spin_lock(&head->lock); > diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c > index 7e3712c..ce3597a 100644 > --- a/net/ipv4/sysctl_net_ipv4.c > +++ b/net/ipv4/sysctl_net_ipv4.c > @@ -298,6 +298,13 @@ static struct ctl_table ipv4_table[] = { > .mode = 0644, > .proc_handler = ipv4_local_port_range, > }, > + { > + .procname = "ip_local_reserved_ports", > + .data = NULL, /* initialized in sysctl_ipv4_init */ > + .maxlen = 65536, > + .mode = 0644, > + .proc_handler = proc_dobitmap, > + }, Isn't there an off-by-one here? In patch 2/3, you use 0 to set the fist bit, then how about 65535 which writes 65536th bit? This is beyond the range of port number. > #ifdef CONFIG_IP_MULTICAST > { > .procname = "igmp_max_memberships", > @@ -721,6 +728,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = { > static __init int sysctl_ipv4_init(void) > { > struct ctl_table_header *hdr; > + struct ctl_table *i; > + > + for (i = ipv4_table; i->procname; i++) { > + if (strcmp(i->procname, "ip_local_reserved_ports") == 0) { > + i->data = sysctl_local_reserved_ports; > + break; > + } > + } > + if (!i->procname[0]) > + return -EINVAL; > > hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); > if (hdr == NULL) > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index 608a544..bfd0a6a 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -232,7 +232,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, > */ > do { > if (low <= snum && snum <= high && > - !test_bit(snum >> udptable->log, bitmap)) > + !test_bit(snum >> udptable->log, bitmap) && > + !inet_is_reserved_local_port(snum)) > goto found; > snum += rand; > } while (snum != first); > diff --git a/net/sctp/socket.c b/net/sctp/socket.c > index f6d1e59..1f839d0 100644 > --- a/net/sctp/socket.c > +++ b/net/sctp/socket.c > @@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) > rover++; > if ((rover < low) || (rover > high)) > rover = low; > + if (inet_is_reserved_local_port(rover)) > + continue; > index = sctp_phashfn(rover); > head = &sctp_port_hashtable[index]; > sctp_spin_lock(&head->lock); ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 9:37 ` Cong Wang @ 2010-02-16 11:06 ` Octavian Purdila 2010-02-16 13:06 ` Cong Wang 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 11:06 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman, Eric Dumazet On Tuesday 16 February 2010 11:37:04 you wrote: > > BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); > > > > + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); > > + if (!sysctl_local_reserved_ports) > > + goto out; > > + > > I think we should also consider the ports in ip_local_port_range, > since we can only reserve the ports in that range. > That is subject to changes at runtime, which means we will have to readjust the bitmap at runtime which introduces the need for additional synchronization operations which I would rather avoid. > > + { > > + .procname = "ip_local_reserved_ports", > > + .data = NULL, /* initialized in sysctl_ipv4_init */ > > + .maxlen = 65536, > > + .mode = 0644, > > + .proc_handler = proc_dobitmap, > > + }, > > Isn't there an off-by-one here? > > In patch 2/3, you use 0 to set the fist bit, then how about 65535 which > writes 65536th bit? This is beyond the range of port number. > This seems fine to me, 65535 is the value used by both the port checking function and the proc read/write function. And it translates indeed to 65536th bit, but that is also bit 65535 if you start counting bits from 0 instead of 1. The usual computing/natural arithmetic confusion for the meaning of first :) ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 11:06 ` Octavian Purdila @ 2010-02-16 13:06 ` Cong Wang 2010-02-16 13:20 ` Eric Dumazet 2010-02-16 14:25 ` Octavian Purdila 0 siblings, 2 replies; 38+ messages in thread From: Cong Wang @ 2010-02-16 13:06 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman, Eric Dumazet Octavian Purdila wrote: > On Tuesday 16 February 2010 11:37:04 you wrote: >>> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); >>> >>> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); >>> + if (!sysctl_local_reserved_ports) >>> + goto out; >>> + >> I think we should also consider the ports in ip_local_port_range, >> since we can only reserve the ports in that range. >> > > That is subject to changes at runtime, which means we will have to readjust > the bitmap at runtime which introduces the need for additional synchronization > operations which I would rather avoid. Why? As long as the bitmap is global, this will not be hard. Consider that if one user writes a port number which is beyond the ip_local_port_range into ip_local_reserved_ports, we should not accept this, because it doesn't make any sense. But with your patch, we do. > >>> + { >>> + .procname = "ip_local_reserved_ports", >>> + .data = NULL, /* initialized in sysctl_ipv4_init */ >>> + .maxlen = 65536, >>> + .mode = 0644, >>> + .proc_handler = proc_dobitmap, >>> + }, >> Isn't there an off-by-one here? >> >> In patch 2/3, you use 0 to set the fist bit, then how about 65535 which >> writes 65536th bit? This is beyond the range of port number. >> > > This seems fine to me, 65535 is the value used by both the port checking > function and the proc read/write function. And it translates indeed to > 65536th bit, but that is also bit 65535 if you start counting bits from 0 > instead of 1. The usual computing/natural arithmetic confusion for the meaning > of first :) > Oh, I see. Thanks. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 13:06 ` Cong Wang @ 2010-02-16 13:20 ` Eric Dumazet 2010-02-17 16:13 ` Cong Wang 2010-02-16 14:25 ` Octavian Purdila 1 sibling, 1 reply; 38+ messages in thread From: Eric Dumazet @ 2010-02-16 13:20 UTC (permalink / raw) To: Cong Wang Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman Le mardi 16 février 2010 à 21:06 +0800, Cong Wang a écrit : > Octavian Purdila wrote: > > On Tuesday 16 February 2010 11:37:04 you wrote: > >>> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); > >>> > >>> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); > >>> + if (!sysctl_local_reserved_ports) > >>> + goto out; > >>> + > >> I think we should also consider the ports in ip_local_port_range, > >> since we can only reserve the ports in that range. > >> > > > > That is subject to changes at runtime, which means we will have to readjust > > the bitmap at runtime which introduces the need for additional synchronization > > operations which I would rather avoid. > > Why? As long as the bitmap is global, this will not be hard. > > Consider that if one user writes a port number which is beyond > the ip_local_port_range into ip_local_reserved_ports, we should > not accept this, because it doesn't make any sense. But with your > patch, we do. I disagree with you. This is perfectly OK. A port not being flagged in ip_local_reserved_ports doesnt mean it can be used for allocation. If you want to really block ports from being used at boot, you could for example : # temporarly reduce the ip_local_port_range echo "61000 61001" >/proc/sys/net/ipv4/ip_local_port_range # Build our bitmap (could be slow, if a remote database is read) for port in $LIST_RESERVED_PORT do echo $port >/proc/sys/net/ipv4/ip_local_reserved_ports done echo "10000 61000" >/proc/sys/net/ipv4/ip_local_port_range ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 13:20 ` Eric Dumazet @ 2010-02-17 16:13 ` Cong Wang 2010-02-17 16:39 ` Eric Dumazet 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-17 16:13 UTC (permalink / raw) To: Eric Dumazet Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman Eric Dumazet wrote: > Le mardi 16 février 2010 à 21:06 +0800, Cong Wang a écrit : >> Octavian Purdila wrote: >>> On Tuesday 16 February 2010 11:37:04 you wrote: >>>>> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); >>>>> >>>>> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); >>>>> + if (!sysctl_local_reserved_ports) >>>>> + goto out; >>>>> + >>>> I think we should also consider the ports in ip_local_port_range, >>>> since we can only reserve the ports in that range. >>>> >>> That is subject to changes at runtime, which means we will have to readjust >>> the bitmap at runtime which introduces the need for additional synchronization >>> operations which I would rather avoid. >> Why? As long as the bitmap is global, this will not be hard. >> >> Consider that if one user writes a port number which is beyond >> the ip_local_port_range into ip_local_reserved_ports, we should >> not accept this, because it doesn't make any sense. But with your >> patch, we do. > > I disagree with you. This is perfectly OK. > > A port not being flagged in ip_local_reserved_ports doesnt mean it can > be used for allocation. > > If you want to really block ports from being used at boot, you could for > example : > > # temporarly reduce the ip_local_port_range > echo "61000 61001" >/proc/sys/net/ipv4/ip_local_port_range > # Build our bitmap (could be slow, if a remote database is read) > for port in $LIST_RESERVED_PORT > do > echo $port >/proc/sys/net/ipv4/ip_local_reserved_ports > done > echo "10000 61000" >/proc/sys/net/ipv4/ip_local_port_range > > I don't think so, if you want to avoid race condition, you just need to write the reserved ports before any networking application starts, IOW, as early as possible during boot. Thanks. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-17 16:13 ` Cong Wang @ 2010-02-17 16:39 ` Eric Dumazet 2010-02-17 16:01 ` Octavian Purdila 2010-02-20 8:00 ` Cong Wang 0 siblings, 2 replies; 38+ messages in thread From: Eric Dumazet @ 2010-02-17 16:39 UTC (permalink / raw) To: Cong Wang Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman Le jeudi 18 février 2010 à 00:13 +0800, Cong Wang a écrit : > I don't think so, if you want to avoid race condition, you just need to > write the reserved ports before any networking application starts, IOW, > as early as possible during boot. > Sure, but I was thinking retrieving the list of reserved port by a database query, using network :) Anyway, I just feel your argument is not applicable. Our kernel is capable of doing an intersection for us, we dont need to forbid user to mark a port as 'reserved' if this port is already blacklisted by another mechanism (for example, if this port is already in use) ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-17 16:39 ` Eric Dumazet @ 2010-02-17 16:01 ` Octavian Purdila 2010-02-20 8:00 ` Cong Wang 1 sibling, 0 replies; 38+ messages in thread From: Octavian Purdila @ 2010-02-17 16:01 UTC (permalink / raw) To: Eric Dumazet Cc: Cong Wang, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman On Wednesday 17 February 2010 18:39:28 you wrote: > Le jeudi 18 février 2010 à 00:13 +0800, Cong Wang a écrit : > > I don't think so, if you want to avoid race condition, you just need to > > write the reserved ports before any networking application starts, IOW, > > as early as possible during boot. > > Sure, but I was thinking retrieving the list of reserved port by a > database query, using network :) > > Anyway, I just feel your argument is not applicable. > > Our kernel is capable of doing an intersection for us, we dont need > to forbid user to mark a port as 'reserved' if this port is already > blacklisted by another mechanism (for example, if this port is already > in use) > Also I believe that ip_local_port_range purpose is not to reserve *specific* ports. Changing this setting helps with things like increasing the port space for NAT or for a higher connection rate. We add the new option for reserving *specific* ports. So, even from a functional perspective, it makes more sense to me to keep them independent, as they serve different purposes. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-17 16:39 ` Eric Dumazet 2010-02-17 16:01 ` Octavian Purdila @ 2010-02-20 8:00 ` Cong Wang 1 sibling, 0 replies; 38+ messages in thread From: Cong Wang @ 2010-02-20 8:00 UTC (permalink / raw) To: Eric Dumazet Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman Eric Dumazet wrote: > Le jeudi 18 février 2010 à 00:13 +0800, Cong Wang a écrit : > >> I don't think so, if you want to avoid race condition, you just need to >> write the reserved ports before any networking application starts, IOW, >> as early as possible during boot. >> > > Sure, but I was thinking retrieving the list of reserved port by a > database query, using network :) > > Anyway, I just feel your argument is not applicable. > > Our kernel is capable of doing an intersection for us, we dont need > to forbid user to mark a port as 'reserved' if this port is already > blacklisted by another mechanism (for example, if this port is already > in use) > Oh, I see your points. But this still could make people confused, like me. I think we'd better document this. Thanks. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 13:06 ` Cong Wang 2010-02-16 13:20 ` Eric Dumazet @ 2010-02-16 14:25 ` Octavian Purdila 2010-02-17 16:07 ` Cong Wang 1 sibling, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 14:25 UTC (permalink / raw) To: Cong Wang Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman, Eric Dumazet On Tuesday 16 February 2010 15:06:26 you wrote: > Octavian Purdila wrote: > > On Tuesday 16 February 2010 11:37:04 you wrote: > >>> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); > >>> > >>> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); > >>> + if (!sysctl_local_reserved_ports) > >>> + goto out; > >>> + > >> > >> I think we should also consider the ports in ip_local_port_range, > >> since we can only reserve the ports in that range. > > > > That is subject to changes at runtime, which means we will have to > > readjust the bitmap at runtime which introduces the need for additional > > synchronization operations which I would rather avoid. > > Why? As long as the bitmap is global, this will not be hard. > For the more important point see bellow, but with regard to reallocation, this means we need to at least use rcu_read_lock() in the fast path to avoid races between freeing the old bitmap and doing a read in progress. Granted, that is a light operation, but would it makes things so much more complicated just so that we save one memory page (assuming the range is the default [32000 64000] one). > Consider that if one user writes a port number which is beyond > the ip_local_port_range into ip_local_reserved_ports, we should > not accept this, because it doesn't make any sense. But with your > patch, we do. > I think it should be allowed. I see ip_local_reserved_ports and ip_local_range as independent settings that can be change at any time. That way I can flag port 8080 even if the current range is [32000, 64000] and then later I can expand the range to [1024, 64000] without loosing the 8080 reservation. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers 2010-02-16 14:25 ` Octavian Purdila @ 2010-02-17 16:07 ` Cong Wang 0 siblings, 0 replies; 38+ messages in thread From: Cong Wang @ 2010-02-17 16:07 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Neil Horman, Eric Dumazet Octavian Purdila wrote: > On Tuesday 16 February 2010 15:06:26 you wrote: >> Octavian Purdila wrote: >>> On Tuesday 16 February 2010 11:37:04 you wrote: >>>>> BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); >>>>> >>>>> + sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL); >>>>> + if (!sysctl_local_reserved_ports) >>>>> + goto out; >>>>> + >>>> I think we should also consider the ports in ip_local_port_range, >>>> since we can only reserve the ports in that range. >>> That is subject to changes at runtime, which means we will have to >>> readjust the bitmap at runtime which introduces the need for additional >>> synchronization operations which I would rather avoid. >> Why? As long as the bitmap is global, this will not be hard. >> > > For the more important point see bellow, but with regard to reallocation, this > means we need to at least use rcu_read_lock() in the fast path to avoid races > between freeing the old bitmap and doing a read in progress. > > Granted, that is a light operation, but would it makes things so much more > complicated just so that we save one memory page (assuming the range is the > default [32000 64000] one). Why not just allocate the bitmap for all ports? 65535/8 bytes are needed. > >> Consider that if one user writes a port number which is beyond >> the ip_local_port_range into ip_local_reserved_ports, we should >> not accept this, because it doesn't make any sense. But with your >> patch, we do. >> > > I think it should be allowed. I see ip_local_reserved_ports and ip_local_range > as independent settings that can be change at any time. According to the original purpose, they are not. > > That way I can flag port 8080 even if the current range is [32000, 64000] and > then later I can expand the range to [1024, 64000] without loosing the 8080 > reservation. Then its meaning is changed, bind(0) will never have chance to get 8080, thus reserving 8080 for this purpose fails. I want to always keep its original meaning, if the local_port_range goes out, then local_reserved_port should be empty at the same time, you have to reset it after changing local_port_range. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-15 22:00 [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Octavian Purdila ` (2 preceding siblings ...) 2010-02-15 22:00 ` [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila @ 2010-02-16 17:25 ` Eric W. Biederman 2010-02-16 18:04 ` Octavian Purdila 3 siblings, 1 reply; 38+ messages in thread From: Eric W. Biederman @ 2010-02-16 17:25 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang Octavian Purdila <opurdila@ixiacom.com> writes: > This iteration makes the bitmap dynamically allocated since it is > quite big (8192 bytes) and adding that much in BSS may still, > apparently, cause problems on some architectures. > > > Octavian Purdila (3): > sysctl: refactor integer handling proc code > sysctl: add proc_dobitmap > net: reserve ports for applications using fixed port numbers > I don't like the /proc interface for this. That is certainly not the format I would choose for a bitmap. The way you have described this it looks like you are a set of different individual values instead of one large value. History says one value per file is the ideal in a user space facing interface. Intuitively I would not know how to change your new proc interface after catting the file. The classic read the file tweak the value and write the new value back will not work. Also we already have a common function for dealing with bitmaps in /proc. bitmap_parse_user. Used in /proc/irq/NNN/smp_affinity among other places. So can you please use bitmap_parse_user, or break this up into 64k individual files that we can set individually? Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 17:25 ` [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Eric W. Biederman @ 2010-02-16 18:04 ` Octavian Purdila 2010-02-16 18:49 ` Eric W. Biederman 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 18:04 UTC (permalink / raw) To: Eric W. Biederman Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang On Tuesday 16 February 2010 19:25:04 you wrote: > I don't like the /proc interface for this. That is certainly not the > format I would choose for a bitmap. The way you have described this > it looks like you are a set of different individual values instead of > one large value. History says one value per file is the ideal in a > user space facing interface. Intuitively I would not know how to > change your new proc interface after catting the file. The classic > read the file tweak the value and write the new value back will not > work. > > Also we already have a common function for dealing with bitmaps > in /proc. bitmap_parse_user. Used in /proc/irq/NNN/smp_affinity > among other places. > > So can you please use bitmap_parse_user, or break this up into > 64k individual files that we can set individually? > Hi Eric, thanks for going over this. The use case (large bitmaps/lists) is different enough from what we have today (small bitmaps) and that is why I think that we need this new interface. If I get bitmap_parse_user correctly, for a 64k bitmap it expects a 2K comma separated values. That is not the most intuitively way for the user to set a list of ports he wants to reserve. Using 64K files has the same practical issues (the user would have to cat all 64K files to determine which ports are reserved) plus it has issues caused by the large number of files: significant memory overhead and also significant time for registering those files. If this new interface is unacceptable I would rather go with a setsockopt approach, since either of the above approaches are more like machine friendly instead of user friendly. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 18:04 ` Octavian Purdila @ 2010-02-16 18:49 ` Eric W. Biederman 2010-02-16 19:51 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Eric W. Biederman @ 2010-02-16 18:49 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang Octavian Purdila <opurdila@ixiacom.com> writes: > Hi Eric, thanks for going over this. > > The use case (large bitmaps/lists) is different enough from what we have today > (small bitmaps) and that is why I think that we need this new interface. > > If I get bitmap_parse_user correctly, for a 64k bitmap it expects a 2K comma > separated values. That is not the most intuitively way for the user to set a > list of ports he wants to reserve. In this case I expect an interface of comma separated ranges would be ideal. Typically compact, and modifiable by writing the new value to the file. I think the default value would be something like 32768-61000. > Using 64K files has the same practical issues (the user would have to cat all > 64K files to determine which ports are reserved) plus it has issues caused by > the large number of files: significant memory overhead and also significant time > for registering those files. "grep -l 1 *" isn't particularly difficult, and it would be one sysctl registration call. It is true that the sysctl memory footprint would be a pain in that case. Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 18:49 ` Eric W. Biederman @ 2010-02-16 19:51 ` Octavian Purdila 2010-02-16 20:08 ` Eric W. Biederman 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 19:51 UTC (permalink / raw) To: Eric W. Biederman Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang On Tuesday 16 February 2010 20:49:37 you wrote: > > The use case (large bitmaps/lists) is different enough from what we have > > today (small bitmaps) and that is why I think that we need this new > > interface. > > > > If I get bitmap_parse_user correctly, for a 64k bitmap it expects a 2K > > comma separated values. That is not the most intuitively way for the > > user to set a list of ports he wants to reserve. > > In this case I expect an interface of comma separated ranges would be > ideal. Typically compact, and modifiable by writing the new value to > the file. > Something like bellow? # set bits 8080 and 1666 $echo 8080 1666-1666 > /proc #reset bit 1666 $echo 8080 > /proc #reset whole bitmap $echo > /proc > I think the default value would be something like 32768-61000. Note that this new proc entry will work in conjunction with the existing ip_local_port_range option, so the default bitmap can (and should be) empty. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 19:51 ` Octavian Purdila @ 2010-02-16 20:08 ` Eric W. Biederman 2010-02-16 21:22 ` Octavian Purdila 0 siblings, 1 reply; 38+ messages in thread From: Eric W. Biederman @ 2010-02-16 20:08 UTC (permalink / raw) To: Octavian Purdila Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang Octavian Purdila <opurdila@ixiacom.com> writes: > On Tuesday 16 February 2010 20:49:37 you wrote: > >> > The use case (large bitmaps/lists) is different enough from what we have >> > today (small bitmaps) and that is why I think that we need this new >> > interface. >> > >> > If I get bitmap_parse_user correctly, for a 64k bitmap it expects a 2K >> > comma separated values. That is not the most intuitively way for the >> > user to set a list of ports he wants to reserve. >> >> In this case I expect an interface of comma separated ranges would be >> ideal. Typically compact, and modifiable by writing the new value to >> the file. >> > > Something like bellow? > > # set bits 8080 and 1666 > $echo 8080 1666-1666 > /proc > > #reset bit 1666 > $echo 8080 > /proc > > #reset whole bitmap > $echo > /proc Yes. So something like that. I think I would use commas instead of spaces as that is more traditional. >> I think the default value would be something like 32768-61000. > > Note that this new proc entry will work in conjunction with the existing > ip_local_port_range option, so the default bitmap can (and should be) empty. Do we want userspace to see this implementation detail? Two data structures doing the almost the same thing could get confusing in a hurry. It feels like a recipe for changing one and not the other and then running around trying to figure out why the change did not work. Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 20:08 ` Eric W. Biederman @ 2010-02-16 21:22 ` Octavian Purdila 2010-02-17 15:57 ` Cong Wang 0 siblings, 1 reply; 38+ messages in thread From: Octavian Purdila @ 2010-02-16 21:22 UTC (permalink / raw) To: Eric W. Biederman Cc: David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Amerigo Wang, Eric Dumazet On Tuesday 16 February 2010 22:08:13 you wrote: > > Something like bellow? > > > > # set bits 8080 and 1666 > > $echo 8080 1666-1666 > /proc > > > > #reset bit 1666 > > $echo 8080 > /proc > > > > #reset whole bitmap > > $echo > /proc > > Yes. So something like that. > > I think I would use commas instead of spaces as that is more traditional. > OK, I was trying to reuse the existing skip whitespace code :) but if you think its cleaner with commas I can do that. > > Note that this new proc entry will work in conjunction with the existing > > ip_local_port_range option, so the default bitmap can (and should be) > > empty. > > Do we want userspace to see this implementation detail? Two data structures > doing the almost the same thing could get confusing in a hurry. It feels > like a recipe for changing one and not the other and then running around > trying to figure out why the change did not work. > Yes, I believe we want to have reserved_ports contain just those special ports that the user wants to reserve. After all we add this entry for this specific purpose. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-16 21:22 ` Octavian Purdila @ 2010-02-17 15:57 ` Cong Wang 2010-02-17 16:10 ` Eric W. Biederman 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-17 15:57 UTC (permalink / raw) To: Octavian Purdila Cc: Eric W. Biederman, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric Dumazet Octavian Purdila wrote: > On Tuesday 16 February 2010 22:08:13 you wrote: >>> Something like bellow? >>> >>> # set bits 8080 and 1666 >>> $echo 8080 1666-1666 > /proc >>> >>> #reset bit 1666 >>> $echo 8080 > /proc >>> >>> #reset whole bitmap >>> $echo > /proc >> Yes. So something like that. >> >> I think I would use commas instead of spaces as that is more traditional. Why this is better than the current version? For the single port case, currently we use: echo +8080 > /xxxx #set echo -8080 > /xxxx #clear Now we will use: echo 8080 > /xxxx #set echo 8080 > /xxxx #clear I don't think the latter is better... For the multi-port case, yes, we should accept 'echo 8080,10000 >/xxxx'. >> > > OK, I was trying to reuse the existing skip whitespace code :) but if you > think its cleaner with commas I can do that. > >>> Note that this new proc entry will work in conjunction with the existing >>> ip_local_port_range option, so the default bitmap can (and should be) >>> empty. Yes, we don't know which ports the user wants to reserve. >> Do we want userspace to see this implementation detail? Two data structures >> doing the almost the same thing could get confusing in a hurry. It feels >> like a recipe for changing one and not the other and then running around >> trying to figure out why the change did not work. >> > > Yes, I believe we want to have reserved_ports contain just those special ports > that the user wants to reserve. After all we add this entry for this specific > purpose. > This is why I insist we should make sure all ports accepted by ip_local_reserved_ports must be in ip_local_port_range. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-17 15:57 ` Cong Wang @ 2010-02-17 16:10 ` Eric W. Biederman 2010-02-17 16:19 ` Cong Wang 0 siblings, 1 reply; 38+ messages in thread From: Eric W. Biederman @ 2010-02-17 16:10 UTC (permalink / raw) To: Cong Wang Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric Dumazet Cong Wang <amwang@redhat.com> writes: > Octavian Purdila wrote: >> On Tuesday 16 February 2010 22:08:13 you wrote: >>>> Something like bellow? >>>> >>>> # set bits 8080 and 1666 >>>> $echo 8080 1666-1666 > /proc >>>> >>>> #reset bit 1666 >>>> $echo 8080 > /proc >>>> >>>> #reset whole bitmap >>>> $echo > /proc >>> Yes. So something like that. >>> >>> I think I would use commas instead of spaces as that is more traditional. > > > Why this is better than the current version? > > For the single port case, currently we use: > > echo +8080 > /xxxx #set > echo -8080 > /xxxx #clear > > Now we will use: > > echo 8080 > /xxxx #set > echo 8080 > /xxxx #clear No. > I don't think the latter is better... > > For the multi-port case, yes, we should accept 'echo 8080,10000 >/xxxx'. What I was envisioning was: echo 8080 > /xxx # set the bitmap to 8080 echo 8080,10000 > /xxx # add 10000 to the bitmap echo 8080 > /xxxx # remove 10000 from the bitmap. That is when you set it you enter the entire set every time, treating the entire set as a single value. Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-17 16:10 ` Eric W. Biederman @ 2010-02-17 16:19 ` Cong Wang 2010-02-17 16:26 ` Eric W. Biederman 0 siblings, 1 reply; 38+ messages in thread From: Cong Wang @ 2010-02-17 16:19 UTC (permalink / raw) To: Eric W. Biederman Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric Dumazet Eric W. Biederman wrote: > Cong Wang <amwang@redhat.com> writes: > >> Octavian Purdila wrote: >>> On Tuesday 16 February 2010 22:08:13 you wrote: >>>>> Something like bellow? >>>>> >>>>> # set bits 8080 and 1666 >>>>> $echo 8080 1666-1666 > /proc >>>>> >>>>> #reset bit 1666 >>>>> $echo 8080 > /proc >>>>> >>>>> #reset whole bitmap >>>>> $echo > /proc >>>> Yes. So something like that. >>>> >>>> I think I would use commas instead of spaces as that is more traditional. >> >> Why this is better than the current version? >> >> For the single port case, currently we use: >> >> echo +8080 > /xxxx #set >> echo -8080 > /xxxx #clear >> >> Now we will use: >> >> echo 8080 > /xxxx #set >> echo 8080 > /xxxx #clear > > No. > >> I don't think the latter is better... >> >> For the multi-port case, yes, we should accept 'echo 8080,10000 >/xxxx'. > > What I was envisioning was: > > echo 8080 > /xxx # set the bitmap to 8080 > echo 8080,10000 > /xxx # add 10000 to the bitmap > echo 8080 > /xxxx # remove 10000 from the bitmap. > > That is when you set it you enter the entire set every time, treating > the entire set as a single value. > Oh, I see, this is ok. But if we could support multi-port, that will be better, something like: echo '8080,10000-11000' > /xxx #add port 8080 and port range 10000-11000 so that I don't have to construct a long string for all ports within 10000 and 11000. Thanks. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port 2010-02-17 16:19 ` Cong Wang @ 2010-02-17 16:26 ` Eric W. Biederman 0 siblings, 0 replies; 38+ messages in thread From: Eric W. Biederman @ 2010-02-17 16:26 UTC (permalink / raw) To: Cong Wang Cc: Octavian Purdila, David Miller, Linux Kernel Network Developers, Linux Kernel Developers, Eric Dumazet Cong Wang <amwang@redhat.com> writes: > Eric W. Biederman wrote: >> Cong Wang <amwang@redhat.com> writes: >> >>> Octavian Purdila wrote: >>>> On Tuesday 16 February 2010 22:08:13 you wrote: >>>>>> Something like bellow? >>>>>> >>>>>> # set bits 8080 and 1666 >>>>>> $echo 8080 1666-1666 > /proc >>>>>> >>>>>> #reset bit 1666 >>>>>> $echo 8080 > /proc >>>>>> >>>>>> #reset whole bitmap >>>>>> $echo > /proc >>>>> Yes. So something like that. >>>>> >>>>> I think I would use commas instead of spaces as that is more traditional. >>> >>> Why this is better than the current version? >>> >>> For the single port case, currently we use: >>> >>> echo +8080 > /xxxx #set >>> echo -8080 > /xxxx #clear >>> >>> Now we will use: >>> >>> echo 8080 > /xxxx #set >>> echo 8080 > /xxxx #clear >> >> No. >> >>> I don't think the latter is better... >>> >>> For the multi-port case, yes, we should accept 'echo 8080,10000 >/xxxx'. >> >> What I was envisioning was: >> >> echo 8080 > /xxx # set the bitmap to 8080 >> echo 8080,10000 > /xxx # add 10000 to the bitmap >> echo 8080 > /xxxx # remove 10000 from the bitmap. >> >> That is when you set it you enter the entire set every time, treating >> the entire set as a single value. >> > > Oh, I see, this is ok. > > But if we could support multi-port, that will be better, something like: > > echo '8080,10000-11000' > /xxx #add port 8080 and port range 10000-11000 > > so that I don't have to construct a long string for all ports within > 10000 and 11000. Yes, multi-port ranges are what I suggested. You simply had not gotten confused about that aspect, so I was not repeating it. Except for pathological cases a ranges should keep the string that represents the bitmap small. Eric ^ permalink raw reply [flat|nested] 38+ messages in thread
end of thread, other threads:[~2010-02-20 7:57 UTC | newest] Thread overview: 38+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-02-15 22:00 [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Octavian Purdila 2010-02-15 22:00 ` [net-next PATCH v4 1/3] sysctl: refactor integer handling proc code Octavian Purdila 2010-02-16 8:41 ` Cong Wang 2010-02-16 10:48 ` Octavian Purdila 2010-02-16 13:08 ` Cong Wang 2010-02-16 14:00 ` Octavian Purdila 2010-02-17 16:31 ` Cong Wang 2010-02-17 21:09 ` Octavian Purdila 2010-02-18 3:58 ` Octavian Purdila 2010-02-16 11:41 ` Octavian Purdila 2010-02-16 13:09 ` Cong Wang 2010-02-16 13:44 ` Octavian Purdila 2010-02-17 16:21 ` Cong Wang 2010-02-17 16:33 ` Eric W. Biederman 2010-02-18 4:25 ` Octavian Purdila 2010-02-15 22:00 ` [net-next PATCH v4 2/3] sysctl: add proc_dobitmap Octavian Purdila 2010-02-16 9:12 ` Cong Wang 2010-02-15 22:00 ` [net-next PATCH v4 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila 2010-02-16 9:37 ` Cong Wang 2010-02-16 11:06 ` Octavian Purdila 2010-02-16 13:06 ` Cong Wang 2010-02-16 13:20 ` Eric Dumazet 2010-02-17 16:13 ` Cong Wang 2010-02-17 16:39 ` Eric Dumazet 2010-02-17 16:01 ` Octavian Purdila 2010-02-20 8:00 ` Cong Wang 2010-02-16 14:25 ` Octavian Purdila 2010-02-17 16:07 ` Cong Wang 2010-02-16 17:25 ` [net-next PATCH v4 0/3] net: reserve ports for applications using fixed port Eric W. Biederman 2010-02-16 18:04 ` Octavian Purdila 2010-02-16 18:49 ` Eric W. Biederman 2010-02-16 19:51 ` Octavian Purdila 2010-02-16 20:08 ` Eric W. Biederman 2010-02-16 21:22 ` Octavian Purdila 2010-02-17 15:57 ` Cong Wang 2010-02-17 16:10 ` Eric W. Biederman 2010-02-17 16:19 ` Cong Wang 2010-02-17 16:26 ` Eric W. Biederman
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).