netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC Patch v2] net: reserve ports for applications using fixed port numbers
@ 2010-02-04 10:12 Amerigo Wang
       [not found] ` <20100204101533.4619.34599.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Amerigo Wang @ 2010-02-04 10:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: Octavian Purdila, Eric Dumazet, linux-rdma, netdev, Neil Horman,
	linux-sctp, Amerigo Wang, David Miller

V2:
update the documentation
update the changelog
fix the checking code in udp

This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports,
it can be used like ip_local_port_range, but this is used to
reserve ports for third-party applications which use fixed
port numbers within ip_local_port_range.

This only affects the applications which call socket functions
like bind(2) with port number 0, or connect() etc., to prevent the kernel
getting the ports within the specified range for them. For applications
which use fixed port number, it will have no effects.

Any comments are welcome.

Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Octavian Purdila <opurdila@ixiacom.com>
Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>

---

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 006b39d..0795ac3 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -564,6 +564,14 @@ ip_local_port_range - 2 INTEGERS
 	(i.e. by default) range 1024-4999 is enough to issue up to
 	2000 connections per second to systems supporting timestamps.
 
+ip_local_reserved_ports - 2 INTEGERS
+	Specify the port range which is reserved for known third-party
+	applications, in case the kernel picks those ports for other
+	applications, e.g. when calling connect() or bind() with port
+	number 0. The range shall not go beyond the range specifed in
+	ip_local_port_range. "0 0" means no ports are reserved.
+	Default: 0 0
+
 ip_nonlocal_bind - BOOLEAN
 	If set, allows processes to bind() to non-local IP addresses,
 	which can be quite useful - but may break some applications.
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc9b594..8248fc6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1979,6 +1979,8 @@ retry:
 	/* FIXME: add proper port randomization per like inet_csk_get_port */
 	do {
 		ret = idr_get_new_above(ps, bind_list, next_port, &port);
+		if (inet_is_reserved_local_port(port))
+			ret = -EAGAIN;
 	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
 
 	if (ret)
@@ -2997,10 +2999,13 @@ static int __init cma_init(void)
 {
 	int ret, low, high, remaining;
 
-	get_random_bytes(&next_port, sizeof next_port);
 	inet_get_local_port_range(&low, &high);
+again:
+	get_random_bytes(&next_port, sizeof next_port);
 	remaining = (high - low) + 1;
 	next_port = ((unsigned int) next_port % remaining) + low;
+	if (inet_is_reserved_local_port(next_port))
+		goto again;
 
 	cma_wq = create_singlethread_workqueue("rdma_cm");
 	if (!cma_wq)
diff --git a/include/net/ip.h b/include/net/ip.h
index fb63371..f70acad 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -181,8 +181,10 @@ extern void snmp_mib_free(void *ptr[2]);
 extern struct local_ports {
 	seqlock_t	lock;
 	int		range[2];
-} sysctl_local_ports;
+} sysctl_local_ports, sysctl_local_reserved_ports;
 extern void inet_get_local_port_range(int *low, int *high);
+extern void inet_get_local_reserved_ports(int *from, int *to);
+extern int inet_is_reserved_local_port(int port);
 
 extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ee16475..ee13e48 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,11 @@ struct local_ports sysctl_local_ports __read_mostly = {
 	.range = { 32768, 61000 },
 };
 
+struct local_ports sysctl_local_reserved_ports __read_mostly = {
+	.lock = SEQLOCK_UNLOCKED,
+	.range = { 0, 0 },
+};
+
 void inet_get_local_port_range(int *low, int *high)
 {
 	unsigned seq;
@@ -49,6 +54,28 @@ void inet_get_local_port_range(int *low, int *high)
 }
 EXPORT_SYMBOL(inet_get_local_port_range);
 
+void inet_get_local_reserved_ports(int *from, int *to)
+{
+	unsigned int seq;
+	do {
+		seq = read_seqbegin(&sysctl_local_reserved_ports.lock);
+
+		*from = sysctl_local_reserved_ports.range[0];
+		*to = sysctl_local_reserved_ports.range[1];
+	} while (read_seqretry(&sysctl_local_reserved_ports.lock, seq));
+}
+
+int inet_is_reserved_local_port(int port)
+{
+	int min, max;
+
+	inet_get_local_reserved_ports(&min, &max);
+	if (min && max)
+		return (port >= min && port <= max);
+	return 0;
+}
+EXPORT_SYMBOL(inet_is_reserved_local_port);
+
 int inet_csk_bind_conflict(const struct sock *sk,
 			   const struct inet_bind_bucket *tb)
 {
@@ -105,6 +132,8 @@ again:
 		inet_get_local_port_range(&low, &high);
 		remaining = (high - low) + 1;
 		smallest_rover = rover = net_random() % remaining + low;
+		if (inet_is_reserved_local_port(rover))
+			goto again;
 
 		smallest_size = -1;
 		do {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377..d3e160a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
+			if (inet_is_reserved_local_port(port))
+				continue;
 			head = &hinfo->bhash[inet_bhashfn(net, port,
 					hinfo->bhash_size)];
 			spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..0791010 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -23,6 +23,7 @@
 
 static int zero;
 static int tcp_retr1_max = 255;
+static int ip_local_reserved_ports_min[] = { 0, 0 };
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
 
@@ -63,6 +64,51 @@ static int ipv4_local_port_range(ctl_table *table, int write,
 	return ret;
 }
 
+static void set_reserved_port_range(int range[2])
+{
+	write_seqlock(&sysctl_local_reserved_ports.lock);
+	sysctl_local_reserved_ports.range[0] = range[0];
+	sysctl_local_reserved_ports.range[1] = range[1];
+	write_sequnlock(&sysctl_local_reserved_ports.lock);
+}
+
+static int ipv4_local_reserved_ports(ctl_table *table, int write,
+				     void __user *buffer,
+				     size_t *lenp, loff_t *ppos)
+{
+	int ret;
+	int range[2];
+	int reserved_range[2];
+	ctl_table tmp = {
+		.data = &reserved_range,
+		.maxlen = sizeof(reserved_range),
+		.mode = table->mode,
+		.extra1 = &ip_local_reserved_ports_min,
+		.extra2 = &ip_local_port_range_max,
+	};
+
+	inet_get_local_reserved_ports(reserved_range, reserved_range+1);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+	if (write && ret == 0) {
+		inet_get_local_port_range(range, range + 1);
+		if (!reserved_range[0] && !reserved_range[1]) {
+			set_reserved_port_range(reserved_range);
+		} else {
+			if (reserved_range[1] < reserved_range[0])
+				ret = -EINVAL;
+			else if (reserved_range[0] < range[0])
+				ret = -EINVAL;
+			else if (reserved_range[1] > range[1])
+				ret = -EINVAL;
+			else
+				set_reserved_port_range(reserved_range);
+		}
+	}
+
+	return ret;
+}
+
 static int proc_tcp_congestion_control(ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -298,6 +344,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= ipv4_local_port_range,
 	},
+	{
+		.procname	= "ip_local_reserved_ports",
+		.data		= &sysctl_local_reserved_ports.range,
+		.maxlen		= sizeof(sysctl_local_reserved_ports.range),
+		.mode		= 0644,
+		.proc_handler	= ipv4_local_reserved_ports,
+	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.procname	= "igmp_max_memberships",
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f0126fd..4bb825e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -203,6 +203,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 
 	if (!snum) {
 		int low, high, remaining;
+		int min, max;
 		unsigned rand;
 		unsigned short first, last;
 		DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
@@ -210,6 +211,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		inet_get_local_port_range(&low, &high);
 		remaining = (high - low) + 1;
 
+again:
 		rand = net_random();
 		first = (((u64)rand * remaining) >> 32) + low;
 		/*
@@ -217,6 +219,9 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 		 */
 		rand = (rand | 1) * (udptable->mask + 1);
 		last = first + udptable->mask + 1;
+		inet_get_local_reserved_ports(&min, &max);
+		if (!(first > max || last < min))
+			goto again;
 		do {
 			hslot = udp_hashslot(udptable, net, first);
 			bitmap_zero(bitmap, PORTS_PER_CHAIN);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 67fdac9..d685141 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
+			if (inet_is_reserved_local_port(rover))
+				continue;
 			index = sctp_phashfn(rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC Patch v2] net: reserve ports for applications using fixed port numbers
       [not found] ` <20100204101533.4619.34599.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-02-04 10:59   ` Tetsuo Handa
  2010-02-05  4:41     ` Cong Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Tetsuo Handa @ 2010-02-04 10:59 UTC (permalink / raw)
  To: amwang-H+wXaHxf7aLQT0dZR+AlfA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-security-module-u79uwXL29TY76Z2rM5mHXA
  Cc: opurdila-+zzKsuq53OdBDgjK7y7TUQ,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
	nhorman-2XuSBdqkA4R54TAoqtyWWQ, linux-sctp-u79uwXL29TY76Z2rM5mHXA,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q

Hello.

Amerigo Wang wrote:
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index 2b79377..d3e160a 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
>  		local_bh_disable();
>  		for (i = 1; i <= remaining; i++) {
>  			port = low + (i + offset) % remaining;
> +			if (inet_is_reserved_local_port(port))
> +				continue;
>  			head = &hinfo->bhash[inet_bhashfn(net, port,
>  					hinfo->bhash_size)];
>  			spin_lock(&head->lock);

I'm planning to add a LSM hook here.

If root user sets min port value less than 1024 to
/proc/sys/net/ipv4/ip_local_port_range , a process without CAP_NET_BIND_SERVICE
capability can bind to privileged port by "bind() with port == 0" or "connect()
without bind()" because the condition is

	err = -EACCES;
	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
		goto out;

I consider this is a security problem if MAC is enabled. MAC is used for
dividing root user's privilege. With MAC, somebody doing some part of root
user's jobs may set min port value to less than 1024.

Also, some applications needs fixed local port numbers (e.g. 3128 for Squid,
8080 for Tomcat). The port numbers I want to reserve are more complex than
simple min-max range like /proc/sys/net/ipv4/ip_local_reserved_ports .

Therefore, TOMOYO wants to insert a LSM hook (
http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/udp.c#L235
http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/inet_connection_sock.c#L114
http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/inet_hashtables.c#L459
) and allow reserving local ports like

  deny_autobind 0-1023
  deny_autobind 3128
  deny_autobind 8080

so that

  applications which need such ports won't be unexpectedly blocked by
  other application's temporary port usage (i.e. "bind() with port == 0" or
  "connect() without bind()")

and

  MAC guarantees that processes without CAP_NET_BIND_SERVICE can never bind
  to privileged port

.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC Patch v2] net: reserve ports for applications using fixed port numbers
  2010-02-04 10:59   ` Tetsuo Handa
@ 2010-02-05  4:41     ` Cong Wang
  2010-02-05 11:21       ` [RFC Patch v2] net: reserve ports for applications using fixedport numbers Tetsuo Handa
  0 siblings, 1 reply; 5+ messages in thread
From: Cong Wang @ 2010-02-05  4:41 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: linux-kernel, linux-security-module, opurdila, eric.dumazet,
	linux-rdma, netdev, nhorman, linux-sctp, davem

Tetsuo Handa wrote:
> Hello.
> 
> Amerigo Wang wrote:
>> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
>> index 2b79377..d3e160a 100644
>> --- a/net/ipv4/inet_hashtables.c
>> +++ b/net/ipv4/inet_hashtables.c
>> @@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
>>  		local_bh_disable();
>>  		for (i = 1; i <= remaining; i++) {
>>  			port = low + (i + offset) % remaining;
>> +			if (inet_is_reserved_local_port(port))
>> +				continue;
>>  			head = &hinfo->bhash[inet_bhashfn(net, port,
>>  					hinfo->bhash_size)];
>>  			spin_lock(&head->lock);
> 
> I'm planning to add a LSM hook here.
> 
> If root user sets min port value less than 1024 to
> /proc/sys/net/ipv4/ip_local_port_range , a process without CAP_NET_BIND_SERVICE
> capability can bind to privileged port by "bind() with port == 0" or "connect()
> without bind()" because the condition is
> 
> 	err = -EACCES;
> 	if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
> 		goto out;
> 
> I consider this is a security problem if MAC is enabled. MAC is used for
> dividing root user's privilege. With MAC, somebody doing some part of root
> user's jobs may set min port value to less than 1024.
> 
> Also, some applications needs fixed local port numbers (e.g. 3128 for Squid,
> 8080 for Tomcat). The port numbers I want to reserve are more complex than
> simple min-max range like /proc/sys/net/ipv4/ip_local_reserved_ports .
> 
> Therefore, TOMOYO wants to insert a LSM hook (
> http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/udp.c#L235
> http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/inet_connection_sock.c#L114
> http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/net/ipv4/inet_hashtables.c#L459
> ) and allow reserving local ports like
> 
>   deny_autobind 0-1023
>   deny_autobind 3128
>   deny_autobind 8080
> 
> so that
> 
>   applications which need such ports won't be unexpectedly blocked by
>   other application's temporary port usage (i.e. "bind() with port == 0" or
>   "connect() without bind()")
> 
> and
> 
>   MAC guarantees that processes without CAP_NET_BIND_SERVICE can never bind
>   to privileged port
> 

Oh, IIUC, TOMOYO is something like SELinux? So, it is somewhat weird
to let users to use TOMOYO to reserve the ports with MAC. For normal
users /proc interface seems more friendly.

Thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC Patch v2] net: reserve ports for applications using fixedport numbers
  2010-02-05  4:41     ` Cong Wang
@ 2010-02-05 11:21       ` Tetsuo Handa
  2010-02-08  3:15         ` Cong Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Tetsuo Handa @ 2010-02-05 11:21 UTC (permalink / raw)
  To: amwang
  Cc: linux-kernel, linux-security-module, opurdila, eric.dumazet,
	linux-rdma, netdev, nhorman, linux-sctp, davem

Cong Wang wrote:
> Oh, IIUC, TOMOYO is something like SELinux?

Yes. It is a policy based mandatory access control implementation which is
applied to not only non root users but also root user. If MAC is enabled,
root user cannot freely modify via sysctl() or /proc/sys interface.

> So, it is somewhat weird to let users to use TOMOYO to reserve
> the ports with MAC.

To add reserved port

echo deny_autobind 0-1023 | ccs-loadpolicy -e
echo deny_autobind 3128 | ccs-loadpolicy -e
echo deny_autobind 8080 | ccs-loadpolicy -e

and to delete reserved port

echo delete deny_autobind 0-1023 | ccs-loadpolicy -e
echo delete deny_autobind 3128 | ccs-loadpolicy -e
echo delete deny_autobind 8080 | ccs-loadpolicy -e

That's all. Quite easy.

> For normal users /proc interface seems more friendly.

I think /proc/sys/net/ipv4/ip_local_reserved_ports interface wants
"struct list_head" for handling multiple sets of min/max pairs. I'm using
http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/security/ccsecurity/autobind.c#L29
for that purpose.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC Patch v2] net: reserve ports for applications using fixedport numbers
  2010-02-05 11:21       ` [RFC Patch v2] net: reserve ports for applications using fixedport numbers Tetsuo Handa
@ 2010-02-08  3:15         ` Cong Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Cong Wang @ 2010-02-08  3:15 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: linux-kernel, linux-security-module, opurdila, eric.dumazet,
	linux-rdma, netdev, nhorman, linux-sctp, davem

Tetsuo Handa wrote:
> Cong Wang wrote:
>> Oh, IIUC, TOMOYO is something like SELinux?
> 
> Yes. It is a policy based mandatory access control implementation which is
> applied to not only non root users but also root user. If MAC is enabled,
> root user cannot freely modify via sysctl() or /proc/sys interface.
> 
>> So, it is somewhat weird to let users to use TOMOYO to reserve
>> the ports with MAC.
> 
> To add reserved port
> 
> echo deny_autobind 0-1023 | ccs-loadpolicy -e
> echo deny_autobind 3128 | ccs-loadpolicy -e
> echo deny_autobind 8080 | ccs-loadpolicy -e
> 
> and to delete reserved port
> 
> echo delete deny_autobind 0-1023 | ccs-loadpolicy -e
> echo delete deny_autobind 3128 | ccs-loadpolicy -e
> echo delete deny_autobind 8080 | ccs-loadpolicy -e
> 
> That's all. Quite easy.


Hmm, but you are solving a non-security problem with a security
tool, doesn't this look weird? ;-)

> 
>> For normal users /proc interface seems more friendly.
> 
> I think /proc/sys/net/ipv4/ip_local_reserved_ports interface wants
> "struct list_head" for handling multiple sets of min/max pairs. I'm using
> http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/security/ccsecurity/autobind.c#L29
> for that purpose.


Yes, but I didn't plan to add multiple range support for
ip_local_reserved_ports, like ip_local_port_range.

Having that will be better but needs more efforts.

Thanks.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-02-08  3:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-04 10:12 [RFC Patch v2] net: reserve ports for applications using fixed port numbers Amerigo Wang
     [not found] ` <20100204101533.4619.34599.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-02-04 10:59   ` Tetsuo Handa
2010-02-05  4:41     ` Cong Wang
2010-02-05 11:21       ` [RFC Patch v2] net: reserve ports for applications using fixedport numbers Tetsuo Handa
2010-02-08  3:15         ` Cong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).