netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Patrick McHardy <kaber@trash.net>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, greearb@candelatech.com,
	Stephen Hemminger <shemminger@osdl.org>
Subject: Re: [RFC NET 00/04]: Increase number of possible routing tables
Date: Mon, 03 Jul 2006 11:23:29 +0200	[thread overview]
Message-ID: <44A8E211.4090005@trash.net> (raw)
In-Reply-To: <20060703075259.6286.67397.sendpatchset@localhost.localdomain>

[-- Attachment #1: Type: text/plain, Size: 1323 bytes --]

Patrick McHardy wrote:
> I took on Ben's challenge to increase the number of possible routing tables,
> these are the resulting patches.
> 
> The table IDs are changed to 32 bit values and are contained in a new netlink
> routing attribute. For compatibility rtm_table in struct rtmsg can still be
> used to access the first 255 tables and contains the low 8 bit of the table
> ID in case of dumps. Unfortunately there are no invalid values for rtm_table,
> so the best userspace can do in case of a new iproute version that tries to
> access tables > 255 on an old kernel is to use RTM_UNSPEC (0) for rtm_table,
> which will make the kernel allocate an empty table instead of silently adding
> routes to a more or less random table. The iproute patch will follow shortly.

Actually that last part wasn't entirely true. The last couple of
releases of the kernel include the inet_check_attr function,
which (unwillingly) breaks with the tradition of ignoring
unknown attributes and signals an error on receiving the RTA_TABLE
attribute. So the iproute patch only includes the RTA_TABLE
attribute when the table ID is > 255, in which case rtm_table
is set to RT_TABLE_UNSPEC. Old kernels will still have the
behaviour I described above. The patch has been tested to
behave as expected on both patched and unpatched kernels.


[-- Attachment #2: x --]
[-- Type: text/plain, Size: 10047 bytes --]

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5e33a20..7573c62 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -238,9 +238,8 @@ enum rt_class_t
 	RT_TABLE_DEFAULT=253,
 	RT_TABLE_MAIN=254,
 	RT_TABLE_LOCAL=255,
-	__RT_TABLE_MAX
 };
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
+#define RT_TABLE_MAX 0xFFFFFFFF
 
 
 
@@ -263,6 +262,7 @@ enum rtattr_type_t
 	RTA_CACHEINFO,
 	RTA_SESSION,
 	RTA_MP_ALGO,
+	RTA_TABLE,
 	__RTA_MAX
 };
 
diff --git a/include/rt_names.h b/include/rt_names.h
index 2d9ef10..07a10e0 100644
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -5,7 +5,7 @@ #include <asm/types.h>
 
 char* rtnl_rtprot_n2a(int id, char *buf, int len);
 char* rtnl_rtscope_n2a(int id, char *buf, int len);
-char* rtnl_rttable_n2a(int id, char *buf, int len);
+char* rtnl_rttable_n2a(__u32 id, char *buf, int len);
 char* rtnl_rtrealm_n2a(int id, char *buf, int len);
 char* rtnl_dsfield_n2a(int id, char *buf, int len);
 int rtnl_rtprot_a2n(__u32 *id, char *arg);
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 1fe4a69..8b286b0 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -32,4 +32,12 @@ extern int do_multiaddr(int argc, char *
 extern int do_multiroute(int argc, char **argv);
 extern int do_xfrm(int argc, char **argv);
 
+static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
+{
+	__u32 table = r->rtm_table;
+	if (tb[RTA_TABLE])
+		table = *(__u32*) RTA_DATA(tb[RTA_TABLE]);
+	return table;
+}
+
 extern struct rtnl_handle rth;
diff --git a/ip/iproute.c b/ip/iproute.c
index a43c09e..4ebe617 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -75,7 +75,8 @@ static void usage(void)
 
 static struct
 {
-	int tb;
+	__u32 tb;
+	int cloned;
 	int flushed;
 	char *flushb;
 	int flushp;
@@ -125,6 +126,7 @@ int print_route(const struct sockaddr_nl
 	inet_prefix prefsrc;
 	inet_prefix via;
 	int host_len = -1;
+	__u32 table;
 	SPRINT_BUF(b1);
 	
 
@@ -151,27 +153,23 @@ int print_route(const struct sockaddr_nl
 		host_len = 80;
 
 	if (r->rtm_family == AF_INET6) {
+		if (filter.cloned) {
+			if (!(r->rtm_flags&RTM_F_CLONED))
+				return 0;
+		}
 		if (filter.tb) {
-			if (filter.tb < 0) {
-				if (!(r->rtm_flags&RTM_F_CLONED))
-					return 0;
-			} else {
-				if (r->rtm_flags&RTM_F_CLONED)
+			if (r->rtm_flags&RTM_F_CLONED)
+				return 0;
+			if (filter.tb == RT_TABLE_LOCAL) {
+				if (r->rtm_type != RTN_LOCAL)
 					return 0;
-				if (filter.tb == RT_TABLE_LOCAL) {
-					if (r->rtm_type != RTN_LOCAL)
-						return 0;
-				} else if (filter.tb == RT_TABLE_MAIN) {
-					if (r->rtm_type == RTN_LOCAL)
-						return 0;
-				} else {
+			} else if (filter.tb == RT_TABLE_MAIN) {
+				if (r->rtm_type == RTN_LOCAL)
 					return 0;
-				}
+			} else {
+				return 0;
 			}
 		}
-	} else {
-		if (filter.tb > 0 && filter.tb != r->rtm_table)
-			return 0;
 	}
 	if ((filter.protocol^r->rtm_protocol)&filter.protocolmask)
 		return 0;
@@ -225,6 +223,10 @@ int print_route(const struct sockaddr_nl
 			memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len/8);
 	}
 
+	table = rtm_get_table(r, tb);
+	if (r->rtm_family == AF_INET && filter.tb > 0 && filter.tb != table)
+		return 0;
+
 	if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen))
 		return 0;
 	if (filter.mdst.family && filter.mdst.bitlen >= 0 &&
@@ -354,8 +356,8 @@ int print_route(const struct sockaddr_nl
 		fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
 
 	if (!(r->rtm_flags&RTM_F_CLONED)) {
-		if (r->rtm_table != RT_TABLE_MAIN && !filter.tb)
-			fprintf(fp, " table %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+		if (table != RT_TABLE_MAIN && !filter.tb)
+			fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
 		if (r->rtm_protocol != RTPROT_BOOT && filter.protocolmask != -1)
 			fprintf(fp, " proto %s ", rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
 		if (r->rtm_scope != RT_SCOPE_UNIVERSE && filter.scopemask != -1)
@@ -840,7 +842,12 @@ #endif
 			NEXT_ARG();
 			if (rtnl_rttable_a2n(&tid, *argv))
 				invarg("\"table\" value is invalid\n", *argv);
-			req.r.rtm_table = tid;
+			if (tid < 256)
+				req.r.rtm_table = tid;
+			else {
+				req.r.rtm_table = RT_TABLE_UNSPEC;
+				addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+			}
 			table_ok = 1;
 		} else if (strcmp(*argv, "dev") == 0 ||
 			   strcmp(*argv, "oif") == 0) {
@@ -1022,7 +1029,7 @@ static int iproute_list_or_flush(int arg
 			filter.tb = tid;
 		} else if (matches(*argv, "cached") == 0 ||
 			   matches(*argv, "cloned") == 0) {
-			filter.tb = -1;
+			filter.cloned = 1;
 		} else if (strcmp(*argv, "tos") == 0 ||
 			   matches(*argv, "dsfield") == 0) {
 			__u32 tos;
diff --git a/ip/iprule.c b/ip/iprule.c
index ccf699f..6caf573 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -27,6 +27,7 @@ #include <string.h>
 
 #include "rt_names.h"
 #include "utils.h"
+#include "ip_common.h"
 
 extern struct rtnl_handle rth;
 
@@ -51,6 +52,7 @@ static int print_rule(const struct socka
 	struct rtmsg *r = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
 	int host_len = -1;
+	__u32 table;
 	struct rtattr * tb[RTA_MAX+1];
 	char abuf[256];
 	SPRINT_BUF(b1);
@@ -129,8 +131,9 @@ static int print_rule(const struct socka
 		fprintf(fp, "iif %s ", (char*)RTA_DATA(tb[RTA_IIF]));
 	}
 
-	if (r->rtm_table)
-		fprintf(fp, "lookup %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+	table = rtm_get_table(r, tb);
+	if (table)
+		fprintf(fp, "lookup %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
 
 	if (tb[RTA_FLOW]) {
 		__u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
@@ -257,7 +260,12 @@ static int iprule_modify(int cmd, int ar
 			NEXT_ARG();
 			if (rtnl_rttable_a2n(&tid, *argv))
 				invarg("invalid table ID\n", *argv);
-			req.r.rtm_table = tid;
+			if (tid < 256)
+				req.r.rtm_table = tid;
+			else {
+				req.r.rtm_table = RT_TABLE_UNSPEC;
+				addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+			}
 			table_ok = 1;
 		} else if (strcmp(*argv, "dev") == 0 ||
 			   strcmp(*argv, "iif") == 0) {
diff --git a/lib/rt_names.c b/lib/rt_names.c
index 05046c2..2ff984a 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -23,6 +23,51 @@ #include <linux/rtnetlink.h>
 
 #include "rt_names.h"
 
+struct rtnl_hash_entry {
+	struct rtnl_hash_entry *next;
+	unsigned int		id;
+	char *			name;
+};
+
+static void
+rtnl_hash_initialize(char *file, struct rtnl_hash_entry **hash, int size)
+{
+	struct rtnl_hash_entry *entry;
+	char buf[512];
+	FILE *fp;
+
+	fp = fopen(file, "r");
+	if (!fp)
+		return;
+	while (fgets(buf, sizeof(buf), fp)) {
+		char *p = buf;
+		int id;
+		char namebuf[512];
+
+		while (*p == ' ' || *p == '\t')
+			p++;
+		if (*p == '#' || *p == '\n' || *p == 0)
+			continue;
+		if (sscanf(p, "0x%x %s\n", &id, namebuf) != 2 &&
+		    sscanf(p, "0x%x %s #", &id, namebuf) != 2 &&
+		    sscanf(p, "%d %s\n", &id, namebuf) != 2 &&
+		    sscanf(p, "%d %s #", &id, namebuf) != 2) {
+			fprintf(stderr, "Database %s is corrupted at %s\n",
+				file, p);
+			return;
+		}
+
+		if (id<0)
+			continue;
+		entry = malloc(sizeof(*entry));
+		entry->id   = id;
+		entry->name = strdup(namebuf);
+		entry->next = hash[id & (size - 1)];
+		hash[id & (size - 1)] = entry;
+	}
+	fclose(fp);
+}
+
 static void rtnl_tab_initialize(char *file, char **tab, int size)
 {
 	char buf[512];
@@ -57,7 +102,6 @@ static void rtnl_tab_initialize(char *fi
 	fclose(fp);
 }
 
-
 static char * rtnl_rtprot_tab[256] = {
 	[RTPROT_UNSPEC] = "none",
 	[RTPROT_REDIRECT] ="redirect",
@@ -266,9 +310,14 @@ int rtnl_rtrealm_a2n(__u32 *id, char *ar
 }
 
 
+static struct rtnl_hash_entry dflt_table_entry  = { .id = 253, .name = "default" };
+static struct rtnl_hash_entry main_table_entry  = { .id = 254, .name = "main" };
+static struct rtnl_hash_entry local_table_entry = { .id = 255, .name = "local" };
 
-static char * rtnl_rttable_tab[256] = {
-	"unspec",
+static struct rtnl_hash_entry * rtnl_rttable_hash[256] = {
+	[253] = &dflt_table_entry,
+	[254] = &main_table_entry,
+	[255] = &local_table_entry,
 };
 
 static int rtnl_rttable_init;
@@ -276,26 +325,26 @@ static int rtnl_rttable_init;
 static void rtnl_rttable_initialize(void)
 {
 	rtnl_rttable_init = 1;
-	rtnl_rttable_tab[255] = "local";
-	rtnl_rttable_tab[254] = "main";
-	rtnl_rttable_tab[253] = "default";
-	rtnl_tab_initialize("/etc/iproute2/rt_tables",
-			    rtnl_rttable_tab, 256);
+	rtnl_hash_initialize("/etc/iproute2/rt_tables",
+			     rtnl_rttable_hash, 256);
 }
 
-char * rtnl_rttable_n2a(int id, char *buf, int len)
+char * rtnl_rttable_n2a(__u32 id, char *buf, int len)
 {
-	if (id<0 || id>=256) {
-		snprintf(buf, len, "%d", id);
+	struct rtnl_hash_entry *entry;
+
+	if (id >= RT_TABLE_MAX) {
+		snprintf(buf, len, "%u", id);
 		return buf;
 	}
-	if (!rtnl_rttable_tab[id]) {
-		if (!rtnl_rttable_init)
-			rtnl_rttable_initialize();
-	}
-	if (rtnl_rttable_tab[id])
-		return rtnl_rttable_tab[id];
-	snprintf(buf, len, "%d", id);
+	if (!rtnl_rttable_init)
+		rtnl_rttable_initialize();
+	entry = rtnl_rttable_hash[id & 255];
+	while (entry && entry->id != id)
+		entry = entry->next;
+	if (entry)
+		return entry->name;
+	snprintf(buf, len, "%u", id);
 	return buf;
 }
 
@@ -303,8 +352,9 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
 {
 	static char *cache = NULL;
 	static unsigned long res;
+	struct rtnl_hash_entry *entry;
 	char *end;
-	int i;
+	__u32 i;
 
 	if (cache && strcmp(cache, arg) == 0) {
 		*id = res;
@@ -315,9 +365,11 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
 		rtnl_rttable_initialize();
 
 	for (i=0; i<256; i++) {
-		if (rtnl_rttable_tab[i] &&
-		    strcmp(rtnl_rttable_tab[i], arg) == 0) {
-			cache = rtnl_rttable_tab[i];
+		entry = rtnl_rttable_hash[i];
+		while (entry && strcmp(entry->name, arg))
+			entry = entry->next;
+		if (entry) {
+			cache = entry->name;
 			res = i;
 			*id = res;
 			return 0;
@@ -325,7 +377,7 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
 	}
 
 	i = strtoul(arg, &end, 0);
-	if (!end || end == arg || *end || i > 255)
+	if (!end || end == arg || *end || i > RT_TABLE_MAX)
 		return -1;
 	*id = i;
 	return 0;

  parent reply	other threads:[~2006-07-03  9:23 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-07-03  7:52 [RFC NET 00/04]: Increase number of possible routing tables Patrick McHardy
2006-07-03  7:53 ` [RFC NET 01/04]: Use u32 for routing table IDs Patrick McHardy
2006-07-03  7:53 ` [RFC NET 02/04]: Introduce RTA_TABLE routing attribute Patrick McHardy
2006-07-03  7:53 ` [RFC IPV4 03/04]: Increase number of possible routing tables to 2^32 Patrick McHardy
2006-07-03  7:53 ` [RFC DECNET 04/04]: " Patrick McHardy
2006-07-03 11:20   ` Steven Whitehouse
2006-07-03 11:21     ` Patrick McHardy
2006-07-03  9:23 ` Patrick McHardy [this message]
2006-07-03  9:38   ` [RFC NET 00/04]: Increase number of possible routing tables Patrick McHardy
2006-07-03 11:34     ` Thomas Graf
2006-07-03 11:36       ` Patrick McHardy
2006-07-03 11:41         ` Thomas Graf
2006-07-07  8:05 ` Patrick McHardy
2006-07-07 18:13   ` Ben Greear
2006-07-07 19:58     ` Patrick McHardy
2006-07-07 23:59       ` David Miller
2006-07-08  2:45         ` Patrick McHardy
2006-07-08  1:07       ` Ben Greear
2006-07-08  2:48         ` Patrick McHardy
2006-07-08  5:06           ` Ben Greear

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44A8E211.4090005@trash.net \
    --to=kaber@trash.net \
    --cc=davem@davemloft.net \
    --cc=greearb@candelatech.com \
    --cc=netdev@vger.kernel.org \
    --cc=shemminger@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).