From: Patrick McHardy <kaber@trash.net>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, greearb@candelatech.com,
Stephen Hemminger <shemminger@osdl.org>
Subject: Re: [RFC NET 00/04]: Increase number of possible routing tables
Date: Mon, 03 Jul 2006 11:23:29 +0200 [thread overview]
Message-ID: <44A8E211.4090005@trash.net> (raw)
In-Reply-To: <20060703075259.6286.67397.sendpatchset@localhost.localdomain>
[-- Attachment #1: Type: text/plain, Size: 1323 bytes --]
Patrick McHardy wrote:
> I took on Ben's challenge to increase the number of possible routing tables,
> these are the resulting patches.
>
> The table IDs are changed to 32 bit values and are contained in a new netlink
> routing attribute. For compatibility rtm_table in struct rtmsg can still be
> used to access the first 255 tables and contains the low 8 bit of the table
> ID in case of dumps. Unfortunately there are no invalid values for rtm_table,
> so the best userspace can do in case of a new iproute version that tries to
> access tables > 255 on an old kernel is to use RTM_UNSPEC (0) for rtm_table,
> which will make the kernel allocate an empty table instead of silently adding
> routes to a more or less random table. The iproute patch will follow shortly.
Actually that last part wasn't entirely true. The last couple of
releases of the kernel include the inet_check_attr function,
which (unwillingly) breaks with the tradition of ignoring
unknown attributes and signals an error on receiving the RTA_TABLE
attribute. So the iproute patch only includes the RTA_TABLE
attribute when the table ID is > 255, in which case rtm_table
is set to RT_TABLE_UNSPEC. Old kernels will still have the
behaviour I described above. The patch has been tested to
behave as expected on both patched and unpatched kernels.
[-- Attachment #2: x --]
[-- Type: text/plain, Size: 10047 bytes --]
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5e33a20..7573c62 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -238,9 +238,8 @@ enum rt_class_t
RT_TABLE_DEFAULT=253,
RT_TABLE_MAIN=254,
RT_TABLE_LOCAL=255,
- __RT_TABLE_MAX
};
-#define RT_TABLE_MAX (__RT_TABLE_MAX - 1)
+#define RT_TABLE_MAX 0xFFFFFFFF
@@ -263,6 +262,7 @@ enum rtattr_type_t
RTA_CACHEINFO,
RTA_SESSION,
RTA_MP_ALGO,
+ RTA_TABLE,
__RTA_MAX
};
diff --git a/include/rt_names.h b/include/rt_names.h
index 2d9ef10..07a10e0 100644
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -5,7 +5,7 @@ #include <asm/types.h>
char* rtnl_rtprot_n2a(int id, char *buf, int len);
char* rtnl_rtscope_n2a(int id, char *buf, int len);
-char* rtnl_rttable_n2a(int id, char *buf, int len);
+char* rtnl_rttable_n2a(__u32 id, char *buf, int len);
char* rtnl_rtrealm_n2a(int id, char *buf, int len);
char* rtnl_dsfield_n2a(int id, char *buf, int len);
int rtnl_rtprot_a2n(__u32 *id, char *arg);
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 1fe4a69..8b286b0 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -32,4 +32,12 @@ extern int do_multiaddr(int argc, char *
extern int do_multiroute(int argc, char **argv);
extern int do_xfrm(int argc, char **argv);
+static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
+{
+ __u32 table = r->rtm_table;
+ if (tb[RTA_TABLE])
+ table = *(__u32*) RTA_DATA(tb[RTA_TABLE]);
+ return table;
+}
+
extern struct rtnl_handle rth;
diff --git a/ip/iproute.c b/ip/iproute.c
index a43c09e..4ebe617 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -75,7 +75,8 @@ static void usage(void)
static struct
{
- int tb;
+ __u32 tb;
+ int cloned;
int flushed;
char *flushb;
int flushp;
@@ -125,6 +126,7 @@ int print_route(const struct sockaddr_nl
inet_prefix prefsrc;
inet_prefix via;
int host_len = -1;
+ __u32 table;
SPRINT_BUF(b1);
@@ -151,27 +153,23 @@ int print_route(const struct sockaddr_nl
host_len = 80;
if (r->rtm_family == AF_INET6) {
+ if (filter.cloned) {
+ if (!(r->rtm_flags&RTM_F_CLONED))
+ return 0;
+ }
if (filter.tb) {
- if (filter.tb < 0) {
- if (!(r->rtm_flags&RTM_F_CLONED))
- return 0;
- } else {
- if (r->rtm_flags&RTM_F_CLONED)
+ if (r->rtm_flags&RTM_F_CLONED)
+ return 0;
+ if (filter.tb == RT_TABLE_LOCAL) {
+ if (r->rtm_type != RTN_LOCAL)
return 0;
- if (filter.tb == RT_TABLE_LOCAL) {
- if (r->rtm_type != RTN_LOCAL)
- return 0;
- } else if (filter.tb == RT_TABLE_MAIN) {
- if (r->rtm_type == RTN_LOCAL)
- return 0;
- } else {
+ } else if (filter.tb == RT_TABLE_MAIN) {
+ if (r->rtm_type == RTN_LOCAL)
return 0;
- }
+ } else {
+ return 0;
}
}
- } else {
- if (filter.tb > 0 && filter.tb != r->rtm_table)
- return 0;
}
if ((filter.protocol^r->rtm_protocol)&filter.protocolmask)
return 0;
@@ -225,6 +223,10 @@ int print_route(const struct sockaddr_nl
memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len/8);
}
+ table = rtm_get_table(r, tb);
+ if (r->rtm_family == AF_INET && filter.tb > 0 && filter.tb != table)
+ return 0;
+
if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen))
return 0;
if (filter.mdst.family && filter.mdst.bitlen >= 0 &&
@@ -354,8 +356,8 @@ int print_route(const struct sockaddr_nl
fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
if (!(r->rtm_flags&RTM_F_CLONED)) {
- if (r->rtm_table != RT_TABLE_MAIN && !filter.tb)
- fprintf(fp, " table %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+ if (table != RT_TABLE_MAIN && !filter.tb)
+ fprintf(fp, " table %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if (r->rtm_protocol != RTPROT_BOOT && filter.protocolmask != -1)
fprintf(fp, " proto %s ", rtnl_rtprot_n2a(r->rtm_protocol, b1, sizeof(b1)));
if (r->rtm_scope != RT_SCOPE_UNIVERSE && filter.scopemask != -1)
@@ -840,7 +842,12 @@ #endif
NEXT_ARG();
if (rtnl_rttable_a2n(&tid, *argv))
invarg("\"table\" value is invalid\n", *argv);
- req.r.rtm_table = tid;
+ if (tid < 256)
+ req.r.rtm_table = tid;
+ else {
+ req.r.rtm_table = RT_TABLE_UNSPEC;
+ addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+ }
table_ok = 1;
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "oif") == 0) {
@@ -1022,7 +1029,7 @@ static int iproute_list_or_flush(int arg
filter.tb = tid;
} else if (matches(*argv, "cached") == 0 ||
matches(*argv, "cloned") == 0) {
- filter.tb = -1;
+ filter.cloned = 1;
} else if (strcmp(*argv, "tos") == 0 ||
matches(*argv, "dsfield") == 0) {
__u32 tos;
diff --git a/ip/iprule.c b/ip/iprule.c
index ccf699f..6caf573 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -27,6 +27,7 @@ #include <string.h>
#include "rt_names.h"
#include "utils.h"
+#include "ip_common.h"
extern struct rtnl_handle rth;
@@ -51,6 +52,7 @@ static int print_rule(const struct socka
struct rtmsg *r = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
+ __u32 table;
struct rtattr * tb[RTA_MAX+1];
char abuf[256];
SPRINT_BUF(b1);
@@ -129,8 +131,9 @@ static int print_rule(const struct socka
fprintf(fp, "iif %s ", (char*)RTA_DATA(tb[RTA_IIF]));
}
- if (r->rtm_table)
- fprintf(fp, "lookup %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
+ table = rtm_get_table(r, tb);
+ if (table)
+ fprintf(fp, "lookup %s ", rtnl_rttable_n2a(table, b1, sizeof(b1)));
if (tb[RTA_FLOW]) {
__u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
@@ -257,7 +260,12 @@ static int iprule_modify(int cmd, int ar
NEXT_ARG();
if (rtnl_rttable_a2n(&tid, *argv))
invarg("invalid table ID\n", *argv);
- req.r.rtm_table = tid;
+ if (tid < 256)
+ req.r.rtm_table = tid;
+ else {
+ req.r.rtm_table = RT_TABLE_UNSPEC;
+ addattr32(&req.n, sizeof(req), RTA_TABLE, tid);
+ }
table_ok = 1;
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "iif") == 0) {
diff --git a/lib/rt_names.c b/lib/rt_names.c
index 05046c2..2ff984a 100644
--- a/lib/rt_names.c
+++ b/lib/rt_names.c
@@ -23,6 +23,51 @@ #include <linux/rtnetlink.h>
#include "rt_names.h"
+struct rtnl_hash_entry {
+ struct rtnl_hash_entry *next;
+ unsigned int id;
+ char * name;
+};
+
+static void
+rtnl_hash_initialize(char *file, struct rtnl_hash_entry **hash, int size)
+{
+ struct rtnl_hash_entry *entry;
+ char buf[512];
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp)
+ return;
+ while (fgets(buf, sizeof(buf), fp)) {
+ char *p = buf;
+ int id;
+ char namebuf[512];
+
+ while (*p == ' ' || *p == '\t')
+ p++;
+ if (*p == '#' || *p == '\n' || *p == 0)
+ continue;
+ if (sscanf(p, "0x%x %s\n", &id, namebuf) != 2 &&
+ sscanf(p, "0x%x %s #", &id, namebuf) != 2 &&
+ sscanf(p, "%d %s\n", &id, namebuf) != 2 &&
+ sscanf(p, "%d %s #", &id, namebuf) != 2) {
+ fprintf(stderr, "Database %s is corrupted at %s\n",
+ file, p);
+ return;
+ }
+
+ if (id<0)
+ continue;
+ entry = malloc(sizeof(*entry));
+ entry->id = id;
+ entry->name = strdup(namebuf);
+ entry->next = hash[id & (size - 1)];
+ hash[id & (size - 1)] = entry;
+ }
+ fclose(fp);
+}
+
static void rtnl_tab_initialize(char *file, char **tab, int size)
{
char buf[512];
@@ -57,7 +102,6 @@ static void rtnl_tab_initialize(char *fi
fclose(fp);
}
-
static char * rtnl_rtprot_tab[256] = {
[RTPROT_UNSPEC] = "none",
[RTPROT_REDIRECT] ="redirect",
@@ -266,9 +310,14 @@ int rtnl_rtrealm_a2n(__u32 *id, char *ar
}
+static struct rtnl_hash_entry dflt_table_entry = { .id = 253, .name = "default" };
+static struct rtnl_hash_entry main_table_entry = { .id = 254, .name = "main" };
+static struct rtnl_hash_entry local_table_entry = { .id = 255, .name = "local" };
-static char * rtnl_rttable_tab[256] = {
- "unspec",
+static struct rtnl_hash_entry * rtnl_rttable_hash[256] = {
+ [253] = &dflt_table_entry,
+ [254] = &main_table_entry,
+ [255] = &local_table_entry,
};
static int rtnl_rttable_init;
@@ -276,26 +325,26 @@ static int rtnl_rttable_init;
static void rtnl_rttable_initialize(void)
{
rtnl_rttable_init = 1;
- rtnl_rttable_tab[255] = "local";
- rtnl_rttable_tab[254] = "main";
- rtnl_rttable_tab[253] = "default";
- rtnl_tab_initialize("/etc/iproute2/rt_tables",
- rtnl_rttable_tab, 256);
+ rtnl_hash_initialize("/etc/iproute2/rt_tables",
+ rtnl_rttable_hash, 256);
}
-char * rtnl_rttable_n2a(int id, char *buf, int len)
+char * rtnl_rttable_n2a(__u32 id, char *buf, int len)
{
- if (id<0 || id>=256) {
- snprintf(buf, len, "%d", id);
+ struct rtnl_hash_entry *entry;
+
+ if (id >= RT_TABLE_MAX) {
+ snprintf(buf, len, "%u", id);
return buf;
}
- if (!rtnl_rttable_tab[id]) {
- if (!rtnl_rttable_init)
- rtnl_rttable_initialize();
- }
- if (rtnl_rttable_tab[id])
- return rtnl_rttable_tab[id];
- snprintf(buf, len, "%d", id);
+ if (!rtnl_rttable_init)
+ rtnl_rttable_initialize();
+ entry = rtnl_rttable_hash[id & 255];
+ while (entry && entry->id != id)
+ entry = entry->next;
+ if (entry)
+ return entry->name;
+ snprintf(buf, len, "%u", id);
return buf;
}
@@ -303,8 +352,9 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
{
static char *cache = NULL;
static unsigned long res;
+ struct rtnl_hash_entry *entry;
char *end;
- int i;
+ __u32 i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
@@ -315,9 +365,11 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
rtnl_rttable_initialize();
for (i=0; i<256; i++) {
- if (rtnl_rttable_tab[i] &&
- strcmp(rtnl_rttable_tab[i], arg) == 0) {
- cache = rtnl_rttable_tab[i];
+ entry = rtnl_rttable_hash[i];
+ while (entry && strcmp(entry->name, arg))
+ entry = entry->next;
+ if (entry) {
+ cache = entry->name;
res = i;
*id = res;
return 0;
@@ -325,7 +377,7 @@ int rtnl_rttable_a2n(__u32 *id, char *ar
}
i = strtoul(arg, &end, 0);
- if (!end || end == arg || *end || i > 255)
+ if (!end || end == arg || *end || i > RT_TABLE_MAX)
return -1;
*id = i;
return 0;
next prev parent reply other threads:[~2006-07-03 9:23 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-07-03 7:52 [RFC NET 00/04]: Increase number of possible routing tables Patrick McHardy
2006-07-03 7:53 ` [RFC NET 01/04]: Use u32 for routing table IDs Patrick McHardy
2006-07-03 7:53 ` [RFC NET 02/04]: Introduce RTA_TABLE routing attribute Patrick McHardy
2006-07-03 7:53 ` [RFC IPV4 03/04]: Increase number of possible routing tables to 2^32 Patrick McHardy
2006-07-03 7:53 ` [RFC DECNET 04/04]: " Patrick McHardy
2006-07-03 11:20 ` Steven Whitehouse
2006-07-03 11:21 ` Patrick McHardy
2006-07-03 9:23 ` Patrick McHardy [this message]
2006-07-03 9:38 ` [RFC NET 00/04]: Increase number of possible routing tables Patrick McHardy
2006-07-03 11:34 ` Thomas Graf
2006-07-03 11:36 ` Patrick McHardy
2006-07-03 11:41 ` Thomas Graf
2006-07-07 8:05 ` Patrick McHardy
2006-07-07 18:13 ` Ben Greear
2006-07-07 19:58 ` Patrick McHardy
2006-07-07 23:59 ` David Miller
2006-07-08 2:45 ` Patrick McHardy
2006-07-08 1:07 ` Ben Greear
2006-07-08 2:48 ` Patrick McHardy
2006-07-08 5:06 ` Ben Greear
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44A8E211.4090005@trash.net \
--to=kaber@trash.net \
--cc=davem@davemloft.net \
--cc=greearb@candelatech.com \
--cc=netdev@vger.kernel.org \
--cc=shemminger@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.