* [PATCH 03/15] ipv4: Remove rt_key_{src,dst,tos} from struct rtable.
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
They are always used in contexts where they can be reconstituted,
or where the finally resolved rt->rt_{src,dst} is semantically
equivalent.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 5 -----
net/ipv4/route.c | 39 +++++++++------------------------------
net/ipv4/xfrm4_policy.c | 3 ---
3 files changed, 9 insertions(+), 38 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 5c86c47..935fa59 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -44,14 +44,9 @@ struct fib_info;
struct rtable {
struct dst_entry dst;
- /* Lookup key. */
- __be32 rt_key_dst;
- __be32 rt_key_src;
-
int rt_genid;
unsigned int rt_flags;
__u16 rt_type;
- __u8 rt_key_tos;
__be32 rt_dst; /* Path destination */
__be32 rt_src; /* Path source */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 534bc4d..83e9663 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,12 +1250,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
rth->dst.output = ip_rt_bug;
- rth->rt_key_dst = daddr;
- rth->rt_key_src = saddr;
rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
- rth->rt_key_tos = tos;
rth->rt_dst = daddr;
rth->rt_src = saddr;
rth->rt_route_iif = dev->ifindex;
@@ -1374,12 +1371,9 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- rth->rt_key_dst = daddr;
- rth->rt_key_src = saddr;
rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
rth->rt_flags = flags;
rth->rt_type = res->type;
- rth->rt_key_tos = tos;
rth->rt_dst = daddr;
rth->rt_src = saddr;
rth->rt_route_iif = in_dev->dev->ifindex;
@@ -1545,12 +1539,9 @@ local_input:
rth->dst.tclassid = itag;
#endif
- rth->rt_key_dst = daddr;
- rth->rt_key_src = saddr;
rth->rt_genid = rt_genid(net);
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
- rth->rt_key_tos = tos;
rth->rt_dst = daddr;
rth->rt_src = saddr;
rth->rt_route_iif = dev->ifindex;
@@ -1650,9 +1641,7 @@ EXPORT_SYMBOL(ip_route_input);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
- const struct flowi4 *fl4,
- __be32 orig_daddr, __be32 orig_saddr,
- int orig_oif, __u8 orig_rtos,
+ const struct flowi4 *fl4, int orig_oif,
struct net_device *dev_out,
unsigned int flags)
{
@@ -1703,12 +1692,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->dst.output = ip_output;
- rth->rt_key_dst = orig_daddr;
- rth->rt_key_src = orig_saddr;
rth->rt_genid = rt_genid(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
- rth->rt_key_tos = orig_rtos;
rth->rt_dst = fl4->daddr;
rth->rt_src = fl4->saddr;
rth->rt_route_iif = 0;
@@ -1759,16 +1745,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
unsigned int flags = 0;
struct fib_result res;
struct rtable *rth;
- __be32 orig_daddr;
- __be32 orig_saddr;
int orig_oif;
res.tclassid = 0;
res.fi = NULL;
res.table = NULL;
- orig_daddr = fl4->daddr;
- orig_saddr = fl4->saddr;
orig_oif = fl4->flowi4_oif;
fl4->flowi4_iif = net->loopback_dev->ifindex;
@@ -1930,8 +1912,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
make_route:
- rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif,
- tos, dev_out, flags);
+ rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
out:
rcu_read_unlock();
@@ -1996,9 +1977,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
if (new->dev)
dev_hold(new->dev);
- rt->rt_key_dst = ort->rt_key_dst;
- rt->rt_key_src = ort->rt_key_src;
- rt->rt_key_tos = ort->rt_key_tos;
rt->rt_route_iif = ort->rt_route_iif;
rt->rt_iif = ort->rt_iif;
rt->rt_oif = ort->rt_oif;
@@ -2040,7 +2018,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
-static int rt_fill_info(struct net *net,
+static int rt_fill_info(struct net *net, __be32 src, u8 tos,
struct sk_buff *skb, u32 pid, u32 seq, int event,
int nowait, unsigned int flags)
{
@@ -2058,7 +2036,7 @@ static int rt_fill_info(struct net *net,
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
- r->rtm_tos = rt->rt_key_tos;
+ r->rtm_tos = tos;
r->rtm_table = RT_TABLE_MAIN;
if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
goto nla_put_failure;
@@ -2071,9 +2049,9 @@ static int rt_fill_info(struct net *net,
if (nla_put_be32(skb, RTA_DST, rt->rt_dst))
goto nla_put_failure;
- if (rt->rt_key_src) {
+ if (src) {
r->rtm_src_len = 32;
- if (nla_put_be32(skb, RTA_SRC, rt->rt_key_src))
+ if (nla_put_be32(skb, RTA_SRC, src))
goto nla_put_failure;
}
if (rt->dst.dev &&
@@ -2085,7 +2063,7 @@ static int rt_fill_info(struct net *net,
goto nla_put_failure;
#endif
if (!rt_is_input_route(rt) &&
- rt->rt_src != rt->rt_key_src) {
+ rt->rt_src != src) {
if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
goto nla_put_failure;
}
@@ -2226,7 +2204,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ err = rt_fill_info(net, src, rtm->rtm_tos, skb,
+ NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err <= 0)
goto errout_free;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fcf7678..2a8d5cf 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,9 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
struct rtable *rt = (struct rtable *)xdst->route;
const struct flowi4 *fl4 = &fl->u.ip4;
- xdst->u.rt.rt_key_dst = fl4->daddr;
- xdst->u.rt.rt_key_src = fl4->saddr;
- xdst->u.rt.rt_key_tos = fl4->flowi4_tos;
xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
xdst->u.rt.rt_oif = fl4->flowi4_oif;
--
1.7.10.4
^ permalink raw reply related
* RE: r8169: link up, link down
From: J. Christopher Pereira @ 2012-07-18 18:23 UTC (permalink / raw)
To: 'Francois Romieu'; +Cc: netdev
In-Reply-To: <20120718173909.GA12524@electric-eye.fr.zoreil.com>
Hi Francois:
Thanks for answering.
> If it's an XID 98000000 - i.e. old new hardware - you may try to remove
the device then rescan the PCI bus through sysfs.
dmesg says "eth0: RTL8110s at 0xffffc2000067ec00, 00:4f:4a:10:1e:cf, XID
04000000 IRQ 16".
> Building a modern kernel is strongly suggested if the hardware includes a
recent 816x chipset.
Is there any particular patch I could apply and just recompile the driver?
My hope was to first receive feedback and identify some probably related
known bug, in order to avoid searching for a solution by trial and error or
by updating the whole environment.
^ permalink raw reply
* [PATCH 04/15] ipv4: Kill 'rt_src' from 'struct rtable'
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 1 -
net/ipv4/route.c | 34 +++++++++++++++-------------------
net/ipv4/xfrm4_policy.c | 1 -
3 files changed, 15 insertions(+), 21 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 935fa59..85d1093 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -49,7 +49,6 @@ struct rtable {
__u16 rt_type;
__be32 rt_dst; /* Path destination */
- __be32 rt_src; /* Path source */
int rt_route_iif;
int rt_iif;
int rt_oif;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 83e9663..2d4ae01 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1254,7 +1254,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
rth->rt_dst = daddr;
- rth->rt_src = saddr;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
@@ -1375,7 +1374,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_flags = flags;
rth->rt_type = res->type;
rth->rt_dst = daddr;
- rth->rt_src = saddr;
rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_oif = 0;
@@ -1543,7 +1541,6 @@ local_input:
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
rth->rt_dst = daddr;
- rth->rt_src = saddr;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
@@ -1696,7 +1693,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_flags = flags;
rth->rt_type = type;
rth->rt_dst = fl4->daddr;
- rth->rt_src = fl4->saddr;
rth->rt_route_iif = 0;
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_oif = orig_oif;
@@ -1987,7 +1983,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
- rt->rt_src = ort->rt_src;
rt->rt_gateway = ort->rt_gateway;
rt->fi = ort->fi;
if (rt->fi)
@@ -2018,7 +2013,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
-static int rt_fill_info(struct net *net, __be32 src, u8 tos,
+static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4,
struct sk_buff *skb, u32 pid, u32 seq, int event,
int nowait, unsigned int flags)
{
@@ -2036,7 +2031,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos,
r->rtm_family = AF_INET;
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
- r->rtm_tos = tos;
+ r->rtm_tos = fl4->flowi4_tos;
r->rtm_table = RT_TABLE_MAIN;
if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
goto nla_put_failure;
@@ -2063,11 +2058,11 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos,
goto nla_put_failure;
#endif
if (!rt_is_input_route(rt) &&
- rt->rt_src != src) {
- if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
+ fl4->saddr != src) {
+ if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_dst != rt->rt_gateway &&
+ if (fl4->daddr != rt->rt_gateway &&
nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
goto nla_put_failure;
@@ -2094,7 +2089,7 @@ static int rt_fill_info(struct net *net, __be32 src, u8 tos,
if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
int err = ipmr_get_route(net, skb,
- rt->rt_src, rt->rt_dst,
+ fl4->saddr, fl4->daddr,
r, nowait);
if (err <= 0) {
if (!nowait) {
@@ -2129,6 +2124,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX+1];
struct rtable *rt = NULL;
+ struct flowi4 fl4;
__be32 dst = 0;
__be32 src = 0;
u32 iif;
@@ -2163,6 +2159,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = dst;
+ fl4.saddr = src;
+ fl4.flowi4_tos = rtm->rtm_tos;
+ fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
+ fl4.flowi4_mark = mark;
+
if (iif) {
struct net_device *dev;
@@ -2183,13 +2186,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (err == 0 && rt->dst.error)
err = -rt->dst.error;
} else {
- struct flowi4 fl4 = {
- .daddr = dst,
- .saddr = src,
- .flowi4_tos = rtm->rtm_tos,
- .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
- .flowi4_mark = mark,
- };
rt = ip_route_output_key(net, &fl4);
err = 0;
@@ -2204,7 +2200,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = rt_fill_info(net, src, rtm->rtm_tos, skb,
+ err = rt_fill_info(net, src, &fl4, skb,
NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err <= 0)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 2a8d5cf..00d49e4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,7 +92,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
- xdst->u.rt.rt_src = rt->rt_src;
xdst->u.rt.rt_dst = rt->rt_dst;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 05/15] ipv4: Remove 'rt_mark' from 'struct rtable'
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 1 -
net/ipv4/ipmr.c | 2 +-
net/ipv4/route.c | 9 ++-------
net/ipv4/xfrm4_policy.c | 1 -
4 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 85d1093..757fe40 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -52,7 +52,6 @@ struct rtable {
int rt_route_iif;
int rt_iif;
int rt_oif;
- __u32 rt_mark;
/* Info on neighbour */
__be32 rt_gateway;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5716c6b..eee3bf6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1797,7 +1797,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
.flowi4_tos = RT_TOS(iph->tos),
.flowi4_oif = rt->rt_oif,
.flowi4_iif = rt->rt_iif,
- .flowi4_mark = rt->rt_mark,
+ .flowi4_mark = skb->mark,
};
struct mr_table *mrt;
int err;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2d4ae01..5ef9f38 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1257,7 +1257,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
- rth->rt_mark = skb->mark;
rth->rt_pmtu = 0;
rth->rt_gateway = daddr;
rth->fi = NULL;
@@ -1377,7 +1376,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_oif = 0;
- rth->rt_mark = skb->mark;
rth->rt_pmtu = 0;
rth->rt_gateway = daddr;
rth->fi = NULL;
@@ -1544,7 +1542,6 @@ local_input:
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
- rth->rt_mark = skb->mark;
rth->rt_pmtu = 0;
rth->rt_gateway = daddr;
rth->fi = NULL;
@@ -1696,7 +1693,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_route_iif = 0;
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_oif = orig_oif;
- rth->rt_mark = fl4->flowi4_mark;
rth->rt_pmtu = 0;
rth->rt_gateway = fl4->daddr;
rth->fi = NULL;
@@ -1976,7 +1972,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_route_iif = ort->rt_route_iif;
rt->rt_iif = ort->rt_iif;
rt->rt_oif = ort->rt_oif;
- rt->rt_mark = ort->rt_mark;
rt->rt_pmtu = ort->rt_pmtu;
rt->rt_genid = rt_genid(net);
@@ -2069,8 +2064,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4,
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
- if (rt->rt_mark &&
- nla_put_be32(skb, RTA_MARK, rt->rt_mark))
+ if (fl4->flowi4_mark &&
+ nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
goto nla_put_failure;
error = rt->dst.error;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 00d49e4..f73ba82 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
xdst->u.rt.rt_oif = fl4->flowi4_oif;
- xdst->u.rt.rt_mark = fl4->flowi4_mark;
xdst->u.dst.dev = dev;
dev_hold(dev);
--
1.7.10.4
^ permalink raw reply related
* [PATCH 06/15] ipv4: Remove 'rt_dst' from 'struct rtable'
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 1 -
net/ipv4/route.c | 45 +++++++++------------------------------------
net/ipv4/xfrm4_policy.c | 1 -
3 files changed, 9 insertions(+), 38 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 757fe40..6d111bc 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -48,7 +48,6 @@ struct rtable {
unsigned int rt_flags;
__u16 rt_type;
- __be32 rt_dst; /* Path destination */
int rt_route_iif;
int rt_iif;
int rt_oif;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5ef9f38..76eb78e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -838,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
peer->rate_tokens == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, rt->rt_iif,
- &rt->rt_dst, &rt->rt_gateway);
+ &ip_hdr(skb)->daddr, &rt->rt_gateway);
#endif
}
out_put_peer:
@@ -1126,8 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
-
- if (rt->rt_gateway != rt->rt_dst && mtu > 576)
+ if (rt->rt_gateway != 0 && mtu > 576)
mtu = 576;
}
@@ -1253,7 +1252,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
- rth->rt_dst = daddr;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
@@ -1372,7 +1370,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
rth->rt_flags = flags;
rth->rt_type = res->type;
- rth->rt_dst = daddr;
rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_oif = 0;
@@ -1538,7 +1535,6 @@ local_input:
rth->rt_genid = rt_genid(net);
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
- rth->rt_dst = daddr;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
@@ -1689,7 +1685,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_genid = rt_genid(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
- rth->rt_dst = fl4->daddr;
rth->rt_route_iif = 0;
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_oif = orig_oif;
@@ -1977,7 +1972,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_genid = rt_genid(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
- rt->rt_dst = ort->rt_dst;
rt->rt_gateway = ort->rt_gateway;
rt->fi = ort->fi;
if (rt->fi)
@@ -2008,9 +2002,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
-static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4,
- struct sk_buff *skb, u32 pid, u32 seq, int event,
- int nowait, unsigned int flags)
+static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
+ struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
+ u32 seq, int event, int nowait, unsigned int flags)
{
struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
@@ -2037,7 +2031,7 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4,
if (rt->rt_flags & RTCF_NOTIFY)
r->rtm_flags |= RTM_F_NOTIFY;
- if (nla_put_be32(skb, RTA_DST, rt->rt_dst))
+ if (nla_put_be32(skb, RTA_DST, dst))
goto nla_put_failure;
if (src) {
r->rtm_src_len = 32;
@@ -2078,29 +2072,8 @@ static int rt_fill_info(struct net *net, __be32 src, struct flowi4 *fl4,
}
if (rt_is_input_route(rt)) {
-#ifdef CONFIG_IP_MROUTE
- __be32 dst = rt->rt_dst;
-
- if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
- IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
- int err = ipmr_get_route(net, skb,
- fl4->saddr, fl4->daddr,
- r, nowait);
- if (err <= 0) {
- if (!nowait) {
- if (err == 0)
- return 0;
- goto nla_put_failure;
- } else {
- if (err == -EMSGSIZE)
- goto nla_put_failure;
- error = err;
- }
- }
- } else
-#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
- goto nla_put_failure;
+ if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ goto nla_put_failure;
}
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
@@ -2195,7 +2168,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = rt_fill_info(net, src, &fl4, skb,
+ err = rt_fill_info(net, dst, src, &fl4, skb,
NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err <= 0)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f73ba82..6074b69 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,7 +91,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
- xdst->u.rt.rt_dst = rt->rt_dst;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 07/15] ipv4: Adjust semantics of rt->rt_gateway.
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
In order to allow prefixed routes, we have to adjust how rt_gateway
is set an interpreted.
The new interpretation is:
1) rt_gateway == 0, destination is on-link, nexthop is iph->daddr
2) rt_gateway != 0, destination requires a nexthop gateway
Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Vijay Subramanian <subramanian.vijay@gmail.com>
---
net/ipv4/arp.c | 3 ++-
net/ipv4/inet_connection_sock.c | 4 ++--
net/ipv4/ip_gre.c | 2 ++
net/ipv4/ip_output.c | 2 +-
net/ipv4/ipip.c | 2 ++
net/ipv4/netfilter/ipt_MASQUERADE.c | 7 +++++--
net/ipv4/route.c | 18 ++++++++++--------
7 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c38293f..672d6f3 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -476,7 +476,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
}
paddr = skb_rtable(skb)->rt_gateway;
-
+ if (!paddr)
+ paddr = ip_hdr(skb)->daddr;
if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
paddr, dev))
return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c7a4de0..0a290d7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -389,7 +389,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
return &rt->dst;
@@ -422,7 +422,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
return &rt->dst;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 42c44b1..1ff6bf8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -767,6 +767,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb->protocol == htons(ETH_P_IP)) {
rt = skb_rtable(skb);
dst = rt->rt_gateway;
+ if (!dst)
+ dst = old_iph->daddr;
}
#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cc52679..6b805e0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && fl4->daddr != rt->rt_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2c2c35b..59e0e95 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -488,6 +488,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_error;
}
dst = rt->rt_gateway;
+ if (!dst)
+ dst = old_iph->daddr;
}
rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 2f210c7..b99746b 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -52,7 +52,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_nat_ipv4_range newrange;
const struct nf_nat_ipv4_multi_range_compat *mr;
const struct rtable *rt;
- __be32 newsrc;
+ __be32 newsrc, nh;
NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
@@ -70,7 +70,10 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
mr = par->targinfo;
rt = skb_rtable(skb);
- newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ nh = rt->rt_gateway;
+ if (!nh)
+ nh = ip_hdr(skb)->daddr;
+ newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE);
if (!newsrc) {
pr_info("%s ate my IP address\n", par->out->name);
return NF_DROP;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 76eb78e..7ebf788 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1079,8 +1079,10 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
else
- src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
- RT_SCOPE_UNIVERSE);
+ src = inet_select_addr(rt->dst.dev, (rt->rt_gateway ?
+ rt->rt_gateway :
+ iph->daddr),
+ RT_SCOPE_UNIVERSE);
rcu_read_unlock();
}
memcpy(addr, &src, 4);
@@ -1126,7 +1128,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
- if (rt->rt_gateway != 0 && mtu > 576)
+ if (rt->rt_gateway && mtu > 576)
mtu = 576;
}
@@ -1256,7 +1258,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
rth->rt_pmtu = 0;
- rth->rt_gateway = daddr;
+ rth->rt_gateway = 0;
rth->fi = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
@@ -1374,7 +1376,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_oif = 0;
rth->rt_pmtu = 0;
- rth->rt_gateway = daddr;
+ rth->rt_gateway = 0;
rth->fi = NULL;
rth->dst.input = ip_forward;
@@ -1539,7 +1541,7 @@ local_input:
rth->rt_iif = dev->ifindex;
rth->rt_oif = 0;
rth->rt_pmtu = 0;
- rth->rt_gateway = daddr;
+ rth->rt_gateway = 0;
rth->fi = NULL;
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -1689,7 +1691,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_oif = orig_oif;
rth->rt_pmtu = 0;
- rth->rt_gateway = fl4->daddr;
+ rth->rt_gateway = 0;
rth->fi = NULL;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2051,7 +2053,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (fl4->daddr != rt->rt_gateway &&
+ if (rt->rt_gateway &&
nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
goto nla_put_failure;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 08/15] ipv4: Kill routes during PMTU/redirect updates.
From: David Miller @ 2012-07-18 18:23 UTC (permalink / raw)
To: netdev
Mark them obsolete so there will be a re-lookup to fetch the
FIB nexthop exception info.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/ipv4/route.c | 39 +++++++++++++++++++++++++++------------
1 file changed, 27 insertions(+), 12 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7ebf788..4d170a1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -657,7 +657,8 @@ out:
return fnhe;
}
-static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4)
+static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
+ bool kill_route)
{
__be32 new_gw = icmp_hdr(skb)->un.gateway;
__be32 old_gw = ip_hdr(skb)->saddr;
@@ -716,8 +717,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
fnhe->fnhe_gw = new_gw;
spin_unlock_bh(&fnhe_lock);
}
- rt->rt_gateway = new_gw;
- rt->rt_flags |= RTCF_REDIRECTED;
+ if (kill_route)
+ rt->dst.obsolete = -2;
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
neigh_release(n);
@@ -748,7 +749,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
rt = (struct rtable *) dst;
ip_rt_build_flow_key(&fl4, sk, skb);
- __ip_do_redirect(rt, skb, &fl4);
+ __ip_do_redirect(rt, skb, &fl4, true);
}
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -907,7 +908,7 @@ out: kfree_skb(skb);
return 0;
}
-static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
struct fib_result res;
@@ -926,8 +927,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
}
spin_unlock_bh(&fnhe_lock);
}
- rt->rt_pmtu = mtu;
- dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+ return mtu;
}
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -937,7 +937,14 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
- __ip_rt_update_pmtu(rt, &fl4, mtu);
+ mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
+
+ if (!rt->rt_pmtu) {
+ dst->expires = -2;
+ } else {
+ rt->rt_pmtu = mtu;
+ dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+ }
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
@@ -983,7 +990,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
RT_TOS(iph->tos), protocol, mark, flow_flags);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
- __ip_do_redirect(rt, skb, &fl4);
+ __ip_do_redirect(rt, skb, &fl4, false);
ip_rt_put(rt);
}
}
@@ -998,7 +1005,7 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
rt = __ip_route_output_key(sock_net(sk), &fl4);
if (!IS_ERR(rt)) {
- __ip_do_redirect(rt, skb, &fl4);
+ __ip_do_redirect(rt, skb, &fl4, false);
ip_rt_put(rt);
}
}
@@ -1008,7 +1015,13 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rtable *rt = (struct rtable *) dst;
- if (rt_is_expired(rt))
+ /* All IPV4 dsts are created with obsoluete set to -1, that
+ * forces validation calls down into this function always.
+ *
+ * When a PMTU/redirect information update invalidates a
+ * route, this is indicated by setting obsolete to -2.
+ */
+ if (dst->obsolete == -2 || rt_is_expired(rt))
return NULL;
return dst;
}
@@ -1168,8 +1181,10 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr
dst_set_expires(&rt->dst, diff);
}
}
- if (fnhe->fnhe_gw)
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = fnhe->fnhe_gw;
+ }
fnhe->fnhe_stamp = jiffies;
break;
}
--
1.7.10.4
^ permalink raw reply related
* [PATCH 09/15] ipv4: Cache output routes in fib_info nexthops.
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
If we have an output route that lacks nexthop exceptions, we can cache
it in the FIB info nexthop.
Such routes will have DST_HOST cleared because such routes refer to a
family of destinations, rather than just one.
The sequence of the handling of exceptions during route lookup is
adjusted to make the logic work properly.
Before we allocate the route, we lookup the exception.
Then we know if we will cache this route or not, and therefore whether
DST_HOST should be set on the allocated route.
Then we use DST_HOST to key off whether we should store the resulting
route, during rt_set_nexthop(), in the FIB nexthop cache.
To counter adding a new argument to rt_set_nexthop() I'm removing the
'fl4' arg to rt_set_nexthop() and rt_init_metrics(), which is no
longer used.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/ip_fib.h | 2 +
net/ipv4/fib_semantics.c | 2 +
net/ipv4/route.c | 109 ++++++++++++++++++++++++++++++++--------------
3 files changed, 80 insertions(+), 33 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e9ee1ca..23c9f9e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -46,6 +46,7 @@ struct fib_config {
};
struct fib_info;
+struct rtable;
struct fib_nh_exception {
struct fib_nh_exception __rcu *fnhe_next;
@@ -80,6 +81,7 @@ struct fib_nh {
__be32 nh_gw;
__be32 nh_saddr;
int nh_saddr_genid;
+ struct rtable *nh_rth_output;
struct fnhe_hash_bucket *nh_exceptions;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57d76..83d0f42 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -171,6 +171,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
+ if (nexthop_nh->nh_rth_output)
+ dst_release(&nexthop_nh->nh_rth_output->dst);
} endfor_nexthops(fi);
release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4d170a1..2e66b9a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1151,8 +1151,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu;
}
-static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
- struct fib_info *fi)
+static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
{
if (fi->fib_metrics != (u32 *) dst_default_metrics) {
rt->fi = fi;
@@ -1161,38 +1160,61 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh *nh, __be32 daddr)
+static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
struct fib_nh_exception *fnhe;
u32 hval;
+ if (!hash)
+ return NULL;
+
hval = fnhe_hashfun(daddr);
for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (fnhe->fnhe_daddr == daddr) {
- if (fnhe->fnhe_pmtu) {
- unsigned long expires = fnhe->fnhe_expires;
- unsigned long diff = jiffies - expires;
-
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = fnhe->fnhe_pmtu;
- dst_set_expires(&rt->dst, diff);
- }
- }
- if (fnhe->fnhe_gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = fnhe->fnhe_gw;
- }
- fnhe->fnhe_stamp = jiffies;
- break;
+ if (fnhe->fnhe_daddr == daddr)
+ return fnhe;
+ }
+ return NULL;
+}
+
+static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe)
+{
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = jiffies - expires;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
}
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+ fnhe->fnhe_stamp = jiffies;
}
-static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
- const struct fib_result *res,
+static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+{
+ static DEFINE_SPINLOCK(fib_cache_lock);
+ struct rtable **p = &nh->nh_rth_output;
+
+ if (*p)
+ return;
+
+ spin_lock_bh(&fib_cache_lock);
+ if (!*p) {
+ *p = rt;
+ dst_clone(&rt->dst);
+ }
+ spin_unlock_bh(&fib_cache_lock);
+}
+
+static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res,
+ struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
if (fi) {
@@ -1200,12 +1222,15 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(nh->nh_exceptions))
- rt_bind_exception(rt, nh, fl4->daddr);
- rt_init_metrics(rt, fl4, fi);
+ if (unlikely(fnhe))
+ rt_bind_exception(rt, fnhe);
+ rt_init_metrics(rt, fi);
#ifdef CONFIG_IP_ROUTE_CLASSID
- rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+ rt->dst.tclassid = nh->nh_tclassid;
#endif
+ if (!(rt->dst.flags & DST_HOST) &&
+ rt_is_output_route(rt))
+ rt_cache_route(nh, rt);
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1217,10 +1242,10 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
}
static struct rtable *rt_dst_alloc(struct net_device *dev,
- bool nopolicy, bool noxfrm)
+ bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, -1,
- DST_HOST | DST_NOCACHE |
+ (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
@@ -1257,7 +1282,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto e_err;
}
rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
goto e_nobufs;
@@ -1330,6 +1355,7 @@ static int __mkroute_input(struct sk_buff *skb,
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
+ struct fib_nh_exception *fnhe;
struct rtable *rth;
int err;
struct in_device *out_dev;
@@ -1376,9 +1402,13 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
+ fnhe = NULL;
+ if (res->fi)
+ fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+
rth = rt_dst_alloc(out_dev->dev,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(out_dev, NOXFRM));
+ IN_DEV_CONF_GET(out_dev, NOXFRM), false);
if (!rth) {
err = -ENOBUFS;
goto cleanup;
@@ -1397,7 +1427,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag);
+ rt_set_nexthop(rth, res, fnhe, res->fi, res->type, itag);
*result = rth;
err = 0;
@@ -1539,7 +1569,7 @@ brd_input:
local_input:
rth = rt_dst_alloc(net->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
goto e_nobufs;
@@ -1653,6 +1683,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
unsigned int flags)
{
struct fib_info *fi = res->fi;
+ struct fib_nh_exception *fnhe;
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
@@ -1691,9 +1722,21 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fi = NULL;
}
+ fnhe = NULL;
+ if (fi) {
+ fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ if (!fnhe) {
+ rth = FIB_RES_NH(*res).nh_rth_output;
+ if (rth) {
+ dst_use(&rth->dst, jiffies);
+ return rth;
+ }
+ }
+ }
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(in_dev, NOXFRM));
+ IN_DEV_CONF_GET(in_dev, NOXFRM),
+ fi && !fnhe);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1730,7 +1773,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
#endif
}
- rt_set_nexthop(rth, fl4, res, fi, type, 0);
+ rt_set_nexthop(rth, res, fnhe, fi, type, 0);
if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
rth->dst.flags |= DST_NOCACHE;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 10/15] ipv4: Cache input routes in fib_info nexthops.
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
Caching input routes is slightly simpler than output routes, since we
don't need to be concerned with nexthop exceptions. (locally
destined, and routed packets, never trigger PMTU events or redirects
that will be processed by us).
However, we have to elide caching for the DIRECTSRC and non-zero itag
cases.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/ip_fib.h | 1 +
net/ipv4/fib_semantics.c | 2 ++
net/ipv4/route.c | 47 ++++++++++++++++++++++++++++++++++++----------
3 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 23c9f9e..b64a19c 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -82,6 +82,7 @@ struct fib_nh {
__be32 nh_saddr;
int nh_saddr_genid;
struct rtable *nh_rth_output;
+ struct rtable *nh_rth_input;
struct fnhe_hash_bucket *nh_exceptions;
};
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 83d0f42..e55171f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -173,6 +173,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
free_nh_exceptions(nexthop_nh);
if (nexthop_nh->nh_rth_output)
dst_release(&nexthop_nh->nh_rth_output->dst);
+ if (nexthop_nh->nh_rth_input)
+ dst_release(&nexthop_nh->nh_rth_input->dst);
} endfor_nexthops(fi);
release_net(fi->fib_net);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2e66b9a..b2f5c33 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1202,6 +1202,9 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
static DEFINE_SPINLOCK(fib_cache_lock);
struct rtable **p = &nh->nh_rth_output;
+ if (rt_is_input_route(rt))
+ p = &nh->nh_rth_input;
+
if (*p)
return;
@@ -1228,8 +1231,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res,
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_HOST) &&
- rt_is_output_route(rt))
+ if (!(rt->dst.flags & DST_HOST))
rt_cache_route(nh, rt);
}
@@ -1355,11 +1357,11 @@ static int __mkroute_input(struct sk_buff *skb,
__be32 daddr, __be32 saddr, u32 tos,
struct rtable **result)
{
- struct fib_nh_exception *fnhe;
struct rtable *rth;
int err;
struct in_device *out_dev;
unsigned int flags = 0;
+ bool do_cache;
u32 itag;
/* get a working reference to the output device */
@@ -1402,13 +1404,21 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- fnhe = NULL;
- if (res->fi)
- fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+ do_cache = false;
+ if (res->fi) {
+ if (!(flags & RTCF_DIRECTSRC) && !itag) {
+ rth = FIB_RES_NH(*res).nh_rth_input;
+ if (rth) {
+ dst_use(&rth->dst, jiffies);
+ goto out;
+ }
+ do_cache = true;
+ }
+ }
rth = rt_dst_alloc(out_dev->dev,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
- IN_DEV_CONF_GET(out_dev, NOXFRM), false);
+ IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
if (!rth) {
err = -ENOBUFS;
goto cleanup;
@@ -1427,8 +1437,8 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
- rt_set_nexthop(rth, res, fnhe, res->fi, res->type, itag);
-
+ rt_set_nexthop(rth, res, NULL, res->fi, res->type, itag);
+out:
*result = rth;
err = 0;
cleanup:
@@ -1480,6 +1490,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct rtable *rth;
int err = -EINVAL;
struct net *net = dev_net(dev);
+ bool do_cache;
/* IP on this device is disabled. */
@@ -1493,6 +1504,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
+ res.fi = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
@@ -1568,8 +1580,20 @@ brd_input:
RT_CACHE_STAT_INC(in_brd);
local_input:
+ do_cache = false;
+ if (res.fi) {
+ if (!(flags & RTCF_DIRECTSRC) && !itag) {
+ rth = FIB_RES_NH(res).nh_rth_input;
+ if (rth) {
+ dst_use(&rth->dst, jiffies);
+ goto set_and_out;
+ }
+ do_cache = true;
+ }
+ }
+
rth = rt_dst_alloc(net->loopback_dev,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
+ IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
@@ -1593,6 +1617,9 @@ local_input:
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
+ if (do_cache)
+ rt_cache_route(&FIB_RES_NH(res), rth);
+set_and_out:
skb_dst_set(skb, &rth->dst);
err = 0;
goto out;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 11/15] ipv4: Kill FLOWI_FLAG_RT_NOCACHE and associated code.
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/flow.h | 1 -
include/net/inet_connection_sock.h | 3 +--
net/dccp/ipv4.c | 2 +-
net/ipv4/inet_connection_sock.c | 5 +----
net/ipv4/route.c | 3 ---
net/ipv4/tcp_ipv4.c | 4 ++--
6 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/include/net/flow.h b/include/net/flow.h
index ce9cb76..e1dd508 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -21,7 +21,6 @@ struct flowi_common {
__u8 flowic_flags;
#define FLOWI_FLAG_ANYSRC 0x01
#define FLOWI_FLAG_CAN_SLEEP 0x02
-#define FLOWI_FLAG_RT_NOCACHE 0x04
__u32 flowic_secid;
};
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 2cf44b4..5ee66f5 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -250,8 +250,7 @@ extern int inet_csk_get_port(struct sock *sk, unsigned short snum);
extern struct dst_entry* inet_csk_route_req(struct sock *sk,
struct flowi4 *fl4,
- const struct request_sock *req,
- bool nocache);
+ const struct request_sock *req);
extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk,
struct sock *newsk,
const struct request_sock *req);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ab4f44c..25428d0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -508,7 +508,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
struct dst_entry *dst;
struct flowi4 fl4;
- dst = inet_csk_route_req(sk, &fl4, req, false);
+ dst = inet_csk_route_req(sk, &fl4, req);
if (dst == NULL)
goto out;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0a290d7..db0cf17 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -368,8 +368,7 @@ EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
struct dst_entry *inet_csk_route_req(struct sock *sk,
struct flowi4 *fl4,
- const struct request_sock *req,
- bool nocache)
+ const struct request_sock *req)
{
struct rtable *rt;
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -377,8 +376,6 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct net *net = sock_net(sk);
int flags = inet_sk_flowi_flags(sk);
- if (nocache)
- flags |= FLOWI_FLAG_RT_NOCACHE;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b2f5c33..d4a3c6e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1802,9 +1802,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rt_set_nexthop(rth, res, fnhe, fi, type, 0);
- if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
- rth->dst.flags |= DST_NOCACHE;
-
return rth;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d9caf5c..de6969a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -824,7 +824,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct sk_buff * skb;
/* First, grab a route. */
- if (!dst && (dst = inet_csk_route_req(sk, &fl4, req, nocache)) == NULL)
+ if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
skb = tcp_make_synack(sk, dst, req, rvp);
@@ -1378,7 +1378,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
*/
if (tmp_opt.saw_tstamp &&
tcp_death_row.sysctl_tw_recycle &&
- (dst = inet_csk_route_req(sk, &fl4, req, want_cookie)) != NULL &&
+ (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
fl4.daddr == saddr) {
if (!tcp_peer_is_proven(req, dst, true)) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
--
1.7.10.4
^ permalink raw reply related
* [PATCH 12/15] ipv4: Dirty less cache lines in route caching paths.
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
Don't bother incrementing dst->__use and setting dst->lastuse,
they are completely pointless and just slow things down.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
net/ipv4/route.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d4a3c6e..2190fc4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1409,7 +1409,7 @@ static int __mkroute_input(struct sk_buff *skb,
if (!(flags & RTCF_DIRECTSRC) && !itag) {
rth = FIB_RES_NH(*res).nh_rth_input;
if (rth) {
- dst_use(&rth->dst, jiffies);
+ dst_hold(&rth->dst);
goto out;
}
do_cache = true;
@@ -1585,7 +1585,7 @@ local_input:
if (!(flags & RTCF_DIRECTSRC) && !itag) {
rth = FIB_RES_NH(res).nh_rth_input;
if (rth) {
- dst_use(&rth->dst, jiffies);
+ dst_hold(&rth->dst);
goto set_and_out;
}
do_cache = true;
@@ -1755,7 +1755,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
if (!fnhe) {
rth = FIB_RES_NH(*res).nh_rth_output;
if (rth) {
- dst_use(&rth->dst, jiffies);
+ dst_hold(&rth->dst);
return rth;
}
}
--
1.7.10.4
^ permalink raw reply related
* [PATCH 13/15] ipv4: Kill rt->rt_oif
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
Never actually used.
It was being set on output routes to the original OIF specified in the
flow key used for the lookup.
But the only user was in ipmr_rt_fib_lookup() which always runs on an
input route.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 1 -
net/ipv4/ipmr.c | 1 -
net/ipv4/route.c | 5 -----
net/ipv4/xfrm4_policy.c | 1 -
4 files changed, 8 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 6d111bc..ee3bf84 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -50,7 +50,6 @@ struct rtable {
int rt_route_iif;
int rt_iif;
- int rt_oif;
/* Info on neighbour */
__be32 rt_gateway;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index eee3bf6..fa75f73 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1795,7 +1795,6 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowi4_tos = RT_TOS(iph->tos),
- .flowi4_oif = rt->rt_oif,
.flowi4_iif = rt->rt_iif,
.flowi4_mark = skb->mark,
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2190fc4..4da374c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1298,7 +1298,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_type = RTN_MULTICAST;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
- rth->rt_oif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
rth->fi = NULL;
@@ -1429,7 +1428,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_type = res->type;
rth->rt_route_iif = in_dev->dev->ifindex;
rth->rt_iif = in_dev->dev->ifindex;
- rth->rt_oif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
rth->fi = NULL;
@@ -1608,7 +1606,6 @@ local_input:
rth->rt_type = res.type;
rth->rt_route_iif = dev->ifindex;
rth->rt_iif = dev->ifindex;
- rth->rt_oif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
rth->fi = NULL;
@@ -1774,7 +1771,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_type = type;
rth->rt_route_iif = 0;
rth->rt_iif = orig_oif ? : dev_out->ifindex;
- rth->rt_oif = orig_oif;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
rth->fi = NULL;
@@ -2050,7 +2046,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_route_iif = ort->rt_route_iif;
rt->rt_iif = ort->rt_iif;
- rt->rt_oif = ort->rt_oif;
rt->rt_pmtu = ort->rt_pmtu;
rt->rt_genid = rt_genid(net);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6074b69..3c99b4c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -81,7 +81,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
- xdst->u.rt.rt_oif = fl4->flowi4_oif;
xdst->u.dst.dev = dev;
dev_hold(dev);
--
1.7.10.4
^ permalink raw reply related
* [PATCH 14/15] ipv4: Turn rt->rt_route_iif into rt->rt_is_input.
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
That is this value's only use, as a boolean to indicate whether
a route is an input route or not.
So implement it that way, using a u16 gap present in the struct
already.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 6 +++---
net/ipv4/route.c | 10 +++++-----
net/ipv4/xfrm4_policy.c | 2 +-
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index ee3bf84..f3ef18a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -47,8 +47,8 @@ struct rtable {
int rt_genid;
unsigned int rt_flags;
__u16 rt_type;
+ __u16 rt_is_input;
- int rt_route_iif;
int rt_iif;
/* Info on neighbour */
@@ -61,12 +61,12 @@ struct rtable {
static inline bool rt_is_input_route(const struct rtable *rt)
{
- return rt->rt_route_iif != 0;
+ return rt->rt_is_input != 0;
}
static inline bool rt_is_output_route(const struct rtable *rt)
{
- return rt->rt_route_iif == 0;
+ return rt->rt_is_input == 0;
}
struct ip_rt_acct {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4da374c..ee35047 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1296,7 +1296,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
- rth->rt_route_iif = dev->ifindex;
+ rth->rt_is_input= 1;
rth->rt_iif = dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
@@ -1426,7 +1426,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
rth->rt_flags = flags;
rth->rt_type = res->type;
- rth->rt_route_iif = in_dev->dev->ifindex;
+ rth->rt_is_input = 1;
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
@@ -1604,7 +1604,7 @@ local_input:
rth->rt_genid = rt_genid(net);
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
- rth->rt_route_iif = dev->ifindex;
+ rth->rt_is_input = 1;
rth->rt_iif = dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
@@ -1769,7 +1769,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_genid = rt_genid(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
- rth->rt_route_iif = 0;
+ rth->rt_is_input = 0;
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
@@ -2044,7 +2044,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
if (new->dev)
dev_hold(new->dev);
- rt->rt_route_iif = ort->rt_route_iif;
+ rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 3c99b4c..c628184 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -79,7 +79,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
struct rtable *rt = (struct rtable *)xdst->route;
const struct flowi4 *fl4 = &fl->u.ip4;
- xdst->u.rt.rt_route_iif = fl4->flowi4_iif;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
xdst->u.dst.dev = dev;
@@ -87,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
+ xdst->u.rt.rt_is_input = rt->rt_is_input;
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
--
1.7.10.4
^ permalink raw reply related
* [PATCH 15/15] ipv4: Kill rt->fi
From: David Miller @ 2012-07-18 18:24 UTC (permalink / raw)
To: netdev
It's not really needed.
We only grabbed a reference to the fib_info for the sake of fib_info
local metrics.
However, fib_info objects are freed using RCU, as are therefore their
private metrics (if any).
We would have triggered a route cache flush if we eliminated a
reference to a fib_info object in the routing tables.
Therefore, any existing cached routes will first check and see that
they have been invalidated before an errant reference to these
metric values would occur.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/net/route.h | 1 -
net/ipv4/route.c | 32 +-------------------------------
2 files changed, 1 insertion(+), 32 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index f3ef18a..665c9ce 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -56,7 +56,6 @@ struct rtable {
/* Miscellaneous cached information */
u32 rt_pmtu;
- struct fib_info *fi; /* for client ref to shared metrics */
};
static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ee35047..34be3f2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -141,7 +141,6 @@ static int ip_rt_min_advmss __read_mostly = 256;
static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
static unsigned int ipv4_mtu(const struct dst_entry *dst);
-static void ipv4_dst_destroy(struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -171,7 +170,6 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
- .destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
@@ -1026,17 +1024,6 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
return dst;
}
-static void ipv4_dst_destroy(struct dst_entry *dst)
-{
- struct rtable *rt = (struct rtable *) dst;
-
- if (rt->fi) {
- fib_info_put(rt->fi);
- rt->fi = NULL;
- }
-}
-
-
static void ipv4_link_failure(struct sk_buff *skb)
{
struct rtable *rt;
@@ -1151,15 +1138,6 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu;
}
-static void rt_init_metrics(struct rtable *rt, struct fib_info *fi)
-{
- if (fi->fib_metrics != (u32 *) dst_default_metrics) {
- rt->fi = fi;
- atomic_inc(&fi->fib_clntref);
- }
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
-}
-
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -1227,7 +1205,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res,
rt->rt_gateway = nh->nh_gw;
if (unlikely(fnhe))
rt_bind_exception(rt, fnhe);
- rt_init_metrics(rt, fi);
+ dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1300,7 +1278,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
- rth->fi = NULL;
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1430,7 +1407,6 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = in_dev->dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
- rth->fi = NULL;
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1608,7 +1584,6 @@ local_input:
rth->rt_iif = dev->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
- rth->fi = NULL;
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
@@ -1773,7 +1748,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : dev_out->ifindex;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
- rth->fi = NULL;
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2018,7 +1992,6 @@ static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
static struct dst_ops ipv4_dst_blackhole_ops = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
- .destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
.mtu = ipv4_blackhole_mtu,
.default_advmss = ipv4_default_advmss,
@@ -2052,9 +2025,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
- rt->fi = ort->fi;
- if (rt->fi)
- atomic_inc(&rt->fi->fib_clntref);
dst_free(new);
}
--
1.7.10.4
^ permalink raw reply related
* Re: [patch net-next] team: refine IFF_XMIT_DST_RELEASE capability
From: David Miller @ 2012-07-18 18:28 UTC (permalink / raw)
To: eric.dumazet; +Cc: jiri, netdev, edumazet
In-Reply-To: <1342635454.2626.3337.camel@edumazet-glaptop>
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Jul 2012 20:17:34 +0200
> On Wed, 2012-07-18 at 19:39 +0200, Jiri Pirko wrote:
>> Cloned patch of Eric Dumazet for bonding.
>>
>> Some workloads greatly benefit of IFF_XMIT_DST_RELEASE capability
>> on output net device, avoiding dirtying dst refcount.
>>
>> team currently disables IFF_XMIT_DST_RELEASE unconditionally.
>>
>> If all ports have the IFF_XMIT_DST_RELEASE bit set, then
>> team dev can also have it in its priv_flags.
>>
>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>> ---
>> drivers/net/team/team.c | 5 +++++
>> 1 file changed, 5 insertions(+)
>
> Acked-by: Eric Dumazet <edumazet@google.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next v4] ipv6: add ipv6_addr_hash() helper
From: David Miller @ 2012-07-18 18:29 UTC (permalink / raw)
To: eric.dumazet; +Cc: joe, netdev, andrewmcgr, dave.taht, therbert
In-Reply-To: <1342635072.2626.3322.camel@edumazet-glaptop>
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 18 Jul 2012 20:11:12 +0200
> From: Eric Dumazet <edumazet@google.com>
>
> Introduce ipv6_addr_hash() helper doing a XOR on all bits
> of an IPv6 address, with an optimized x86_64 version.
>
> Use it in flow dissector, as suggested by Andrew McGregor,
> to reduce hash collision probabilities in fq_codel (and other
> users of flow dissector)
>
> Use it in ip6_tunnel.c and use more bit shuffling, as suggested
> by David Laight, as existing hash was ignoring most of them.
>
> Use it in sunrpc and use more bit shuffling, using hash_32().
>
> Use it in net/ipv6/addrconf.c, using hash_32() as well.
>
> As a cleanup, use it in net/ipv4/tcp_metrics.c
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Andrew McGregor <andrewmcgr@gmail.com>
Applied, thanks.
> v4: net/ipv6/addrconf.c part, sorry again David
The more you test my routing cache removal patches, the
more you will be forgiven :-))))))
^ permalink raw reply
* Re: That's pretty much it for 3.5.0
From: Rustad, Mark D @ 2012-07-18 18:31 UTC (permalink / raw)
To: Eric Dumazet
Cc: Neil Horman, Fastabend, John R,
<h@hmsreliant.think-freely.org>, David Miller,
<netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
<linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
<netfilter-devel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
In-Reply-To: <1342634139.2626.3281.camel@edumazet-glaptop>
On Jul 18, 2012, at 10:55 AM, Eric Dumazet wrote:
> On Wed, 2012-07-18 at 17:36 +0000, Rustad, Mark D wrote:
>>
>> The following change simply statically initializes init_net.dev_base_head. I copied and pasted it into the email, so this rendering may not work, but I can send it if this approach looks reasonable. I have verified that it resolves the issue above.
>>
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 0f28a9e..db1ba61 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -6283,8 +6283,6 @@ static struct hlist_head *netdev_create_hash(void)
>> /* Initialize per network namespace state */
>> static int __net_init netdev_init(struct net *net)
>> {
>> - INIT_LIST_HEAD(&net->dev_base_head);
>> -
>
> if (net != &init_net)
> INIT_LIST_HEAD(&net->dev_base_head);
Ooooh. Good catch.
>> net->dev_name_head = netdev_create_hash();
>> if (net->dev_name_head == NULL)
>> goto err_name;
>> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
>> index dddbacb..42f1e1c 100644
>> --- a/net/core/net_namespace.c
>> +++ b/net/core/net_namespace.c
>> @@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);
>> LIST_HEAD(net_namespace_list);
>> EXPORT_SYMBOL_GPL(net_namespace_list);
>>
>> -struct net init_net;
>> +struct net init_net = {
>> + .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
>> +};
>> EXPORT_SYMBOL(init_net);
>>
>> #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
If this looks like a good change, I can send the patch. Is there any concern about init_net going from bss to data?
--
Mark Rustad, LAN Access Division, Intel Corporation
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next 2/7] sfc: Add channel specific receive_skb handler and post_remove callback
From: David Miller @ 2012-07-18 18:32 UTC (permalink / raw)
To: bhutchings; +Cc: netdev, linux-net-drivers, ajackson, richardcochran
In-Reply-To: <1342635600.2617.54.camel@bwh-desktop.uk.solarflarecom.com>
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 18 Jul 2012 19:20:00 +0100
> + void (*receive_skb)(struct efx_channel *, struct sk_buff *);
This looks to me like a conduit for proprietary features implemented
in a binary-only blob.
I understand how you're using here for PTP, but you're really openning
the door for things I really wouldn't be very happy about.
^ permalink raw reply
* Re: That's pretty much it for 3.5.0
From: David Miller @ 2012-07-18 18:33 UTC (permalink / raw)
To: mark.d.rustad-ral2JQCrhuEAvxtiuMwx3w
Cc: eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w,
nhorman-2XuSBdqkA4R54TAoqtyWWQ,
john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w, h,
netdev-u79uwXL29TY76Z2rM5mHXA,
linux-wireless-u79uwXL29TY76Z2rM5mHXA,
netfilter-devel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <FEB6B45E-1CCF-4CBC-AEB7-21D2088E175C-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
From: "Rustad, Mark D" <mark.d.rustad-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Date: Wed, 18 Jul 2012 18:31:31 +0000
> If this looks like a good change, I can send the patch. Is there any
> concern about init_net going from bss to data?
There is no such concern, I like your change a lot.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH 0/3 v2] net: various tilegx networking fixes
From: David Miller @ 2012-07-18 18:36 UTC (permalink / raw)
To: cmetcalf; +Cc: netdev, linux-kernel
In-Reply-To: <201207181650.q6IGodZ7007565@lab-41.internal.tilera.com>
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Sun, 1 Jul 2012 14:43:47 -0400
> The tree is at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile.git net
>
> Chris Metcalf (3):
> net: tilegx driver bugfix (be explicit about percpu queue number)
> tilegx net driver: handle payload data not in frags
> tilegx net: use eth_hw_addr_random(), not random_ether_addr()
These changes look fine, but when I pull from your tree I get tons of
totally unrelated stuff and a merge conflict in this driver.
Can you put together a clean pull against net-next?
Thanks.
^ permalink raw reply
* Re: [PATCH net-next V1 5/9] net/eipoib: Add ethtool file support
From: Ben Hutchings @ 2012-07-18 18:37 UTC (permalink / raw)
To: Or Gerlitz; +Cc: davem, roland, netdev, ali, sean.hefty, shlomop, Erez Shitrit
In-Reply-To: <1342609202-32427-6-git-send-email-ogerlitz@mellanox.com>
On Wed, 2012-07-18 at 13:59 +0300, Or Gerlitz wrote:
> From: Erez Shitrit <erezsh@mellanox.co.il>
>
> Via ethtool the driver describes its version, ABI version, on what PIF
> interface it runs and various statistics.
[...]
> +static const char parent_strings[][ETH_GSTRING_LEN] = {
> + /* private statistics */
> + "tx_parent_dropped",
> + "tx_vif_miss",
> + "tx_neigh_miss",
> + "tx_vlan",
> + "tx_shared",
> + "tx_proto_errors",
> + "tx_skb_errors",
> + "tx_slave_err",
> +
> + "rx_parent_dropped",
> + "rx_vif_miss",
> + "rx_neigh_miss",
> + "rx_vlan",
> + "rx_shared",
> + "rx_proto_errors",
> + "rx_skb_errors",
> + "rx_slave_err",
> +#define PORT_STATS_LEN (8 * 2)
> +};
> +
> +#define PARENT_STATS_LEN (sizeof(parent_strings) / ETH_GSTRING_LEN)
> +
> +static void parent_get_strings(struct net_device *parent_dev,
> + uint32_t stringset, uint8_t *data)
> +{
> + int index = 0, stats_off = 0, i;
> +
> + if (stringset != ETH_SS_STATS)
> + return;
> +
> + for (i = 0; i < PORT_STATS_LEN; i++)
> + strcpy(data + (index++) * ETH_GSTRING_LEN,
> + parent_strings[i + stats_off]);
> +
> + stats_off += PORT_STATS_LEN;
This is a very longwinded way to write:
memcpy(data, parent_strings, sizeof(parent_strings));
> +
> +}
> +
> +static void parent_get_ethtool_stats(struct net_device *parent_dev,
> + struct ethtool_stats *stats,
> + uint64_t *data)
> +{
> + struct parent *parent = netdev_priv(parent_dev);
> + int index = 0, i;
> +
> + read_lock_bh(&parent->lock);
> +
> + for (i = 0; i < PORT_STATS_LEN; i++)
> + data[index++] = ((unsigned long *) &parent->port_stats)[i];
> +
> + read_unlock_bh(&parent->lock);
> +}
> +
> +static int parent_get_sset_count(struct net_device *parent_dev, int sset)
> +{
> + switch (sset) {
> + case ETH_SS_STATS:
> + return PARENT_STATS_LEN;
> + default:
> + return -EOPNOTSUPP;
> + }
> +}
[...]
I get the feeling you've removed some code with unifdef; the result
looks really weird, with PORT_STATS_LEN and PARENT_STATS_LEN used
inconsistently.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [PATCH net-next V1 1/9] IB/ipoib: Add support for clones / multiple childs on the same partition
From: David Miller @ 2012-07-18 18:38 UTC (permalink / raw)
To: ogerlitz; +Cc: roland, netdev, ali, sean.hefty, shlomop, erezsh
In-Reply-To: <1342609202-32427-2-git-send-email-ogerlitz@mellanox.com>
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 18 Jul 2012 13:59:54 +0300
> All sorts of childs are still created/deleted through sysfs, in a
> similar manner to the way legacy child interfaces are.
Network device instantiation of this type is the domain of
rtnl_link_ops rather than ugly sysfs interfaces.
^ permalink raw reply
* Re: [PATCH net-next 0/4] net/mlx4_en: Add accelerated RFS support
From: David Miller @ 2012-07-18 18:41 UTC (permalink / raw)
To: ogerlitz; +Cc: roland, netdev, oren, yevgenyp, amirv
In-Reply-To: <1342621162-18498-1-git-send-email-ogerlitz@mellanox.com>
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 18 Jul 2012 17:19:18 +0300
> This series from Amir Vadai adds support for Accelerated RFS
> to the mlx4_en Ethernet driver.
>
> The code uses the Accelerated RFS infrastructure and HW flow steering
> to keep CPU affinity of rx interrupts and applications per TCP stream.
>
> To do so, we had to add little protection to cpu_rmap.h against double
> inclusion. Also, added linking between CPU to IRQ using rmap in the
> mlx4_core driver.
Please use CONFIG_RFS_ACCEL consistently to protect this feature
in your driver sources.
Using CPU_RMAP in a few places is inconsistent, and not what other
drivers do.
Thanks.
^ permalink raw reply
* [PATCH] net: cgroup: null ptr dereference in netprio cgroup during init
From: John Fastabend @ 2012-07-18 18:27 UTC (permalink / raw)
To: davem, gaofeng, nhorman; +Cc: mark.d.rustad, netdev, eric.dumazet
When the netprio cgroup is built in the kernel cgroup_init will call
cgrp_create which eventually calls update_netdev_tables. This is
being called before do_initcalls() so a null ptr dereference occurs
on init_net.
This patch adds a check on init_net.count to verify the structure
has been initialized. The failure was introduced here,
commit ef209f15980360f6945873df3cd710c5f62f2a3e
Author: Gao feng <gaofeng@cn.fujitsu.com>
Date: Wed Jul 11 21:50:15 2012 +0000
net: cgroup: fix access the unallocated memory in netprio cgroup
Tested with ping with netprio_cgroup as a module and built in.
[ 0.256451] Initializing cgroup subsys net_prio
[ 0.269948] BUG: unable to handle kernel NULL pointer dereference at
0000000000000698
[ 0.293303] IP: [<ffffffff81512e37>] cgrp_create+0x107/0x1c0
[ 0.310175] PGD 0
[ 0.316157] Oops: 0000 [#1] SMP
[ 0.325775] CPU 0
[ 0.331227] Modules linked in:
[ 0.340846]
[ 0.345264] Pid: 0, comm: swapper/0 Not tainted 3.5.0-rc7+ #1 AMD Dinar/Dinar
[ 0.366555] RIP: 0010:[<ffffffff81512e37>] [<ffffffff81512e37>]
cgrp_create+0x107/0x1c0
[ 0.390681] RSP: 0000:ffffffff81c01ea8 EFLAGS: 00010213
[ 0.406501] RAX: 0000000000000000 RBX: ffffffffffffff10 RCX: 0000000000000000
[ 0.427764] RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffffffff81c9d840
[ 0.449026] RBP: ffffffff81c01ed8 R08: 00000000000164e0 R09: 0000000000000000
[ 0.470289] R10: ffff8804278303c0 R11: 0000000000000000 R12: 0000000000000001
[ 0.491553] R13: ffff8804278303c0 R14: ffff881036fd0700 R15: 0000000000000000
[ 0.512819] FS: 0000000000000000(0000) GS:ffff880427c00000(0000)
knlGS:0000000000000000
[ 0.536932] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 0.554049] CR2: 0000000000000698 CR3: 0000000001c0b000 CR4: 00000000000406b0
[ 0.575311] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 0.596574] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 0.617838] Process swapper/0 (pid: 0, threadinfo ffffffff81c00000, task
ffffffff81c13420)
[ 0.642471] Stack:
[ 0.648442] ffffffff81c01eb8 ffffffff81c9f320 ffffffff81c9f320
ffffffff81c9f320
[ 0.670522] ffffffff81c9f320 ffffffff81d482c0 ffffffff81c01ef8
ffffffff81d10397
[ 0.692604] ffffffff81e99790 0000000000000048 ffffffff81c01f18
ffffffff81d1062e
[ 0.714687] Call Trace:
[ 0.721960] [<ffffffff81d10397>] cgroup_init_subsys+0x51/0xdf
[ 0.739337] [<ffffffff81d1062e>] cgroup_init+0x36/0x119
[ 0.755160] [<ffffffff81cf5c02>] start_kernel+0x38f/0x3c4
[ 0.771501] [<ffffffff81cf5672>] ? repair_env_string+0x5e/0x5e
[ 0.789138] [<ffffffff81cf5356>] x86_64_start_reservations+0x131/0x135
[ 0.808849] [<ffffffff81cf545a>] x86_64_start_kernel+0x100/0x10f
Reported-by: Mark Rustad <mark.d.rustad@intel.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
net/core/net_namespace.c | 4 +++-
net/core/netprio_cgroup.c | 3 +++
2 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dddbacb..0d37c94 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,7 +27,9 @@ static DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
EXPORT_SYMBOL_GPL(net_namespace_list);
-struct net init_net;
+struct net init_net = {
+ .count = ATOMIC_INIT(0),
+ };
EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b2e9caa..e9fd7fd 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -116,6 +116,9 @@ static int update_netdev_tables(void)
u32 max_len;
struct netprio_map *map;
+ if (!atomic_read(&init_net.count))
+ return ret;
+
rtnl_lock();
max_len = atomic_read(&max_prioidx) + 1;
for_each_netdev(&init_net, dev) {
^ permalink raw reply related
* Re: [PATCH net-next 2/7] sfc: Add channel specific receive_skb handler and post_remove callback
From: Ben Hutchings @ 2012-07-18 18:42 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-net-drivers, ajackson, richardcochran
In-Reply-To: <20120718.113256.279646201702165485.davem@davemloft.net>
On Wed, 2012-07-18 at 11:32 -0700, David Miller wrote:
> From: Ben Hutchings <bhutchings@solarflare.com>
> Date: Wed, 18 Jul 2012 19:20:00 +0100
>
> > + void (*receive_skb)(struct efx_channel *, struct sk_buff *);
>
> This looks to me like a conduit for proprietary features implemented
> in a binary-only blob.
>
> I understand how you're using here for PTP, but you're really openning
> the door for things I really wouldn't be very happy about.
Through all the functions that, er, aren't exported?
Even in the out-of-tree version of sfc there is no receive path hook any
more; I converted the client driver that wanted it (which is under GNU
GPL, thank you very much) to use netfilter.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox