From mboxrd@z Thu Jan 1 00:00:00 1970 From: Pavel Emelyanov Subject: Re: 2.6.25-rc: Null dereference in ip_defrag Date: Mon, 17 Mar 2008 20:40:07 +0300 Message-ID: <47DEACF7.10202@openvz.org> References: <20080317170008.GA30338@linuxace.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Cc: netdev@vger.kernel.org To: Phil Oester Return-path: Received: from sacred.ru ([62.205.161.221]:49195 "EHLO sacred.ru" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751697AbYCQRkK (ORCPT ); Mon, 17 Mar 2008 13:40:10 -0400 In-Reply-To: <20080317170008.GA30338@linuxace.com> Sender: netdev-owner@vger.kernel.org List-ID: Phil Oester wrote: > Been seeing occasional panics in my testing of 2.6.25-rc in ip_defrag. > Offending line in ip_defrag is here: > > net = skb->dev->nd_net Yikes :( > where dev is NULL. Bisected the problem down to commit > ac18e7509e7df327e30d6e073a787d922eaf211d ([NETNS][FRAGS]: Make the > inet_frag_queue lookup work in namespaces). > > To prevent panic, I added the below patch (whitespace damaged): > > --- a/net/ipv4/ip_fragment.c > +++ b/net/ipv4/ip_fragment.c > @@ -568,6 +568,14 @@ int ip_defrag(struct sk_buff *skb, u32 user) > > IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); > > + if (!skb->dev) { > + printk("ip_defrag_bug: %u.%u.%u.%u -> %u.%u.%u.%u\n", > + NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr)); > + WARN_ON(1); > + kfree_skb(skb); > + return -ENOMEM; > + } > + > > And the packets causing the problem are all multicast fragments being > generated by Quagga's OSPFD (see debug output below). Tried manually generating > some multicast fragments with iperf, but couldn't reproduce it. > > Any ideas? This is the same as the problem fixed here: commit 4136cd523eb0c0bd53173e16fd7406d31d05824f [IPV4]: route: fix crash ip_route_input The sk_buff does not have a valid dev sometimes in ip_defrag() :( and you have to setup conntrack rules to make packets go this way. But unlike the above problem we cannot even trust the skb->dst to be not NULL... Can you check with this patch, please (untested, but should work)? diff --git a/include/net/ip.h b/include/net/ip.h index 9f50d4f..fc6b650 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -330,7 +330,8 @@ enum ip_defrag_users IP_DEFRAG_VS_FWD }; -int ip_defrag(struct sk_buff *skb, u32 user); +struct net; +int ip_defrag(struct net *net, struct sk_buff *skb, u32 user); int ip_frag_mem(struct net *net); int ip_frag_nqueues(struct net *net); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a2e92f9..aad6652 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -561,14 +561,12 @@ out_fail: } /* Process an incoming IP datagram fragment. */ -int ip_defrag(struct sk_buff *skb, u32 user) +int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) { struct ipq *qp; - struct net *net; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); - net = skb->dev->nd_net; /* Start by cleaning up the memory. */ if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) ip_evictor(net); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 6563139..25c6846 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -172,7 +172,8 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { + if (ip_defrag(skb->dev->nd_net, skb, + IP_DEFRAG_CALL_RA_CHAIN)) { read_unlock(&ip_ra_lock); return 1; } @@ -256,7 +257,7 @@ int ip_local_deliver(struct sk_buff *skb) */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) + if (ip_defrag(skb->dev->nd_net, skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 963981a..d2c3c0f 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -506,7 +506,7 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - int err = ip_defrag(skb, user); + int err = ip_defrag(&init_net, skb, user); if (!err) ip_send_check(ip_hdr(skb)); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index a65b845..00a0f63 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -58,14 +58,15 @@ static int ipv4_print_tuple(struct seq_file *s, } /* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, + u_int32_t user) { int err; skb_orphan(skb); local_bh_disable(); - err = ip_defrag(skb, user); + err = ip_defrag(net, skb, user); local_bh_enable(); if (!err) @@ -138,6 +139,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + u_int32_t user; + struct net *net; + /* Previously seen (loopback)? Ignore. Do this before fragment check. */ if (skb->nfct) @@ -145,10 +149,14 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, /* Gather fragments. */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) + if (hooknum == NF_INET_PRE_ROUTING) { + user = IP_DEFRAG_CONNTRACK_IN; + net = in->nd_net; + } else { + user = IP_DEFRAG_CONNTRACK_OUT; + net = out->nd_net; + } + if (nf_ct_ipv4_gather_frags(net, skb, user)) return NF_STOLEN; } return NF_ACCEPT; > Phil > > > ip_defrag_bug: 10.253.13.122 -> 224.0.0.5 > ------------[ cut here ]------------ > WARNING: at net/ipv4/ip_fragment.c:574 ip_defrag+0x9d/0xa0d() > Pid: 1662, comm: ospfd Not tainted 2.6.25-rc4 #4 > > Call Trace: > [] warn_on_slowpath+0x53/0x66 > [] ? printk+0x67/0x69 > [] ? skb_release_data+0xa8/0xad > [] ? __kfree_skb+0x74/0x78 > [] ip_defrag+0x9d/0xa0d > [] ? sock_def_write_space+0x18/0x89 > [] ipv4_conntrack_defrag+0x67/0x96 > [] nf_iterate+0x41/0x81 > [] ? dst_output+0x0/0x10 > [] nf_hook_slow+0x5e/0xbe > [] ? dst_output+0x0/0x10 > [] raw_sendmsg+0x586/0x758 > [] inet_sendmsg+0x46/0x53 > [] sock_sendmsg+0xdf/0xf8 > [] ? _spin_lock_bh+0x11/0x29 > [] ? release_sock+0x9b/0xa3 > [] ? autoremove_wake_function+0x0/0x38 > [] ? move_addr_to_kernel+0x25/0x35 > [] ? verify_compat_iovec+0x60/0x9e > [] sys_sendmsg+0x1e1/0x253 > [] ? getrusage+0x1c9/0x1e6 > [] ? thread_return+0x3d/0x9c > [] compat_sys_sendmsg+0xf/0x11 > [] compat_sys_socketcall+0x13f/0x158 > [] sysenter_do_call+0x1b/0x66 > > ---[ end trace 48218d00aa061d3c ]--- > -- > To unsubscribe from this list: send the line "unsubscribe netdev" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >