Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH for 2.6.32 (untested)] netns: Add quota for number of NET_NS instances.
From: Eric W. Biederman @ 2011-11-20 23:13 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: netdev
In-Reply-To: <201111201622.FDJ51567.VLFHQFMFOOSOtJ@I-love.SAKURA.ne.jp>

Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:

> In order to solve below problems, can we add sysctl variable for
> restricting number of NET_NS instances?

I don't have any particular problems with patch but I don't think it
will result in a working system that is easy to keep working.  Tuning
static limits can be fickle.

Simply throttling the number of processes as anything reasonable will do
should keep the problem in check.  The practical issue is that we have
a huge build of network namespaces that don't get cleaned up.

My inclination in this case the practical fix is that during network
namespace allocation someone take a look at the cleanup_list.  See
that there is ongoing cleanup activity, and wait until at least one
network namespace has cleaned up.  Perhaps by creating a work struct
and waiting for it to cycle through the netns workqueue.

That should throttle network namespace creation to the same speed as
network namespace deletion and prevent the problem of too many
dead network namespaces building up and taking resources.

Eric


> --------------------------------------------------
> [PATCH for 2.6.32 (untested)] netns: Add quota for number of NET_NS instances.
>
> CONFIG_NET_NS support in 2.6.32 has a problem that leads to OOM killer when
> clone(CLONE_NEWNET) is called instantly.
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/720095
> But disabling CONFIG_NET_NS broke lxc containers.
> https://bugs.launchpad.net/ubuntu/+source/linux/+bug/790863
>
> This patch introduces /proc/sys/net/core/netns_max interface that limits
> max number of network namespace instances.
>
> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
> ---
>  include/net/sock.h         |    4 ++++
>  net/core/net_namespace.c   |    9 +++++++++
>  net/core/sysctl_net_core.c |   10 ++++++++++
>  3 files changed, 23 insertions(+)
>
> --- linux-2.6.32.48.orig/include/net/sock.h
> +++ linux-2.6.32.48/include/net/sock.h
> @@ -1598,4 +1598,8 @@ extern int sysctl_optmem_max;
>  extern __u32 sysctl_wmem_default;
>  extern __u32 sysctl_rmem_default;
>  
> +#ifdef CONFIG_NET_NS
> +extern int max_netns_count;
> +#endif
> +
>  #endif	/* _SOCK_H */
> --- linux-2.6.32.48.orig/net/core/net_namespace.c
> +++ linux-2.6.32.48/net/core/net_namespace.c
> @@ -81,12 +81,18 @@ static struct net_generic *net_alloc_gen
>  #ifdef CONFIG_NET_NS
>  static struct kmem_cache *net_cachep;
>  static struct workqueue_struct *netns_wq;
> +static atomic_t used_netns_count = ATOMIC_INIT(0);
> +unsigned int max_netns_count;
>  
>  static struct net *net_alloc(void)
>  {
>  	struct net *net = NULL;
>  	struct net_generic *ng;
>  
> +	atomic_inc(&used_netns_count);
> +	if (atomic_read(&used_netns_count) > max_netns_count)
> +		goto out;
> +
>  	ng = net_alloc_generic();
>  	if (!ng)
>  		goto out;
> @@ -96,7 +102,9 @@ static struct net *net_alloc(void)
>  		goto out_free;
>  
>  	rcu_assign_pointer(net->gen, ng);
> +	return net;
>  out:
> +	atomic_dec(&used_netns_count);
>  	return net;
>  
>  out_free:
> @@ -115,6 +123,7 @@ static void net_free(struct net *net)
>  #endif
>  	kfree(net->gen);
>  	kmem_cache_free(net_cachep, net);
> +	atomic_dec(&used_netns_count);
>  }
>  
>  static struct net *net_create(void)
> --- linux-2.6.32.48.orig/net/core/sysctl_net_core.c
> +++ linux-2.6.32.48/net/core/sysctl_net_core.c
> @@ -89,6 +89,16 @@ static struct ctl_table net_core_table[]
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +#ifdef CONFIG_NET_NS
> +	{
> +		.ctl_name       = CTL_UNNUMBERED,
> +		.procname       = "netns_max",
> +		.data           = &max_netns_count,
> +		.maxlen         = sizeof(int),
> +		.mode           = 0644,
> +		.proc_handler   = proc_dointvec,
> +	},
> +#endif
>  #endif /* CONFIG_NET */
>  	{
>  		.ctl_name	= NET_CORE_BUDGET,

^ permalink raw reply

* Re: [rfc 00/18] slub: irqless/lockless slow allocation paths
From: David Rientjes @ 2011-11-20 23:32 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Lameter, David Miller, Pekka Enberg, Andi Kleen, tj,
	Metathronius Galabant, Matt Mackall, Adrian Drzewiecki,
	Shaohua Li, Alex Shi, linux-mm, netdev
In-Reply-To: <1321465534.4182.37.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On Wed, 16 Nov 2011, Eric Dumazet wrote:

> > Adding SLUB_STATS gives :
> > 
> > $ cd /sys/kernel/slab/skbuff_head_cache ; grep . *
> > aliases:6
> > align:8
> > grep: alloc_calls: Function not implemented
> > alloc_fastpath:89181782 C0=89173048 C1=1599 C2=1357 C3=2140 C4=802 C5=675 C6=638 C7=1523
> > alloc_from_partial:412658 C0=412658
> > alloc_node_mismatch:0
> > alloc_refill:593417 C0=593189 C1=19 C2=15 C3=24 C4=51 C5=18 C6=17 C7=84
> > alloc_slab:2831313 C0=2831285 C1=2 C2=2 C3=2 C4=2 C5=12 C6=4 C7=4
> > alloc_slowpath:4430371 C0=4430112 C1=20 C2=17 C3=25 C4=57 C5=31 C6=21 C7=88
> > cache_dma:0
> > cmpxchg_double_cpu_fail:0
> > cmpxchg_double_fail:1 C0=1
> > cpu_partial:30
> > cpu_partial_alloc:592991 C0=592981 C2=1 C4=5 C5=2 C6=1 C7=1
> > cpu_partial_free:4429836 C0=592981 C1=25 C2=19 C3=23 C4=3836767 C5=6 C6=8 C7=7
> > cpuslab_flush:0
> > cpu_slabs:107
> > deactivate_bypass:3836954 C0=3836923 C1=1 C2=2 C3=1 C4=6 C5=13 C6=4 C7=4
> > deactivate_empty:2831168 C4=2831168
> > deactivate_full:0
> > deactivate_remote_frees:0
> > deactivate_to_head:0
> > deactivate_to_tail:0
> > destroy_by_rcu:0
> > free_add_partial:0
> > grep: free_calls: Function not implemented
> > free_fastpath:21192924 C0=21186268 C1=1420 C2=1204 C3=1966 C4=572 C5=349 C6=380 C7=765
> > free_frozen:67988498 C0=516 C1=121 C2=85 C3=841 C4=67986468 C5=215 C6=76 C7=176
> > free_remove_partial:18 C4=18
> > free_slab:2831186 C4=2831186
> > free_slowpath:71825749 C0=609 C1=146 C2=104 C3=864 C4=71823538 C5=221 C6=84 C7=183
> > hwcache_align:0
> > min_partial:5
> > objects:2494
> > object_size:192
> > objects_partial:121
> > objs_per_slab:21
> > order:0
> > order_fallback:0
> > partial:14
> > poison:0
> > reclaim_account:0
> > red_zone:0
> > reserved:0
> > sanity_checks:0
> > slabs:127
> > slabs_cpu_partial:99(99) C1=25(25) C2=18(18) C3=23(23) C4=16(16) C5=4(4) C6=7(7) C7=6(6)
> > slab_size:192
> > store_user:0
> > total_objects:2667
> > trace:0
> > 
> 
> And the SLUB stats for the 2048 bytes slab is even worse : About every
> alloc/free is slow path
> 
> $ cd /sys/kernel/slab/:t-0002048 ; grep . *
> aliases:0
> align:8
> grep: alloc_calls: Function not implemented
> alloc_fastpath:8199220 C0=8196915 C1=306 C2=63 C3=297 C4=319 C5=550
> C6=722 C7=48
> alloc_from_partial:13931406 C0=13931401 C3=1 C5=4
> alloc_node_mismatch:0
> alloc_refill:70871657 C0=70871629 C1=2 C3=3 C4=9 C5=11 C6=3
> alloc_slab:1335 C0=1216 C1=17 C2=2 C3=15 C4=17 C5=22 C6=44 C7=2
> alloc_slowpath:155455299 C0=155455144 C1=18 C2=1 C3=21 C4=27 C5=40 C6=47
> C7=1

I certainly sympathize with your situation; these stats are even worse 
with netperf TCP_RR where slub regresses very heavily against slab.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* sky2 tx watchdog timeout with 1Gb speed
From: Milan Kocian @ 2011-11-20 23:21 UTC (permalink / raw)
  To: netdev

hi all,

I switched my home pc from 100Mb/s to 1000Mb/s and I see 
this warning below.

The original kernel was 2.6.39.4 then I tested 3.1.1 with the same
result. (self compiled 32bit vanilla). The workaround is to force 10/100 speed
on my new switch (hp).

lspci:

03:00.0 Ethernet controller: Marvell Technology Group Ltd. 88E8056 PCI-E Gigabit Ethernet Controller (rev 13)
        Subsystem: Giga-byte Technology Device e000
        Flags: bus master, fast devsel, latency 0, IRQ 45
        Memory at f5000000 (64-bit, non-prefetchable) [size=16K]
        I/O ports at 9000 [size=256]
        [virtual] Expansion ROM at 80300000 [disabled] [size=128K]
        Capabilities: [48] Power Management version 3
        Capabilities: [50] Vital Product Data
        Capabilities: [5c] MSI: Enable+ Count=1/1 Maskable- 64bit+
        Capabilities: [e0] Express Legacy Endpoint, MSI 00
        Capabilities: [100] Advanced Error Reporting
        Kernel driver in use: sky2


Nov 20 21:32:54 milu kernel: sky2 0000:03:00.0: eth0: Link is up at 1000 Mbps, full duplex, flow control both
Nov 20 21:35:29 milu kernel: ------------[ cut here ]------------
Nov 20 21:35:29 milu kernel: WARNING: at net/sched/sch_generic.c:255 dev_watchdog+0x1fa/0x206()
Nov 20 21:35:29 milu kernel: Hardware name: 965GM-S2
Nov 20 21:35:29 milu kernel: NETDEV WATCHDOG: eth0 (sky2): transmit queue 0 timed out
Nov 20 21:35:29 milu kernel: Modules linked in: parport_pc parport fuse nfsd ipv6 nfs lockd auth_rpcgss nfs_acl sunrpc usbhid snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_intel8x0 sg snd_ac97_codec sr_mod ac97_bus cdrom sky2 snd_pcm_oss snd_mixer_oss snd_pcm snd_seq_dummy snd_seq_oss intel_agp snd_seq_midi snd_rawmidi snd_seq_midi_event snd_seq snd_timer snd_seq_device snd bitrev i2c_i801 crc32 intel_gtt uhci_hcd i2c_core ehci_hcd soundcore usbcore agpgart evdev snd_page_alloc
Nov 20 21:35:29 milu kernel: Pid: 0, comm: swapper Not tainted 3.1.1 #2
Nov 20 21:35:29 milu kernel: Call Trace:
Nov 20 21:35:29 milu kernel: [<c102cd5d>] ? warn_slowpath_common+0x6c/0x94
Nov 20 21:35:29 milu kernel: [<c1254deb>] ? dev_watchdog+0x1fa/0x206
Nov 20 21:35:29 milu kernel: [<c1254deb>] ? dev_watchdog+0x1fa/0x206
Nov 20 21:35:29 milu kernel: [<c102ce0e>] ? warn_slowpath_fmt+0x33/0x37
Nov 20 21:35:29 milu kernel: [<c1254deb>] ? dev_watchdog+0x1fa/0x206
Nov 20 21:35:29 milu kernel: [<c1254bf1>] ? qdisc_reset+0x2d/0x2d
Nov 20 21:35:29 milu kernel: [<c1036434>] ? run_timer_softirq+0xc6/0x1c4
Nov 20 21:35:29 milu kernel: [<c1027e9b>] ? run_rebalance_domains+0x148/0x169
Nov 20 21:35:29 milu kernel: [<c103163b>] ? __do_softirq+0x6e/0xea
Nov 20 21:35:29 milu kernel: [<c10315cd>] ? remote_softirq_receive+0x11/0x11
Nov 20 21:35:29 milu kernel: <IRQ>  [<c1031906>] ? irq_exit+0x5b/0x67
Nov 20 21:35:29 milu kernel: [<c101631f>] ? smp_apic_timer_interrupt+0x51/0x81
Nov 20 21:35:29 milu kernel: [<c12ccd96>] ? apic_timer_interrupt+0x2a/0x30
Nov 20 21:35:29 milu kernel: [<c13f007b>] ? asus_hides_smbus_hostbridge+0xcb/0x249
Nov 20 21:35:29 milu kernel: [<c1008732>] ? mwait_idle+0x41/0x51
Nov 20 21:35:29 milu kernel: [<c10015d8>] ? cpu_idle+0x74/0x84
Nov 20 21:35:29 milu kernel: [<c13d6638>] ? start_kernel+0x28a/0x28f
Nov 20 21:35:29 milu kernel: [<c13d615e>] ? loglevel+0x2b/0x2b
Nov 20 21:35:29 milu kernel: ---[ end trace ef84175f674c7842 ]---
Nov 20 21:35:29 milu kernel: sky2 0000:03:00.0: eth0: tx timeout
Nov 20 21:35:29 milu kernel: sky2 0000:03:00.0: eth0: transmit ring 52 .. 30 report=52 done=52
Nov 20 21:35:32 milu kernel: sky2 0000:03:00.0: eth0: Link is up at 1000 Mbps, full duplex, flow control both
Nov 20 21:37:13 milu kernel: sky2 0000:03:00.0: eth0: tx timeout
Nov 20 21:37:13 milu kernel: sky2 0000:03:00.0: eth0: transmit ring 37 .. 15 report=37 done=37
Nov 20 21:37:16 milu kernel: sky2 0000:03:00.0: eth0: Link is up at 1000 Mbps, full duplex, flow control both

Any suggestion ? As I said its home machine so I can test what you want :-).

best regards,

-- 
Milan Kocian

^ permalink raw reply

* MSL WINNING NOTIFICATION 2011                                             XVWJCSZDXM
From: Microsoft @ 2011-11-20 23:44 UTC (permalink / raw)


[-- Attachment #1: Type: text/plain, Size: 159 bytes --]

Att:

You have to urgently contact your claims agent to redeem your winning prize.

Microsoft Lottery
Managment Board
 
NXBHSSXKKJXQVEPVFMHZXEVPQIMUSQIYWILLHN

[-- Attachment #2: MSL-WINNING NOTIFICATION 2011.doc --]
[-- Type: application/msword, Size: 73216 bytes --]

^ permalink raw reply

* Recursive routing causes MTU collapse (was Re: Bug? GRE tunnel periodically won't transmit some packets)
From: Chris Siebenmann @ 2011-11-21  0:23 UTC (permalink / raw)
  To: Eric Dumazet, netdev; +Cc: cks
In-Reply-To: <20111110051649.505C8362D2@apps0.cs.toronto.edu>

 I believe I've identified the root cause of my GRE tunnel packet
transmission problems. The short summary is that I have a 'recursive'
routing, where the route for the tunnel endpoint can nominally be routed
over the tunnel itself. In current kernel versions, when a packet for
the tunnel endpoint is actually routed over the tunnel the path MTUs
determined for the endpoint and the tunnel both collapse down to very
small values.

 Here is the routing I have. First, the links themselves, for the DSL
PPPoE device and the GRE tunnel:

3: ppp0: <POINTOPOINT,MULTICAST,NOARP,UP,LOWER_UP> mtu 1492 qdisc pfifo_fast state UNKNOWN qlen 3
    link/ppp 
    inet 66.96.18.208 peer 66.96.31.6/32 scope global ppp0
5: extun: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1200 qdisc noqueue state UNKNOWN 
    link/gre 66.96.18.208 peer 128.100.3.58
    inet 128.100.3.52/32 scope global extun

(My IPSec policy forces GRE traffic between 128.100.3.58 and
66.96.18.208 to be encrypted in 'esp/tunnel' mode.)

 Now the recursion. To reach other machines on 128.100.3.0/24, I route
128.100.3.0/24 over the GRE tunnel:
	; ip route list match 128.100.3.51
	default dev ppp0  scope link
	128.100.3.0/24 dev extun  scope link

 I also have policy based routing set to force traffic with an IP
origin of 66.96.18.208 out over the PPP link and traffic with an
IP origin of 128.100.3.52 out the GRE tunnel.

 With this setup in place, if I do anything that tries to talk to
128.100.3.58 (such as ping or ssh) what I get is an immediate path
MTU collapse for the 66.96.18.208 -> 128.100.3.58 link used by the
GRE tunnel, ending when the path MTU for 66.96.18.208 -> 128.100.3.58
reaches 552 octets. At this point various things choke (I am guessing
because the GRE tunnel expects a minimum MTU of 576 octets).

 If I add a host route for 128.100.3.58 that forces traffic for it
through ppp0 I can mostly avoid this route collapse:
	; ip route list exact 128.100.3.58
	128.100.3.58 dev ppp0  scope link  src 66.96.18.208  mtu 1492

 However, even with this if I explicitly force traffic for 128.100.3.58
over the GRE tunnel (such as by specifying the IP source address
so as to make my policy based routing kick in) I still see the MTU
collapse. Using 'mtu lock 1492' instead of plain 'mtu 1492' on this
host-based route does not appear to change anything.

 This did not happen back in kernel 2.6.35.14 (the Fedora 14 kernel) and
previous kernels (going back years). In that kernel everything was happy
even without the ppp0-forcing host route for 128.100.3.58 and I could
talk to 128.100.3.58 over the GRE tunnel without causing any path MTU
changes (and without problems in general).

(This always made my head hurt a little bit but since it worked,
I didn't worry about it.)

	- cks

^ permalink raw reply

* Re: [RFC] kvm tools: Implement multiple VQ for virtio-net
From: Rusty Russell @ 2011-11-21  0:41 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Krishna Kumar, gorcunov, kvm, Asias He, virtualization,
	Pekka Enberg, Sasha Levin, netdev, mingo, Stephen Hemminger
In-Reply-To: <20111116072317.GG5433@redhat.com>

On Wed, 16 Nov 2011 09:23:17 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Wed, Nov 16, 2011 at 10:34:42AM +1030, Rusty Russell wrote:
> > On Mon, 14 Nov 2011 15:05:07 +0200, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Mon, Nov 14, 2011 at 02:25:17PM +0200, Pekka Enberg wrote:
> > > > On Mon, Nov 14, 2011 at 4:04 AM, Asias He <asias.hejun@gmail.com> wrote:
> > > > > Why both the bandwidth and latency performance are dropping so dramatically
> > > > > with multiple VQ?
> > > > 
> > > > What's the expected benefit from multiple VQs
> > > 
> > > Heh, the original patchset didn't mention this :) It really should.
> > > They are supposed to speed up networking for high smp guests.
> > 
> > If we have one queue per guest CPU, does this allow us to run lockless?
> > 
> > Thanks,
> > Rusty.
> 
> LLTX? It's supposed to be deprecated, isn't it?

I was referring back to "Subject: virtio net lockless ring" which
Stephen sent back in June, nothing more specific.

I assumed from his query that this was still an active area of
exploration...

Stephen?

Thanks,
Rusty.

^ permalink raw reply

* Re: WARNING: at mm/slub.c:3357, kernel BUG at mm/slub.c:3413
From: Alex,Shi @ 2011-11-21  0:44 UTC (permalink / raw)
  To: Markus Trippelsdorf
  Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Christoph Lameter, Pekka Enberg, Matt Mackall,
	netdev@vger.kernel.org, Eric Dumazet
In-Reply-To: <20111118120201.GA1642@x4.trippels.de>

On Fri, 2011-11-18 at 20:02 +0800, Markus Trippelsdorf wrote:
> On 2011.11.18 at 09:54 +0100, Markus Trippelsdorf wrote:
> > On 2011.11.18 at 16:43 +0800, Alex,Shi wrote:
> > > > > 
> > > > > The dirty flag comes from a bunch of unrelated xfs patches from Christoph, that
> > > > > I'm testing right now.
> > > 
> > > Where is the xfs patchset? I am wondering if it is due to slub code. 
> 
> I begin to wonder if this might be the result of a compiler bug. 
> The kernel in question was compiled with gcc version 4.7.0 20111117. And
> there was commit to the gcc repository today that looks suspicious:
> http://gcc.gnu.org/viewcvs?view=revision&revision=181466
> 

Tell us if it is still there and you can reproduce it.
> Will have to dig deeper, but if this turns out to be the cause of the
> issue, I apologize for the noise.
> 

That's all right. 


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH for 2.6.32 (untested)] netns: Add quota for number of NET_NS instances.
From: Tetsuo Handa @ 2011-11-21  1:57 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev
In-Reply-To: <m1bos6wfnt.fsf@fess.ebiederm.org>

Eric W. Biederman wrote:
> Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:
> 
> > In order to solve below problems, can we add sysctl variable for
> > restricting number of NET_NS instances?
> 
> I don't have any particular problems with patch but I don't think it
> will result in a working system that is easy to keep working.  Tuning
> static limits can be fickle.

What I worry is that, although clone() is an operation that is allowed to
sleep, waiting for too long might be annoying for users, especially when the
user cannot easily send Ctrl-C or SIGKILL. (I think ftp client is an example.)

> My inclination in this case the practical fix is that during network
> namespace allocation someone take a look at the cleanup_list.  See
> that there is ongoing cleanup activity, and wait until at least one
> network namespace has cleaned up.  Perhaps by creating a work struct
> and waiting for it to cycle through the netns workqueue.

Are you suggesting that we should wait only when "the number of NET_NS
instances exceeded quota" and "there is a dead NET_NS instance"?
In other words, let clone() fail immediately if "the number of NET_NS
instances exceeded quota" but "cleanup_list is empty"?

If you are suggesting that we should always wait until "the number of NET_NS
instances becomes smaller than quota", clone() might sleep too long when the
user cannot easily send signals.

^ permalink raw reply

* RE: [PATCH 1/1]  PHY configuration for compatible issue
From: Aries Lee @ 2011-11-21  2:13 UTC (permalink / raw)
  To: 'Guo-Fu Tseng', netdev; +Cc: 'AriesLee'
In-Reply-To: <20111119041736.M37290@cooldavid.org>


Yes ~~ that's indeed a good suggestion

-----Original Message-----
From: Guo-Fu Tseng [mailto:cooldavid@cooldavid.org] 
Sent: Saturday, November 19, 2011 12:20 PM
To: Aries Lee; netdev@vger.kernel.org
Cc: 'AriesLee'
Subject: RE: [PATCH 1/1] PHY configuration for compatible issue

On Fri, 18 Nov 2011 15:13:37 +0800, Aries Lee wrote
> Hi Guo-Fu and All
> 
> 	Because jme_phy_on() and jme_phy_off() just turn on/off the PHY, the
> value of extern register is still the power on default value, not the most
> robust value which we collect in the LAB.
Sure, I got it. That's the point of this patch isn't it? :p
+     /*  Turn PHY off */ 
+     bmcr = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_BMCR); 
+     bmcr |= BMCR_PDOWN; 
+     jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_BMCR, bmcr); 
+     /*  Turn PHY on */ 
+     bmcr = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_BMCR); 
+     bmcr &= ~BMCR_PDOWN; 
+     jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_BMCR, bmcr); 
But what I mean is this part of the code.

Guo-Fu Tseng

^ permalink raw reply

* [PATCH 1/1]  PHY configuration for compatible issue
From: AriesLee @ 2011-11-21 10:08 UTC (permalink / raw)
  To: Aries Lee, Guo-Fu Tseng, netdev; +Cc: AriesLee

To perform PHY calibration and set a different EA value by chip ID,
Whenever the NIC chip power on, ie booting or resuming, we need to
force HW to calibrate PHY parameter again, and also set a proper EA
value which gather from experiment.

Those procedures help to reduce compatible issues(NIC is unable to link
up in some special case) in giga speed.

Signed-off-by: AriesLee <AriesLee@jmicron.com>
---
 drivers/net/ethernet/jme.c |  113 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/jme.h |   19 +++++++
 2 files changed, 129 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index df3ab83..4d217b8 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1744,6 +1744,112 @@ jme_phy_off(struct jme_adapter *jme)
 		jme_new_phy_off(jme);
 }
 
+static void
+jme_phy_specreg_read(struct jme_adapter *jme, u32 specreg, u32 *phy_data)
+{
+	u32 phy_addr;
+
+	phy_addr = JM_PHY_SPEC_REG_READ | specreg;
+	jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+			phy_addr);
+	*phy_data = jme_mdio_read(jme->dev, jme->mii_if.phy_id,
+			JM_PHY_SPEC_DATA_REG);
+}
+
+static void
+jme_phy_specreg_write(struct jme_adapter *jme, u32 ext_reg, u32 phy_data)
+{
+	u32 phy_addr;
+
+	phy_addr = JM_PHY_SPEC_REG_WRITE | ext_reg;
+	jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_DATA_REG,
+			phy_data);
+	jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
+			phy_addr);
+}
+
+static int
+jme_phy_calibration(struct jme_adapter *jme)
+{
+	u32 ctrl1000, phy_data;
+
+	jme_phy_off(jme);
+	jme_phy_on(jme);
+	/*  Enabel PHY test mode 1 */
+	ctrl1000 = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_CTRL1000);
+	ctrl1000 &= ~PHY_GAD_TEST_MODE_MSK;
+	ctrl1000 |= PHY_GAD_TEST_MODE_1;
+	jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_CTRL1000, ctrl1000);
+
+	jme_phy_specreg_read(jme, JM_PHY_EXT_COMM_2_REG, &phy_data);
+	phy_data &= ~JM_PHY_EXT_COMM_2_CALI_MODE_0;
+	phy_data |= JM_PHY_EXT_COMM_2_CALI_LATCH |
+			JM_PHY_EXT_COMM_2_CALI_ENABLE;
+	jme_phy_specreg_write(jme, JM_PHY_EXT_COMM_2_REG, phy_data);
+	msleep(20);
+	jme_phy_specreg_read(jme, JM_PHY_EXT_COMM_2_REG, &phy_data);
+	phy_data &= ~(JM_PHY_EXT_COMM_2_CALI_ENABLE |
+			JM_PHY_EXT_COMM_2_CALI_MODE_0 |
+			JM_PHY_EXT_COMM_2_CALI_LATCH);
+	jme_phy_specreg_write(jme, JM_PHY_EXT_COMM_2_REG, phy_data);
+
+	/*  Disable PHY test mode */
+	ctrl1000 = jme_mdio_read(jme->dev, jme->mii_if.phy_id, MII_CTRL1000);
+	ctrl1000 &= ~PHY_GAD_TEST_MODE_MSK;
+	jme_mdio_write(jme->dev, jme->mii_if.phy_id, MII_CTRL1000, ctrl1000);
+	return 0;
+}
+
+static int
+jme_phy_setEA(struct jme_adapter *jme)
+{
+	u32 phy_comm0 = 0, phy_comm1 = 0;
+	u8 nic_ctrl;
+
+	pci_read_config_byte(jme->pdev, PCI_PRIV_SHARE_NICCTRL, &nic_ctrl);
+	if ((nic_ctrl & 0x3) == JME_FLAG_PHYEA_ENABLE)
+		return 0;
+
+	switch (jme->pdev->device) {
+	case PCI_DEVICE_ID_JMICRON_JMC250:
+		if (((jme->chip_main_rev == 5) &&
+			((jme->chip_sub_rev == 0) || (jme->chip_sub_rev == 1) ||
+			(jme->chip_sub_rev == 3))) ||
+			(jme->chip_main_rev >= 6)) {
+			phy_comm0 = 0x008A;
+			phy_comm1 = 0x4109;
+		}
+		if ((jme->chip_main_rev == 3) &&
+			((jme->chip_sub_rev == 1) || (jme->chip_sub_rev == 2)))
+			phy_comm0 = 0xE088;
+		break;
+	case PCI_DEVICE_ID_JMICRON_JMC260:
+		if (((jme->chip_main_rev == 5) &&
+			((jme->chip_sub_rev == 0) || (jme->chip_sub_rev == 1) ||
+			(jme->chip_sub_rev == 3))) ||
+			(jme->chip_main_rev >= 6)) {
+			phy_comm0 = 0x008A;
+			phy_comm1 = 0x4109;
+		}
+		if ((jme->chip_main_rev == 3) &&
+			((jme->chip_sub_rev == 1) || (jme->chip_sub_rev == 2)))
+			phy_comm0 = 0xE088;
+		if ((jme->chip_main_rev == 2) && (jme->chip_sub_rev == 0))
+			phy_comm0 = 0x608A;
+		if ((jme->chip_main_rev == 2) && (jme->chip_sub_rev == 2))
+			phy_comm0 = 0x408A;
+		break;
+	default:
+		return -ENODEV;
+	}
+	if (phy_comm0)
+		jme_phy_specreg_write(jme, JM_PHY_EXT_COMM_0_REG, phy_comm0);
+	if (phy_comm1)
+		jme_phy_specreg_write(jme, JM_PHY_EXT_COMM_1_REG, phy_comm1);
+
+	return 0;
+}
+
 static int
 jme_open(struct net_device *netdev)
 {
@@ -1769,7 +1875,8 @@ jme_open(struct net_device *netdev)
 		jme_set_settings(netdev, &jme->old_ecmd);
 	else
 		jme_reset_phy_processor(jme);
-
+	jme_phy_calibration(jme);
+	jme_phy_setEA(jme);
 	jme_reset_link(jme);
 
 	return 0;
@@ -3184,7 +3291,8 @@ jme_resume(struct device *dev)
 		jme_set_settings(netdev, &jme->old_ecmd);
 	else
 		jme_reset_phy_processor(jme);
-
+	jme_phy_calibration(jme);
+	jme_phy_setEA(jme);
 	jme_start_irq(jme);
 	netif_device_attach(netdev);
 
@@ -3239,4 +3347,3 @@ MODULE_DESCRIPTION("JMicron JMC2x0 PCI Express Ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, jme_pci_tbl);
-
diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h
index 02ea27c..4304072 100644
--- a/drivers/net/ethernet/jme.h
+++ b/drivers/net/ethernet/jme.h
@@ -760,6 +760,25 @@ enum jme_rxmcs_bits {
 				  RXMCS_CHECKSUM,
 };
 
+/*	Extern PHY common register 2	*/
+
+#define PHY_GAD_TEST_MODE_1			0x00002000
+#define PHY_GAD_TEST_MODE_MSK			0x0000E000
+#define JM_PHY_SPEC_REG_READ			0x00004000
+#define JM_PHY_SPEC_REG_WRITE			0x00008000
+#define PHY_CALIBRATION_DELAY			20
+#define JM_PHY_SPEC_ADDR_REG			0x1E
+#define JM_PHY_SPEC_DATA_REG			0x1F
+
+#define JM_PHY_EXT_COMM_0_REG			0x30
+#define JM_PHY_EXT_COMM_1_REG			0x31
+#define JM_PHY_EXT_COMM_2_REG			0x32
+#define JM_PHY_EXT_COMM_2_CALI_ENABLE		0x01
+#define JM_PHY_EXT_COMM_2_CALI_MODE_0		0x02
+#define JM_PHY_EXT_COMM_2_CALI_LATCH		0x10
+#define PCI_PRIV_SHARE_NICCTRL			0xF5
+#define JME_FLAG_PHYEA_ENABLE			0x2
+
 /*
  * Wakeup Frame setup interface registers
  */
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH for 2.6.32 (untested)] netns: Add quota for number of NET_NS instances.
From: Eric W. Biederman @ 2011-11-21  2:45 UTC (permalink / raw)
  To: Tetsuo Handa; +Cc: netdev
In-Reply-To: <201111210157.pAL1vbRo089486@www262.sakura.ne.jp>

Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:

> Eric W. Biederman wrote:
>> Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> writes:
>> 
>> > In order to solve below problems, can we add sysctl variable for
>> > restricting number of NET_NS instances?
>> 
>> I don't have any particular problems with patch but I don't think it
>> will result in a working system that is easy to keep working.  Tuning
>> static limits can be fickle.
>
> What I worry is that, although clone() is an operation that is allowed to
> sleep, waiting for too long might be annoying for users, especially when the
> user cannot easily send Ctrl-C or SIGKILL. (I think ftp client is an
> example.)

An ftp client can always close the connection.  We already have to
contend for the net_mutex when both creating and destroying network
namespaces so I would be surprised if it is actually a problem.

But the reality is that under high connection load if we actually want
to use network namespaces we have to wait for previous network
namespaces to clean up.  So I am not particularly worried.  Especially
since most of the cleanup speed issues when there is a backlog have
been fixed in more recent kernels.

>> My inclination in this case the practical fix is that during network
>> namespace allocation someone take a look at the cleanup_list.  See
>> that there is ongoing cleanup activity, and wait until at least one
>> network namespace has cleaned up.  Perhaps by creating a work struct
>> and waiting for it to cycle through the netns workqueue.
>
> Are you suggesting that we should wait only when "the number of NET_NS
> instances exceeded quota" and "there is a dead NET_NS instance"?
> In other words, let clone() fail immediately if "the number of NET_NS
> instances exceeded quota" but "cleanup_list is empty"?
>
> If you are suggesting that we should always wait until "the number of NET_NS
> instances becomes smaller than quota", clone() might sleep too long when the
> user cannot easily send signals.

I am suggesting that if a netns instance is being cleaned up we should
wait for one netns instance to be cleaned up.  A single netns instance
does not take long to clean up (in general).  But a lot of netns
instances do take a while.

With waiting for one netns instance to be cleaned up we should be able
to guarantee that we don't develop a substantial backlog network
namespaces to be cleaned up.  And that was the problem.

I don't expect we need to do anything if there are no network namespaces
not being cleaned up.

There is of course debian's solution which was to simply tweak vsftp
to not use network namespaces on 2.6.32 and only enable the feature
on later kernels.  But you seem to want to do something a little
more substantial than that.

Eric

^ permalink raw reply

* MSL WINNING NOTIFICATION 2011                                             WQLHPIUVRJ
From: Microsoft @ 2011-11-21  3:02 UTC (permalink / raw)


[-- Attachment #1: Type: text/plain, Size: 159 bytes --]

Att:

You have to urgently contact your claims agent to redeem your winning prize.

Microsoft Lottery
Managment Board
 
GRPVWCCNJTXMMRSGULXXUDPQUSDDXKQBBZPFZB

[-- Attachment #2: MSL-WINNING NOTIFICATION 2011.doc --]
[-- Type: application/msword, Size: 73216 bytes --]

^ permalink raw reply

* Wir bieten Darlehen
From: CAPITAL EQUALITY LOAN COMPANY @ 2011-11-21  2:57 UTC (permalink / raw)


Hiermit möchten wir Sie darüber informieren, dass CAPITAL GLEICHHEIT  
Loan Company ist derzeit ein Darlehen anbieten zu 3% Zins.

Wir bieten eine Vielzahl von Krediten an unsere Kunden. Was auch immer  
Ihr Darlehen braucht, sind große oder kleine, persönliche oder  
Hypotheken, sind wir bereit, mit Ihnen darüber, wie wir Ihre  
Bedürfnisse zu sprechen. Anmeldeformular unter

CAPITAL GLEICHHEIT Loan Company
71 Queen Victoria Street
London, United Kingdom, EC4V 4DE

Bewerbungsformular:
1) Vollständiger Name :.........
2) Land :..............
3) Anschrift: ...
4) Sex :..............
5) Familienstand :......................
6) Beruf :..............
7) Telefon :......
8) Monatliches Einkommen :.....................
9) Darlehensbetrag Benötigte :...............
10) Dauer der Ausleihe :..................
11) Zweck des Darlehens :.................
12) Alter: ..............

Rosmarie Neely
Loan Officer
E-mail: capitalequalityloancompany@yahoo.com.hk
Tell: +447035958575
Fax: +448447742385

----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.

^ permalink raw reply

* [PATCH net-next 1/3] be2net: fix to display Pause autonegotiation setting
From: Ajit Khaparde @ 2011-11-21  5:12 UTC (permalink / raw)
  To: netdev, davem

Pause autonegotiation is supported by default.

Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_ethtool.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 575c783..ff53489 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -538,7 +538,7 @@ be_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *ecmd)
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	be_cmd_get_flow_control(adapter, &ecmd->tx_pause, &ecmd->rx_pause);
-	ecmd->autoneg = 0;
+	ecmd->autoneg = 1;
 }
 
 static int
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH net-next 0/3] be2net: patch series
From: Ajit Khaparde @ 2011-11-21  5:12 UTC (permalink / raw)
  To: netdev, davem

Please apply.

Thanks
-Ajit

[1/3] be2net: fix to display Pause autonegotiation setting
[2/3] be2net: update some counters to display via ethtool
[3/3] be2net: workaround to fix a BE bug

^ permalink raw reply

* [PATCH net-next 2/3] be2net: update some counters to display via ethtool
From: Ajit Khaparde @ 2011-11-21  5:12 UTC (permalink / raw)
  To: netdev, davem

update pmem_fifo_overflow_drop, rx_priority_pause_frames counters.

Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be_main.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 93869d4..95d41ba 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -315,6 +315,8 @@ static void populate_be3_stats(struct be_adapter *adapter)
 	struct be_drv_stats *drvs = &adapter->drv_stats;
 
 	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
+	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
+	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
 	drvs->rx_pause_frames = port_stats->rx_pause_frames;
 	drvs->rx_crc_errors = port_stats->rx_crc_errors;
 	drvs->rx_control_frames = port_stats->rx_control_frames;
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH] dccp: fix error propagation in dccp_v4_connect
From: roy.qing.li @ 2011-11-21  5:18 UTC (permalink / raw)
  To: netdev

From: RongQing.Li <roy.qing.li@gmail.com>

The errcode is not updated when ip_route_newports() fails.

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
---
 net/dccp/ipv4.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 90a919a..3f4e541 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -111,6 +111,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
 	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
 		rt = NULL;
 		goto failure;
 	}
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next 3/3] be2net: workaround to fix a BE bug
From: Ajit Khaparde @ 2011-11-21  5:22 UTC (permalink / raw)
  To: netdev, davem

For vlan tagged pkts, BE
1) calculates checksum even when CSO is not requested
2) calculates checksum wrongly for padded pkt less than 60 bytes long.
As a workaround disable TX vlan offloading in such cases.

Signed-off-by: Ajit Khaparde <ajit.khaparde@emulex.com>
---
 drivers/net/ethernet/emulex/benet/be.h      |   16 +++++++++++++
 drivers/net/ethernet/emulex/benet/be_main.c |   32 +++++++++++++++++++-------
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 34f162d..c4fcea69 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -518,6 +518,22 @@ static inline void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
 	memcpy(mac, adapter->netdev->dev_addr, 3);
 }
 
+static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
+					struct sk_buff *skb)
+{
+	u8 vlan_prio;
+	u16 vlan_tag;
+
+	vlan_tag = vlan_tx_tag_get(skb);
+	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+	/* If vlan priority provided by OS is NOT in available bmap */
+	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
+		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
+				adapter->recommended_prio;
+
+	return vlan_tag;
+}
+
 static inline bool be_multi_rxq(const struct be_adapter *adapter)
 {
 	return adapter->num_rx_qs > 1;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 95d41ba..db27269 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -554,8 +554,7 @@ static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
 static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
 		struct sk_buff *skb, u32 wrb_cnt, u32 len)
 {
-	u8 vlan_prio = 0;
-	u16 vlan_tag = 0;
+	u16 vlan_tag;
 
 	memset(hdr, 0, sizeof(*hdr));
 
@@ -584,14 +583,9 @@ static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
 			AMAP_SET_BITS(struct amap_eth_hdr_wrb, udpcs, hdr, 1);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (adapter->vlans_added && vlan_tx_tag_present(skb)) {
 		AMAP_SET_BITS(struct amap_eth_hdr_wrb, vlan, hdr, 1);
-		vlan_tag = vlan_tx_tag_get(skb);
-		vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-		/* If vlan priority provided by OS is NOT in available bmap */
-		if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
-			vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
-					adapter->recommended_prio;
+		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 		AMAP_SET_BITS(struct amap_eth_hdr_wrb, vlan_tag, hdr, vlan_tag);
 	}
 
@@ -694,6 +688,25 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
 	u32 start = txq->head;
 	bool dummy_wrb, stopped = false;
 
+	/* For vlan tagged pkts, BE
+	 * 1) calculates checksum even when CSO is not requested
+	 * 2) calculates checksum wrongly for padded pkt less than
+	 * 60 bytes long.
+	 * As a workaround disable TX vlan offloading in such cases.
+	 */
+	if (unlikely(vlan_tx_tag_present(skb) &&
+		(skb->ip_summed != CHECKSUM_PARTIAL || skb->len <= 60))) {
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (unlikely(!skb))
+			goto tx_drop;
+
+		skb = __vlan_put_tag(skb, be_get_tx_vlan_tag(adapter, skb));
+		if (unlikely(!skb))
+			goto tx_drop;
+
+		skb->vlan_tci = 0;
+	}
+
 	wrb_cnt = wrb_cnt_for_skb(adapter, skb, &dummy_wrb);
 
 	copied = make_tx_wrbs(adapter, txq, skb, wrb_cnt, dummy_wrb);
@@ -719,6 +732,7 @@ static netdev_tx_t be_xmit(struct sk_buff *skb,
 				skb_shinfo(skb)->gso_segs, stopped);
 	} else {
 		txq->head = start;
+tx_drop:
 		dev_kfree_skb_any(skb);
 	}
 	return NETDEV_TX_OK;
-- 
1.7.5.4

^ permalink raw reply related

* Problems with dropped packets on bonded interface for 3.x kernels
From: Albert Chin @ 2011-11-21  5:16 UTC (permalink / raw)
  To: netdev

I'm running Ubuntu 11.10 on an Intel SR2625URLXR system with an Intel
S5520UR motherboard and an internal Intel E1G44HT (I340-T4) Quad Port
Server Adapter. I am seeing dropped packets on a bonded interface,
comprised of two GigE ports on the Intel E1G44HT Quad Port Server
Adapter. The following kernels exhibit this problem:
  3.0.0-12-server, 3.0.0-13-server, 3.1.0-2-server, 3.2.0-rc2
Installing Fedora 16 with a 3.1.1-1.fc16.x86_64 also showed dropped
packets.

I also tried RHEL6 with a 2.6.32-131.17.1.el6.x86_64 kernel and didn't
see any dropped packets. Testing an older 2.6.32-28.55-generic Ubuntu
kernel also didn't show any dropped packets.

So, with 2.6, I don't see dropped packets, but everything including
3.0 and after show dropped packets.

# ifconfig bond0
bond0     Link encap:Ethernet  HWaddr 00:1b:21:d3:f6:0a  
          inet6 addr: fe80::21b:21ff:fed3:f60a/64 Scope:Link
          UP BROADCAST RUNNING MASTER MULTICAST  MTU:1500  Metric:1
          RX packets:225 errors:0 dropped:186 overruns:0 frame:0
          TX packets:231 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:0 
          RX bytes:25450 (25.4 KB)  TX bytes:28368 (28.3 KB)

With lacp_rate=fast, I see higher packet loss than with
lacp_rate=slow. I've tried bonding t

This server has the following network controllers for the two internal
NICs:
  # lspci -vv
  01:00.0 Ethernet controller: Intel Corporation 82575EB Gigabit Network Connection (rev 02)
  01:00.1 Ethernet controller: Intel Corporation 82575EB Gigabit Network Connection (rev 02)

And it has the following network controllers for the four NICs on the
I340-T4 PCI-E card:
  # lspci -vv
  0a:00.0 Ethernet controller: Intel Corporation 82580 Gigabit Network Connection (rev 01)
  0a:00.1 Ethernet controller: Intel Corporation 82580 Gigabit Network Connection (rev 01)
  0a:00.2 Ethernet controller: Intel Corporation 82580 Gigabit Network Connection (rev 01)
  0a:00.3 Ethernet controller: Intel Corporation 82580 Gigabit Network Connection (rev 01)

I tried bonding the two 82575EB NICs rather than two NICs on the 82580
but see the same dropped packet issue.

I have replaced the cables, tested each port individually on the
switch without bonding, and don't see any reason to expect hardware as
the issue. The switch is a Summit Extreme 400-48t.

I am using a 802.3ad configuration:
# cat /proc/net/bonding/bond0
Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011)

Bonding Mode: IEEE 802.3ad Dynamic link aggregation
Transmit Hash Policy: layer2 (0)
MII Status: up
MII Polling Interval (ms): 100
Up Delay (ms): 200
Down Delay (ms): 0

802.3ad info
LACP rate: fast
Aggregator selection policy (ad_select): stable
Active Aggregator Info:
        Aggregator ID: 1
        Number of ports: 1
        Actor Key: 17
        Partner Key: 24
        Partner Mac Address: 00:04:96:18:54:d5

Slave Interface: eth4
MII Status: up
Speed: 1000 Mbps
Duplex: full
Link Failure Count: 0
Permanent HW addr: 00:1b:21:d3:f6:0a
Aggregator ID: 1
Slave queue ID: 0

Slave Interface: eth5
MII Status: up
Speed: 1000 Mbps
Duplex: full
Link Failure Count: 0
Permanent HW addr: 00:1b:21:d3:f6:0b
Aggregator ID: 2
Slave queue ID: 0

Anyone have any ideas?

-- 
albert chin (china@thewrittenword.com)

^ permalink raw reply

* Re: [PATCH 1/1]  PHY configuration for compatible issue
From: Guo-Fu Tseng @ 2011-11-21  5:35 UTC (permalink / raw)
  To: AriesLee, Aries Lee, netdev
In-Reply-To: <1321870127-15541-1-git-send-email-AriesLee@jmicron.com>

On Mon, 21 Nov 2011 18:08:47 +0800, AriesLee wrote
> To perform PHY calibration and set a different EA value by chip ID,
> Whenever the NIC chip power on, ie booting or resuming, we need to
> force HW to calibrate PHY parameter again, and also set a proper EA
> value which gather from experiment.
> 
> Those procedures help to reduce compatible issues(NIC is unable to link
> up in some special case) in giga speed.
> 
> Signed-off-by: AriesLee <AriesLee@jmicron.com>
> ---
>  drivers/net/ethernet/jme.c |  113 
> ++++++++++++++++++++++++++++++++++++++++++- drivers/net/ethernet/jme.h 
> |   19 +++++++ 2 files changed, 129 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
> index df3ab83..4d217b8 100644
> --- a/drivers/net/ethernet/jme.c
> +++ b/drivers/net/ethernet/jme.c
> @@ -1744,6 +1744,112 @@ jme_phy_off(struct jme_adapter *jme)
>  		jme_new_phy_off(jme);
>  }
> 
> +static void
> +jme_phy_specreg_read(struct jme_adapter *jme, u32 specreg, u32 *phy_data) 
> +{
> +	u32 phy_addr;
> +
> +	phy_addr = JM_PHY_SPEC_REG_READ | specreg;
> +	jme_mdio_write(jme->dev, jme->mii_if.phy_id, JM_PHY_SPEC_ADDR_REG,
> +			phy_addr);
> +	*phy_data = jme_mdio_read(jme->dev, jme->mii_if.phy_id,
> +			JM_PHY_SPEC_DATA_REG);
> +}
Is there any particular reason that you pass the address of reading data.
Instead of just returning the value? (phy_data)
It would be more consistent if you return the value in this kind of
read function.

Otherwise this path all seems to be OK to me. :)

Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>

Guo-Fu Tseng

^ permalink raw reply

* [PATCH 02/17] netfilter: nat: remove module reference counting from NAT protocols
From: kaber @ 2011-11-21  5:45 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, ulrich.weber
In-Reply-To: <1321854369-3432-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

The only remaining user of NAT protocol module reference counting is NAT
ctnetlink support. Since this is a fairly short sequence of code, convert
over to use RCU and remove module reference counting.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_protocol.h   |    2 --
 net/ipv4/netfilter/nf_nat_core.c          |   26 ++++----------------------
 net/ipv4/netfilter/nf_nat_proto_dccp.c    |    1 -
 net/ipv4/netfilter/nf_nat_proto_gre.c     |    1 -
 net/ipv4/netfilter/nf_nat_proto_icmp.c    |    1 -
 net/ipv4/netfilter/nf_nat_proto_sctp.c    |    1 -
 net/ipv4/netfilter/nf_nat_proto_tcp.c     |    1 -
 net/ipv4/netfilter/nf_nat_proto_udp.c     |    1 -
 net/ipv4/netfilter/nf_nat_proto_udplite.c |    1 -
 net/ipv4/netfilter/nf_nat_proto_unknown.c |    1 -
 10 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index 93cc90d..c951c6f 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -10,8 +10,6 @@ struct nf_nat_protocol {
 	/* Protocol number. */
 	unsigned int protonum;
 
-	struct module *me;
-
 	/* Translate a packet to the target according to manip type.
 	   Return true if succeeded. */
 	bool (*manip_pkt)(struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index ffb0c94..d830a47 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -575,26 +575,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-static const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
-	const struct nf_nat_protocol *p;
-
-	rcu_read_lock();
-	p = __nf_nat_proto_find(protonum);
-	if (!try_module_get(p->me))
-		p = &nf_nat_unknown_protocol;
-	rcu_read_unlock();
-
-	return p;
-}
-
-static void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
-	module_put(p->me);
-}
-
 static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
 	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
@@ -612,16 +592,18 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 	if (err < 0)
 		return err;
 
-	npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
+	rcu_read_lock();
+	npt = __nf_nat_proto_find(nf_ct_protonum(ct));
 	if (npt->nlattr_to_range)
 		err = npt->nlattr_to_range(tb, range);
-	nf_nat_proto_put(npt);
+	rcu_read_unlock();
 	return err;
 }
 
 static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
 	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
+	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
 };
 
 static int
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 570faf2..17edd75 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -80,7 +80,6 @@ dccp_manip_pkt(struct sk_buff *skb,
 
 static const struct nf_nat_protocol nf_nat_protocol_dccp = {
 	.protonum		= IPPROTO_DCCP,
-	.me			= THIS_MODULE,
 	.manip_pkt		= dccp_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= dccp_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index bc8d83a..d177e61 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -119,7 +119,6 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 
 static const struct nf_nat_protocol gre = {
 	.protonum		= IPPROTO_GRE,
-	.me			= THIS_MODULE,
 	.manip_pkt		= gre_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= gre_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 5744c3e..5ff91f1 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -73,7 +73,6 @@ icmp_manip_pkt(struct sk_buff *skb,
 
 const struct nf_nat_protocol nf_nat_protocol_icmp = {
 	.protonum		= IPPROTO_ICMP,
-	.me			= THIS_MODULE,
 	.manip_pkt		= icmp_manip_pkt,
 	.in_range		= icmp_in_range,
 	.unique_tuple		= icmp_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 756331d..eac0b82 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -69,7 +69,6 @@ sctp_manip_pkt(struct sk_buff *skb,
 
 static const struct nf_nat_protocol nf_nat_protocol_sctp = {
 	.protonum		= IPPROTO_SCTP,
-	.me			= THIS_MODULE,
 	.manip_pkt		= sctp_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= sctp_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index aa460a5..ec37036 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -81,7 +81,6 @@ tcp_manip_pkt(struct sk_buff *skb,
 
 const struct nf_nat_protocol nf_nat_protocol_tcp = {
 	.protonum		= IPPROTO_TCP,
-	.me			= THIS_MODULE,
 	.manip_pkt		= tcp_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= tcp_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index dfe65c7..4ba8c2c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -72,7 +72,6 @@ udp_manip_pkt(struct sk_buff *skb,
 
 const struct nf_nat_protocol nf_nat_protocol_udp = {
 	.protonum		= IPPROTO_UDP,
-	.me			= THIS_MODULE,
 	.manip_pkt		= udp_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= udp_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 3cc8c8a..f45c1a6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -71,7 +71,6 @@ udplite_manip_pkt(struct sk_buff *skb,
 
 static const struct nf_nat_protocol nf_nat_protocol_udplite = {
 	.protonum		= IPPROTO_UDPLITE,
-	.me			= THIS_MODULE,
 	.manip_pkt		= udplite_manip_pkt,
 	.in_range		= nf_nat_proto_in_range,
 	.unique_tuple		= udplite_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a50f2bc..549c5b5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb,
 }
 
 const struct nf_nat_protocol nf_nat_unknown_protocol = {
-	/* .me isn't set: getting a ref to this cannot fail. */
 	.manip_pkt		= unknown_manip_pkt,
 	.in_range		= unknown_in_range,
 	.unique_tuple		= unknown_unique_tuple,
-- 
1.7.1

^ permalink raw reply related

* [PATCH 04/17] netfilter: nf_nat: remove obsolete code from nf_nat_icmp_reply_translation()
From: kaber @ 2011-11-21  5:45 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, ulrich.weber
In-Reply-To: <1321854369-3432-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

The inner tuple that is extracted from the packet is unused. The code also
doesn't have any useful side-effects like verifying the packet does contain
enough data to extract the inner tuple since conntrack already does the
same, so remove it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c |   14 +-------------
 1 files changed, 1 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index e9cab93..413c9de 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -30,7 +30,6 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
 static DEFINE_SPINLOCK(nf_nat_lock);
@@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 		struct icmphdr icmp;
 		struct iphdr ip;
 	} *inside;
-	const struct nf_conntrack_l4proto *l4proto;
-	struct nf_conntrack_tuple inner, target;
+	struct nf_conntrack_tuple target;
 	int hdrlen = ip_hdrlen(skb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
@@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 		 "dir %s\n", skb, manip,
 		 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
 
-	/* rcu_read_lock()ed by nf_hook_slow */
-	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
-
-	if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
-			     (hdrlen +
-			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
-			     (u_int16_t)AF_INET, inside->ip.protocol,
-			     &inner, l3proto, l4proto))
-		return 0;
-
 	/* Change inner back to look like incoming packet.  We do the
 	   opposite manip on this hook to normal, because it might not
 	   pass all hooks (locally-generated ICMP).  Consider incoming
-- 
1.7.1

^ permalink raw reply related

* [PATCH 01/17] netfilter: nf_nat: use hash random for bysource hash
From: kaber @ 2011-11-21  5:45 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, ulrich.weber
In-Reply-To: <1321854369-3432-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Use nf_conntrack_hash_rnd in NAT bysource hash to avoid hash chain attacks.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c  |    2 +-
 net/netfilter/nf_conntrack_core.c |    1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3346de5..ffb0c94 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -57,7 +57,7 @@ hash_by_src(const struct net *net, u16 zone,
 	/* Original src, to ensure we map it consistently if poss. */
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all ^ zone,
-			    tuple->dst.protonum, 0);
+			    tuple->dst.protonum, nf_conntrack_hash_rnd);
 	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f7af8b8..df12969 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 unsigned int nf_conntrack_hash_rnd __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
 
 static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
 {
-- 
1.7.1

^ permalink raw reply related

* [RFC PATCH 00/17] netfilter: IPv6 NAT
From: kaber @ 2011-11-21  5:45 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, ulrich.weber

The following patches contain an implementation of IPv6 NAT for netfilter.
During the last netfilter workshop it was agreed that there are legitimate
use cases for IPv6 NAT and since vendors are going to (and already have)
implement it anyways, its better to have one well tested implementation
everyone can use than having everyone implement it on his own.

The implementation remodels the existing IPv4-only code to provide a layer
3 independant NAT core with layer 3 specific modules for IPv4 and IPv6.
The SNAT/DNAT targets are changed to support IPv6, additionally IPv6
specific versions of the MASQUERADE/REDIRECT/NETMAP targets are included.

For testing purposes the FTP and amanda NAT helpers have been converted
to support IPv6, the SIP helper has also been converted but is not yet
included in this patchset since it still needs a bit more work.

There are two remaining open issues in this patchset:

- IPv6 fragment handling. Currently IPv6 conntrack reassembles the original
  packet from all its fragments, passes the reassembled packet though
  conntrack, associates all individual fragments with the conntrack entry
  of the reassembled packet and drops the reassembled packet. Conntrack
  helpers only see the individual fragments. For NAT helpers to operate
  properly, they need to receive the entire reassembled packet from
  conntrack helpers. In order to be able to mangle the payload, we also
  can't drop the reassembled packet but need to send it out, potentially
  refragmenting it previously.

  In order to not break PMTUD as done in IPv4 conntrack/NAT, the best
  choice seems to be to store the biggest fragment size seen during
  reassembly in the CB of the reassembled packet and use that as a hard
  limit during reassembly. As mentioned before, this is currently not done,
  any other suggestions are welcome.

- NAT protocol registration needs to be reworked slightly, currently
  modular built protocols don't work properly ATM.

The patchset is based on 3.1 and contains three parts:

- minor cleanups and improvements not directly related to IPv6 NAT
- preparatory patches for IPv6 NAT
- NAT core, IPv6 NAT, new IPv6 NAT targets and NAT helpers

I'll forward port it to the current kernel during the week and post an
updated patchset. I'll also try to split up the patches a bit more since
especially the main patch (add protocol independant NAT core) is quite
large. Userspace patches also still need to be split up, patches will
follow shortly.

Comments welcome.


Patrick McHardy (17):
      netfilter: nf_nat: use hash random for bysource hash
      netfilter: nat: remove module reference counting from NAT protocols
      netfilter: nf_nat: export NAT definitions to userspace
      netfilter: nf_nat: remove obsolete code from nf_nat_icmp_reply_translation()
      netfilter: nf_nat: remove obsolete check in nf_nat_mangle_udp_packet()
      netfilter: ctnetlink: remove dead NAT code
      netfilter: conntrack: restrict NAT helper invocation to IPv4
      netfilter: nf_nat: add protoff argument to packet mangling functions
      netfilter: add protocol independant NAT core
      netfilter: ipv6: expand skb head in ip6_route_me_harder after oif change
      net: core: add function for incremental IPv6 pseudo header checksum updates
      netfilter: ipv6: add IPv6 NAT support
      netfilter: ip6tables: add MASQUERADE target
      netfilter: ip6tables: add REDIRECT target
      netfilter: ip6tables: add NETMAP target
      netfilter: nf_nat: support IPv6 in FTP NAT helper
      netfilter: nf_nat: support IPv6 in amanda NAT helper


 include/linux/netfilter.h                          |   14 +-
 include/linux/netfilter/Kbuild                     |    1 +
 include/linux/netfilter/nf_conntrack_amanda.h      |    1 +
 include/linux/netfilter/nf_conntrack_ftp.h         |    1 +
 include/linux/netfilter/nf_conntrack_h323.h        |   15 +-
 include/linux/netfilter/nf_conntrack_irc.h         |    1 +
 include/linux/netfilter/nf_conntrack_pptp.h        |    2 +
 include/linux/netfilter/nf_conntrack_sip.h         |   12 +-
 .../linux/netfilter/nf_conntrack_tuple_common.h    |   27 +
 include/linux/netfilter/nf_nat.h                   |   33 +
 include/linux/netfilter/nfnetlink_conntrack.h      |    8 +-
 include/linux/netfilter_ipv4.h                     |    1 -
 include/net/addrconf.h                             |    2 +-
 include/net/checksum.h                             |    3 +
 include/net/netfilter/nf_conntrack_expect.h        |    2 +-
 include/net/netfilter/nf_conntrack_tuple.h         |   26 -
 include/net/netfilter/nf_nat.h                     |   32 +-
 include/net/netfilter/nf_nat_core.h                |    7 +-
 include/net/netfilter/nf_nat_helper.h              |   11 +-
 include/net/netfilter/nf_nat_l3proto.h             |   52 ++
 include/net/netfilter/nf_nat_l4proto.h             |   73 +++
 include/net/netfilter/nf_nat_protocol.h            |   74 ---
 include/net/netfilter/nf_nat_rule.h                |   15 -
 include/net/netns/conntrack.h                      |    4 +
 include/net/netns/ipv4.h                           |    2 -
 include/net/netns/ipv6.h                           |    1 +
 net/core/secure_seq.c                              |    1 +
 net/core/utils.c                                   |   20 +
 net/ipv4/netfilter.c                               |   37 --
 net/ipv4/netfilter/Kconfig                         |   67 +--
 net/ipv4/netfilter/Makefile                        |   13 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c                |   23 +-
 net/ipv4/netfilter/ipt_NETMAP.c                    |   21 +-
 net/ipv4/netfilter/ipt_REDIRECT.c                  |   23 +-
 .../{nf_nat_standalone.c => iptable_nat.c}         |  265 ++++-----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |    8 +-
 net/ipv4/netfilter/nf_nat_h323.c                   |   83 ++--
 net/ipv4/netfilter/nf_nat_irc.c                    |    4 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c           |  281 +++++++++
 net/ipv4/netfilter/nf_nat_pptp.c                   |   31 +-
 net/ipv4/netfilter/nf_nat_proto_common.c           |  125 ----
 net/ipv4/netfilter/nf_nat_proto_gre.c              |   37 +-
 net/ipv4/netfilter/nf_nat_proto_icmp.c             |   27 +-
 net/ipv4/netfilter/nf_nat_rule.c                   |  214 -------
 net/ipv4/netfilter/nf_nat_sip.c                    |  121 +++--
 net/ipv4/netfilter/nf_nat_tftp.c                   |    1 -
 net/ipv6/addrconf.c                                |    2 +-
 net/ipv6/netfilter.c                               |    7 +
 net/ipv6/netfilter/Kconfig                         |   48 ++
 net/ipv6/netfilter/Makefile                        |    7 +
 net/ipv6/netfilter/ip6t_MASQUERADE.c               |  133 +++++
 net/ipv6/netfilter/ip6t_NETMAP.c                   |   94 +++
 net/ipv6/netfilter/ip6t_REDIRECT.c                 |   98 +++
 net/ipv6/netfilter/ip6table_nat.c                  |  318 ++++++++++
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c     |   14 +
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c           |  285 +++++++++
 net/ipv6/netfilter/nf_nat_proto_icmpv6.c           |   88 +++
 net/netfilter/Kconfig                              |   34 ++
 net/netfilter/Makefile                             |   15 +
 net/netfilter/core.c                               |    5 +
 net/netfilter/ipvs/ip_vs_ftp.c                     |    1 +
 net/netfilter/nf_conntrack_amanda.c                |    5 +-
 net/netfilter/nf_conntrack_core.c                  |    7 +
 net/netfilter/nf_conntrack_ftp.c                   |    3 +-
 net/netfilter/nf_conntrack_h323_main.c             |  232 +++++---
 net/netfilter/nf_conntrack_irc.c                   |    6 +-
 net/netfilter/nf_conntrack_netlink.c               |   30 +-
 net/netfilter/nf_conntrack_pptp.c                  |   18 +-
 net/netfilter/nf_conntrack_proto_tcp.c             |    8 +-
 net/netfilter/nf_conntrack_sip.c                   |  119 +++--
 net/netfilter/nf_conntrack_tftp.c                  |    3 +-
 net/{ipv4 => }/netfilter/nf_nat_amanda.c           |    4 +-
 net/{ipv4 => }/netfilter/nf_nat_core.c             |  617 ++++++++++----------
 net/{ipv4 => }/netfilter/nf_nat_ftp.c              |   33 +-
 net/{ipv4 => }/netfilter/nf_nat_helper.c           |  121 ++---
 net/netfilter/nf_nat_proto_common.c                |  111 ++++
 net/{ipv4 => }/netfilter/nf_nat_proto_dccp.c       |   43 +-
 net/{ipv4 => }/netfilter/nf_nat_proto_sctp.c       |   41 +-
 net/{ipv4 => }/netfilter/nf_nat_proto_tcp.c        |   42 +-
 net/{ipv4 => }/netfilter/nf_nat_proto_udp.c        |   44 +-
 net/{ipv4 => }/netfilter/nf_nat_proto_udplite.c    |   46 +-
 net/{ipv4 => }/netfilter/nf_nat_proto_unknown.c    |   15 +-
 net/netfilter/xt_nat.c                             |  185 ++++++
 83 files changed, 3081 insertions(+), 1623 deletions(-)

^ permalink raw reply

* [PATCH 03/17] netfilter: nf_nat: export NAT definitions to userspace
From: kaber @ 2011-11-21  5:45 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, ulrich.weber
In-Reply-To: <1321854369-3432-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Export the NAT definitions to userspace. So far userspace (specifically,
iptables) has been copying the headers files from include/net. Also
rename some structures and definitions in preparation for IPv6 NAT.
Since these have never been officially exported, this doesn't affect
existing userspace code.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild                     |    1 +
 .../linux/netfilter/nf_conntrack_tuple_common.h    |   27 ++++++++++
 include/linux/netfilter/nf_nat.h                   |   25 +++++++++
 include/net/netfilter/nf_conntrack_tuple.h         |   26 ---------
 include/net/netfilter/nf_nat.h                     |   34 ++-----------
 include/net/netfilter/nf_nat_core.h                |    2 +-
 include/net/netfilter/nf_nat_protocol.h            |   14 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c                |   16 +++---
 net/ipv4/netfilter/ipt_NETMAP.c                    |   14 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c                  |   16 +++---
 net/ipv4/netfilter/nf_nat_core.c                   |   54 ++++++++++----------
 net/ipv4/netfilter/nf_nat_h323.c                   |   20 ++++----
 net/ipv4/netfilter/nf_nat_helper.c                 |   10 ++--
 net/ipv4/netfilter/nf_nat_pptp.c                   |   14 +++---
 net/ipv4/netfilter/nf_nat_proto_common.c           |   24 ++++----
 net/ipv4/netfilter/nf_nat_proto_dccp.c             |    4 +-
 net/ipv4/netfilter/nf_nat_proto_gre.c              |    8 ++--
 net/ipv4/netfilter/nf_nat_proto_icmp.c             |    4 +-
 net/ipv4/netfilter/nf_nat_proto_sctp.c             |    4 +-
 net/ipv4/netfilter/nf_nat_proto_tcp.c              |    4 +-
 net/ipv4/netfilter/nf_nat_proto_udp.c              |    4 +-
 net/ipv4/netfilter/nf_nat_proto_udplite.c          |    4 +-
 net/ipv4/netfilter/nf_nat_proto_unknown.c          |    2 +-
 net/ipv4/netfilter/nf_nat_rule.c                   |   22 ++++----
 net/ipv4/netfilter/nf_nat_sip.c                    |   10 ++--
 net/ipv4/netfilter/nf_nat_standalone.c             |    2 +-
 net/netfilter/nf_conntrack_netlink.c               |    4 +-
 27 files changed, 185 insertions(+), 184 deletions(-)
 create mode 100644 include/linux/netfilter/nf_nat.h

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a1b410c..d81f771 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_compat.h
 header-y += nfnetlink_conntrack.h
diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h
index 2ea22b0..2f6bbc5 100644
--- a/include/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/linux/netfilter/nf_conntrack_tuple_common.h
@@ -7,6 +7,33 @@ enum ip_conntrack_dir {
 	IP_CT_DIR_MAX
 };
 
+/* The protocol-specific manipulable parts of the tuple: always in
+ * network order
+ */
+union nf_conntrack_man_proto {
+	/* Add other protocols here. */
+	__be16 all;
+
+	struct {
+		__be16 port;
+	} tcp;
+	struct {
+		__be16 port;
+	} udp;
+	struct {
+		__be16 id;
+	} icmp;
+	struct {
+		__be16 port;
+	} dccp;
+	struct {
+		__be16 port;
+	} sctp;
+	struct {
+		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
+	} gre;
+};
+
 #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
 
 #endif /* _NF_CONNTRACK_TUPLE_COMMON_H */
diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h
new file mode 100644
index 0000000..8df2d13
--- /dev/null
+++ b/include/linux/netfilter/nf_nat.h
@@ -0,0 +1,25 @@
+#ifndef _NETFILTER_NF_NAT_H
+#define _NETFILTER_NF_NAT_H
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
+#define NF_NAT_RANGE_MAP_IPS		1
+#define NF_NAT_RANGE_PROTO_SPECIFIED	2
+#define NF_NAT_RANGE_PROTO_RANDOM	4
+#define NF_NAT_RANGE_PERSISTENT		8
+
+struct nf_nat_ipv4_range {
+	unsigned int			flags;
+	__be32				min_ip;
+	__be32				max_ip;
+	union nf_conntrack_man_proto	min;
+	union nf_conntrack_man_proto	max;
+};
+
+struct nf_nat_ipv4_multi_range_compat {
+	unsigned int			rangesize;
+	struct nf_nat_ipv4_range	range[1];
+};
+
+#endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 7ca6bdd..aea3f82 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -24,32 +24,6 @@
 
 #define NF_CT_TUPLE_L3SIZE	ARRAY_SIZE(((union nf_inet_addr *)NULL)->all)
 
-/* The protocol-specific manipulable parts of the tuple: always in
-   network order! */
-union nf_conntrack_man_proto {
-	/* Add other protocols here. */
-	__be16 all;
-
-	struct {
-		__be16 port;
-	} tcp;
-	struct {
-		__be16 port;
-	} udp;
-	struct {
-		__be16 id;
-	} icmp;
-	struct {
-		__be16 port;
-	} dccp;
-	struct {
-		__be16 port;
-	} sctp;
-	struct {
-		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
-	} gre;
-};
-
 /* The manipulable part of the tuple. */
 struct nf_conntrack_man {
 	union nf_inet_addr u3;
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 0346b00..b4de990 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,24 +1,18 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-#define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16
-
 enum nf_nat_manip_type {
-	IP_NAT_MANIP_SRC,
-	IP_NAT_MANIP_DST
+	NF_NAT_MANIP_SRC,
+	NF_NAT_MANIP_DST
 };
 
 /* SRC manip occurs POST_ROUTING or LOCAL_IN */
 #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \
 			     (hooknum) != NF_INET_LOCAL_IN)
 
-#define IP_NAT_RANGE_MAP_IPS 1
-#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-#define IP_NAT_RANGE_PROTO_RANDOM 4
-#define IP_NAT_RANGE_PERSISTENT 8
-
 /* NAT sequence number modifications */
 struct nf_nat_seq {
 	/* position of the last TCP sequence number modification (if any) */
@@ -28,26 +22,6 @@ struct nf_nat_seq {
 	int16_t offset_before, offset_after;
 };
 
-/* Single range specification. */
-struct nf_nat_range {
-	/* Set to OR of flags above. */
-	unsigned int flags;
-
-	/* Inclusive: network order. */
-	__be32 min_ip, max_ip;
-
-	/* Inclusive: network order */
-	union nf_conntrack_man_proto min, max;
-};
-
-/* For backwards compat: don't use in modern code. */
-struct nf_nat_multi_range_compat {
-	unsigned int rangesize; /* Must be 1. */
-
-	/* hangs off end. */
-	struct nf_nat_range range[1];
-};
-
 #include <linux/list.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 #include <net/netfilter/nf_conntrack_extend.h>
@@ -76,7 +50,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype);
 
 /* Is this tuple already taken? (not by us)*/
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 3dc7b98..b13d8d1 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -20,7 +20,7 @@ extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
 {
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		return ct->status & IPS_SRC_NAT_DONE;
 	else
 		return ct->status & IPS_DST_NAT_DONE;
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index c951c6f..eaad0ac 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -4,7 +4,7 @@
 #include <net/netfilter/nf_nat.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-struct nf_nat_range;
+struct nf_nat_ipv4_range;
 
 struct nf_nat_protocol {
 	/* Protocol number. */
@@ -28,15 +28,15 @@ struct nf_nat_protocol {
 	   possible.  Per-protocol part of tuple is initialized to the
 	   incoming packet. */
 	void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
-			     const struct nf_nat_range *range,
+			     const struct nf_nat_ipv4_range *range,
 			     enum nf_nat_manip_type maniptype,
 			     const struct nf_conn *ct);
 
 	int (*range_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_nat_range *range);
+			       const struct nf_nat_ipv4_range *range);
 
 	int (*nlattr_to_range)(struct nlattr *tb[],
-			       struct nf_nat_range *range);
+			       struct nf_nat_ipv4_range *range);
 };
 
 /* Protocol registration. */
@@ -59,14 +59,14 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 				  const union nf_conntrack_man_proto *max);
 
 extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype,
 				      const struct nf_conn *ct,
 				      u_int16_t *rover);
 
 extern int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-					const struct nf_nat_range *range);
+					const struct nf_nat_ipv4_range *range);
 extern int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-					struct nf_nat_range *range);
+					struct nf_nat_ipv4_range *range);
 
 #endif /*_NF_NAT_PROTO_H*/
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9931152..2f210c7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 /* FIXME: Multiple targets. --RR */
 static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
 	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range newrange;
-	const struct nf_nat_multi_range_compat *mr;
+	struct nf_nat_ipv4_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr;
 	const struct rtable *rt;
 	__be32 newsrc;
 
@@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat->masq_index = par->out->ifindex;
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newsrc, newsrc,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 }
 
 static int
@@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV4,
 	.target		= masquerade_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6cdb298..b5bfbba 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
 static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+	if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  new_ip, new_ip,
 		  mr->range[0].min, mr->range[0].max });
 
@@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.name 		= "NETMAP",
 	.family		= NFPROTO_IPV4,
 	.target 	= netmap_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 18a0656..7c0103a 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 /* FIXME: Take multiple ranges --RR */
 static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newdst, newdst,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
 }
 
 static struct xt_target redirect_tg_reg __read_mostly = {
 	.name		= "REDIRECT",
 	.family		= NFPROTO_IPV4,
 	.target		= redirect_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= redirect_tg_check,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index d830a47..e9cab93 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -82,14 +82,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
  * that meet the constraints of range. */
 static int
 in_range(const struct nf_conntrack_tuple *tuple,
-	 const struct nf_nat_range *range)
+	 const struct nf_nat_ipv4_range *range)
 {
 	const struct nf_nat_protocol *proto;
 	int ret = 0;
 
 	/* If we are supposed to map IPs, then we must be in the
 	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
 		if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
 		    ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
 			return 0;
@@ -97,8 +97,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
 
 	rcu_read_lock();
 	proto = __nf_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
-	    proto->in_range(tuple, IP_NAT_MANIP_SRC,
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
+	    proto->in_range(tuple, NF_NAT_MANIP_SRC,
 			    &range->min, &range->max))
 		ret = 1;
 	rcu_read_unlock();
@@ -123,7 +123,7 @@ static int
 find_appropriate_src(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
-		     const struct nf_nat_range *range)
+		     const struct nf_nat_ipv4_range *range)
 {
 	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
@@ -157,7 +157,7 @@ find_appropriate_src(struct net *net, u16 zone,
 */
 static void
 find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range,
+		    const struct nf_nat_ipv4_range *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
 {
@@ -166,10 +166,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	u_int32_t minip, maxip, j;
 
 	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		var_ipp = &tuple->src.u3.ip;
 	else
 		var_ipp = &tuple->dst.u3.ip;
@@ -189,7 +189,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
-			 range->flags & IP_NAT_RANGE_PERSISTENT ?
+			 range->flags & NF_NAT_RANGE_PERSISTENT ?
 				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
@@ -204,7 +204,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
@@ -219,8 +219,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   This is only required for source (ie. NAT/masq) mappings.
 	   So far, we don't do local source mappings, so multiple
 	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC &&
-	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
 		/* try the original tuple first */
 		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -247,8 +247,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (proto->in_range(tuple, maniptype, &range->min,
 					    &range->max) &&
 			    (range->min.all == range->max.all ||
@@ -267,7 +267,7 @@ out:
 
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
 	struct net *net = nf_ct_net(ct);
@@ -284,8 +284,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 		}
 	}
 
-	NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
-		     maniptype == IP_NAT_MANIP_DST);
+	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
+		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
@@ -306,13 +306,13 @@ nf_nat_setup_info(struct nf_conn *ct,
 		nf_conntrack_alter_reply(ct, &reply);
 
 		/* Non-atomic: we own this at the moment. */
-		if (maniptype == IP_NAT_MANIP_SRC)
+		if (maniptype == NF_NAT_MANIP_SRC)
 			ct->status |= IPS_SRC_NAT;
 		else
 			ct->status |= IPS_DST_NAT;
 	}
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		unsigned int srchash;
 
 		srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -327,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	/* It's done. */
-	if (maniptype == IP_NAT_MANIP_DST)
+	if (maniptype == NF_NAT_MANIP_DST)
 		ct->status |= IPS_DST_NAT_DONE;
 	else
 		ct->status |= IPS_SRC_NAT_DONE;
@@ -361,7 +361,7 @@ manip_pkt(u_int16_t proto,
 
 	iph = (void *)skb->data + iphdroff;
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 		iph->saddr = target->src.u3.ip;
 	} else {
@@ -381,7 +381,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
 	unsigned long statusbit;
 	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
 
-	if (mtype == IP_NAT_MANIP_SRC)
+	if (mtype == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -447,7 +447,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 			return 0;
 	}
 
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -582,7 +582,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
+				     struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
 	const struct nf_nat_protocol *npt;
@@ -608,7 +608,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range)
+		    const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
 	int err;
@@ -628,7 +628,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
 		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
 
 	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
 
 	if (!tb[CTA_NAT_PROTO])
 		return 0;
@@ -645,7 +645,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
 			  const struct nlattr *attr)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
 		return -EINVAL;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 790f316..0235ee2 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
@@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip =
 	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
@@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
 					 struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip = this->saved_ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ebc5f88..049e8b7 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -430,22 +430,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 4c06003..04a52b9 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
 	if (exp->dir == IP_CT_DIR_REPLY) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 
 /* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index f52d41e..a80658c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -25,7 +25,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 {
 	__be16 port;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		port = tuple->src.u.all;
 	else
 		port = tuple->dst.u.all;
@@ -36,7 +36,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
 
 void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-			       const struct nf_nat_range *range,
+			       const struct nf_nat_ipv4_range *range,
 			       enum nf_nat_manip_type maniptype,
 			       const struct nf_conn *ct,
 			       u_int16_t *rover)
@@ -45,15 +45,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	__be16 *portptr;
 	u_int16_t off;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		portptr = &tuple->src.u.all;
 	else
 		portptr = &tuple->dst.u.all;
 
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
+		if (maniptype == NF_NAT_MANIP_DST)
 			return;
 
 		if (ntohs(*portptr) < 1024) {
@@ -74,9 +74,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.all) - min + 1;
 	}
 
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
 		off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
-						 maniptype == IP_NAT_MANIP_SRC
+						 maniptype == NF_NAT_MANIP_SRC
 						 ? tuple->dst.u.all
 						 : tuple->src.u.all);
 	else
@@ -86,7 +86,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
 		return;
 	}
@@ -96,7 +96,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-				 const struct nf_nat_range *range)
+				 const struct nf_nat_ipv4_range *range)
 {
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
@@ -108,16 +108,16 @@ nla_put_failure:
 EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
 
 int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-				 struct nf_nat_range *range)
+				 struct nf_nat_ipv4_range *range)
 {
 	if (tb[CTA_PROTONAT_PORT_MIN]) {
 		range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
 		range->max.all = range->min.tcp.port;
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	if (tb[CTA_PROTONAT_PORT_MAX]) {
 		range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 17edd75..466d63d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover;
 
 static void
 dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
 		newport = tuple->src.u.dccp.port;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d177e61..35cd158 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 /* generate unique tuple ... */
 static void
 gre_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!ct->master)
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
 	else
 		keyptr = &tuple->dst.u.gre.key;
 
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		pr_debug("%p: NATing GRE PPTP\n", ct);
 		min = 1;
 		range_size = 0xffff;
@@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 
 	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
-	if (maniptype != IP_NAT_MANIP_DST)
+	if (maniptype != NF_NAT_MANIP_DST)
 		return true;
 	switch (greh->version) {
 	case GRE_VERSION_1701:
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 5ff91f1..3872755 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -29,7 +29,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 
 static void
 icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -39,7 +39,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		range_size = 0xFFFF;
 
 	for (i = 0; ; ++id) {
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index eac0b82..ec58b34 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -18,7 +18,7 @@ static u_int16_t nf_sctp_port_rover;
 
 static void
 sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -45,7 +45,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct sctphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index ec37036..a7de264 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -22,7 +22,7 @@ static u_int16_t tcp_port_rover;
 
 static void
 tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -54,7 +54,7 @@ tcp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct tcphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 4ba8c2c..527f39c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -21,7 +21,7 @@ static u_int16_t udp_port_rover;
 
 static void
 udp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -46,7 +46,7 @@ udp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index f45c1a6..aa9dfea 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -20,7 +20,7 @@ static u_int16_t udplite_port_rover;
 
 static void
 udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_range *range,
+		     const struct nf_nat_ipv4_range *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
@@ -46,7 +46,7 @@ udplite_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 549c5b5..e0afe81 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 }
 
 static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_ipv4_range *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 733c9ab..d2a9dc3 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_IN);
@@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 			    ctinfo == IP_CT_RELATED_REPLY));
 	NF_CT_ASSERT(par->out != NULL);
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
 }
 
 static unsigned int
@@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
 }
 
 static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 
 static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -105,13 +105,13 @@ static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	   per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
 	*/
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	range.flags = 0;
 	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
-		 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
 
@@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
 static struct xt_target ipt_snat_reg __read_mostly = {
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
@@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 static struct xt_target ipt_dnat_reg __read_mostly = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= ipt_dnat_checkentry,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e40cf78..03d97d4 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip = exp->saved_ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 
 	/* Change src to where master sends to, but only if the connection
 	 * actually came from the same source. */
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
 	    ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
-		range.flags = IP_NAT_RANGE_MAP_IPS;
+		range.flags = NF_NAT_RANGE_MAP_IPS;
 		range.min_ip = range.max_ip
 			= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-		nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 	}
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index a6e606e..0827edb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
 				return ret;
 		} else
 			pr_debug("Already setup manip %s for ct %p\n",
-				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
 		break;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 7dec88a..4d6c7ed 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1087,14 +1087,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 
 	if (cda[CTA_NAT_DST]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_DST,
+						NF_NAT_MANIP_DST,
 						cda[CTA_NAT_DST]);
 		if (ret < 0)
 			return ret;
 	}
 	if (cda[CTA_NAT_SRC]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_SRC,
+						NF_NAT_MANIP_SRC,
 						cda[CTA_NAT_SRC]);
 		if (ret < 0)
 			return ret;
-- 
1.7.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox