* [PATCH net-next-2.6 v4 06/14] l2tp: Update PPP-over-L2TP driver to work over L2TPv3
From: James Chapman @ 2010-04-02 16:18 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This patch makes changes to the L2TP PPP code for L2TPv3.
The existing code has some assumptions about the L2TP header which are
broken by L2TPv3. Also the sockaddr_pppol2tp structure of the original
code is too small to support the increased size of the L2TPv3 tunnel
and session id, so a new sockaddr_pppol2tpv3 structure is needed. In
the socket calls, the size of this structure is used to tell if the
operation is for L2TPv2 or L2TPv3.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
include/linux/if_pppol2tp.h | 14 +++++
include/linux/if_pppox.h | 9 +++
net/l2tp/l2tp_ppp.c | 120 +++++++++++++++++++++++++++----------------
3 files changed, 97 insertions(+), 46 deletions(-)
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index 1a1fb6e..184bc55 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -35,6 +35,20 @@ struct pppol2tp_addr {
__u16 d_tunnel, d_session; /* For sending outgoing packets */
};
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct pppol2tpv3_addr {
+ pid_t pid; /* pid that owns the fd.
+ * 0 => current */
+ int fd; /* FD of UDP or IP socket to use */
+
+ struct sockaddr_in addr; /* IP address and port to send to */
+
+ __u32 s_tunnel, s_session; /* For matching incoming packets */
+ __u32 d_tunnel, d_session; /* For sending outgoing packets */
+};
+
/* Socket options:
* DEBUG - bitmask of debug message categories
* SENDSEQ - 0 => don't send packets with sequence numbers
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 90b5fae..a6577af 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -72,6 +72,15 @@ struct sockaddr_pppol2tp {
struct pppol2tp_addr pppol2tp;
}__attribute__ ((packed));
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct sockaddr_pppol2tpv3 {
+ sa_family_t sa_family; /* address family, AF_PPPOX */
+ unsigned int sa_protocol; /* protocol identifier */
+ struct pppol2tpv3_addr pppol2tp;
+} __attribute__ ((packed));
+
/*********************************************************************
*
* ioctl interface for defining forwarding of connections
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bee5b14..e5b5312 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -291,17 +291,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
* Transmit handling
***********************************************************************/
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
-{
- if (session->send_seq)
- return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
- return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket. We come here
* when a user application does a sendmsg() on the session socket. L2TP and
* PPP headers must be inserted into the user's data.
@@ -394,7 +383,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
static const u8 ppph[2] = { 0xff, 0x03 };
struct sock *sk = (struct sock *) chan->private;
struct sock *sk_tun;
- int hdr_len;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
struct pppol2tp_session *ps;
@@ -417,9 +405,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
if (tunnel == NULL)
goto abort_put_sess;
- /* What header length is configured for this session? */
- hdr_len = pppol2tp_l2tp_header_len(session);
-
old_headroom = skb_headroom(skb);
if (skb_cow_head(skb, sizeof(ppph)))
goto abort_put_sess_tun;
@@ -432,7 +417,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
skb->data[0] = ppph[0];
skb->data[1] = ppph[1];
- l2tp_xmit_skb(session, skb, hdr_len);
+ l2tp_xmit_skb(session, skb, session->hdr_len);
sock_put(sk_tun);
sock_put(sk);
@@ -615,6 +600,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
{
struct sock *sk = sock->sk;
struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+ struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
struct pppox_sock *po = pppox_sk(sk);
struct l2tp_session *session = NULL;
struct l2tp_tunnel *tunnel;
@@ -622,6 +608,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
struct dst_entry *dst;
struct l2tp_session_cfg cfg = { 0, };
int error = 0;
+ u32 tunnel_id, peer_tunnel_id;
+ u32 session_id, peer_session_id;
+ int ver = 2;
+ int fd;
lock_sock(sk);
@@ -639,21 +629,40 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (sk->sk_user_data)
goto end; /* socket is already attached */
- /* Don't bind if s_tunnel is 0 */
+ /* Get params from socket address. Handle L2TPv2 and L2TPv3 */
+ if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
+ fd = sp->pppol2tp.fd;
+ tunnel_id = sp->pppol2tp.s_tunnel;
+ peer_tunnel_id = sp->pppol2tp.d_tunnel;
+ session_id = sp->pppol2tp.s_session;
+ peer_session_id = sp->pppol2tp.d_session;
+ } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
+ ver = 3;
+ fd = sp3->pppol2tp.fd;
+ tunnel_id = sp3->pppol2tp.s_tunnel;
+ peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+ session_id = sp3->pppol2tp.s_session;
+ peer_session_id = sp3->pppol2tp.d_session;
+ } else {
+ error = -EINVAL;
+ goto end; /* bad socket address */
+ }
+
+ /* Don't bind if tunnel_id is 0 */
error = -EINVAL;
- if (sp->pppol2tp.s_tunnel == 0)
+ if (tunnel_id == 0)
goto end;
- /* Special case: create tunnel context if s_session and
- * d_session is 0. Otherwise look up tunnel using supplied
+ /* Special case: create tunnel context if session_id and
+ * peer_session_id is 0. Otherwise look up tunnel using supplied
* tunnel id.
*/
- if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
- error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+ if ((session_id == 0) && (peer_session_id == 0)) {
+ error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
if (error < 0)
goto end;
} else {
- tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+ tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
/* Error if we can't find the tunnel */
error = -ENOENT;
@@ -670,20 +679,21 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Check that this session doesn't already exist */
error = -EEXIST;
- session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
+ session = l2tp_session_find(sock_net(sk), tunnel, session_id);
if (session != NULL)
goto end;
- /* Default MTU must allow space for UDP/L2TP/PPP
- * headers.
- */
- cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ /* Default MTU values. */
+ if (cfg.mtu == 0)
+ cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ if (cfg.mru == 0)
+ cfg.mru = cfg.mtu;
cfg.debug = tunnel->debug;
/* Allocate and initialize a new session context. */
session = l2tp_session_create(sizeof(struct pppol2tp_session),
- tunnel, sp->pppol2tp.s_session,
- sp->pppol2tp.d_session, &cfg);
+ tunnel, session_id,
+ peer_session_id, &cfg);
if (session == NULL) {
error = -ENOMEM;
goto end;
@@ -756,8 +766,7 @@ end:
static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
int *usockaddr_len, int peer)
{
- int len = sizeof(struct sockaddr_pppol2tp);
- struct sockaddr_pppol2tp sp;
+ int len = 0;
int error = 0;
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
@@ -783,21 +792,40 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
goto end_put_sess;
}
- memset(&sp, 0, len);
- sp.sa_family = AF_PPPOX;
- sp.sa_protocol = PX_PROTO_OL2TP;
- sp.pppol2tp.fd = tunnel->fd;
- sp.pppol2tp.pid = pls->owner;
- sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
- sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
- sp.pppol2tp.s_session = session->session_id;
- sp.pppol2tp.d_session = session->peer_session_id;
inet = inet_sk(sk);
- sp.pppol2tp.addr.sin_family = AF_INET;
- sp.pppol2tp.addr.sin_port = inet->inet_dport;
- sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
-
- memcpy(uaddr, &sp, len);
+ if (tunnel->version == 2) {
+ struct sockaddr_pppol2tp sp;
+ len = sizeof(sp);
+ memset(&sp, 0, len);
+ sp.sa_family = AF_PPPOX;
+ sp.sa_protocol = PX_PROTO_OL2TP;
+ sp.pppol2tp.fd = tunnel->fd;
+ sp.pppol2tp.pid = pls->owner;
+ sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+ sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+ sp.pppol2tp.s_session = session->session_id;
+ sp.pppol2tp.d_session = session->peer_session_id;
+ sp.pppol2tp.addr.sin_family = AF_INET;
+ sp.pppol2tp.addr.sin_port = inet->inet_dport;
+ sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+ memcpy(uaddr, &sp, len);
+ } else if (tunnel->version == 3) {
+ struct sockaddr_pppol2tpv3 sp;
+ len = sizeof(sp);
+ memset(&sp, 0, len);
+ sp.sa_family = AF_PPPOX;
+ sp.sa_protocol = PX_PROTO_OL2TP;
+ sp.pppol2tp.fd = tunnel->fd;
+ sp.pppol2tp.pid = pls->owner;
+ sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+ sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+ sp.pppol2tp.s_session = session->session_id;
+ sp.pppol2tp.d_session = session->peer_session_id;
+ sp.pppol2tp.addr.sin_family = AF_INET;
+ sp.pppol2tp.addr.sin_port = inet->inet_dport;
+ sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+ memcpy(uaddr, &sp, len);
+ }
*usockaddr_len = len;
^ permalink raw reply related
* [PATCH net-next-2.6 v4 07/14] l2tp: Add L2TPv3 IP encapsulation (no UDP) support
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This patch adds a new L2TPIP socket family and modifies the core to
handle the case where there is no UDP header in the L2TP
packet. L2TP/IP uses IP protocol 115. Since L2TP/UDP and L2TP/IP
packets differ in layout, the datapath packet handling code needs
changes too. Userspace uses an L2TPIP socket instead of a UDP socket
when IP encapsulation is required.
We can't use raw sockets for this because the semantics of raw sockets
don't lend themselves to the socket-per-tunnel model - we need to
differentiate sockets between the same IP peers by L2TP tunnel ids.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
include/linux/l2tp.h | 38 +++
net/l2tp/Kconfig | 17 +
net/l2tp/Makefile | 1
net/l2tp/l2tp_core.c | 163 ++++++++----
net/l2tp/l2tp_core.h | 7 +
net/l2tp/l2tp_ip.c | 679 ++++++++++++++++++++++++++++++++++++++++++++++++++
net/l2tp/l2tp_ppp.c | 7 -
7 files changed, 850 insertions(+), 62 deletions(-)
create mode 100644 include/linux/l2tp.h
create mode 100644 net/l2tp/l2tp_ip.c
diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
new file mode 100644
index 0000000..deff7bc
--- /dev/null
+++ b/include/linux/l2tp.h
@@ -0,0 +1,38 @@
+/*
+ * L2TP-over-IP socket for L2TPv3.
+ *
+ * Author: James Chapman <jchapman@katalix.com>
+ */
+
+#ifndef _LINUX_L2TP_H_
+#define _LINUX_L2TP_H_
+
+#include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/socket.h>
+#include <linux/in.h>
+#endif
+
+#define IPPROTO_L2TP 115
+
+/**
+ * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets
+ * @l2tp_family: address family number AF_L2TPIP.
+ * @l2tp_addr: protocol specific address information
+ * @l2tp_conn_id: connection id of tunnel
+ */
+struct sockaddr_l2tpip {
+ /* The first fields must match struct sockaddr_in */
+ sa_family_t l2tp_family; /* AF_INET */
+ __be16 l2tp_unused; /* INET port number (unused) */
+ struct in_addr l2tp_addr; /* Internet address */
+
+ __u32 l2tp_conn_id; /* Connection ID of tunnel */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[sizeof(struct sockaddr) - sizeof(sa_family_t) -
+ sizeof(__be16) - sizeof(struct in_addr) -
+ sizeof(__u32)];
+};
+
+#endif
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index d60758d..0a11ccf 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -51,3 +51,20 @@ config L2TP_V3
If you are connecting to L2TPv3 equipment, or you want to
tunnel raw ethernet frames using L2TP, say Y here. If
unsure, say N.
+
+config L2TP_IP
+ tristate "L2TP IP encapsulation for L2TPv3"
+ depends on L2TP_V3
+ help
+ Support for L2TP-over-IP socket family.
+
+ The L2TPv3 protocol defines two possible encapsulations for
+ L2TP frames, namely UDP and plain IP (without UDP). This
+ driver provides a new L2TPIP socket family with which
+ userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
+ when UDP encapsulation is not required. When L2TP is carried
+ in IP packets, it used IP protocol number 115, so this port
+ must be enabled in firewalls.
+
+ To compile this driver as a module, choose M here. The module
+ will be called l2tp_ip.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index c91f208..ef28b16 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
# Build l2tp as modules if L2TP is M
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0eee1a6..1739d04 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -36,8 +36,10 @@
#include <linux/inetdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/udp.h>
+#include <linux/l2tp.h>
#include <linux/hash.h>
#include <linux/sort.h>
#include <linux/file.h>
@@ -48,6 +50,7 @@
#include <net/ip.h>
#include <net/udp.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
#include <asm/byteorder.h>
#include <asm/atomic.h>
@@ -849,15 +852,21 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
{
+ struct l2tp_tunnel *tunnel = session->tunnel;
char *bufp = buf;
char *optr = bufp;
- u16 flags = L2TP_HDR_VER_3;
- /* Setup L2TP header. */
- *((__be16 *) bufp) = htons(flags);
- bufp += 2;
- *((__be16 *) bufp) = 0;
- bufp += 2;
+ /* Setup L2TP header. The header differs slightly for UDP and
+ * IP encapsulations. For UDP, there is 4 bytes of flags.
+ */
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+ u16 flags = L2TP_HDR_VER_3;
+ *((__be16 *) bufp) = htons(flags);
+ bufp += 2;
+ *((__be16 *) bufp) = 0;
+ bufp += 2;
+ }
+
*((__be32 *) bufp) = htonl(session->peer_session_id);
bufp += 4;
if (session->cookie_len) {
@@ -902,10 +911,11 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
if (session->debug & L2TP_MSG_DATA) {
int i;
- unsigned char *datap = skb->data + sizeof(struct udphdr);
+ int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+ unsigned char *datap = skb->data + uhlen;
printk(KERN_DEBUG "%s: xmit:", session->name);
- for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+ for (i = 0; i < (len - uhlen); i++) {
printk(" %02X", *datap++);
if (i == 31) {
printk(" ...");
@@ -956,21 +966,23 @@ static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
{
int data_len = skb->len;
- struct sock *sk = session->tunnel->sock;
+ struct l2tp_tunnel *tunnel = session->tunnel;
+ struct sock *sk = tunnel->sock;
struct udphdr *uh;
- unsigned int udp_len;
struct inet_sock *inet;
__wsum csum;
int old_headroom;
int new_headroom;
int headroom;
+ int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+ int udp_len;
/* Check that there's enough headroom in the skb to insert IP,
* UDP and L2TP headers. If not enough, expand it to
* make room. Adjust truesize.
*/
headroom = NET_SKB_PAD + sizeof(struct iphdr) +
- sizeof(struct udphdr) + hdr_len;
+ uhlen + hdr_len;
old_headroom = skb_headroom(skb);
if (skb_cow_head(skb, headroom))
goto abort;
@@ -981,18 +993,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Setup L2TP header */
session->build_header(session, __skb_push(skb, hdr_len));
- udp_len = sizeof(struct udphdr) + hdr_len + data_len;
-
- /* Setup UDP header */
- inet = inet_sk(sk);
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
- uh->source = inet->inet_sport;
- uh->dest = inet->inet_dport;
- uh->len = htons(udp_len);
- uh->check = 0;
+ /* Reset skb netfilter state */
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
@@ -1001,29 +1003,48 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Get routing info from the tunnel socket */
skb_dst_drop(skb);
skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
- l2tp_skb_set_owner_w(skb, sk);
- /* Calculate UDP checksum if configured to do so */
- if (sk->sk_no_check == UDP_CSUM_NOXMIT)
- skb->ip_summed = CHECKSUM_NONE;
- else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
- (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
- skb->ip_summed = CHECKSUM_COMPLETE;
- csum = skb_checksum(skb, 0, udp_len, 0);
- uh->check = csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, csum);
- if (uh->check == 0)
- uh->check = CSUM_MANGLED_0;
- } else {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
- inet->inet_daddr,
- udp_len, IPPROTO_UDP, 0);
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ /* Setup UDP header */
+ inet = inet_sk(sk);
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source = inet->inet_sport;
+ uh->dest = inet->inet_dport;
+ udp_len = uhlen + hdr_len + data_len;
+ uh->len = htons(udp_len);
+ uh->check = 0;
+
+ /* Calculate UDP checksum if configured to do so */
+ if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+ skb->ip_summed = CHECKSUM_NONE;
+ else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+ (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ csum = skb_checksum(skb, 0, udp_len, 0);
+ uh->check = csum_tcpudp_magic(inet->inet_saddr,
+ inet->inet_daddr,
+ udp_len, IPPROTO_UDP, csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+ inet->inet_daddr,
+ udp_len, IPPROTO_UDP, 0);
+ }
+ break;
+
+ case L2TP_ENCAPTYPE_IP:
+ break;
}
+ l2tp_skb_set_owner_w(skb, sk);
+
l2tp_xmit_core(session, skb, data_len);
abort:
@@ -1053,9 +1074,15 @@ void l2tp_tunnel_destruct(struct sock *sk)
/* Close all sessions */
l2tp_tunnel_closeall(tunnel);
- /* No longer an encapsulation socket. See net/ipv4/udp.c */
- (udp_sk(sk))->encap_type = 0;
- (udp_sk(sk))->encap_rcv = NULL;
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ /* No longer an encapsulation socket. See net/ipv4/udp.c */
+ (udp_sk(sk))->encap_type = 0;
+ (udp_sk(sk))->encap_rcv = NULL;
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ break;
+ }
/* Remove hooks into tunnel socket */
tunnel->sock = NULL;
@@ -1168,6 +1195,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
struct socket *sock = NULL;
struct sock *sk = NULL;
struct l2tp_net *pn;
+ enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
/* Get the tunnel socket from the fd, which was opened by
* the userspace L2TP daemon.
@@ -1182,18 +1210,27 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
sk = sock->sk;
+ if (cfg != NULL)
+ encap = cfg->encap;
+
/* Quick sanity checks */
- err = -EPROTONOSUPPORT;
- if (sk->sk_protocol != IPPROTO_UDP) {
- printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
- tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
- goto err;
- }
- err = -EAFNOSUPPORT;
- if (sock->ops->family != AF_INET) {
- printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
- tunnel_id, fd, sock->ops->family, AF_INET);
- goto err;
+ switch (encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ err = -EPROTONOSUPPORT;
+ if (sk->sk_protocol != IPPROTO_UDP) {
+ printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+ tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+ goto err;
+ }
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ err = -EPROTONOSUPPORT;
+ if (sk->sk_protocol != IPPROTO_L2TP) {
+ printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+ tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+ goto err;
+ }
+ break;
}
/* Check if this socket has already been prepped */
@@ -1223,12 +1260,16 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->l2tp_net = net;
pn = l2tp_pernet(net);
- if (cfg)
+ if (cfg != NULL)
tunnel->debug = cfg->debug;
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
- udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
- udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+ tunnel->encap = encap;
+ if (encap == L2TP_ENCAPTYPE_UDP) {
+ /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+ udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+ udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+ }
sk->sk_user_data = tunnel;
@@ -1318,7 +1359,9 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
if (session->send_seq)
session->hdr_len += 4;
} else {
- session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+ session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+ if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+ session->hdr_len += 4;
}
}
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5c53eb2..d239598 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -48,6 +48,11 @@ enum l2tp_l2spec_type {
L2TP_L2SPECTYPE_DEFAULT,
};
+enum l2tp_encap_type {
+ L2TP_ENCAPTYPE_UDP,
+ L2TP_ENCAPTYPE_IP,
+};
+
struct sk_buff;
struct l2tp_stats {
@@ -155,6 +160,7 @@ struct l2tp_session {
struct l2tp_tunnel_cfg {
int debug; /* bitmask of debug message
* categories */
+ enum l2tp_encap_type encap;
};
struct l2tp_tunnel {
@@ -170,6 +176,7 @@ struct l2tp_tunnel {
char name[20]; /* for logging */
int debug; /* bitmask of debug message
* categories */
+ enum l2tp_encap_type encap;
struct l2tp_stats stats;
struct list_head list; /* Keep a list of all tunnels */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 0000000..75bf784
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
+/*
+ * L2TPv3 IP encapsulation support
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/socket.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+#include "l2tp_core.h"
+
+struct l2tp_ip_sock {
+ /* inet_sock has to be the first member of l2tp_ip_sock */
+ struct inet_sock inet;
+
+ __u32 conn_id;
+ __u32 peer_conn_id;
+
+ __u64 tx_packets;
+ __u64 tx_bytes;
+ __u64 tx_errors;
+ __u64 rx_packets;
+ __u64 rx_bytes;
+ __u64 rx_errors;
+};
+
+static DEFINE_RWLOCK(l2tp_ip_lock);
+static struct hlist_head l2tp_ip_table;
+static struct hlist_head l2tp_ip_bind_table;
+
+static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+{
+ return (struct l2tp_ip_sock *)sk;
+}
+
+static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+ struct hlist_node *node;
+ struct sock *sk;
+
+ sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+ struct inet_sock *inet = inet_sk(sk);
+ struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
+
+ if (l2tp == NULL)
+ continue;
+
+ if ((l2tp->conn_id == tunnel_id) &&
+#ifdef CONFIG_NET_NS
+ (sk->sk_net == net) &&
+#endif
+ !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+ !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ goto found;
+ }
+
+ sk = NULL;
+found:
+ return sk;
+}
+
+static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+ struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
+ if (sk)
+ sock_hold(sk);
+
+ return sk;
+}
+
+/* When processing receive frames, there are two cases to
+ * consider. Data frames consist of a non-zero session-id and an
+ * optional cookie. Control frames consist of a regular L2TP header
+ * preceded by 32-bits of zeros.
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Control Message Header Over IP
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | (32 bits of zeros) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|x|x|x|x|x|x| Ver | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Control Connection ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ns | Nr |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * All control frames are passed to userspace.
+ */
+static int l2tp_ip_recv(struct sk_buff *skb)
+{
+ struct sock *sk;
+ u32 session_id;
+ u32 tunnel_id;
+ unsigned char *ptr, *optr;
+ struct l2tp_session *session;
+ struct l2tp_tunnel *tunnel = NULL;
+ int length;
+ int offset;
+
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+
+ if (!pskb_may_pull(skb, 4))
+ goto discard;
+
+ session_id = ntohl(*((__be32 *) ptr));
+ ptr += 4;
+
+ /* RFC3931: L2TP/IP packets have the first 4 bytes containing
+ * the session_id. If it is 0, the packet is a L2TP control
+ * frame and the session_id value can be discarded.
+ */
+ if (session_id == 0) {
+ __skb_pull(skb, 4);
+ goto pass_up;
+ }
+
+ /* Ok, this is a data packet. Lookup the session. */
+ session = l2tp_session_find(&init_net, NULL, session_id);
+ if (session == NULL)
+ goto discard;
+
+ tunnel = session->tunnel;
+ if (tunnel == NULL)
+ goto discard;
+
+ /* Trace packet contents, if enabled */
+ if (tunnel->debug & L2TP_MSG_DATA) {
+ length = min(32u, skb->len);
+ if (!pskb_may_pull(skb, length))
+ goto discard;
+
+ printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
+
+ offset = 0;
+ do {
+ printk(" %02X", ptr[offset]);
+ } while (++offset < length);
+
+ printk("\n");
+ }
+
+ l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+
+ return 0;
+
+pass_up:
+ /* Get the tunnel_id from the L2TP header */
+ if (!pskb_may_pull(skb, 12))
+ goto discard;
+
+ if ((skb->data[0] & 0xc0) != 0xc0)
+ goto discard;
+
+ tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+ tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+ if (tunnel != NULL)
+ sk = tunnel->sock;
+ else {
+ struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+
+ read_lock_bh(&l2tp_ip_lock);
+ sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+ read_unlock_bh(&l2tp_ip_lock);
+ }
+
+ if (sk == NULL)
+ goto discard;
+
+ sock_hold(sk);
+
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto discard_put;
+
+ nf_reset(skb);
+
+ return sk_receive_skb(sk, skb, 1);
+
+discard_put:
+ sock_put(sk);
+
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int l2tp_ip_open(struct sock *sk)
+{
+ /* Prevent autobind. We don't have ports. */
+ inet_sk(sk)->inet_num = IPPROTO_L2TP;
+
+ write_lock_bh(&l2tp_ip_lock);
+ sk_add_node(sk, &l2tp_ip_table);
+ write_unlock_bh(&l2tp_ip_lock);
+
+ return 0;
+}
+
+static void l2tp_ip_close(struct sock *sk, long timeout)
+{
+ write_lock_bh(&l2tp_ip_lock);
+ hlist_del_init(&sk->sk_bind_node);
+ hlist_del_init(&sk->sk_node);
+ write_unlock_bh(&l2tp_ip_lock);
+ sk_common_release(sk);
+}
+
+static void l2tp_ip_destroy_sock(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+ kfree_skb(skb);
+
+ sk_refcnt_debug_dec(sk);
+}
+
+static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+ int ret = -EINVAL;
+ int chk_addr_ret;
+
+ ret = -EADDRINUSE;
+ read_lock_bh(&l2tp_ip_lock);
+ if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+ goto out_in_use;
+
+ read_unlock_bh(&l2tp_ip_lock);
+
+ lock_sock(sk);
+ if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+ goto out;
+
+ chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+ ret = -EADDRNOTAVAIL;
+ if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+ chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+ goto out;
+
+ inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+ if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+ inet->inet_saddr = 0; /* Use device */
+ sk_dst_reset(sk);
+
+ l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
+
+ write_lock_bh(&l2tp_ip_lock);
+ sk_add_bind_node(sk, &l2tp_ip_bind_table);
+ sk_del_node_init(sk);
+ write_unlock_bh(&l2tp_ip_lock);
+ ret = 0;
+out:
+ release_sock(sk);
+
+ return ret;
+
+out_in_use:
+ read_unlock_bh(&l2tp_ip_lock);
+
+ return ret;
+}
+
+static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int rc;
+ struct inet_sock *inet = inet_sk(sk);
+ struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+ struct rtable *rt;
+ __be32 saddr;
+ int oif;
+
+ rc = -EINVAL;
+ if (addr_len < sizeof(*lsa))
+ goto out;
+
+ rc = -EAFNOSUPPORT;
+ if (lsa->l2tp_family != AF_INET)
+ goto out;
+
+ sk_dst_reset(sk);
+
+ oif = sk->sk_bound_dev_if;
+ saddr = inet->inet_saddr;
+
+ rc = -EINVAL;
+ if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
+ goto out;
+
+ rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+ RT_CONN_FLAGS(sk), oif,
+ IPPROTO_L2TP,
+ 0, 0, sk, 1);
+ if (rc) {
+ if (rc == -ENETUNREACH)
+ IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+ goto out;
+ }
+
+ rc = -ENETUNREACH;
+ if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+ ip_rt_put(rt);
+ goto out;
+ }
+
+ l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
+
+ if (!inet->inet_saddr)
+ inet->inet_saddr = rt->rt_src;
+ if (!inet->inet_rcv_saddr)
+ inet->inet_rcv_saddr = rt->rt_src;
+ inet->inet_daddr = rt->rt_dst;
+ sk->sk_state = TCP_ESTABLISHED;
+ inet->inet_id = jiffies;
+
+ sk_dst_set(sk, &rt->u.dst);
+
+ write_lock_bh(&l2tp_ip_lock);
+ hlist_del_init(&sk->sk_bind_node);
+ sk_add_bind_node(sk, &l2tp_ip_bind_table);
+ write_unlock_bh(&l2tp_ip_lock);
+
+ rc = 0;
+out:
+ return rc;
+}
+
+static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
+{
+ struct sock *sk = sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+ struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+
+ memset(lsa, 0, sizeof(*lsa));
+ lsa->l2tp_family = AF_INET;
+ if (peer) {
+ if (!inet->inet_dport)
+ return -ENOTCONN;
+ lsa->l2tp_conn_id = lsk->peer_conn_id;
+ lsa->l2tp_addr.s_addr = inet->inet_daddr;
+ } else {
+ __be32 addr = inet->inet_rcv_saddr;
+ if (!addr)
+ addr = inet->inet_saddr;
+ lsa->l2tp_conn_id = lsk->conn_id;
+ lsa->l2tp_addr.s_addr = addr;
+ }
+ *uaddr_len = sizeof(*lsa);
+ return 0;
+}
+
+static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+ int rc;
+
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ nf_reset(skb);
+
+ /* Charge it to the socket, dropping if the queue is full. */
+ rc = sock_queue_rcv_skb(sk, skb);
+ if (rc < 0)
+ goto drop;
+
+ return 0;
+
+drop:
+ IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+ kfree_skb(skb);
+ return -1;
+}
+
+/* Userspace will call sendmsg() on the tunnel socket to send L2TP
+ * control frames.
+ */
+static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+{
+ struct sk_buff *skb;
+ int rc;
+ struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ struct ip_options *opt = inet->opt;
+ struct rtable *rt = NULL;
+ int connected = 0;
+ __be32 daddr;
+
+ if (sock_flag(sk, SOCK_DEAD))
+ return -ENOTCONN;
+
+ /* Get and verify the address. */
+ if (msg->msg_name) {
+ struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+ if (msg->msg_namelen < sizeof(*lip))
+ return -EINVAL;
+
+ if (lip->l2tp_family != AF_INET) {
+ if (lip->l2tp_family != AF_UNSPEC)
+ return -EAFNOSUPPORT;
+ }
+
+ daddr = lip->l2tp_addr.s_addr;
+ } else {
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -EDESTADDRREQ;
+
+ daddr = inet->inet_daddr;
+ connected = 1;
+ }
+
+ /* Allocate a socket buffer */
+ rc = -ENOMEM;
+ skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
+ 4 + len, 0, GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ /* Reserve space for headers, putting IP header on 4-byte boundary. */
+ skb_reserve(skb, 2 + NET_SKB_PAD);
+ skb_reset_network_header(skb);
+ skb_reserve(skb, sizeof(struct iphdr));
+ skb_reset_transport_header(skb);
+
+ /* Insert 0 session_id */
+ *((__be32 *) skb_put(skb, 4)) = 0;
+
+ /* Copy user data into skb */
+ rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+ if (rc < 0) {
+ kfree_skb(skb);
+ goto error;
+ }
+
+ if (connected)
+ rt = (struct rtable *) __sk_dst_check(sk, 0);
+
+ if (rt == NULL) {
+ /* Use correct destination address if we have options. */
+ if (opt && opt->srr)
+ daddr = opt->faddr;
+
+ {
+ struct flowi fl = { .oif = sk->sk_bound_dev_if,
+ .nl_u = { .ip4_u = {
+ .daddr = daddr,
+ .saddr = inet->inet_saddr,
+ .tos = RT_CONN_FLAGS(sk) } },
+ .proto = sk->sk_protocol,
+ .flags = inet_sk_flowi_flags(sk),
+ .uli_u = { .ports = {
+ .sport = inet->inet_sport,
+ .dport = inet->inet_dport } } };
+
+ /* If this fails, retransmit mechanism of transport layer will
+ * keep trying until route appears or the connection times
+ * itself out.
+ */
+ security_sk_classify_flow(sk, &fl);
+ if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+ goto no_route;
+ }
+ sk_setup_caps(sk, &rt->u.dst);
+ }
+ skb_dst_set(skb, dst_clone(&rt->u.dst));
+
+ /* Queue the packet to IP for output */
+ rc = ip_queue_xmit(skb, 0);
+
+error:
+ /* Update stats */
+ if (rc >= 0) {
+ lsa->tx_packets++;
+ lsa->tx_bytes += len;
+ rc = len;
+ } else {
+ lsa->tx_errors++;
+ }
+
+ return rc;
+
+no_route:
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+}
+
+static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t len, int noblock, int flags, int *addr_len)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+ size_t copied = 0;
+ int err = -EOPNOTSUPP;
+ struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+ struct sk_buff *skb;
+
+ if (flags & MSG_OOB)
+ goto out;
+
+ if (addr_len)
+ *addr_len = sizeof(*sin);
+
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
+ if (!skb)
+ goto out;
+
+ copied = skb->len;
+ if (len < copied) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto done;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ /* Copy the address. */
+ if (sin) {
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+ sin->sin_port = 0;
+ memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ }
+ if (inet->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
+done:
+ skb_free_datagram(sk, skb);
+out:
+ if (err) {
+ lsk->rx_errors++;
+ return err;
+ }
+
+ lsk->rx_packets++;
+ lsk->rx_bytes += copied;
+
+ return copied;
+}
+
+struct proto l2tp_ip_prot = {
+ .name = "L2TP/IP",
+ .owner = THIS_MODULE,
+ .init = l2tp_ip_open,
+ .close = l2tp_ip_close,
+ .bind = l2tp_ip_bind,
+ .connect = l2tp_ip_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .destroy = l2tp_ip_destroy_sock,
+ .setsockopt = ip_setsockopt,
+ .getsockopt = ip_getsockopt,
+ .sendmsg = l2tp_ip_sendmsg,
+ .recvmsg = l2tp_ip_recvmsg,
+ .backlog_rcv = l2tp_ip_backlog_recv,
+ .hash = inet_hash,
+ .unhash = inet_unhash,
+ .obj_size = sizeof(struct l2tp_ip_sock),
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_ip_setsockopt,
+ .compat_getsockopt = compat_ip_getsockopt,
+#endif
+};
+
+static const struct proto_ops l2tp_ip_ops = {
+ .family = PF_INET,
+ .owner = THIS_MODULE,
+ .release = inet_release,
+ .bind = inet_bind,
+ .connect = inet_dgram_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = l2tp_ip_getname,
+ .poll = datagram_poll,
+ .ioctl = inet_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = inet_shutdown,
+ .setsockopt = sock_common_setsockopt,
+ .getsockopt = sock_common_getsockopt,
+ .sendmsg = inet_sendmsg,
+ .recvmsg = sock_common_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+ .compat_setsockopt = compat_sock_common_setsockopt,
+ .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw l2tp_ip_protosw = {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_L2TP,
+ .prot = &l2tp_ip_prot,
+ .ops = &l2tp_ip_ops,
+ .no_check = 0,
+};
+
+static struct net_protocol l2tp_ip_protocol __read_mostly = {
+ .handler = l2tp_ip_recv,
+};
+
+static int __init l2tp_ip_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
+
+ err = proto_register(&l2tp_ip_prot, 1);
+ if (err != 0)
+ goto out;
+
+ err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+ if (err)
+ goto out1;
+
+ inet_register_protosw(&l2tp_ip_protosw);
+ return 0;
+
+out1:
+ proto_unregister(&l2tp_ip_prot);
+out:
+ return err;
+}
+
+static void __exit l2tp_ip_exit(void)
+{
+ inet_unregister_protosw(&l2tp_ip_protosw);
+ inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+ proto_unregister(&l2tp_ip_prot);
+}
+
+module_init(l2tp_ip_init);
+module_exit(l2tp_ip_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP over IP");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e5b5312..63fc62b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -305,6 +305,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
struct l2tp_session *session;
struct l2tp_tunnel *tunnel;
struct pppol2tp_session *ps;
+ int uhlen;
error = -ENOTCONN;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -321,10 +322,12 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
if (tunnel == NULL)
goto error_put_sess;
+ uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+
/* Allocate a socket buffer */
error = -ENOMEM;
skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
- sizeof(struct udphdr) + session->hdr_len +
+ uhlen + session->hdr_len +
sizeof(ppph) + total_len,
0, GFP_KERNEL);
if (!skb)
@@ -335,7 +338,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
skb_reset_network_header(skb);
skb_reserve(skb, sizeof(struct iphdr));
skb_reset_transport_header(skb);
- skb_reserve(skb, sizeof(struct udphdr));
+ skb_reserve(skb, uhlen);
/* Add PPP header */
skb->data[0] = ppph[0];
^ permalink raw reply related
* [PATCH net-next-2.6 v4 05/14] l2tp: Add L2TPv3 protocol support
From: James Chapman @ 2010-04-02 16:18 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
The L2TPv3 protocol changes the layout of the L2TP packet
header. Tunnel and session ids change from 16-bit to 32-bit values,
data sequence numbers change from 16-bit to 24-bit values and PPP-specific
fields are moved into protocol-specific subheaders.
Although this patch introduces L2TPv3 protocol support, there are no
userspace interfaces to create L2TPv3 sessions yet.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
net/l2tp/Kconfig | 25 ++
net/l2tp/l2tp_core.c | 532 +++++++++++++++++++++++++++++++++++++-------------
net/l2tp/l2tp_core.h | 54 ++++-
net/l2tp/l2tp_ppp.c | 21 ++
4 files changed, 484 insertions(+), 148 deletions(-)
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index ec88c5c..d60758d 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -19,6 +19,10 @@ menuconfig L2TP
connections. L2TP is also used as a VPN protocol, popular
with home workers to connect to their offices.
+ L2TPv3 allows other protocols as well as PPP to be carried
+ over L2TP tunnels. L2TPv3 is defined in RFC 3931
+ <http://www.ietf.org/rfc/rfc3931.txt>.
+
The kernel component handles only L2TP data packets: a
userland daemon handles L2TP the control protocol (tunnel
and session setup). One such daemon is OpenL2TP
@@ -26,3 +30,24 @@ menuconfig L2TP
If you don't need L2TP, say N. To compile all L2TP code as
modules, choose M here.
+
+config L2TP_V3
+ bool "L2TPv3 support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && L2TP
+ help
+ Layer Two Tunneling Protocol Version 3
+
+ From RFC 3931 <http://www.ietf.org/rfc/rfc3931.txt>.
+
+ The Layer Two Tunneling Protocol (L2TP) provides a dynamic
+ mechanism for tunneling Layer 2 (L2) "circuits" across a
+ packet-oriented data network (e.g., over IP). L2TP, as
+ originally defined in RFC 2661, is a standard method for
+ tunneling Point-to-Point Protocol (PPP) [RFC1661] sessions.
+ L2TP has since been adopted for tunneling a number of other
+ L2 protocols, including ATM, Frame Relay, HDLC and even raw
+ ethernet frames.
+
+ If you are connecting to L2TPv3 equipment, or you want to
+ tunnel raw ethernet frames using L2TP, say Y here. If
+ unsure, say N.
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 4b6da36..0eee1a6 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -65,6 +65,7 @@
#define L2TP_HDR_VER_MASK 0x000F
#define L2TP_HDR_VER_2 0x0002
+#define L2TP_HDR_VER_3 0x0003
/* L2TPv3 default L2-specific sublayer */
#define L2TP_SLFLAG_S 0x40000000
@@ -85,7 +86,7 @@
/* Private data stored for received packets in the skb.
*/
struct l2tp_skb_cb {
- u16 ns;
+ u32 ns;
u16 has_seq;
u16 length;
unsigned long expires;
@@ -101,6 +102,8 @@ static unsigned int l2tp_net_id;
struct l2tp_net {
struct list_head l2tp_tunnel_list;
rwlock_t l2tp_tunnel_list_lock;
+ struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+ rwlock_t l2tp_session_hlist_lock;
};
static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -110,6 +113,40 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
return net_generic(net, l2tp_net_id);
}
+/* Session hash global list for L2TPv3.
+ * The session_id SHOULD be random according to RFC3931, but several
+ * L2TP implementations use incrementing session_ids. So we do a real
+ * hash on the session_id, rather than a simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
+{
+ return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
+
+}
+
+/* Lookup a session by id in the global session list
+ */
+static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct hlist_head *session_list =
+ l2tp_session_id_hash_2(pn, session_id);
+ struct l2tp_session *session;
+ struct hlist_node *walk;
+
+ read_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_for_each_entry(session, walk, session_list, global_hlist) {
+ if (session->session_id == session_id) {
+ read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ return session;
+ }
+ }
+ read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+ return NULL;
+}
+
/* Session hash list.
* The session_id SHOULD be random according to RFC2661, but several
* L2TP implementations (Cisco and Microsoft) use incrementing
@@ -124,13 +161,20 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
/* Lookup a session by id
*/
-struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
{
- struct hlist_head *session_list =
- l2tp_session_id_hash(tunnel, session_id);
+ struct hlist_head *session_list;
struct l2tp_session *session;
struct hlist_node *walk;
+ /* In L2TPv3, session_ids are unique over all tunnels and we
+ * sometimes need to look them up before we know the
+ * tunnel.
+ */
+ if (tunnel == NULL)
+ return l2tp_session_find_2(net, session_id);
+
+ session_list = l2tp_session_id_hash(tunnel, session_id);
read_lock_bh(&tunnel->hlist_lock);
hlist_for_each_entry(session, walk, session_list, hlist) {
if (session->session_id == session_id) {
@@ -218,7 +262,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
{
struct sk_buff *skbp;
struct sk_buff *tmp;
- u16 ns = L2TP_SKB_CB(skb)->ns;
+ u32 ns = L2TP_SKB_CB(skb)->ns;
spin_lock_bh(&session->reorder_q.lock);
skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
@@ -259,6 +303,11 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
if (L2TP_SKB_CB(skb)->has_seq) {
/* Bump our Nr */
session->nr++;
+ if (tunnel->version == L2TP_HDR_VER_2)
+ session->nr &= 0xffff;
+ else
+ session->nr &= 0xffffff;
+
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
"%s: updated nr to %hu\n", session->name, session->nr);
}
@@ -291,8 +340,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
session->stats.rx_seq_discards++;
session->stats.rx_errors++;
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
- "%s: oos pkt %hu len %d discarded (too old), "
- "waiting for %hu, reorder_q_len=%d\n",
+ "%s: oos pkt %u len %d discarded (too old), "
+ "waiting for %u, reorder_q_len=%d\n",
session->name, L2TP_SKB_CB(skb)->ns,
L2TP_SKB_CB(skb)->length, session->nr,
skb_queue_len(&session->reorder_q));
@@ -306,8 +355,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
if (L2TP_SKB_CB(skb)->has_seq) {
if (L2TP_SKB_CB(skb)->ns != session->nr) {
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
- "%s: holding oos pkt %hu len %d, "
- "waiting for %hu, reorder_q_len=%d\n",
+ "%s: holding oos pkt %u len %d, "
+ "waiting for %u, reorder_q_len=%d\n",
session->name, L2TP_SKB_CB(skb)->ns,
L2TP_SKB_CB(skb)->length, session->nr,
skb_queue_len(&session->reorder_q));
@@ -352,100 +401,73 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
return __skb_checksum_complete(skb);
}
-/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded. All such packets are passed up to userspace to deal with.
+/* Do receive processing of L2TP data frames. We handle both L2TPv2
+ * and L2TPv3 data frames here.
+ *
+ * L2TPv2 Data Message Header
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|O|P|x|x|x|x| Ver | Length (opt) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Tunnel ID | Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ns (opt) | Nr (opt) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Offset Size (opt) | Offset pad... (opt)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Data frames are marked by T=0. All other fields are the same as
+ * those in L2TP control frames.
+ *
+ * L2TPv3 Data Message Header
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | L2TP Session Header |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | L2-Specific Sublayer |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Tunnel Payload ...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 L2-Specific Sublayer Format
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x|S|x|x|x|x|x|x| Sequence Number |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Cookie value, sublayer format and offset (pad) are negotiated with
+ * the peer when the session is set up. Unlike L2TPv2, we do not need
+ * to parse the packet header to determine if optional fields are
+ * present.
+ *
+ * Caller must already have parsed the frame and determined that it is
+ * a data (not control) frame before coming here. Fields up to the
+ * session-id have already been parsed and ptr points to the data
+ * after the session-id.
*/
-int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
- int (*payload_hook)(struct sk_buff *skb))
+void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
+ unsigned char *ptr, unsigned char *optr, u16 hdrflags,
+ int length, int (*payload_hook)(struct sk_buff *skb))
{
- struct l2tp_session *session = NULL;
- unsigned char *ptr, *optr;
- u16 hdrflags;
- u32 tunnel_id, session_id;
- int length;
+ struct l2tp_tunnel *tunnel = session->tunnel;
int offset;
- u16 version;
- u16 ns, nr;
-
- if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
- goto discard_bad_csum;
-
- /* UDP always verifies the packet length. */
- __skb_pull(skb, sizeof(struct udphdr));
-
- /* Short packet? */
- if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
- PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
- "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
- goto error;
- }
-
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
- /* Trace packet contents, if enabled */
- if (tunnel->debug & L2TP_MSG_DATA) {
- length = min(32u, skb->len);
- if (!pskb_may_pull(skb, length))
- goto error;
-
- printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
- offset = 0;
- do {
- printk(" %02X", ptr[offset]);
- } while (++offset < length);
-
- printk("\n");
- }
-
- /* Get L2TP header flags */
- hdrflags = ntohs(*(__be16 *)ptr);
-
- /* Check protocol version */
- version = hdrflags & L2TP_HDR_VER_MASK;
- if (version != tunnel->version) {
- PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
- "%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto error;
- }
-
- /* Get length of L2TP packet */
- length = skb->len;
-
- /* If type is control packet, it is handled by userspace. */
- if (hdrflags & L2TP_HDRFLAG_T) {
- PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
- "%s: recv control packet, len=%d\n", tunnel->name, length);
- goto error;
- }
-
- /* Skip flags */
- ptr += 2;
-
- /* If length is present, skip it */
- if (hdrflags & L2TP_HDRFLAG_L)
- ptr += 2;
-
- /* Extract tunnel and session ID */
- tunnel_id = ntohs(*(__be16 *) ptr);
- ptr += 2;
- session_id = ntohs(*(__be16 *) ptr);
- ptr += 2;
-
- /* Find the session context */
- session = l2tp_session_find(tunnel, session_id);
- if (!session) {
- /* Not found? Pass to userspace to deal with */
- PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
- "%s: no session found (%hu/%hu). Passing up.\n",
- tunnel->name, tunnel_id, session_id);
- goto error;
- }
+ u32 ns, nr;
/* The ref count is increased since we now hold a pointer to
* the session. Take care to decrement the refcnt when exiting
@@ -455,6 +477,18 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
if (session->ref)
(*session->ref)(session);
+ /* Parse and check optional cookie */
+ if (session->peer_cookie_len > 0) {
+ if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
+ PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+ "%s: cookie mismatch (%u/%u). Discarding.\n",
+ tunnel->name, tunnel->tunnel_id, session->session_id);
+ session->stats.rx_cookie_discards++;
+ goto discard;
+ }
+ ptr += session->peer_cookie_len;
+ }
+
/* Handle the optional sequence numbers. Sequence numbers are
* in different places for L2TPv2 and L2TPv3.
*
@@ -464,21 +498,40 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
*/
ns = nr = 0;
L2TP_SKB_CB(skb)->has_seq = 0;
- if (hdrflags & L2TP_HDRFLAG_S) {
- ns = (u16) ntohs(*(__be16 *) ptr);
- ptr += 2;
- nr = ntohs(*(__be16 *) ptr);
- ptr += 2;
+ if (tunnel->version == L2TP_HDR_VER_2) {
+ if (hdrflags & L2TP_HDRFLAG_S) {
+ ns = ntohs(*(__be16 *) ptr);
+ ptr += 2;
+ nr = ntohs(*(__be16 *) ptr);
+ ptr += 2;
- /* Store L2TP info in the skb */
- L2TP_SKB_CB(skb)->ns = ns;
- L2TP_SKB_CB(skb)->has_seq = 1;
+ /* Store L2TP info in the skb */
+ L2TP_SKB_CB(skb)->ns = ns;
+ L2TP_SKB_CB(skb)->has_seq = 1;
- PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
- "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
- session->name, ns, nr, session->nr);
+ PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+ "%s: recv data ns=%u, nr=%u, session nr=%u\n",
+ session->name, ns, nr, session->nr);
+ }
+ } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+ u32 l2h = ntohl(*(__be32 *) ptr);
+
+ if (l2h & 0x40000000) {
+ ns = l2h & 0x00ffffff;
+
+ /* Store L2TP info in the skb */
+ L2TP_SKB_CB(skb)->ns = ns;
+ L2TP_SKB_CB(skb)->has_seq = 1;
+
+ PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+ "%s: recv data ns=%u, session nr=%u\n",
+ session->name, ns, session->nr);
+ }
}
+ /* Advance past L2-specific header, if present */
+ ptr += session->l2specific_len;
+
if (L2TP_SKB_CB(skb)->has_seq) {
/* Received a packet with sequence numbers. If we're the LNS,
* check if we sre sending sequence numbers and if not,
@@ -489,6 +542,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
"%s: requested to enable seq numbers by LNS\n",
session->name);
session->send_seq = -1;
+ l2tp_session_set_header_len(session, tunnel->version);
}
} else {
/* No sequence numbers.
@@ -512,6 +566,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
"%s: requested to disable seq numbers by LNS\n",
session->name);
session->send_seq = 0;
+ l2tp_session_set_header_len(session, tunnel->version);
} else if (session->send_seq) {
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
"%s: recv data has no seq numbers when required. "
@@ -521,11 +576,19 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
}
- /* If offset bit set, skip it. */
- if (hdrflags & L2TP_HDRFLAG_O) {
- offset = ntohs(*(__be16 *)ptr);
- ptr += 2 + offset;
- }
+ /* Session data offset is handled differently for L2TPv2 and
+ * L2TPv3. For L2TPv2, there is an optional 16-bit value in
+ * the header. For L2TPv3, the offset is negotiated using AVPs
+ * in the session setup control protocol.
+ */
+ if (tunnel->version == L2TP_HDR_VER_2) {
+ /* If offset bit set, skip it. */
+ if (hdrflags & L2TP_HDRFLAG_O) {
+ offset = ntohs(*(__be16 *)ptr);
+ ptr += 2 + offset;
+ }
+ } else
+ ptr += session->offset;
offset = ptr - optr;
if (!pskb_may_pull(skb, offset))
@@ -564,8 +627,8 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
if (L2TP_SKB_CB(skb)->ns != session->nr) {
session->stats.rx_seq_discards++;
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
- "%s: oos pkt %hu len %d discarded, "
- "waiting for %hu, reorder_q_len=%d\n",
+ "%s: oos pkt %u len %d discarded, "
+ "waiting for %u, reorder_q_len=%d\n",
session->name, L2TP_SKB_CB(skb)->ns,
L2TP_SKB_CB(skb)->length, session->nr,
skb_queue_len(&session->reorder_q));
@@ -586,7 +649,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
l2tp_session_dec_refcount(session);
- return 0;
+ return;
discard:
session->stats.rx_errors++;
@@ -596,6 +659,111 @@ discard:
(*session->deref)(session);
l2tp_session_dec_refcount(session);
+}
+EXPORT_SYMBOL(l2tp_recv_common);
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded. All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+ int (*payload_hook)(struct sk_buff *skb))
+{
+ struct l2tp_session *session = NULL;
+ unsigned char *ptr, *optr;
+ u16 hdrflags;
+ u32 tunnel_id, session_id;
+ int offset;
+ u16 version;
+ int length;
+
+ if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+ goto discard_bad_csum;
+
+ /* UDP always verifies the packet length. */
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Short packet? */
+ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+ PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+ "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+ goto error;
+ }
+
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+
+ /* Trace packet contents, if enabled */
+ if (tunnel->debug & L2TP_MSG_DATA) {
+ length = min(32u, skb->len);
+ if (!pskb_may_pull(skb, length))
+ goto error;
+
+ printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+ offset = 0;
+ do {
+ printk(" %02X", ptr[offset]);
+ } while (++offset < length);
+
+ printk("\n");
+ }
+
+ /* Get L2TP header flags */
+ hdrflags = ntohs(*(__be16 *) ptr);
+
+ /* Check protocol version */
+ version = hdrflags & L2TP_HDR_VER_MASK;
+ if (version != tunnel->version) {
+ PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+ "%s: recv protocol version mismatch: got %d expected %d\n",
+ tunnel->name, version, tunnel->version);
+ goto error;
+ }
+
+ /* Get length of L2TP packet */
+ length = skb->len;
+
+ /* If type is control packet, it is handled by userspace. */
+ if (hdrflags & L2TP_HDRFLAG_T) {
+ PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+ "%s: recv control packet, len=%d\n", tunnel->name, length);
+ goto error;
+ }
+
+ /* Skip flags */
+ ptr += 2;
+
+ if (tunnel->version == L2TP_HDR_VER_2) {
+ /* If length is present, skip it */
+ if (hdrflags & L2TP_HDRFLAG_L)
+ ptr += 2;
+
+ /* Extract tunnel and session ID */
+ tunnel_id = ntohs(*(__be16 *) ptr);
+ ptr += 2;
+ session_id = ntohs(*(__be16 *) ptr);
+ ptr += 2;
+ } else {
+ ptr += 2; /* skip reserved bits */
+ tunnel_id = tunnel->tunnel_id;
+ session_id = ntohl(*(__be32 *) ptr);
+ ptr += 4;
+ }
+
+ /* Find the session context */
+ session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+ if (!session) {
+ /* Not found? Pass to userspace to deal with */
+ PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+ "%s: no session found (%u/%u). Passing up.\n",
+ tunnel->name, tunnel_id, session_id);
+ goto error;
+ }
+
+ l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
return 0;
@@ -651,11 +819,11 @@ EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
/* Build an L2TP header for the session into the buffer provided.
*/
-static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
- struct l2tp_session *session,
- void *buf)
+static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
{
+ struct l2tp_tunnel *tunnel = session->tunnel;
__be16 *bufp = buf;
+ __be16 *optr = buf;
u16 flags = L2TP_HDR_VER_2;
u32 tunnel_id = tunnel->peer_tunnel_id;
u32 session_id = session->peer_session_id;
@@ -671,19 +839,51 @@ static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
*bufp++ = htons(session->ns);
*bufp++ = 0;
session->ns++;
+ session->ns &= 0xffff;
PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
- "%s: updated ns to %hu\n", session->name, session->ns);
+ "%s: updated ns to %u\n", session->name, session->ns);
}
+
+ return bufp - optr;
}
-void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
{
- struct l2tp_tunnel *tunnel = session->tunnel;
+ char *bufp = buf;
+ char *optr = bufp;
+ u16 flags = L2TP_HDR_VER_3;
+
+ /* Setup L2TP header. */
+ *((__be16 *) bufp) = htons(flags);
+ bufp += 2;
+ *((__be16 *) bufp) = 0;
+ bufp += 2;
+ *((__be32 *) bufp) = htonl(session->peer_session_id);
+ bufp += 4;
+ if (session->cookie_len) {
+ memcpy(bufp, &session->cookie[0], session->cookie_len);
+ bufp += session->cookie_len;
+ }
+ if (session->l2specific_len) {
+ if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+ u32 l2h = 0;
+ if (session->send_seq) {
+ l2h = 0x40000000 | session->ns;
+ session->ns++;
+ session->ns &= 0xffffff;
+ PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+ "%s: updated ns to %u\n", session->name, session->ns);
+ }
+
+ *((__be32 *) bufp) = htonl(l2h);
+ }
+ bufp += session->l2specific_len;
+ }
+ if (session->offset)
+ bufp += session->offset;
- BUG_ON(tunnel->version != L2TP_HDR_VER_2);
- l2tp_build_l2tpv2_header(tunnel, session, buf);
+ return bufp - optr;
}
-EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
{
@@ -694,7 +894,7 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
/* Debug */
if (session->send_seq)
PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
- "%s: send %Zd bytes, ns=%hu\n", session->name,
+ "%s: send %Zd bytes, ns=%u\n", session->name,
data_len, session->ns - 1);
else
PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
@@ -780,7 +980,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
skb->truesize += new_headroom - old_headroom;
/* Setup L2TP header */
- l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+ session->build_header(session, __skb_push(skb, hdr_len));
udp_len = sizeof(struct udphdr) + hdr_len + data_len;
/* Setup UDP header */
@@ -791,7 +991,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
uh->source = inet->inet_sport;
uh->dest = inet->inet_dport;
uh->len = htons(udp_len);
-
uh->check = 0;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -911,6 +1110,14 @@ again:
write_unlock_bh(&tunnel->hlist_lock);
+ if (tunnel->version != L2TP_HDR_VER_2) {
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+ write_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_del_init(&session->global_hlist);
+ write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ }
+
if (session->session_close != NULL)
(*session->session_close)(session);
@@ -997,9 +1204,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
goto err;
}
- if (version != L2TP_HDR_VER_2)
- goto err;
-
tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
if (tunnel == NULL) {
err = -ENOMEM;
@@ -1077,6 +1281,15 @@ void l2tp_session_free(struct l2tp_session *session)
hlist_del_init(&session->hlist);
write_unlock_bh(&tunnel->hlist_lock);
+ /* Unlink from the global hash if not L2TPv2 */
+ if (tunnel->version != L2TP_HDR_VER_2) {
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+ write_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_del_init(&session->global_hlist);
+ write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ }
+
if (session->session_id != 0)
atomic_dec(&l2tp_session_count);
@@ -1095,6 +1308,22 @@ void l2tp_session_free(struct l2tp_session *session)
}
EXPORT_SYMBOL_GPL(l2tp_session_free);
+/* We come here whenever a session's send_seq, cookie_len or
+ * l2specific_len parameters are set.
+ */
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+{
+ if (version == L2TP_HDR_VER_2) {
+ session->hdr_len = 6;
+ if (session->send_seq)
+ session->hdr_len += 4;
+ } else {
+ session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+ }
+
+}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
+
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
@@ -1106,6 +1335,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session->session_id = session_id;
session->peer_session_id = peer_session_id;
+ session->nr = 1;
sprintf(&session->name[0], "sess %u/%u",
tunnel->tunnel_id, session->session_id);
@@ -1113,20 +1343,36 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
skb_queue_head_init(&session->reorder_q);
INIT_HLIST_NODE(&session->hlist);
+ INIT_HLIST_NODE(&session->global_hlist);
/* Inherit debug options from tunnel */
session->debug = tunnel->debug;
if (cfg) {
+ session->pwtype = cfg->pw_type;
session->debug = cfg->debug;
- session->hdr_len = cfg->hdr_len;
session->mtu = cfg->mtu;
session->mru = cfg->mru;
session->send_seq = cfg->send_seq;
session->recv_seq = cfg->recv_seq;
session->lns_mode = cfg->lns_mode;
+ session->reorder_timeout = cfg->reorder_timeout;
+ session->offset = cfg->offset;
+ session->l2specific_type = cfg->l2specific_type;
+ session->l2specific_len = cfg->l2specific_len;
+ session->cookie_len = cfg->cookie_len;
+ memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
+ session->peer_cookie_len = cfg->peer_cookie_len;
+ memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
}
+ if (tunnel->version == L2TP_HDR_VER_2)
+ session->build_header = l2tp_build_l2tpv2_header;
+ else
+ session->build_header = l2tp_build_l2tpv3_header;
+
+ l2tp_session_set_header_len(session, tunnel->version);
+
/* Bump the reference count. The session context is deleted
* only when this drops to zero.
*/
@@ -1142,6 +1388,16 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_id_hash(tunnel, session_id));
write_unlock_bh(&tunnel->hlist_lock);
+ /* And to the global session list if L2TPv3 */
+ if (tunnel->version != L2TP_HDR_VER_2) {
+ struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+ write_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_add_head(&session->global_hlist,
+ l2tp_session_id_hash_2(pn, session_id));
+ write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ }
+
/* Ignore management session in session count value */
if (session->session_id != 0)
atomic_inc(&l2tp_session_count);
@@ -1159,6 +1415,7 @@ static __net_init int l2tp_init_net(struct net *net)
{
struct l2tp_net *pn;
int err;
+ int hash;
pn = kzalloc(sizeof(*pn), GFP_KERNEL);
if (!pn)
@@ -1167,6 +1424,11 @@ static __net_init int l2tp_init_net(struct net *net)
INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
rwlock_init(&pn->l2tp_tunnel_list_lock);
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+ INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
+
+ rwlock_init(&pn->l2tp_session_hlist_lock);
+
err = net_assign_generic(net, l2tp_net_id, pn);
if (err)
goto out;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2efe1a3..5c53eb2 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -15,9 +15,14 @@
#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
+/* Per tunnel, session hash table size */
#define L2TP_HASH_BITS 4
#define L2TP_HASH_SIZE (1 << L2TP_HASH_BITS)
+/* System-wide, session hash table size */
+#define L2TP_HASH_BITS_2 8
+#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
+
/* Debug message categories for the DEBUG socket option */
enum {
L2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if
@@ -28,6 +33,21 @@ enum {
L2TP_MSG_DATA = (1 << 3), /* data packets */
};
+enum l2tp_pwtype {
+ L2TP_PWTYPE_NONE = 0x0000,
+ L2TP_PWTYPE_ETH_VLAN = 0x0004,
+ L2TP_PWTYPE_ETH = 0x0005,
+ L2TP_PWTYPE_PPP = 0x0007,
+ L2TP_PWTYPE_PPP_AC = 0x0008,
+ L2TP_PWTYPE_IP = 0x000b,
+ __L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+ L2TP_L2SPECTYPE_NONE,
+ L2TP_L2SPECTYPE_DEFAULT,
+};
+
struct sk_buff;
struct l2tp_stats {
@@ -39,6 +59,7 @@ struct l2tp_stats {
u64 rx_seq_discards;
u64 rx_oos_packets;
u64 rx_errors;
+ u64 rx_cookie_discards;
};
struct l2tp_tunnel;
@@ -47,6 +68,7 @@ struct l2tp_tunnel;
* packets and transmit outgoing ones.
*/
struct l2tp_session_cfg {
+ enum l2tp_pwtype pw_type;
unsigned data_seq:2; /* data sequencing level
* 0 => none, 1 => IP only,
* 2 => all
@@ -60,12 +82,17 @@ struct l2tp_session_cfg {
* control of LNS. */
int debug; /* bitmask of debug message
* categories */
- int offset; /* offset to payload */
+ u16 offset; /* offset to payload */
+ u16 l2specific_len; /* Layer 2 specific length */
+ u16 l2specific_type; /* Layer 2 specific type */
+ u8 cookie[8]; /* optional cookie */
+ int cookie_len; /* 0, 4 or 8 bytes */
+ u8 peer_cookie[8]; /* peer's cookie */
+ int peer_cookie_len; /* 0, 4 or 8 bytes */
int reorder_timeout; /* configured reorder timeout
* (in jiffies) */
int mtu;
int mru;
- int hdr_len;
};
struct l2tp_session {
@@ -76,8 +103,17 @@ struct l2tp_session {
* context */
u32 session_id;
u32 peer_session_id;
- u16 nr; /* session NR state (receive) */
- u16 ns; /* session NR state (send) */
+ u8 cookie[8];
+ int cookie_len;
+ u8 peer_cookie[8];
+ int peer_cookie_len;
+ u16 offset; /* offset from end of L2TP header
+ to beginning of data */
+ u16 l2specific_len;
+ u16 l2specific_type;
+ u16 hdr_len;
+ u32 nr; /* session NR state (receive) */
+ u32 ns; /* session NR state (send) */
struct sk_buff_head reorder_q; /* receive reorder queue */
struct hlist_node hlist; /* Hash list node */
atomic_t ref_count;
@@ -100,9 +136,11 @@ struct l2tp_session {
* (in jiffies) */
int mtu;
int mru;
- int hdr_len;
+ enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
+ struct hlist_node global_hlist; /* Global hash list node */
+ int (*build_header)(struct l2tp_session *session, void *buf);
void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
void (*session_close)(struct l2tp_session *session);
void (*ref)(struct l2tp_session *session);
@@ -132,7 +170,6 @@ struct l2tp_tunnel {
char name[20]; /* for logging */
int debug; /* bitmask of debug message
* categories */
- int hdr_len;
struct l2tp_stats stats;
struct list_head list; /* Keep a list of all tunnels */
@@ -178,7 +215,7 @@ out:
return tunnel;
}
-extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
@@ -187,14 +224,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
extern void l2tp_session_free(struct l2tp_session *session);
+extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
-extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
extern void l2tp_tunnel_destruct(struct sock *sk);
extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
/* Tunnel reference counts. Incremented per session that is added to
* the tunnel.
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3ad290d..bee5b14 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -670,7 +670,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Check that this session doesn't already exist */
error = -EEXIST;
- session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+ session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
if (session != NULL)
goto end;
@@ -678,7 +678,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
* headers.
*/
cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
- cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
cfg.debug = tunnel->debug;
/* Allocate and initialize a new session context. */
@@ -999,7 +998,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
if (stats.session_id != 0) {
/* resend to session ioctl handler */
struct l2tp_session *session =
- l2tp_session_find(tunnel, stats.session_id);
+ l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
if (session != NULL)
err = pppol2tp_session_ioctl(session, cmd, arg);
else
@@ -1375,6 +1374,8 @@ end:
/*****************************************************************************
* /proc filesystem for debug
+ * Since the original pppol2tp driver provided /proc/net/pppol2tp for
+ * L2TPv2, we dump only L2TPv2 tunnels and sessions here.
*****************************************************************************/
static unsigned int pppol2tp_net_id;
@@ -1391,14 +1392,24 @@ struct pppol2tp_seq_data {
static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
{
- pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ for (;;) {
+ pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+ pd->tunnel_idx++;
+
+ if (pd->tunnel == NULL)
+ break;
+
+ /* Ignore L2TPv3 tunnels */
+ if (pd->tunnel->version < 3)
+ break;
+ }
}
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
+
if (pd->session == NULL) {
pd->session_idx = 0;
pppol2tp_next_tunnel(net, pd);
^ permalink raw reply related
* [PATCH net-next-2.6 v4 08/14] netlink: Export genl_lock() API for use by modules
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This lets kernel modules which use genl netlink APIs serialize netlink
processing.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
include/linux/genetlink.h | 8 ++++++++
net/netlink/genetlink.c | 6 ++++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index b834ef6..61549b2 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -80,4 +80,12 @@ enum {
#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+#ifdef __KERNEL__
+
+/* All generic netlink requests are serialized by a global lock. */
+extern void genl_lock(void);
+extern void genl_unlock(void);
+
+#endif /* __KERNEL__ */
+
#endif /* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a4b6e14..a28fda7 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -20,15 +20,17 @@
static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
-static inline void genl_lock(void)
+void genl_lock(void)
{
mutex_lock(&genl_mutex);
}
+EXPORT_SYMBOL(genl_lock);
-static inline void genl_unlock(void)
+void genl_unlock(void)
{
mutex_unlock(&genl_mutex);
}
+EXPORT_SYMBOL(genl_unlock);
#define GENL_FAM_TAB_SIZE 16
#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
^ permalink raw reply related
* [PATCH net-next-2.6 v4 10/14] l2tp: Convert rwlock to RCU
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
Reader/write locks are discouraged because they are slower than spin
locks. So this patch converts the rwlocks used in the per_net structs
to rcu.
Signed-off-by: James Chapman <jchapman@katalix.com>
---
net/l2tp/l2tp_core.c | 78 +++++++++++++++++++++++++++-----------------------
1 files changed, 42 insertions(+), 36 deletions(-)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fbd1f21..473cf2d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/list.h>
+#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
@@ -105,9 +106,9 @@ static atomic_t l2tp_session_count;
static unsigned int l2tp_net_id;
struct l2tp_net {
struct list_head l2tp_tunnel_list;
- rwlock_t l2tp_tunnel_list_lock;
+ spinlock_t l2tp_tunnel_list_lock;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
- rwlock_t l2tp_session_hlist_lock;
+ spinlock_t l2tp_session_hlist_lock;
};
static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -139,14 +140,14 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
struct l2tp_session *session;
struct hlist_node *walk;
- read_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_for_each_entry(session, walk, session_list, global_hlist) {
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
if (session->session_id == session_id) {
- read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ rcu_read_unlock_bh();
return session;
}
}
- read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -225,17 +226,17 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
struct hlist_node *walk;
struct l2tp_session *session;
- read_lock_bh(&pn->l2tp_session_hlist_lock);
+ rcu_read_lock_bh();
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
- hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+ hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
- read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ rcu_read_unlock_bh();
return session;
}
}
}
- read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -248,14 +249,14 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
struct l2tp_tunnel *tunnel;
struct l2tp_net *pn = l2tp_pernet(net);
- read_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
if (tunnel->tunnel_id == tunnel_id) {
- read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ rcu_read_unlock_bh();
return tunnel;
}
}
- read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -267,15 +268,15 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
struct l2tp_tunnel *tunnel;
int count = 0;
- read_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
if (++count > nth) {
- read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ rcu_read_unlock_bh();
return tunnel;
}
}
- read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ rcu_read_unlock_bh();
return NULL;
}
@@ -1167,9 +1168,10 @@ again:
if (tunnel->version != L2TP_HDR_VER_2) {
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
- write_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_del_init(&session->global_hlist);
- write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_del_init_rcu(&session->global_hlist);
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ synchronize_rcu();
}
if (session->session_close != NULL)
@@ -1206,9 +1208,10 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
"%s: free...\n", tunnel->name);
/* Remove from tunnel list */
- write_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_del_init(&tunnel->list);
- write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ list_del_rcu(&tunnel->list);
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ synchronize_rcu();
atomic_dec(&l2tp_tunnel_count);
kfree(tunnel);
@@ -1310,9 +1313,10 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Add tunnel to our list */
INIT_LIST_HEAD(&tunnel->list);
- write_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_add(&tunnel->list, &pn->l2tp_tunnel_list);
- write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+ list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+ spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ synchronize_rcu();
atomic_inc(&l2tp_tunnel_count);
/* Bump the reference count. The tunnel context is deleted
@@ -1370,9 +1374,10 @@ void l2tp_session_free(struct l2tp_session *session)
if (tunnel->version != L2TP_HDR_VER_2) {
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
- write_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_del_init(&session->global_hlist);
- write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_del_init_rcu(&session->global_hlist);
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ synchronize_rcu();
}
if (session->session_id != 0)
@@ -1494,10 +1499,11 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
if (tunnel->version != L2TP_HDR_VER_2) {
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
- write_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_add_head(&session->global_hlist,
- l2tp_session_id_hash_2(pn, session_id));
- write_unlock_bh(&pn->l2tp_session_hlist_lock);
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_add_head_rcu(&session->global_hlist,
+ l2tp_session_id_hash_2(pn, session_id));
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ synchronize_rcu();
}
/* Ignore management session in session count value */
@@ -1524,12 +1530,12 @@ static __net_init int l2tp_init_net(struct net *net)
return -ENOMEM;
INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
- rwlock_init(&pn->l2tp_tunnel_list_lock);
+ spin_lock_init(&pn->l2tp_tunnel_list_lock);
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
- rwlock_init(&pn->l2tp_session_hlist_lock);
+ spin_lock_init(&pn->l2tp_session_hlist_lock);
err = net_assign_generic(net, l2tp_net_id, pn);
if (err)
^ permalink raw reply related
* [PATCH net-next-2.6 v4 09/14] l2tp: Add netlink control API for L2TP
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
In L2TPv3, we need to create/delete/modify/query L2TP tunnel and
session contexts. The number of parameters is significant. So let's
use netlink. Userspace uses this API to control L2TP tunnel/session
contexts in the kernel.
The previous pppol2tp driver was managed using [gs]etsockopt(). This
API is retained for backwards compatibility. Unlike L2TPv2 which
carries only PPP frames, L2TPv3 can carry raw ethernet frames or other
frame types and these do not always have an associated socket
family. Therefore, we need a way to use L2TP sessions that doesn't
require a socket type for each supported frame type. Hence netlink is
used.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
include/linux/l2tp.h | 125 +++++++
net/l2tp/Makefile | 1
net/l2tp/l2tp_core.c | 61 +++
net/l2tp/l2tp_core.h | 34 +-
net/l2tp/l2tp_netlink.c | 830 +++++++++++++++++++++++++++++++++++++++++++++++
net/l2tp/l2tp_ppp.c | 162 ++++++++-
6 files changed, 1169 insertions(+), 44 deletions(-)
create mode 100644 net/l2tp/l2tp_netlink.c
diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
index deff7bc..4bdb31d 100644
--- a/include/linux/l2tp.h
+++ b/include/linux/l2tp.h
@@ -11,6 +11,8 @@
#ifdef __KERNEL__
#include <linux/socket.h>
#include <linux/in.h>
+#else
+#include <netinet/in.h>
#endif
#define IPPROTO_L2TP 115
@@ -21,6 +23,7 @@
* @l2tp_addr: protocol specific address information
* @l2tp_conn_id: connection id of tunnel
*/
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
struct sockaddr_l2tpip {
/* The first fields must match struct sockaddr_in */
sa_family_t l2tp_family; /* AF_INET */
@@ -35,4 +38,126 @@ struct sockaddr_l2tpip {
sizeof(__u32)];
};
+/*****************************************************************************
+ * NETLINK_GENERIC netlink family.
+ *****************************************************************************/
+
+/*
+ * Commands.
+ * Valid TLVs of each command are:-
+ * TUNNEL_CREATE - CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum, vlanid
+ * TUNNEL_DELETE - CONN_ID
+ * TUNNEL_MODIFY - CONN_ID, udpcsum
+ * TUNNEL_GETSTATS - CONN_ID, (stats)
+ * TUNNEL_GET - CONN_ID, (...)
+ * SESSION_CREATE - SESSION_ID, PW_TYPE, offset, data_seq, cookie, peer_cookie, offset, l2spec
+ * SESSION_DELETE - SESSION_ID
+ * SESSION_MODIFY - SESSION_ID, data_seq
+ * SESSION_GET - SESSION_ID, (...)
+ * SESSION_GETSTATS - SESSION_ID, (stats)
+ *
+ */
+enum {
+ L2TP_CMD_NOOP,
+ L2TP_CMD_TUNNEL_CREATE,
+ L2TP_CMD_TUNNEL_DELETE,
+ L2TP_CMD_TUNNEL_MODIFY,
+ L2TP_CMD_TUNNEL_GET,
+ L2TP_CMD_SESSION_CREATE,
+ L2TP_CMD_SESSION_DELETE,
+ L2TP_CMD_SESSION_MODIFY,
+ L2TP_CMD_SESSION_GET,
+ __L2TP_CMD_MAX,
+};
+
+#define L2TP_CMD_MAX (__L2TP_CMD_MAX - 1)
+
+/*
+ * ATTR types defined for L2TP
+ */
+enum {
+ L2TP_ATTR_NONE, /* no data */
+ L2TP_ATTR_PW_TYPE, /* u16, enum l2tp_pwtype */
+ L2TP_ATTR_ENCAP_TYPE, /* u16, enum l2tp_encap_type */
+ L2TP_ATTR_OFFSET, /* u16 */
+ L2TP_ATTR_DATA_SEQ, /* u16 */
+ L2TP_ATTR_L2SPEC_TYPE, /* u8, enum l2tp_l2spec_type */
+ L2TP_ATTR_L2SPEC_LEN, /* u8, enum l2tp_l2spec_type */
+ L2TP_ATTR_PROTO_VERSION, /* u8 */
+ L2TP_ATTR_IFNAME, /* string */
+ L2TP_ATTR_CONN_ID, /* u32 */
+ L2TP_ATTR_PEER_CONN_ID, /* u32 */
+ L2TP_ATTR_SESSION_ID, /* u32 */
+ L2TP_ATTR_PEER_SESSION_ID, /* u32 */
+ L2TP_ATTR_UDP_CSUM, /* u8 */
+ L2TP_ATTR_VLAN_ID, /* u16 */
+ L2TP_ATTR_COOKIE, /* 0, 4 or 8 bytes */
+ L2TP_ATTR_PEER_COOKIE, /* 0, 4 or 8 bytes */
+ L2TP_ATTR_DEBUG, /* u32 */
+ L2TP_ATTR_RECV_SEQ, /* u8 */
+ L2TP_ATTR_SEND_SEQ, /* u8 */
+ L2TP_ATTR_LNS_MODE, /* u8 */
+ L2TP_ATTR_USING_IPSEC, /* u8 */
+ L2TP_ATTR_RECV_TIMEOUT, /* msec */
+ L2TP_ATTR_FD, /* int */
+ L2TP_ATTR_IP_SADDR, /* u32 */
+ L2TP_ATTR_IP_DADDR, /* u32 */
+ L2TP_ATTR_UDP_SPORT, /* u16 */
+ L2TP_ATTR_UDP_DPORT, /* u16 */
+ L2TP_ATTR_MTU, /* u16 */
+ L2TP_ATTR_MRU, /* u16 */
+ L2TP_ATTR_STATS, /* nested */
+ __L2TP_ATTR_MAX,
+};
+
+#define L2TP_ATTR_MAX (__L2TP_ATTR_MAX - 1)
+
+/* Nested in L2TP_ATTR_STATS */
+enum {
+ L2TP_ATTR_STATS_NONE, /* no data */
+ L2TP_ATTR_TX_PACKETS, /* u64 */
+ L2TP_ATTR_TX_BYTES, /* u64 */
+ L2TP_ATTR_TX_ERRORS, /* u64 */
+ L2TP_ATTR_RX_PACKETS, /* u64 */
+ L2TP_ATTR_RX_BYTES, /* u64 */
+ L2TP_ATTR_RX_SEQ_DISCARDS, /* u64 */
+ L2TP_ATTR_RX_OOS_PACKETS, /* u64 */
+ L2TP_ATTR_RX_ERRORS, /* u64 */
+ __L2TP_ATTR_STATS_MAX,
+};
+
+#define L2TP_ATTR_STATS_MAX (__L2TP_ATTR_STATS_MAX - 1)
+
+enum l2tp_pwtype {
+ L2TP_PWTYPE_NONE = 0x0000,
+ L2TP_PWTYPE_ETH_VLAN = 0x0004,
+ L2TP_PWTYPE_ETH = 0x0005,
+ L2TP_PWTYPE_PPP = 0x0007,
+ L2TP_PWTYPE_PPP_AC = 0x0008,
+ L2TP_PWTYPE_IP = 0x000b,
+ __L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+ L2TP_L2SPECTYPE_NONE,
+ L2TP_L2SPECTYPE_DEFAULT,
+};
+
+enum l2tp_encap_type {
+ L2TP_ENCAPTYPE_UDP,
+ L2TP_ENCAPTYPE_IP,
+};
+
+enum l2tp_seqmode {
+ L2TP_SEQ_NONE = 0,
+ L2TP_SEQ_IP = 1,
+ L2TP_SEQ_ALL = 2,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define L2TP_GENL_NAME "l2tp"
+#define L2TP_GENL_VERSION 0x1
+
#endif
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index ef28b16..2c4a14b 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
# Build l2tp as modules if L2TP is M
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1739d04..fbd1f21 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -49,6 +49,7 @@
#include <net/dst.h>
#include <net/ip.h>
#include <net/udp.h>
+#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/protocol.h>
@@ -214,6 +215,32 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
}
EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+/* Lookup a session by interface name.
+ * This is very inefficient but is only used by management interfaces.
+ */
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ int hash;
+ struct hlist_node *walk;
+ struct l2tp_session *session;
+
+ read_lock_bh(&pn->l2tp_session_hlist_lock);
+ for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
+ hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+ if (!strcmp(session->ifname, ifname)) {
+ read_unlock_bh(&pn->l2tp_session_hlist_lock);
+ return session;
+ }
+ }
+ }
+
+ read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+
/* Lookup a tunnel by id
*/
struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -758,7 +785,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
/* Find the session context */
session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
- if (!session) {
+ if (!session || !session->recv_skb) {
/* Not found? Pass to userspace to deal with */
PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
"%s: no session found (%u/%u). Passing up.\n",
@@ -1305,6 +1332,23 @@ err:
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+/* This function is used by the netlink TUNNEL_DELETE command.
+ */
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+{
+ int err = 0;
+
+ /* Force the tunnel socket to close. This will eventually
+ * cause the tunnel to be deleted via the normal socket close
+ * mechanisms when userspace closes the tunnel socket.
+ */
+ if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
+ err = inet_shutdown(tunnel->sock->sk_socket, 2);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+
/* Really kill the session.
*/
void l2tp_session_free(struct l2tp_session *session)
@@ -1349,6 +1393,21 @@ void l2tp_session_free(struct l2tp_session *session)
}
EXPORT_SYMBOL_GPL(l2tp_session_free);
+/* This function is used by the netlink SESSION_DELETE command and by
+ pseudowire modules.
+ */
+int l2tp_session_delete(struct l2tp_session *session)
+{
+ if (session->session_close != NULL)
+ (*session->session_close)(session);
+
+ l2tp_session_dec_refcount(session);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_len parameters are set.
*/
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d239598..2974d9a 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -33,26 +33,6 @@ enum {
L2TP_MSG_DATA = (1 << 3), /* data packets */
};
-enum l2tp_pwtype {
- L2TP_PWTYPE_NONE = 0x0000,
- L2TP_PWTYPE_ETH_VLAN = 0x0004,
- L2TP_PWTYPE_ETH = 0x0005,
- L2TP_PWTYPE_PPP = 0x0007,
- L2TP_PWTYPE_PPP_AC = 0x0008,
- L2TP_PWTYPE_IP = 0x000b,
- __L2TP_PWTYPE_MAX
-};
-
-enum l2tp_l2spec_type {
- L2TP_L2SPECTYPE_NONE,
- L2TP_L2SPECTYPE_DEFAULT,
-};
-
-enum l2tp_encap_type {
- L2TP_ENCAPTYPE_UDP,
- L2TP_ENCAPTYPE_IP,
-};
-
struct sk_buff;
struct l2tp_stats {
@@ -87,6 +67,7 @@ struct l2tp_session_cfg {
* control of LNS. */
int debug; /* bitmask of debug message
* categories */
+ u16 vlan_id; /* VLAN pseudowire only */
u16 offset; /* offset to payload */
u16 l2specific_len; /* Layer 2 specific length */
u16 l2specific_type; /* Layer 2 specific type */
@@ -98,6 +79,7 @@ struct l2tp_session_cfg {
* (in jiffies) */
int mtu;
int mru;
+ char *ifname;
};
struct l2tp_session {
@@ -124,6 +106,7 @@ struct l2tp_session {
atomic_t ref_count;
char name[32]; /* for logging */
+ char ifname[IFNAMSIZ];
unsigned data_seq:2; /* data sequencing level
* 0 => none, 1 => IP only,
* 2 => all
@@ -192,6 +175,11 @@ struct l2tp_tunnel {
uint8_t priv[0]; /* private data */
};
+struct l2tp_nl_cmd_ops {
+ int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+ int (*session_delete)(struct l2tp_session *session);
+};
+
static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
{
return &tunnel->priv[0];
@@ -224,11 +212,14 @@ out:
extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern int l2tp_session_delete(struct l2tp_session *session);
extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
extern void l2tp_session_free(struct l2tp_session *session);
extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
@@ -241,6 +232,9 @@ extern void l2tp_tunnel_destruct(struct sock *sk);
extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+
/* Tunnel reference counts. Incremented per session that is added to
* the tunnel.
*/
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 0000000..3d0f7f6
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,830 @@
+/*
+ * L2TP netlink layer, for management
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * Partly based on the IrDA nelink implementation
+ * (see net/irda/irnetlink.c) which is:
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
+ * which is in turn partly based on the wireless netlink code:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/udp.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+
+#include <linux/l2tp.h>
+
+#include "l2tp_core.h"
+
+
+static struct genl_family l2tp_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .name = L2TP_GENL_NAME,
+ .version = L2TP_GENL_VERSION,
+ .hdrsize = 0,
+ .maxattr = L2TP_ATTR_MAX,
+};
+
+/* Accessed under genl lock */
+static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
+
+static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+{
+ u32 tunnel_id;
+ u32 session_id;
+ char *ifname;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_session *session = NULL;
+ struct net *net = genl_info_net(info);
+
+ if (info->attrs[L2TP_ATTR_IFNAME]) {
+ ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+ session = l2tp_session_find_by_ifname(net, ifname);
+ } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
+ (info->attrs[L2TP_ATTR_CONN_ID])) {
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+ session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (tunnel)
+ session = l2tp_session_find(net, tunnel, session_id);
+ }
+
+ return session;
+}
+
+static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+ int ret = -ENOBUFS;
+
+ msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+ &l2tp_nl_family, 0, L2TP_CMD_NOOP);
+ if (IS_ERR(hdr)) {
+ ret = PTR_ERR(hdr);
+ goto err_out;
+ }
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+ nlmsg_free(msg);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 tunnel_id;
+ u32 peer_tunnel_id;
+ int proto_version;
+ int fd;
+ int ret = 0;
+ struct l2tp_tunnel_cfg cfg = { 0, };
+ struct l2tp_tunnel *tunnel;
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+ if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+
+ if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+
+ if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+ if (!info->attrs[L2TP_ATTR_FD]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+
+ if (info->attrs[L2TP_ATTR_DEBUG])
+ cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (tunnel != NULL) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ ret = -EINVAL;
+ switch (cfg.encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ case L2TP_ENCAPTYPE_IP:
+ ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+ peer_tunnel_id, &cfg, &tunnel);
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
+{
+ struct l2tp_tunnel *tunnel;
+ u32 tunnel_id;
+ int ret = 0;
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (tunnel == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ (void) l2tp_tunnel_delete(tunnel);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
+{
+ struct l2tp_tunnel *tunnel;
+ u32 tunnel_id;
+ int ret = 0;
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (tunnel == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (info->attrs[L2TP_ATTR_DEBUG])
+ tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+ struct l2tp_tunnel *tunnel)
+{
+ void *hdr;
+ struct nlattr *nest;
+ struct sock *sk = NULL;
+ struct inet_sock *inet;
+
+ hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+ L2TP_CMD_TUNNEL_GET);
+ if (IS_ERR(hdr))
+ return PTR_ERR(hdr);
+
+ NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
+ NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
+ NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
+
+ nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
+ nla_nest_end(skb, nest);
+
+ sk = tunnel->sock;
+ if (!sk)
+ goto out;
+
+ inet = inet_sk(sk);
+
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
+ NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
+ NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
+ /* NOBREAK */
+ case L2TP_ENCAPTYPE_IP:
+ NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
+ NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
+ break;
+ }
+
+out:
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -1;
+}
+
+static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct l2tp_tunnel *tunnel;
+ struct sk_buff *msg;
+ u32 tunnel_id;
+ int ret = -ENOBUFS;
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (tunnel == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+ NLM_F_ACK, tunnel);
+ if (ret < 0)
+ goto err_out;
+
+ return genlmsg_unicast(net, msg, info->snd_pid);
+
+err_out:
+ nlmsg_free(msg);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int ti = cb->args[0];
+ struct l2tp_tunnel *tunnel;
+ struct net *net = sock_net(skb->sk);
+
+ for (;;) {
+ tunnel = l2tp_tunnel_find_nth(net, ti);
+ if (tunnel == NULL)
+ goto out;
+
+ if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ tunnel) <= 0)
+ goto out;
+
+ ti++;
+ }
+
+out:
+ cb->args[0] = ti;
+
+ return skb->len;
+}
+
+static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 tunnel_id = 0;
+ u32 session_id;
+ u32 peer_session_id;
+ int ret = 0;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_session *session;
+ struct l2tp_session_cfg cfg = { 0, };
+ struct net *net = genl_info_net(info);
+
+ if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (!tunnel) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+ session = l2tp_session_find(net, tunnel, session_id);
+ if (session) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
+
+ if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
+ if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (tunnel->version > 2) {
+ if (info->attrs[L2TP_ATTR_OFFSET])
+ cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
+
+ if (info->attrs[L2TP_ATTR_DATA_SEQ])
+ cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+ cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+ if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+ cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
+
+ cfg.l2specific_len = 4;
+ if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
+ cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+
+ if (info->attrs[L2TP_ATTR_COOKIE]) {
+ u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+ if (len > 8) {
+ ret = -EINVAL;
+ goto out;
+ }
+ cfg.cookie_len = len;
+ memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
+ }
+ if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
+ u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+ if (len > 8) {
+ ret = -EINVAL;
+ goto out;
+ }
+ cfg.peer_cookie_len = len;
+ memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
+ }
+ if (info->attrs[L2TP_ATTR_IFNAME])
+ cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+
+ if (info->attrs[L2TP_ATTR_VLAN_ID])
+ cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
+ }
+
+ if (info->attrs[L2TP_ATTR_DEBUG])
+ cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+ if (info->attrs[L2TP_ATTR_RECV_SEQ])
+ cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+ if (info->attrs[L2TP_ATTR_SEND_SEQ])
+ cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+ if (info->attrs[L2TP_ATTR_LNS_MODE])
+ cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+ if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+ cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+ if (info->attrs[L2TP_ATTR_MTU])
+ cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+ if (info->attrs[L2TP_ATTR_MRU])
+ cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+ if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
+ (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+ ret = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ /* Check that pseudowire-specific params are present */
+ switch (cfg.pw_type) {
+ case L2TP_PWTYPE_NONE:
+ break;
+ case L2TP_PWTYPE_ETH_VLAN:
+ if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case L2TP_PWTYPE_ETH:
+ break;
+ case L2TP_PWTYPE_PPP:
+ case L2TP_PWTYPE_PPP_AC:
+ break;
+ case L2TP_PWTYPE_IP:
+ default:
+ ret = -EPROTONOSUPPORT;
+ break;
+ }
+
+ ret = -EPROTONOSUPPORT;
+ if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
+ ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
+ session_id, peer_session_id, &cfg);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret = 0;
+ struct l2tp_session *session;
+ u16 pw_type;
+
+ session = l2tp_nl_session_find(info);
+ if (session == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ pw_type = session->pwtype;
+ if (pw_type < __L2TP_PWTYPE_MAX)
+ if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
+ ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
+{
+ int ret = 0;
+ struct l2tp_session *session;
+
+ session = l2tp_nl_session_find(info);
+ if (session == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (info->attrs[L2TP_ATTR_DEBUG])
+ session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+ if (info->attrs[L2TP_ATTR_DATA_SEQ])
+ session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+ if (info->attrs[L2TP_ATTR_RECV_SEQ])
+ session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+ if (info->attrs[L2TP_ATTR_SEND_SEQ])
+ session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+ if (info->attrs[L2TP_ATTR_LNS_MODE])
+ session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+ if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+ session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+ if (info->attrs[L2TP_ATTR_MTU])
+ session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+ if (info->attrs[L2TP_ATTR_MRU])
+ session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+ struct l2tp_session *session)
+{
+ void *hdr;
+ struct nlattr *nest;
+ struct l2tp_tunnel *tunnel = session->tunnel;
+ struct sock *sk = NULL;
+
+ sk = tunnel->sock;
+
+ hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+ if (IS_ERR(hdr))
+ return PTR_ERR(hdr);
+
+ NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+ NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
+ NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
+ NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
+ if (session->mru)
+ NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
+
+ if (session->ifname && session->ifname[0])
+ NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
+ if (session->cookie_len)
+ NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
+ if (session->peer_cookie_len)
+ NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
+ NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
+ NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
+ NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
+#ifdef CONFIG_XFRM
+ if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
+ NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
+#endif
+ if (session->reorder_timeout)
+ NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
+
+ nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+ if (nest == NULL)
+ goto nla_put_failure;
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
+ NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
+ NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
+ nla_nest_end(skb, nest);
+
+ return genlmsg_end(skb, hdr);
+
+ nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -1;
+}
+
+static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct l2tp_session *session;
+ struct sk_buff *msg;
+ int ret;
+
+ session = l2tp_nl_session_find(info);
+ if (session == NULL) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+ 0, session);
+ if (ret < 0)
+ goto err_out;
+
+ return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+ nlmsg_free(msg);
+
+out:
+ return ret;
+}
+
+static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct l2tp_session *session;
+ struct l2tp_tunnel *tunnel = NULL;
+ int ti = cb->args[0];
+ int si = cb->args[1];
+
+ for (;;) {
+ if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_find_nth(net, ti);
+ if (tunnel == NULL)
+ goto out;
+ }
+
+ session = l2tp_session_find_nth(tunnel, si);
+ if (session == NULL) {
+ ti++;
+ tunnel = NULL;
+ si = 0;
+ continue;
+ }
+
+ if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ session) <= 0)
+ break;
+
+ si++;
+ }
+
+out:
+ cb->args[0] = ti;
+ cb->args[1] = si;
+
+ return skb->len;
+}
+
+static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+ [L2TP_ATTR_NONE] = { .type = NLA_UNSPEC, },
+ [L2TP_ATTR_PW_TYPE] = { .type = NLA_U16, },
+ [L2TP_ATTR_ENCAP_TYPE] = { .type = NLA_U16, },
+ [L2TP_ATTR_OFFSET] = { .type = NLA_U16, },
+ [L2TP_ATTR_DATA_SEQ] = { .type = NLA_U8, },
+ [L2TP_ATTR_L2SPEC_TYPE] = { .type = NLA_U8, },
+ [L2TP_ATTR_L2SPEC_LEN] = { .type = NLA_U8, },
+ [L2TP_ATTR_PROTO_VERSION] = { .type = NLA_U8, },
+ [L2TP_ATTR_CONN_ID] = { .type = NLA_U32, },
+ [L2TP_ATTR_PEER_CONN_ID] = { .type = NLA_U32, },
+ [L2TP_ATTR_SESSION_ID] = { .type = NLA_U32, },
+ [L2TP_ATTR_PEER_SESSION_ID] = { .type = NLA_U32, },
+ [L2TP_ATTR_UDP_CSUM] = { .type = NLA_U8, },
+ [L2TP_ATTR_VLAN_ID] = { .type = NLA_U16, },
+ [L2TP_ATTR_DEBUG] = { .type = NLA_U32, },
+ [L2TP_ATTR_RECV_SEQ] = { .type = NLA_U8, },
+ [L2TP_ATTR_SEND_SEQ] = { .type = NLA_U8, },
+ [L2TP_ATTR_LNS_MODE] = { .type = NLA_U8, },
+ [L2TP_ATTR_USING_IPSEC] = { .type = NLA_U8, },
+ [L2TP_ATTR_RECV_TIMEOUT] = { .type = NLA_MSECS, },
+ [L2TP_ATTR_FD] = { .type = NLA_U32, },
+ [L2TP_ATTR_IP_SADDR] = { .type = NLA_U32, },
+ [L2TP_ATTR_IP_DADDR] = { .type = NLA_U32, },
+ [L2TP_ATTR_UDP_SPORT] = { .type = NLA_U16, },
+ [L2TP_ATTR_UDP_DPORT] = { .type = NLA_U16, },
+ [L2TP_ATTR_MTU] = { .type = NLA_U16, },
+ [L2TP_ATTR_MRU] = { .type = NLA_U16, },
+ [L2TP_ATTR_STATS] = { .type = NLA_NESTED, },
+ [L2TP_ATTR_IFNAME] = {
+ .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ - 1,
+ },
+ [L2TP_ATTR_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = 8,
+ },
+ [L2TP_ATTR_PEER_COOKIE] = {
+ .type = NLA_BINARY,
+ .len = 8,
+ },
+};
+
+static struct genl_ops l2tp_nl_ops[] = {
+ {
+ .cmd = L2TP_CMD_NOOP,
+ .doit = l2tp_nl_cmd_noop,
+ .policy = l2tp_nl_policy,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = L2TP_CMD_TUNNEL_CREATE,
+ .doit = l2tp_nl_cmd_tunnel_create,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_TUNNEL_DELETE,
+ .doit = l2tp_nl_cmd_tunnel_delete,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_TUNNEL_MODIFY,
+ .doit = l2tp_nl_cmd_tunnel_modify,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_TUNNEL_GET,
+ .doit = l2tp_nl_cmd_tunnel_get,
+ .dumpit = l2tp_nl_cmd_tunnel_dump,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_SESSION_CREATE,
+ .doit = l2tp_nl_cmd_session_create,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_SESSION_DELETE,
+ .doit = l2tp_nl_cmd_session_delete,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_SESSION_MODIFY,
+ .doit = l2tp_nl_cmd_session_modify,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = L2TP_CMD_SESSION_GET,
+ .doit = l2tp_nl_cmd_session_get,
+ .dumpit = l2tp_nl_cmd_session_dump,
+ .policy = l2tp_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
+{
+ int ret;
+
+ ret = -EINVAL;
+ if (pw_type >= __L2TP_PWTYPE_MAX)
+ goto err;
+
+ genl_lock();
+ ret = -EBUSY;
+ if (l2tp_nl_cmd_ops[pw_type])
+ goto out;
+
+ l2tp_nl_cmd_ops[pw_type] = ops;
+
+out:
+ genl_unlock();
+err:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
+
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
+{
+ if (pw_type < __L2TP_PWTYPE_MAX) {
+ genl_lock();
+ l2tp_nl_cmd_ops[pw_type] = NULL;
+ genl_unlock();
+ }
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
+
+static int l2tp_nl_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "L2TP netlink interface\n");
+ err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
+ ARRAY_SIZE(l2tp_nl_ops));
+
+ return err;
+}
+
+static void l2tp_nl_cleanup(void)
+{
+ genl_unregister_family(&l2tp_nl_family);
+}
+
+module_init(l2tp_nl_init);
+module_exit(l2tp_nl_cleanup);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP netlink");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
+ __stringify(NETLINK_GENERIC) "-type-" "l2tp")
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 63fc62b..d64f081 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -87,6 +87,7 @@
#include <linux/hash.h>
#include <linux/sort.h>
#include <linux/proc_fs.h>
+#include <linux/l2tp.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -656,17 +657,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel_id == 0)
goto end;
+ tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+
/* Special case: create tunnel context if session_id and
* peer_session_id is 0. Otherwise look up tunnel using supplied
* tunnel id.
*/
if ((session_id == 0) && (peer_session_id == 0)) {
- error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
- if (error < 0)
- goto end;
+ if (tunnel == NULL) {
+ struct l2tp_tunnel_cfg tcfg = {
+ .encap = L2TP_ENCAPTYPE_UDP,
+ .debug = 0,
+ };
+ error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+ if (error < 0)
+ goto end;
+ }
} else {
- tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
-
/* Error if we can't find the tunnel */
error = -ENOENT;
if (tunnel == NULL)
@@ -680,28 +687,46 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->recv_payload_hook == NULL)
tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
- /* Check that this session doesn't already exist */
- error = -EEXIST;
- session = l2tp_session_find(sock_net(sk), tunnel, session_id);
- if (session != NULL)
- goto end;
-
- /* Default MTU values. */
- if (cfg.mtu == 0)
- cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
- if (cfg.mru == 0)
- cfg.mru = cfg.mtu;
- cfg.debug = tunnel->debug;
+ if (tunnel->peer_tunnel_id == 0) {
+ if (ver == 2)
+ tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
+ else
+ tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+ }
- /* Allocate and initialize a new session context. */
- session = l2tp_session_create(sizeof(struct pppol2tp_session),
- tunnel, session_id,
- peer_session_id, &cfg);
+ /* Create session if it doesn't already exist. We handle the
+ * case where a session was previously created by the netlink
+ * interface by checking that the session doesn't already have
+ * a socket and its tunnel socket are what we expect. If any
+ * of those checks fail, return EEXIST to the caller.
+ */
+ session = l2tp_session_find(sock_net(sk), tunnel, session_id);
if (session == NULL) {
- error = -ENOMEM;
- goto end;
+ /* Default MTU must allow space for UDP/L2TP/PPP
+ * headers.
+ */
+ cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+ /* Allocate and initialize a new session context. */
+ session = l2tp_session_create(sizeof(struct pppol2tp_session),
+ tunnel, session_id,
+ peer_session_id, &cfg);
+ if (session == NULL) {
+ error = -ENOMEM;
+ goto end;
+ }
+ } else {
+ ps = l2tp_session_priv(session);
+ error = -EEXIST;
+ if (ps->sock != NULL)
+ goto end;
+
+ /* consistency checks */
+ if (ps->tunnel_sock != tunnel->sock)
+ goto end;
}
+ /* Associate session with its PPPoL2TP socket */
ps = l2tp_session_priv(session);
ps->owner = current->pid;
ps->sock = sk;
@@ -764,6 +789,74 @@ end:
return error;
}
+#ifdef CONFIG_L2TP_V3
+
+/* Called when creating sessions via the netlink interface.
+ */
+static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+ int error;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_session *session;
+ struct pppol2tp_session *ps;
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+
+ /* Error if we can't find the tunnel */
+ error = -ENOENT;
+ if (tunnel == NULL)
+ goto out;
+
+ /* Error if tunnel socket is not prepped */
+ if (tunnel->sock == NULL)
+ goto out;
+
+ /* Check that this session doesn't already exist */
+ error = -EEXIST;
+ session = l2tp_session_find(net, tunnel, session_id);
+ if (session != NULL)
+ goto out;
+
+ /* Default MTU values. */
+ if (cfg->mtu == 0)
+ cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ if (cfg->mru == 0)
+ cfg->mru = cfg->mtu;
+
+ /* Allocate and initialize a new session context. */
+ error = -ENOMEM;
+ session = l2tp_session_create(sizeof(struct pppol2tp_session),
+ tunnel, session_id,
+ peer_session_id, cfg);
+ if (session == NULL)
+ goto out;
+
+ ps = l2tp_session_priv(session);
+ ps->tunnel_sock = tunnel->sock;
+
+ PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+ "%s: created\n", session->name);
+
+ error = 0;
+
+out:
+ return error;
+}
+
+/* Called when deleting sessions via the netlink interface.
+ */
+static int pppol2tp_session_delete(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+ if (ps->sock == NULL)
+ l2tp_session_dec_refcount(session);
+
+ return 0;
+}
+
+#endif /* CONFIG_L2TP_V3 */
+
/* getname() support.
*/
static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -1660,6 +1753,15 @@ static struct pppox_proto pppol2tp_proto = {
.ioctl = pppol2tp_ioctl
};
+#ifdef CONFIG_L2TP_V3
+
+static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
+ .session_create = pppol2tp_session_create,
+ .session_delete = pppol2tp_session_delete,
+};
+
+#endif /* CONFIG_L2TP_V3 */
+
static int __init pppol2tp_init(void)
{
int err;
@@ -1676,11 +1778,22 @@ static int __init pppol2tp_init(void)
if (err)
goto out_unregister_pppol2tp_proto;
+#ifdef CONFIG_L2TP_V3
+ err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
+ if (err)
+ goto out_unregister_pppox;
+#endif
+
printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
PPPOL2TP_DRV_VERSION);
out:
return err;
+
+#ifdef CONFIG_L2TP_V3
+out_unregister_pppox:
+ unregister_pppox_proto(PX_PROTO_OL2TP);
+#endif
out_unregister_pppol2tp_proto:
proto_unregister(&pppol2tp_sk_proto);
out_unregister_pppol2tp_pernet:
@@ -1690,6 +1803,9 @@ out_unregister_pppol2tp_pernet:
static void __exit pppol2tp_exit(void)
{
+#ifdef CONFIG_L2TP_V3
+ l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
+#endif
unregister_pppox_proto(PX_PROTO_OL2TP);
proto_unregister(&pppol2tp_sk_proto);
unregister_pernet_device(&pppol2tp_net_ops);
^ permalink raw reply related
* [PATCH net-next-2.6 v4 12/14] l2tp: Add debugfs files for dumping l2tp debug info
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
The existing pppol2tp driver exports debug info to
/proc/net/pppol2tp. Rather than adding info to that file for the new
functionality added in this patch series, we add new files in debugfs,
leaving the old /proc file for backwards compatibility (L2TPv2 only).
Currently only one file is provided: l2tp/tunnels, which lists
internal debug info for all l2tp tunnels and sessions. More files may
be added later. The info is for debug and problem analysis only -
userspace apps should use netlink to obtain status about l2tp tunnels
and sessions.
Although debugfs does not support net namespaces, the tunnels and
sessions dumped in l2tp/tunnels are only those in the net namespace of
the process reading the file.
Signed-off-by: James Chapman <jchapman@katalix.com>
---
net/l2tp/Kconfig | 13 ++
net/l2tp/Makefile | 1
net/l2tp/l2tp_core.h | 8 +
net/l2tp/l2tp_debugfs.c | 341 +++++++++++++++++++++++++++++++++++++++++++++++
net/l2tp/l2tp_eth.c | 14 ++
net/l2tp/l2tp_ppp.c | 17 ++
6 files changed, 392 insertions(+), 2 deletions(-)
create mode 100644 net/l2tp/l2tp_debugfs.c
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index a292270..4b1e717 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -31,6 +31,19 @@ menuconfig L2TP
If you don't need L2TP, say N. To compile all L2TP code as
modules, choose M here.
+config L2TP_DEBUGFS
+ tristate "L2TP debugfs support"
+ depends on L2TP && DEBUG_FS
+ help
+ Support for l2tp directory in debugfs filesystem. This may be
+ used to dump internal state of the l2tp drivers for problem
+ analysis.
+
+ If unsure, say 'Y'.
+
+ To compile this driver as a module, choose M here. The module
+ will be called l2tp_debugfs.
+
config L2TP_V3
bool "L2TPv3 support (EXPERIMENTAL)"
depends on EXPERIMENTAL && L2TP
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index bddbf04..110e7bc 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -9,3 +9,4 @@ obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_DEBUGFS)) += l2tp_debugfs.o
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2974d9a..5713355 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -133,7 +133,9 @@ struct l2tp_session {
void (*session_close)(struct l2tp_session *session);
void (*ref)(struct l2tp_session *session);
void (*deref)(struct l2tp_session *session);
-
+#ifdef CONFIG_L2TP_DEBUGFS
+ void (*show)(struct seq_file *m, void *priv);
+#endif
uint8_t priv[0]; /* private data */
};
@@ -166,7 +168,9 @@ struct l2tp_tunnel {
struct net *l2tp_net; /* the net we belong to */
atomic_t ref_count;
-
+#ifdef CONFIG_DEBUG_FS
+ void (*show)(struct seq_file *m, void *arg);
+#endif
int (*recv_payload_hook)(struct sk_buff *skb);
void (*old_sk_destruct)(struct sock *);
struct sock *sock; /* Parent socket */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
new file mode 100644
index 0000000..908f10f
--- /dev/null
+++ b/net/l2tp/l2tp_debugfs.c
@@ -0,0 +1,341 @@
+/*
+ * L2TP subsystem debugfs
+ *
+ * Copyright (c) 2010 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+static struct dentry *rootdir;
+static struct dentry *tunnels;
+
+struct l2tp_dfs_seq_data {
+ struct net *net;
+ int tunnel_idx; /* current tunnel */
+ int session_idx; /* index of session within current tunnel */
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_session *session; /* NULL means get next tunnel */
+};
+
+static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
+{
+ pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
+ pd->tunnel_idx++;
+}
+
+static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
+{
+ pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+ pd->session_idx++;
+
+ if (pd->session == NULL) {
+ pd->session_idx = 0;
+ l2tp_dfs_next_tunnel(pd);
+ }
+
+}
+
+static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
+{
+ struct l2tp_dfs_seq_data *pd = SEQ_START_TOKEN;
+ loff_t pos = *offs;
+
+ if (!pos)
+ goto out;
+
+ BUG_ON(m->private == NULL);
+ pd = m->private;
+
+ if (pd->tunnel == NULL)
+ l2tp_dfs_next_tunnel(pd);
+ else
+ l2tp_dfs_next_session(pd);
+
+ /* NULL tunnel and session indicates end of list */
+ if ((pd->tunnel == NULL) && (pd->session == NULL))
+ pd = NULL;
+
+out:
+ return pd;
+}
+
+
+static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return NULL;
+}
+
+static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
+{
+ /* nothing to do */
+}
+
+static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
+{
+ struct l2tp_tunnel *tunnel = v;
+ int session_count = 0;
+ int hash;
+ struct hlist_node *walk;
+ struct hlist_node *tmp;
+
+ read_lock_bh(&tunnel->hlist_lock);
+ for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+ hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+ struct l2tp_session *session;
+
+ session = hlist_entry(walk, struct l2tp_session, hlist);
+ if (session->session_id == 0)
+ continue;
+
+ session_count++;
+ }
+ }
+ read_unlock_bh(&tunnel->hlist_lock);
+
+ seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
+ if (tunnel->sock) {
+ struct inet_sock *inet = inet_sk(tunnel->sock);
+ seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
+ NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+ if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+ seq_printf(m, " source port %hu, dest port %hu\n",
+ ntohs(inet->inet_sport), ntohs(inet->inet_dport));
+ }
+ seq_printf(m, " L2TPv%d, %s\n", tunnel->version,
+ tunnel->encap == L2TP_ENCAPTYPE_UDP ? "UDP" :
+ tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
+ "");
+ seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
+ tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+ atomic_read(&tunnel->ref_count));
+
+ seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+ tunnel->debug,
+ (unsigned long long)tunnel->stats.tx_packets,
+ (unsigned long long)tunnel->stats.tx_bytes,
+ (unsigned long long)tunnel->stats.tx_errors,
+ (unsigned long long)tunnel->stats.rx_packets,
+ (unsigned long long)tunnel->stats.rx_bytes,
+ (unsigned long long)tunnel->stats.rx_errors);
+
+ if (tunnel->show != NULL)
+ tunnel->show(m, tunnel);
+}
+
+static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
+{
+ struct l2tp_session *session = v;
+
+ seq_printf(m, " SESSION %u, peer %u, %s\n", session->session_id,
+ session->peer_session_id,
+ session->pwtype == L2TP_PWTYPE_ETH ? "ETH" :
+ session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
+ "");
+ if (session->send_seq || session->recv_seq)
+ seq_printf(m, " nr %hu, ns %hu\n", session->nr, session->ns);
+ seq_printf(m, " refcnt %d\n", atomic_read(&session->ref_count));
+ seq_printf(m, " config %d/%d/%c/%c/%s/%s %08x %u\n",
+ session->mtu, session->mru,
+ session->recv_seq ? 'R' : '-',
+ session->send_seq ? 'S' : '-',
+ session->data_seq == 1 ? "IPSEQ" :
+ session->data_seq == 2 ? "DATASEQ" : "-",
+ session->lns_mode ? "LNS" : "LAC",
+ session->debug,
+ jiffies_to_msecs(session->reorder_timeout));
+ seq_printf(m, " offset %hu l2specific %hu/%hu\n",
+ session->offset, session->l2specific_type, session->l2specific_len);
+ if (session->cookie_len) {
+ seq_printf(m, " cookie %02x%02x%02x%02x",
+ session->cookie[0], session->cookie[1],
+ session->cookie[2], session->cookie[3]);
+ if (session->cookie_len == 8)
+ seq_printf(m, "%02x%02x%02x%02x",
+ session->cookie[4], session->cookie[5],
+ session->cookie[6], session->cookie[7]);
+ seq_printf(m, "\n");
+ }
+ if (session->peer_cookie_len) {
+ seq_printf(m, " peer cookie %02x%02x%02x%02x",
+ session->peer_cookie[0], session->peer_cookie[1],
+ session->peer_cookie[2], session->peer_cookie[3]);
+ if (session->peer_cookie_len == 8)
+ seq_printf(m, "%02x%02x%02x%02x",
+ session->peer_cookie[4], session->peer_cookie[5],
+ session->peer_cookie[6], session->peer_cookie[7]);
+ seq_printf(m, "\n");
+ }
+
+ seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+ session->nr, session->ns,
+ (unsigned long long)session->stats.tx_packets,
+ (unsigned long long)session->stats.tx_bytes,
+ (unsigned long long)session->stats.tx_errors,
+ (unsigned long long)session->stats.rx_packets,
+ (unsigned long long)session->stats.rx_bytes,
+ (unsigned long long)session->stats.rx_errors);
+
+ if (session->show != NULL)
+ session->show(m, session);
+}
+
+static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
+{
+ struct l2tp_dfs_seq_data *pd = v;
+
+ /* display header on line 1 */
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(m, "TUNNEL ID, peer ID from IP to IP\n");
+ seq_puts(m, " L2TPv2/L2TPv3, UDP/IP\n");
+ seq_puts(m, " sessions session-count, refcnt refcnt/sk->refcnt\n");
+ seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+ seq_puts(m, " SESSION ID, peer ID, PWTYPE\n");
+ seq_puts(m, " refcnt cnt\n");
+ seq_puts(m, " offset OFFSET l2specific TYPE/LEN\n");
+ seq_puts(m, " [ cookie ]\n");
+ seq_puts(m, " [ peer cookie ]\n");
+ seq_puts(m, " config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
+ seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+ goto out;
+ }
+
+ /* Show the tunnel or session context */
+ if (pd->session == NULL)
+ l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
+ else
+ l2tp_dfs_seq_session_show(m, pd->session);
+
+out:
+ return 0;
+}
+
+static const struct seq_operations l2tp_dfs_seq_ops = {
+ .start = l2tp_dfs_seq_start,
+ .next = l2tp_dfs_seq_next,
+ .stop = l2tp_dfs_seq_stop,
+ .show = l2tp_dfs_seq_show,
+};
+
+static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
+{
+ struct l2tp_dfs_seq_data *pd;
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+
+ pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+ if (pd == NULL)
+ goto out;
+
+ /* Derive the network namespace from the pid opening the
+ * file.
+ */
+ pd->net = get_net_ns_by_pid(current->pid);
+ if (IS_ERR(pd->net)) {
+ rc = -PTR_ERR(pd->net);
+ goto err_free_pd;
+ }
+
+ rc = seq_open(file, &l2tp_dfs_seq_ops);
+ if (rc)
+ goto err_free_net;
+
+ seq = file->private_data;
+ seq->private = pd;
+
+out:
+ return rc;
+
+err_free_net:
+ put_net(pd->net);
+err_free_pd:
+ kfree(pd);
+ goto out;
+}
+
+static int l2tp_dfs_seq_release(struct inode *inode, struct file *file)
+{
+ struct l2tp_dfs_seq_data *pd;
+ struct seq_file *seq;
+
+ seq = file->private_data;
+ pd = seq->private;
+ if (pd->net)
+ put_net(pd->net);
+ kfree(pd);
+ seq_release(inode, file);
+
+ return 0;
+}
+
+static const struct file_operations l2tp_dfs_fops = {
+ .owner = THIS_MODULE,
+ .open = l2tp_dfs_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = l2tp_dfs_seq_release,
+};
+
+static int __init l2tp_debugfs_init(void)
+{
+ int rc = 0;
+
+ rootdir = debugfs_create_dir("l2tp", NULL);
+ if (IS_ERR(rootdir)) {
+ rc = PTR_ERR(rootdir);
+ rootdir = NULL;
+ goto out;
+ }
+
+ tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
+ if (tunnels == NULL)
+ rc = -EIO;
+
+ printk(KERN_INFO "L2TP debugfs support\n");
+
+out:
+ if (rc)
+ printk(KERN_WARNING "l2tp debugfs: unable to init\n");
+
+ return rc;
+}
+
+static void __exit l2tp_debugfs_exit(void)
+{
+ debugfs_remove(tunnels);
+ debugfs_remove(rootdir);
+}
+
+module_init(l2tp_debugfs_init);
+module_exit(l2tp_debugfs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP debugfs driver");
+MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 755c297..9848faa 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -172,6 +172,17 @@ static void l2tp_eth_delete(struct l2tp_session *session)
}
}
+#ifdef CONFIG_L2TP_DEBUGFS
+static void l2tp_eth_show(struct seq_file *m, void *arg)
+{
+ struct l2tp_session *session = arg;
+ struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+ struct net_device *dev = spriv->dev;
+
+ seq_printf(m, " interface %s\n", dev->name);
+}
+#endif
+
static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct net_device *dev;
@@ -233,6 +244,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
session->session_close = l2tp_eth_delete;
+#ifdef CONFIG_L2TP_DEBUGFS
+ session->show = l2tp_eth_show;
+#endif
spriv = l2tp_session_priv(session);
spriv->dev = dev;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d64f081..1ef10e4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -597,6 +597,20 @@ out:
return error;
}
+#ifdef CONFIG_L2TP_DEBUGFS
+static void pppol2tp_show(struct seq_file *m, void *arg)
+{
+ struct l2tp_session *session = arg;
+ struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+ if (ps) {
+ struct pppox_sock *po = pppox_sk(ps->sock);
+ if (po)
+ seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ }
+}
+#endif
+
/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
*/
static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -734,6 +748,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
session->recv_skb = pppol2tp_recv;
session->session_close = pppol2tp_session_close;
+#ifdef CONFIG_L2TP_DEBUGFS
+ session->show = pppol2tp_show;
+#endif
/* We need to know each time a skb is dropped from the reorder
* queue.
^ permalink raw reply related
* [PATCH net-next-2.6 v4 11/14] l2tp: Add L2TP ethernet pseudowire support
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This driver presents a regular net_device for each L2TP ethernet
pseudowire instance. These interfaces are named l2tpethN by default,
though userspace can specify an alternative name when the L2TP
session is created, if preferred. When the pseudowire is established,
regular Linux networking utilities may be used to configure the
interface, i.e. give it IP address info or add it to a bridge. Any
data passed over the interface is carried over an L2TP tunnel.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
net/l2tp/Kconfig | 24 ++++
net/l2tp/Makefile | 1
net/l2tp/l2tp_eth.c | 347 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 372 insertions(+), 0 deletions(-)
create mode 100644 net/l2tp/l2tp_eth.c
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 0a11ccf..a292270 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -68,3 +68,27 @@ config L2TP_IP
To compile this driver as a module, choose M here. The module
will be called l2tp_ip.
+
+config L2TP_ETH
+ tristate "L2TP ethernet pseudowire support for L2TPv3"
+ depends on L2TP_V3
+ help
+ Support for carrying raw ethernet frames over L2TPv3.
+
+ From RFC 4719 <http://www.ietf.org/rfc/rfc4719.txt>.
+
+ The Layer 2 Tunneling Protocol, Version 3 (L2TPv3) can be
+ used as a control protocol and for data encapsulation to set
+ up Pseudowires for transporting layer 2 Packet Data Units
+ across an IP network [RFC3931].
+
+ This driver provides an ethernet virtual interface for each
+ L2TP ethernet pseudowire instance. Standard Linux tools may
+ be used to assign an IP address to the local virtual
+ interface, or add the interface to a bridge.
+
+ If you are using L2TPv3, you will almost certainly want to
+ enable this option.
+
+ To compile this driver as a module, choose M here. The module
+ will be called l2tp_eth.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2c4a14b..bddbf04 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
new file mode 100644
index 0000000..755c297
--- /dev/null
+++ b/net/l2tp/l2tp_eth.c
@@ -0,0 +1,347 @@
+/*
+ * L2TPv3 ethernet pseudowire driver
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+/* Default device name. May be overridden by name specified by user */
+#define L2TP_ETH_DEV_NAME "l2tpeth%d"
+
+/* via netdev_priv() */
+struct l2tp_eth {
+ struct net_device *dev;
+ struct sock *tunnel_sock;
+ struct l2tp_session *session;
+ struct list_head list;
+};
+
+/* via l2tp_session_priv() */
+struct l2tp_eth_sess {
+ struct net_device *dev;
+};
+
+/* per-net private data for this module */
+static unsigned int l2tp_eth_net_id;
+struct l2tp_eth_net {
+ struct list_head l2tp_eth_dev_list;
+ spinlock_t l2tp_eth_lock;
+};
+
+static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
+{
+ return net_generic(net, l2tp_eth_net_id);
+}
+
+static int l2tp_eth_dev_init(struct net_device *dev)
+{
+ struct l2tp_eth *priv = netdev_priv(dev);
+
+ priv->dev = dev;
+ random_ether_addr(dev->dev_addr);
+ memset(&dev->broadcast[0], 0xff, 6);
+
+ return 0;
+}
+
+static void l2tp_eth_dev_uninit(struct net_device *dev)
+{
+ struct l2tp_eth *priv = netdev_priv(dev);
+ struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+
+ spin_lock(&pn->l2tp_eth_lock);
+ list_del_init(&priv->list);
+ spin_unlock(&pn->l2tp_eth_lock);
+ dev_put(dev);
+}
+
+static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct l2tp_eth *priv = netdev_priv(dev);
+ struct l2tp_session *session = priv->session;
+
+ l2tp_xmit_skb(session, skb, session->hdr_len);
+
+ dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
+
+ return 0;
+}
+
+static struct net_device_ops l2tp_eth_netdev_ops = {
+ .ndo_init = l2tp_eth_dev_init,
+ .ndo_uninit = l2tp_eth_dev_uninit,
+ .ndo_start_xmit = l2tp_eth_dev_xmit,
+};
+
+static void l2tp_eth_dev_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->netdev_ops = &l2tp_eth_netdev_ops;
+ dev->destructor = free_netdev;
+}
+
+static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+ struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+ struct net_device *dev = spriv->dev;
+
+ if (session->debug & L2TP_MSG_DATA) {
+ unsigned int length;
+ int offset;
+ u8 *ptr = skb->data;
+
+ length = min(32u, skb->len);
+ if (!pskb_may_pull(skb, length))
+ goto error;
+
+ printk(KERN_DEBUG "%s: eth recv: ", session->name);
+
+ offset = 0;
+ do {
+ printk(" %02X", ptr[offset]);
+ } while (++offset < length);
+
+ printk("\n");
+ }
+
+ if (data_len < ETH_HLEN)
+ goto error;
+
+ secpath_reset(skb);
+
+ /* checksums verified by L2TP */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb_dst_drop(skb);
+ nf_reset(skb);
+
+ if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
+ dev->last_rx = jiffies;
+ dev->stats.rx_packets++;
+ dev->stats.rx_bytes += data_len;
+ } else
+ dev->stats.rx_errors++;
+
+ return;
+
+error:
+ dev->stats.rx_errors++;
+ kfree_skb(skb);
+}
+
+static void l2tp_eth_delete(struct l2tp_session *session)
+{
+ struct l2tp_eth_sess *spriv;
+ struct net_device *dev;
+
+ if (session) {
+ spriv = l2tp_session_priv(session);
+ dev = spriv->dev;
+ if (dev) {
+ unregister_netdev(dev);
+ spriv->dev = NULL;
+ }
+ }
+}
+
+static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_session *session;
+ struct l2tp_eth *priv;
+ struct l2tp_eth_sess *spriv;
+ int rc;
+ struct l2tp_eth_net *pn;
+
+ tunnel = l2tp_tunnel_find(net, tunnel_id);
+ if (!tunnel) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ session = l2tp_session_find(net, tunnel, session_id);
+ if (session) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if (cfg->ifname) {
+ dev = dev_get_by_name(net, cfg->ifname);
+ if (dev) {
+ dev_put(dev);
+ rc = -EEXIST;
+ goto out;
+ }
+ strlcpy(name, cfg->ifname, IFNAMSIZ);
+ } else
+ strcpy(name, L2TP_ETH_DEV_NAME);
+
+ session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
+ peer_session_id, cfg);
+ if (!session) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+ if (!dev) {
+ rc = -ENOMEM;
+ goto out_del_session;
+ }
+
+ dev_net_set(dev, net);
+ if (session->mtu == 0)
+ session->mtu = dev->mtu - session->hdr_len;
+ dev->mtu = session->mtu;
+ dev->needed_headroom += session->hdr_len;
+
+ priv = netdev_priv(dev);
+ priv->dev = dev;
+ priv->session = session;
+ INIT_LIST_HEAD(&priv->list);
+
+ priv->tunnel_sock = tunnel->sock;
+ session->recv_skb = l2tp_eth_dev_recv;
+ session->session_close = l2tp_eth_delete;
+
+ spriv = l2tp_session_priv(session);
+ spriv->dev = dev;
+
+ rc = register_netdev(dev);
+ if (rc < 0)
+ goto out_del_dev;
+
+ /* Must be done after register_netdev() */
+ strlcpy(session->ifname, dev->name, IFNAMSIZ);
+
+ dev_hold(dev);
+ pn = l2tp_eth_pernet(dev_net(dev));
+ spin_lock(&pn->l2tp_eth_lock);
+ list_add(&priv->list, &pn->l2tp_eth_dev_list);
+ spin_unlock(&pn->l2tp_eth_lock);
+
+ return 0;
+
+out_del_dev:
+ free_netdev(dev);
+out_del_session:
+ l2tp_session_delete(session);
+out:
+ return rc;
+}
+
+static __net_init int l2tp_eth_init_net(struct net *net)
+{
+ struct l2tp_eth_net *pn;
+ int err;
+
+ pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+ if (!pn)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
+ spin_lock_init(&pn->l2tp_eth_lock);
+
+ err = net_assign_generic(net, l2tp_eth_net_id, pn);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ kfree(pn);
+ return err;
+}
+
+static __net_exit void l2tp_eth_exit_net(struct net *net)
+{
+ struct l2tp_eth_net *pn;
+
+ pn = net_generic(net, l2tp_eth_net_id);
+ /*
+ * if someone has cached our net then
+ * further net_generic call will return NULL
+ */
+ net_assign_generic(net, l2tp_eth_net_id, NULL);
+ kfree(pn);
+}
+
+static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+ .init = l2tp_eth_init_net,
+ .exit = l2tp_eth_exit_net,
+ .id = &l2tp_eth_net_id,
+ .size = sizeof(struct l2tp_eth_net),
+};
+
+
+static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
+ .session_create = l2tp_eth_create,
+ .session_delete = l2tp_session_delete,
+};
+
+
+static int __init l2tp_eth_init(void)
+{
+ int err = 0;
+
+ err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
+ if (err)
+ goto out;
+
+ err = register_pernet_device(&l2tp_eth_net_ops);
+ if (err)
+ goto out_unreg;
+
+ printk(KERN_INFO "L2TP ethernet pseudowire support (L2TPv3)\n");
+
+ return 0;
+
+out_unreg:
+ l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+out:
+ return err;
+}
+
+static void __exit l2tp_eth_exit(void)
+{
+ unregister_pernet_device(&l2tp_eth_net_ops);
+ l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+}
+
+module_init(l2tp_eth_init);
+module_exit(l2tp_eth_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
+MODULE_VERSION("1.0");
^ permalink raw reply related
* [PATCH net-next-2.6 v4 13/14] l2tp: Add support for static unmanaged L2TPv3 tunnels
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This patch adds support for static (unmanaged) L2TPv3 tunnels, where
the tunnel socket is created by the kernel rather than being created
by userspace. This means L2TP tunnels and sessions can be created
manually, without needing an L2TP control protocol implemented in
userspace. This might be useful where the user wants a simple ethernet
over IP tunnel.
A patch to iproute2 adds a new command set under "ip l2tp" to make use
of this feature. This will be submitted separately.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
net/l2tp/l2tp_core.c | 115 +++++++++++++++++++++++++++++++++++++++++++----
net/l2tp/l2tp_core.h | 7 +++
net/l2tp/l2tp_netlink.c | 18 ++++++-
3 files changed, 126 insertions(+), 14 deletions(-)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 473cf2d..13ed85b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1218,6 +1218,82 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
+/* Create a socket for the tunnel, if one isn't set up by
+ * userspace. This is used for static tunnels where there is no
+ * managing L2TP daemon.
+ */
+static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+{
+ int err = -EINVAL;
+ struct sockaddr_in udp_addr;
+ struct sockaddr_l2tpip ip_addr;
+ struct socket *sock;
+
+ switch (cfg->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+ if (err < 0)
+ goto out;
+
+ sock = *sockp;
+
+ memset(&udp_addr, 0, sizeof(udp_addr));
+ udp_addr.sin_family = AF_INET;
+ udp_addr.sin_addr = cfg->local_ip;
+ udp_addr.sin_port = htons(cfg->local_udp_port);
+ err = kernel_bind(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr));
+ if (err < 0)
+ goto out;
+
+ udp_addr.sin_family = AF_INET;
+ udp_addr.sin_addr = cfg->peer_ip;
+ udp_addr.sin_port = htons(cfg->peer_udp_port);
+ err = kernel_connect(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr), 0);
+ if (err < 0)
+ goto out;
+
+ if (!cfg->use_udp_checksums)
+ sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+
+ break;
+
+ case L2TP_ENCAPTYPE_IP:
+ err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, sockp);
+ if (err < 0)
+ goto out;
+
+ sock = *sockp;
+
+ memset(&ip_addr, 0, sizeof(ip_addr));
+ ip_addr.l2tp_family = AF_INET;
+ ip_addr.l2tp_addr = cfg->local_ip;
+ ip_addr.l2tp_conn_id = tunnel_id;
+ err = kernel_bind(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr));
+ if (err < 0)
+ goto out;
+
+ ip_addr.l2tp_family = AF_INET;
+ ip_addr.l2tp_addr = cfg->peer_ip;
+ ip_addr.l2tp_conn_id = peer_tunnel_id;
+ err = kernel_connect(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr), 0);
+ if (err < 0)
+ goto out;
+
+ break;
+
+ default:
+ goto out;
+ }
+
+out:
+ if ((err < 0) && sock) {
+ sock_release(sock);
+ *sockp = NULL;
+ }
+
+ return err;
+}
+
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
@@ -1228,14 +1304,21 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
/* Get the tunnel socket from the fd, which was opened by
- * the userspace L2TP daemon.
+ * the userspace L2TP daemon. If not specified, create a
+ * kernel socket.
*/
- err = -EBADF;
- sock = sockfd_lookup(fd, &err);
- if (!sock) {
- printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
- tunnel_id, fd, err);
- goto err;
+ if (fd < 0) {
+ err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+ if (err < 0)
+ goto err;
+ } else {
+ err = -EBADF;
+ sock = sockfd_lookup(fd, &err);
+ if (!sock) {
+ printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+ tunnel_id, fd, err);
+ goto err;
+ }
}
sk = sock->sk;
@@ -1329,7 +1412,10 @@ err:
if (tunnelp)
*tunnelp = tunnel;
- if (sock)
+ /* If tunnel's socket was created by the kernel, it doesn't
+ * have a file.
+ */
+ if (sock && sock->file)
sockfd_put(sock);
return err;
@@ -1341,13 +1427,22 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
int err = 0;
+ struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
/* Force the tunnel socket to close. This will eventually
* cause the tunnel to be deleted via the normal socket close
* mechanisms when userspace closes the tunnel socket.
*/
- if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
- err = inet_shutdown(tunnel->sock->sk_socket, 2);
+ if (sock != NULL) {
+ err = inet_shutdown(sock, 2);
+
+ /* If the tunnel's socket was created by the kernel,
+ * close the socket here since the socket was not
+ * created by userspace.
+ */
+ if (sock->file == NULL)
+ err = inet_release(sock);
+ }
return err;
}
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5713355..a961c77 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -146,6 +146,13 @@ struct l2tp_tunnel_cfg {
int debug; /* bitmask of debug message
* categories */
enum l2tp_encap_type encap;
+
+ /* Used only for kernel-created sockets */
+ struct in_addr local_ip;
+ struct in_addr peer_ip;
+ u16 local_udp_port;
+ u16 peer_udp_port;
+ int use_udp_checksums:1;
};
struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3d0f7f6..12341a6 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -129,11 +129,21 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
}
cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
- if (!info->attrs[L2TP_ATTR_FD]) {
- ret = -EINVAL;
- goto out;
+ fd = -1;
+ if (info->attrs[L2TP_ATTR_FD]) {
+ fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+ } else {
+ if (info->attrs[L2TP_ATTR_IP_SADDR])
+ cfg.local_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_SADDR]);
+ if (info->attrs[L2TP_ATTR_IP_DADDR])
+ cfg.peer_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_DADDR]);
+ if (info->attrs[L2TP_ATTR_UDP_SPORT])
+ cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
+ if (info->attrs[L2TP_ATTR_UDP_DPORT])
+ cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
+ if (info->attrs[L2TP_ATTR_UDP_CSUM])
+ cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
}
- fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
if (info->attrs[L2TP_ATTR_DEBUG])
cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
^ permalink raw reply related
* [PATCH net-next-2.6 v4 14/14] l2tp: Update documentation
From: James Chapman @ 2010-04-02 16:19 UTC (permalink / raw)
To: netdev
In-Reply-To: <20100402161822.11367.70454.stgit@bert.katalix.com>
This patch adds documentation about the L2TPv3 functionality.
Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
---
Documentation/networking/l2tp.txt | 247 ++++++++++++++++++++++++++++++++-----
1 files changed, 213 insertions(+), 34 deletions(-)
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index 63214b2..e7bf397 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -1,44 +1,95 @@
-This brief document describes how to use the kernel's PPPoL2TP driver
-to provide L2TP functionality. L2TP is a protocol that tunnels one or
-more PPP sessions over a UDP tunnel. It is commonly used for VPNs
+This document describes how to use the kernel's L2TP drivers to
+provide L2TP functionality. L2TP is a protocol that tunnels one or
+more sessions over an IP tunnel. It is commonly used for VPNs
(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
-network infrastructure.
+network infrastructure. With L2TPv3, it is also useful as a Layer-2
+tunneling infrastructure.
+
+Features
+========
+
+L2TPv2 (PPP over L2TP (UDP tunnels)).
+L2TPv3 ethernet pseudowires.
+L2TPv3 PPP pseudowires.
+L2TPv3 IP encapsulation.
+Netlink sockets for L2TPv3 configuration management.
+
+History
+=======
+
+The original pppol2tp driver was introduced in 2.6.23 and provided
+L2TPv2 functionality (rfc2661). L2TPv2 is used to tunnel one or more PPP
+sessions over a UDP tunnel.
+
+L2TPv3 (rfc3931) changes the protocol to allow different frame types
+to be passed over an L2TP tunnel by moving the PPP-specific parts of
+the protocol out of the core L2TP packet headers. Each frame type is
+known as a pseudowire type. Ethernet, PPP, HDLC, Frame Relay and ATM
+pseudowires for L2TP are defined in separate RFC standards. Another
+change for L2TPv3 is that it can be carried directly over IP with no
+UDP header (UDP is optional). It is also possible to create static
+unmanaged L2TPv3 tunnels manually without a control protocol
+(userspace daemon) to manage them.
+
+To support L2TPv3, the original pppol2tp driver was split up to
+separate the L2TP and PPP functionality. Existing L2TPv2 userspace
+apps should be unaffected as the original pppol2tp sockets API is
+retained. L2TPv3, however, uses netlink to manage L2TPv3 tunnels and
+sessions.
Design
======
-The PPPoL2TP driver, drivers/net/pppol2tp.c, provides a mechanism by
-which PPP frames carried through an L2TP session are passed through
-the kernel's PPP subsystem. The standard PPP daemon, pppd, handles all
-PPP interaction with the peer. PPP network interfaces are created for
-each local PPP endpoint.
-
-The L2TP protocol http://www.faqs.org/rfcs/rfc2661.html defines L2TP
-control and data frames. L2TP control frames carry messages between
-L2TP clients/servers and are used to setup / teardown tunnels and
-sessions. An L2TP client or server is implemented in userspace and
-will use a regular UDP socket per tunnel. L2TP data frames carry PPP
-frames, which may be PPP control or PPP data. The kernel's PPP
+The L2TP protocol separates control and data frames. The L2TP kernel
+drivers handle only L2TP data frames; control frames are always
+handled by userspace. L2TP control frames carry messages between L2TP
+clients/servers and are used to setup / teardown tunnels and
+sessions. An L2TP client or server is implemented in userspace.
+
+Each L2TP tunnel is implemented using a UDP or L2TPIP socket; L2TPIP
+provides L2TPv3 IP encapsulation (no UDP) and is implemented using a
+new l2tpip socket family. The tunnel socket is typically created by
+userspace, though for unmanaged L2TPv3 tunnels, the socket can also be
+created by the kernel. Each L2TP session (pseudowire) gets a network
+interface instance. In the case of PPP, these interfaces are created
+indirectly by pppd using a pppol2tp socket. In the case of ethernet,
+the netdevice is created upon a netlink request to create an L2TPv3
+ethernet pseudowire.
+
+For PPP, the PPPoL2TP driver, net/l2tp/l2tp_ppp.c, provides a
+mechanism by which PPP frames carried through an L2TP session are
+passed through the kernel's PPP subsystem. The standard PPP daemon,
+pppd, handles all PPP interaction with the peer. PPP network
+interfaces are created for each local PPP endpoint. The kernel's PPP
subsystem arranges for PPP control frames to be delivered to pppd,
while data frames are forwarded as usual.
+For ethernet, the L2TPETH driver, net/l2tp/l2tp_eth.c, implements a
+netdevice driver, managing virtual ethernet devices, one per
+pseudowire. These interfaces can be managed using standard Linux tools
+such as "ip" and "ifconfig". If only IP frames are passed over the
+tunnel, the interface can be given an IP addresses of itself and its
+peer. If non-IP frames are to be passed over the tunnel, the interface
+can be added to a bridge using brctl. All L2TP datapath protocol
+functions are handled by the L2TP core driver.
+
Each tunnel and session within a tunnel is assigned a unique tunnel_id
and session_id. These ids are carried in the L2TP header of every
-control and data packet. The pppol2tp driver uses them to lookup
-internal tunnel and/or session contexts. Zero tunnel / session ids are
-treated specially - zero ids are never assigned to tunnels or sessions
-in the network. In the driver, the tunnel context keeps a pointer to
-the tunnel UDP socket. The session context keeps a pointer to the
-PPPoL2TP socket, as well as other data that lets the driver interface
-to the kernel PPP subsystem.
-
-Note that the pppol2tp kernel driver handles only L2TP data frames;
-L2TP control frames are simply passed up to userspace in the UDP
-tunnel socket. The kernel handles all datapath aspects of the
-protocol, including data packet resequencing (if enabled).
-
-There are a number of requirements on the userspace L2TP daemon in
-order to use the pppol2tp driver.
+control and data packet. (Actually, in L2TPv3, the tunnel_id isn't
+present in data frames - it is inferred from the IP connection on
+which the packet was received.) The L2TP driver uses the ids to lookup
+internal tunnel and/or session contexts to determine how to handle the
+packet. Zero tunnel / session ids are treated specially - zero ids are
+never assigned to tunnels or sessions in the network. In the driver,
+the tunnel context keeps a reference to the tunnel UDP or L2TPIP
+socket. The session context holds data that lets the driver interface
+to the kernel's network frame type subsystems, i.e. PPP, ethernet.
+
+Userspace Programming
+=====================
+
+For L2TPv2, there are a number of requirements on the userspace L2TP
+daemon in order to use the pppol2tp driver.
1. Use a UDP socket per tunnel.
@@ -86,6 +137,35 @@ In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
to retrieve tunnel and session statistics from the kernel using the
PPPoX socket of the appropriate tunnel or session.
+For L2TPv3, userspace must use the netlink API defined in
+include/linux/l2tp.h to manage tunnel and session contexts. The
+general procedure to create a new L2TP tunnel with one session is:-
+
+1. Open a GENL socket using L2TP_GENL_NAME for configuring the kernel
+ using netlink.
+
+2. Create a UDP or L2TPIP socket for the tunnel.
+
+3. Create a new L2TP tunnel using a L2TP_CMD_TUNNEL_CREATE
+ request. Set attributes according to desired tunnel parameters,
+ referencing the UDP or L2TPIP socket created in the previous step.
+
+4. Create a new L2TP session in the tunnel using a
+ L2TP_CMD_SESSION_CREATE request.
+
+The tunnel and all of its sessions are closed when the tunnel socket
+is closed. The netlink API may also be used to delete sessions and
+tunnels. Configuration and status info may be set or read using netlink.
+
+The L2TP driver also supports static (unmanaged) L2TPv3 tunnels. These
+are where there is no L2TP control message exchange with the peer to
+setup the tunnel; the tunnel is configured manually at each end of the
+tunnel. There is no need for an L2TP userspace application in this
+case -- the tunnel socket is created by the kernel and configured
+using parameters sent in the L2TP_CMD_TUNNEL_CREATE netlink
+request. The "ip" utility of iproute2 has commands for managing static
+L2TPv3 tunnels; do "ip l2tp help" for more information.
+
Debugging
=========
@@ -102,6 +182,69 @@ PPPOL2TP_MSG_CONTROL userspace - kernel interface
PPPOL2TP_MSG_SEQ sequence numbers handling
PPPOL2TP_MSG_DATA data packets
+If enabled, files under a l2tp debugfs directory can be used to dump
+kernel state about L2TP tunnels and sessions. To access it, the
+debugfs filesystem must first be mounted.
+
+# mount -t debugfs debugfs /debug
+
+Files under the l2tp directory can then be accessed.
+
+# cat /debug/l2tp/tunnels
+
+The debugfs files should not be used by applications to obtain L2TP
+state information because the file format is subject to change. It is
+implemented to provide extra debug information to help diagnose
+problems.) Users should use the netlink API.
+
+/proc/net/pppol2tp is also provided for backwards compaibility with
+the original pppol2tp driver. It lists information about L2TPv2
+tunnels and sessions only. Its use is discouraged.
+
+Unmanaged L2TPv3 Tunnels
+========================
+
+Some commercial L2TP products support unmanaged L2TPv3 ethernet
+tunnels, where there is no L2TP control protocol; tunnels are
+configured at each side manually. New commands are available in
+iproute2's ip utility to support this.
+
+To create an L2TPv3 ethernet pseudowire between local host 192.168.1.1
+and peer 192.168.1.2, using IP addresses 10.5.1.1 and 10.5.1.2 for the
+tunnel endpoints:-
+
+# modprobe l2tp_eth
+# modprobe l2tp_netlink
+
+# ip l2tp add tunnel tunnel_id 1 peer_tunnel_id 1 udp_sport 5000 \
+ udp_dport 5000 encap udp local 192.168.1.1 remote 192.168.1.2
+# ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
+# ifconfig -a
+# ip addr add 10.5.1.2/32 peer 10.5.1.1/32 dev l2tpeth0
+# ifconfig l2tpeth0 up
+
+Choose IP addresses to be the address of a local IP interface and that
+of the remote system. The IP addresses of the l2tpeth0 interface can be
+anything suitable.
+
+Repeat the above at the peer, with ports, tunnel/session ids and IP
+addresses reversed. The tunnel and session IDs can be any non-zero
+32-bit number, but the values must be reversed at the peer.
+
+Host 1 Host2
+udp_sport=5000 udp_sport=5001
+udp_dport=5001 udp_dport=5000
+tunnel_id=42 tunnel_id=45
+peer_tunnel_id=45 peer_tunnel_id=42
+session_id=128 session_id=5196755
+peer_session_id=5196755 peer_session_id=128
+
+When done at both ends of the tunnel, it should be possible to send
+data over the network. e.g.
+
+# ping 10.5.1.1
+
+
Sample Userspace Code
=====================
@@ -158,12 +301,48 @@ Sample Userspace Code
}
return 0;
+Internal Implementation
+=======================
+
+The driver keeps a struct l2tp_tunnel context per L2TP tunnel and a
+struct l2tp_session context for each session. The l2tp_tunnel is
+always associated with a UDP or L2TP/IP socket and keeps a list of
+sessions in the tunnel. The l2tp_session context keeps kernel state
+about the session. It has private data which is used for data specific
+to the session type. With L2TPv2, the session always carried PPP
+traffic. With L2TPv3, the session can also carry ethernet frames
+(ethernet pseudowire) or other data types such as ATM, HDLC or Frame
+Relay.
+
+When a tunnel is first opened, the reference count on the socket is
+increased using sock_hold(). This ensures that the kernel socket
+cannot be removed while L2TP's data structures reference it.
+
+Some L2TP sessions also have a socket (PPP pseudowires) while others
+do not (ethernet pseudowires). We can't use the socket reference count
+as the reference count for session contexts. The L2TP implementation
+therefore has its own internal reference counts on the session
+contexts.
+
+To Do
+=====
+
+Add L2TP tunnel switching support. This would route tunneled traffic
+from one L2TP tunnel into another. Specified in
+http://tools.ietf.org/html/draft-ietf-l2tpext-tunnel-switching-08
+
+Add L2TPv3 VLAN pseudowire support.
+
+Add L2TPv3 IP pseudowire support.
+
+Add L2TPv3 ATM pseudowire support.
+
Miscellaneous
-============
+=============
-The PPPoL2TP driver was developed as part of the OpenL2TP project by
+The L2TP drivers were developed as part of the OpenL2TP project by
Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
designed from the ground up to have the L2TP datapath in the
kernel. The project also implemented the pppol2tp plugin for pppd
which allows pppd to use the kernel driver. Details can be found at
-http://openl2tp.sourceforge.net.
+http://www.openl2tp.org.
^ permalink raw reply related
* Re: [PATCH v3 10/12] l2tp: Add L2TP ethernet pseudowire support
From: Paul E. McKenney @ 2010-04-02 16:21 UTC (permalink / raw)
To: Eric Dumazet; +Cc: James Chapman, Stephen Hemminger, netdev
In-Reply-To: <1270223931.11099.7.camel@edumazet-laptop>
On Fri, Apr 02, 2010 at 05:58:51PM +0200, Eric Dumazet wrote:
> Le vendredi 02 avril 2010 à 08:24 -0700, Paul E. McKenney a écrit :
> > On Wed, Mar 31, 2010 at 10:38:36AM -0700, Paul E. McKenney wrote:
>
> > > The list_is_last() is RCU-safe already, since the ->next pointer is
> > > only compared, never dereferenced. I suggest adding a comment to its
> > > header stating that it is RCU-safe.
> > >
> > > Feel free to create a list_for_each_entry_safe_rcu(), and I will be
> > > happy to review it.
> >
> > Right. When using RCU, list_for_each_entry_safe_rcu() is not needed,
> > because destruction of the element you removed doesn't happen until the
> > end of a subsequent grace period. By which time you will be done with
> > your RCU read-side critical section.
> >
> > So you can just use list_for_each_entry_rcu(), you do not need a new
> > list_for_each_entry_safe_rcu().
> >
> > But you did have me going for a bit!!! ;-)
>
> Just wondering Paul if you need to rest :)
In this case, I most certainly needed to have engaged my brain more
fully before hitting "send". :-/
> list_for_each_entry_safe_rcu() is not needed not because of various (and
> very intersting) reasons you gave !!!
>
> But because list_for_each_entry_safe() is OK, since if we are in a
> destroy phase, we are a writer, and can use regular list primitive.
>
> Remember _rcu() versions of list iterators are for readers, and readers
> are NOT supposed to delete an item from the list !
Yes, RCU readers had better have acquired the update-side lock before
modifying the list. And then there would be some debate as to whether
they were still readers. For example, while reading one list under RCU
protection, you might be modifying a subordinate list while holding the
corresponding lock.
And in the networking code, you would also need to convince the usual
suspects, including Eric, that upgrading to writer in the middle of an
RCU read-side critical section was the right thing to be doing in the
first place!
Thanx, Paul
^ permalink raw reply
* Re: [PATCH] rfs: Receive Flow Steering
From: Eric Dumazet @ 2010-04-02 16:28 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev
In-Reply-To: <1270211871.11099.2.camel@edumazet-laptop>
Some more thoughts ...
Do we really want to call inet_rps_record_flow(sk) from inet_sendmsg() &
inet_sendpage() ?
This seems not necessary to me...
This bit :
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f4df5f9..3060c17 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1674,6 +1674,10 @@ process:
skb->dev = NULL;
+#ifdef CONFIG_RPS
+ inet_sk(sk)->rxhash = skb->rxhash;
+#endif
+
bh_lock_sock_nested(sk);
ret = 0;
if (!sock_owned_by_user(sk)) {
1) It's not pretty, could you define a helper ?
2) Why should we set inet_sk(sk)->rxhash at each packet ?
rxhash is a constant given the (src,dst,sport,dport) tuple and could be
set at connection time. This way we could set inet_sk(sk)->rxhash both
for TCP and UDP.
(Currently, you dont set inet_sk(sk)->rxhash on UDP)
^ permalink raw reply related
* Re: [PATCH] rfs: Receive Flow Steering
From: Rick Jones @ 2010-04-02 17:01 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Changli Gao, Tom Herbert, davem, netdev
In-Reply-To: <1270193393.1936.52.camel@edumazet-laptop>
Eric Dumazet wrote:
>
> Your claim of RPS being not good for applications is wrong, our test
> results show an improvement as is. Maybe your applications dont scale,
> because of bad habits, or collidings heuristics, I dont know.
The progression in HP-UX was IPS (10.20) (aka RPS) then TOPS (11.0) (aka RFS).
We found that IPS was great for single-flow-per-thread-of-execution stuff and
that TOPS was better for multiple-flow-per-thread-of-execution stuff. It was
long enough ago now that I can safely say for one system-level benchmark not
known to be a "networking" benchmark, and without a massive kernel component,
TOPS was a 10% win. Not too shabby.
It wasn't that IPS wasn't good in its context - just that TOPS was even better.
We also preferred the concept of the scheduler giving networking clues as to
where to process an application's packets rather than networking trying to tell
the scheduler. There was some discussion of out of order worries, but we were
willing to trust to the basic soundness of the scheduler - if it was moving
threads around willy nilly at a rate able to cause big packet reordering it had
fundamental problems that would have to be addressed anyway. And while it may
be incindiary to point this out :) I suspect (without concrete data :) that
bonding mode 0 is a much, Much, MUCH larger source of out-of-order traffic than
any plausible scheduler thrashing.
happy benchmarking,
rick jones
^ permalink raw reply
* Re: [PATCH 0/2] Fix ethernet driver for Octeon based Movidis hardware
From: Greg KH @ 2010-04-02 17:04 UTC (permalink / raw)
To: David Miller; +Cc: ddaney, ralf, linux-mips, netdev
In-Reply-To: <20100401.182045.106908204.davem@davemloft.net>
On Thu, Apr 01, 2010 at 06:20:45PM -0700, David Miller wrote:
> From: David Daney <ddaney@caviumnetworks.com>
> Date: Thu, 1 Apr 2010 18:17:53 -0700
>
> > The Movidis X16 bootloader doesn't enable the mdio bus. The first
> > patch fixes this by enabling the mdio bus when the driver is
> > initialized.
> >
> > Also the addresses of the PHYs was unknown for this device. The
> > second patch adds the corresponding PHY addresses.
> >
> > With both patches applied I can successfully use all eight Ethernet
> > ports.
> >
> > Please consider for 2.6.34. Since Octeon is an embedded MIPS SOC it
> > is unlikely to break the kernel for any workstations. Any or all of
> > these could be considered for merging via Ralf's linux-mips.org tree.
>
> Ralf please merge this via your MIPS tree, thanks:
>
> Acked-by: David S. Miller <davem@davemloft.net>
I agree, Ralf, please take these and you can add:
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
to them.
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCHv3] drivers/net/usb: Add new driver ipheth
From: "L. Alberto Giménez" @ 2010-04-02 17:15 UTC (permalink / raw)
To: Ben Hutchings
Cc: linux-kernel, netdev, linux-usb, oliver, linville, j.dumon,
steve.glendinning, davem, gregkh, dgiagio, dborca
In-Reply-To: <1270077538.8653.484.camel@localhost>
On 04/01/2010 01:18 AM, Ben Hutchings wrote:
> On Wed, 2010-03-31 at 21:42 +0200, L. Alberto Giménez wrote:
> [...]
>> --- /dev/null
>> +++ b/drivers/net/usb/ipheth.c
> [...]
Hi Ben,
Upstream has fixed several errors pointed out by you and Oliver (thanks
for that), but some of them are still pending.
I will send patches on top of my last driver submission (if the proper
way would be resubmit the whole code, please tell me. Anyway I need to
clarify some doubts...
>> + usb_fill_bulk_urb(dev->tx_urb, udev,
>> + usb_sndbulkpipe(udev, dev->bulk_out),
>> + dev->tx_buf, IPHETH_BUF_SIZE,
>> + ipheth_sndbulk_callback,
>> + dev);
>> + dev->tx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
>> +
>> + retval = usb_submit_urb(dev->tx_urb, GFP_ATOMIC);
>> + if (retval) {
>> + err("%s: usb_submit_urb: %d", __func__, retval);
>> + dev->stats.tx_errors++;
>> + dev_kfree_skb_irq(skb);
>> + } else {
>> + net->trans_start = jiffies;
>
> No longer needed.
What is not longer needed? The assignment, the whole "else" branch? If
the assignment is what is not needed, can I just remove that line, right?
> [...]
>> +#ifdef HAVE_NET_DEVICE_OPS
>> +static const struct net_device_ops ipheth_netdev_ops = {
>> + .ndo_open = &ipheth_open,
>> + .ndo_stop = &ipheth_close,
>> + .ndo_start_xmit = &ipheth_tx,
>> + .ndo_tx_timeout = &ipheth_tx_timeout,
>> + .ndo_get_stats = &ipheth_stats,
>> +};
>> +#endif
>
> Remove the #ifdef, there is no question whether we have net_device_ops.
Ok, I will just remove both #ifdefs, but why is that? Maybe in previous
versions of the kernel the net_device_ops struct was introduced and now
it's present no matter how you configure your kernel?
> I have no idea about USB so I haven't checked the USB API usage at all.
I think that Greg is the maintainer for the USB subsystem, so if he has
no further commets, I will try to submit fixes for both your and
Oliver's comments along with the upstream developers.
Thanks for your comments.
Best regards,
--
L. Alberto Giménez
JabberID agimenez@jabber.sysvalve.es
GnuPG key ID 0x3BAABDE1
^ permalink raw reply
* Re: [PATCHv3] drivers/net/usb: Add new driver ipheth
From: Ben Hutchings @ 2010-04-02 17:21 UTC (permalink / raw)
To: "L. Alberto Giménez"
Cc: linux-kernel, netdev, linux-usb, oliver, linville, j.dumon,
steve.glendinning, davem, gregkh, dgiagio, dborca
In-Reply-To: <4BB62620.3070807@sysvalve.es>
On Fri, 2010-04-02 at 19:15 +0200, "L. Alberto Giménez" wrote:
> On 04/01/2010 01:18 AM, Ben Hutchings wrote:
> > On Wed, 2010-03-31 at 21:42 +0200, L. Alberto Giménez wrote:
> > [...]
> >> --- /dev/null
> >> +++ b/drivers/net/usb/ipheth.c
> > [...]
>
> Hi Ben,
>
> Upstream has fixed several errors pointed out by you and Oliver (thanks
> for that), but some of them are still pending.
>
> I will send patches on top of my last driver submission (if the proper
> way would be resubmit the whole code, please tell me. Anyway I need to
> clarify some doubts...
Since David Miller has not merged your original patch, you should send a
single new patch with the changes incorporated.
> >> + usb_fill_bulk_urb(dev->tx_urb, udev,
> >> + usb_sndbulkpipe(udev, dev->bulk_out),
> >> + dev->tx_buf, IPHETH_BUF_SIZE,
> >> + ipheth_sndbulk_callback,
> >> + dev);
> >> + dev->tx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
> >> +
> >> + retval = usb_submit_urb(dev->tx_urb, GFP_ATOMIC);
> >> + if (retval) {
> >> + err("%s: usb_submit_urb: %d", __func__, retval);
> >> + dev->stats.tx_errors++;
> >> + dev_kfree_skb_irq(skb);
> >> + } else {
> >> + net->trans_start = jiffies;
> >
> > No longer needed.
>
> What is not longer needed? The assignment, the whole "else" branch? If
> the assignment is what is not needed, can I just remove that line, right?
The assignment is not needed.
> > [...]
> >> +#ifdef HAVE_NET_DEVICE_OPS
> >> +static const struct net_device_ops ipheth_netdev_ops = {
> >> + .ndo_open = &ipheth_open,
> >> + .ndo_stop = &ipheth_close,
> >> + .ndo_start_xmit = &ipheth_tx,
> >> + .ndo_tx_timeout = &ipheth_tx_timeout,
> >> + .ndo_get_stats = &ipheth_stats,
> >> +};
> >> +#endif
> >
> > Remove the #ifdef, there is no question whether we have net_device_ops.
>
> Ok, I will just remove both #ifdefs, but why is that? Maybe in previous
> versions of the kernel the net_device_ops struct was introduced and now
> it's present no matter how you configure your kernel?
[...]
Correct, it is now mandatory.
Ben.
--
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
^ permalink raw reply
* Re: [PATCH] ethtool: RXHASH flag support (v2)
From: Jeff Garzik @ 2010-04-02 17:26 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <20100402081515.1b963562@nehalam>
Both patches look great, thanks. Will apply after Easter holiday...
Jeff
^ permalink raw reply
* Receive steering and hash and cache misses
From: Stephen Hemminger @ 2010-04-02 17:26 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev
Although Receive Packet Steering can use a hardware generated receive hash
the device driver still causes an unnecessary cache miss on the interrupt
processing CPU. The current Ethernet network device driver receive processing
has the device driver calling eth_type_trans() which causes a the
interrupt CPU to read the received frame header.
Is there some way the hardware receive hash value could be used to
steer to the receive CPU, then have the receive CPU find the Ethernet
type field (eth_type_trans)?
^ permalink raw reply
* [PATCH] vhost: Make it more scalable by creating a vhost thread per device.
From: Sridhar Samudrala @ 2010-04-02 17:31 UTC (permalink / raw)
To: Michael S. Tsirkin, Tom Lendacky; +Cc: netdev, kvm@vger.kernel.org
Make vhost scalable by creating a separate vhost thread per vhost
device. This provides better scaling across multiple guests and with
multiple interfaces in a guest.
I am seeing better aggregated througput/latency when running netperf
across multiple guests or multiple interfaces in a guest in parallel
with this patch.
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index a6a88df..29aa80f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -339,8 +339,10 @@ static int vhost_net_open(struct inode *inode, struct file *f)
return r;
}
- vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
- vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT,
+ &n->dev);
+ vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN,
+ &n->dev);
n->tx_poll_state = VHOST_NET_POLL_DISABLED;
f->private_data = n;
@@ -643,25 +645,14 @@ static struct miscdevice vhost_net_misc = {
int vhost_net_init(void)
{
- int r = vhost_init();
- if (r)
- goto err_init;
- r = misc_register(&vhost_net_misc);
- if (r)
- goto err_reg;
- return 0;
-err_reg:
- vhost_cleanup();
-err_init:
- return r;
-
+ return misc_register(&vhost_net_misc);
}
+
module_init(vhost_net_init);
void vhost_net_exit(void)
{
misc_deregister(&vhost_net_misc);
- vhost_cleanup();
}
module_exit(vhost_net_exit);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 7bd7a1e..243f4d3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -36,8 +36,6 @@ enum {
VHOST_MEMORY_F_LOG = 0x1,
};
-static struct workqueue_struct *vhost_workqueue;
-
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
@@ -56,18 +54,19 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
if (!((unsigned long)key & poll->mask))
return 0;
- queue_work(vhost_workqueue, &poll->work);
+ queue_work(poll->dev->wq, &poll->work);
return 0;
}
/* Init poll structure */
void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
- unsigned long mask)
+ unsigned long mask, struct vhost_dev *dev)
{
INIT_WORK(&poll->work, func);
init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
init_poll_funcptr(&poll->table, vhost_poll_func);
poll->mask = mask;
+ poll->dev = dev;
}
/* Start polling a file. We add ourselves to file's wait queue. The caller must
@@ -96,7 +95,7 @@ void vhost_poll_flush(struct vhost_poll *poll)
void vhost_poll_queue(struct vhost_poll *poll)
{
- queue_work(vhost_workqueue, &poll->work);
+ queue_work(poll->dev->wq, &poll->work);
}
static void vhost_vq_reset(struct vhost_dev *dev,
@@ -128,6 +127,11 @@ long vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue *vqs, int nvqs)
{
int i;
+
+ dev->wq = create_singlethread_workqueue("vhost");
+ if (!dev->wq)
+ return -ENOMEM;
+
dev->vqs = vqs;
dev->nvqs = nvqs;
mutex_init(&dev->mutex);
@@ -143,7 +147,7 @@ long vhost_dev_init(struct vhost_dev *dev,
if (dev->vqs[i].handle_kick)
vhost_poll_init(&dev->vqs[i].poll,
dev->vqs[i].handle_kick,
- POLLIN);
+ POLLIN, dev);
}
return 0;
}
@@ -216,6 +220,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
+
+ destroy_workqueue(dev->wq);
}
static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
@@ -1095,16 +1101,3 @@ void vhost_disable_notify(struct vhost_virtqueue *vq)
vq_err(vq, "Failed to enable notification at %p: %d\n",
&vq->used->flags, r);
}
-
-int vhost_init(void)
-{
- vhost_workqueue = create_singlethread_workqueue("vhost");
- if (!vhost_workqueue)
- return -ENOMEM;
- return 0;
-}
-
-void vhost_cleanup(void)
-{
- destroy_workqueue(vhost_workqueue);
-}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 44591ba..60fefd0 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -29,10 +29,11 @@ struct vhost_poll {
/* struct which will handle all actual work. */
struct work_struct work;
unsigned long mask;
+ struct vhost_dev *dev;
};
void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
- unsigned long mask);
+ unsigned long mask, struct vhost_dev *dev);
void vhost_poll_start(struct vhost_poll *poll, struct file *file);
void vhost_poll_stop(struct vhost_poll *poll);
void vhost_poll_flush(struct vhost_poll *poll);
@@ -110,6 +111,7 @@ struct vhost_dev {
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
+ struct workqueue_struct *wq;
};
long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
@@ -136,9 +138,6 @@ bool vhost_enable_notify(struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len);
-int vhost_init(void);
-void vhost_cleanup(void);
-
#define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
if ((vq)->error_ctx) \
^ permalink raw reply related
* Re: [PATCH 1/2] phylib: Support phy module autoloading
From: Andy Fleming @ 2010-04-02 17:51 UTC (permalink / raw)
To: David Woodhouse; +Cc: davem, netdev, ben
In-Reply-To: <1270206327.3101.2436.camel@macbook.infradead.org>
On Fri, Apr 2, 2010 at 6:05 AM, David Woodhouse <dwmw2@infradead.org> wrote:
> We don't use the normal hotplug mechanism because it doesn't work. It will
> load the module some time after the device appears, but that's not good
> enough for us -- we need the driver loaded _immediately_ because otherwise
> the NIC driver may just abort and then the phy 'device' goes away.
>
> [bwh: s/phy/mdio/ in module alias, kerneldoc for struct mdio_device_id]
>
> Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Neat!
Acked-by: Andy Fleming <afleming@freescale.com>
^ permalink raw reply
* Re: [PATCHv3] drivers/net/usb: Add new driver ipheth
From: "L. Alberto Giménez" @ 2010-04-02 17:53 UTC (permalink / raw)
To: Ben Hutchings
Cc: "L. Alberto Giménez", linux-kernel, netdev,
linux-usb, oliver, linville, j.dumon, steve.glendinning, davem,
gregkh, dgiagio, Daniel Borca
In-Reply-To: <1270228883.12516.140.camel@localhost>
On 04/02/2010 07:21 PM, Ben Hutchings wrote:
> On Fri, 2010-04-02 at 19:15 +0200, "L. Alberto Giménez" wrote:
>> On 04/01/2010 01:18 AM, Ben Hutchings wrote:
>>> On Wed, 2010-03-31 at 21:42 +0200, L. Alberto Giménez wrote:
>>> [...]
>>>> --- /dev/null
>>>> +++ b/drivers/net/usb/ipheth.c
>>> [...]
>>>> + usb_fill_bulk_urb(dev->tx_urb, udev,
>>>> + usb_sndbulkpipe(udev, dev->bulk_out),
>>>> + dev->tx_buf, IPHETH_BUF_SIZE,
>>>> + ipheth_sndbulk_callback,
>>>> + dev);
>>>> + dev->tx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
>>>> +
>>>> + retval = usb_submit_urb(dev->tx_urb, GFP_ATOMIC);
>>>> + if (retval) {
>>>> + err("%s: usb_submit_urb: %d", __func__, retval);
>>>> + dev->stats.tx_errors++;
>>>> + dev_kfree_skb_irq(skb);
>>>> + } else {
>>>> + net->trans_start = jiffies;
>>> No longer needed.
>> What is not longer needed? The assignment, the whole "else" branch? If
>> the assignment is what is not needed, can I just remove that line, right?
>
> The assignment is not needed.
Hi,
I've been looking into this and it seems that the net_device.trans_start
field is now deprecated in favor of net_device.rx_queue.trans_start
-rx_queue is a struct netdev_queue- (file include/linux/netdevice.h), as
states the comment:
512 /*
513 * please use this field instead of dev->trans_start
514 */
515 unsigned long trans_start;
Reading LDD3 book, it says that the driver is reponsible for updating
trans_start (as well as trans_rx, but we're not talking about that one).
So, I guess that the LDD book is outdated (again ;) ), but what I don't
understand at all is wether the driver should keep updating the right
field (dev->rx_queue.trans_start) or if the fact that it's inside a
"queue" implies that the code that is responsible for that queue would
update the trans_start field by itself?
Thanks in advance.
Best regards,
--
L. Alberto Giménez
JabberID agimenez@jabber.sysvalve.es
GnuPG key ID 0x3BAABDE1
^ permalink raw reply
* Re: Receive steering and hash and cache misses
From: Tom Herbert @ 2010-04-02 17:59 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: Eric Dumazet, netdev
In-Reply-To: <20100402102650.5bdb5b52@nehalam>
On Fri, Apr 2, 2010 at 10:26 AM, Stephen Hemminger
<shemminger@vyatta.com> wrote:
>
> Although Receive Packet Steering can use a hardware generated receive hash
> the device driver still causes an unnecessary cache miss on the interrupt
> processing CPU. The current Ethernet network device driver receive processing
> has the device driver calling eth_type_trans() which causes a the
> interrupt CPU to read the received frame header.
>
It should be possible to deduce the values set by eth_type_trans from
the RX descriptor along with the RX hash. I'll post the patch getting
rxhash from bnx2x which does this.
> Is there some way the hardware receive hash value could be used to
> steer to the receive CPU, then have the receive CPU find the Ethernet
> type field (eth_type_trans)?
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: Any ideas about a crash on reboot with igb and intel_iommu?
From: Roland Dreier @ 2010-04-02 18:13 UTC (permalink / raw)
To: netdev; +Cc: iommu, David Woodhouse
In-Reply-To: <adabpe3f3c4.fsf@roland-alpha.cisco.com>
So actually I found the following change that went into 2.6.31:
commit 91615f765a2935b6cbae424b9eee1585ed681ae6
Author: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Tue Jun 30 12:45:15 2009 +0000
igb: fix unmap length bug
driver was mixing NET_IP_ALIGN count bytes in map/unmap calls
unevenly. Only map the bytes that the hardware might dma into
igb in 2.6.30.y is doing pci_map_single(<some length>) and doing
pci_unmap_single(<some other length>).
However I haven't been able to provoke the crash yet, even by bouncing
an igb interface (with VT-d turned on and with CONFIG_DMAR_DEFAULT_ON enabled).
Does this make sense as the sort of thing that might corrupt the iova
rbtree and lead to a crash in the __free_iova / rb_erase code?
Thanks,
Roland
--
Roland Dreier <rolandd@cisco.com> || For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/index.html
^ permalink raw reply
* Re: [PATCH 5/5] netfilter: xt_TEE: have cloned packet travel through Xtables too
From: Jan Engelhardt @ 2010-04-02 18:15 UTC (permalink / raw)
To: Patrick McHardy; +Cc: netfilter-devel, netdev
In-Reply-To: <4BB4A796.8010708@trash.net>
On Thursday 2010-04-01 16:03, Patrick McHardy wrote:
>>>>[detecting teed packets getting teed again by means of
>>>> iptables -A OUTPUT -j TEE]
>>>
>>> What's wrong with adding a reentrancy counter?
>>
>> Sounds like a plan.
Should we be using a percpu variable, or is a simplistic
array ok too?
static bool tee_active;
target(...)
{
if (tee_active[smp_processor_id()])
return XT_CONTINUE;
...
if (tee_tg4_route(...)) {
tee_active[cpu] = true;
ip_local_out(skb);
tee_active[cpu] = false;
}
}
^ permalink raw reply
* Re: [PATCHv3] drivers/net/usb: Add new driver ipheth
From: "L. Alberto Giménez" @ 2010-04-02 18:23 UTC (permalink / raw)
To: Oliver Neukum
Cc: linux-kernel, netdev, linux-usb, linville, j.dumon,
steve.glendinning, davem, gregkh, dgiagio, dborca
In-Reply-To: <201003312233.26130.oliver@neukum.org>
On 03/31/2010 10:33 PM, Oliver Neukum wrote:
> Am Mittwoch, 31. März 2010 21:42:07 schrieb L. Alberto Giménez:
Hi Oliver,
Just like with Ben's comments I still have a couple of doubts about your
comments.
>> +
>> +static int ipheth_open(struct net_device *net)
>> +{
>> + struct ipheth_device *dev = netdev_priv(net);
>> + struct usb_device *udev = dev->udev;
>> + int retval = 0;
>> +
>> + usb_set_interface(udev, IPHETH_INTFNUM, IPHETH_ALT_INTFNUM);
>> + usb_clear_halt(udev, usb_rcvbulkpipe(udev, dev->bulk_in));
>> + usb_clear_halt(udev, usb_sndbulkpipe(udev, dev->bulk_out));
>
> Is this really needed? If so, please add a comment.
I understand that usb_clear_halt is only needed when the device has
transmitted data, and as it is "open" time, we can assume that no
transmissions ere made, so we don't need to clear anything (aka: remove
both lines), am I right?
>> +
>> + retval = ipheth_carrier_set(dev);
>> + if (retval)
>> + goto error;
>> +
>> + retval = ipheth_rx_submit(dev, GFP_KERNEL);
>> + if (retval)
>> + goto error;
>> +
>> + schedule_delayed_work(&dev->carrier_work, IPHETH_CARRIER_CHECK_TIMEOUT);
>
> Does it make sense to start rx while you have no carrier?
Well, I have no clue about this one. I think that upstream developers
should take a look into this (Dario, Daniel, could you?) since I don't
have the knowledge to decide what to do about it.
But I assume that as with the previous one, we have just opened the
device and we aren't (yet) doing anything with it, so we shouldn't start rx?
>> +static void ipheth_disconnect(struct usb_interface *intf)
>> +{
>> + struct ipheth_device *dev;
>> +
>> + dev = usb_get_intfdata(intf);
>> + if (dev != NULL) {
>
> is this check needed?
Does usb_get_infdata always return not NULL? I haven't found anything
about it (just manual pages for the function, but can't spot if it
cannot return NULL). We disconnected the device, but I understand that
the kernel still has the information and the allocated memory, so the
cleanup code is still needed, isn't it?
>
>> +static struct usb_driver ipheth_driver = {
>> + .name = "ipheth",
>> + .probe = ipheth_probe,
>> + .disconnect = ipheth_disconnect,
>> + .id_table = ipheth_table,
>> + .supports_autosuspend = 0,
>
> redundant
Why?
Thanks a lot in advance.
Best regards,
--
L. Alberto Giménez
JabberID agimenez@jabber.sysvalve.es
GnuPG key ID 0x3BAABDE1
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox