All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch] vnet-module   ( for  3.1  && unstable )
@ 2007-07-15 16:26 Robert Valentan
  2007-07-16  2:15 ` Zhu Han
  0 siblings, 1 reply; 3+ messages in thread
From: Robert Valentan @ 2007-07-15 16:26 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1057 bytes --]

Hallo!

The attached patch makes the vnet-module working for i386
and x86_64. (I had no other systems for testing)
(My patch from 6.6.2007 is included)

 - x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL
   defined, but has no socketcall. Replacing the _syscall's
   to a working one.

 - moves the "skb_pull" function from kernel to skb_util.c
   because pulling in the data will rise a "BUG_ON" in the
   kernel. The kernel-function does not allow to pull into
   the payload!

 - the skb_buff is not always possible to modify a received
   buffer to send it again to the network.
   (vnet_forward.c and etherip.c) The code now work's..
   I think with no or less time-penalty.


Summary:
Solved:
  With this patch the vnet-module is working on i386 and x86_64,
  including peering-option.   (except the following "open")


Open:
 - Vnet's only with security-option "none" are working.
 - The module-option "vnet_encaps" is ignored, always "udp" is used.



Signed-off-by: Robert Valentan <R.Valentan@solid-soft.at>


-- 
regards
Robert Valentan

[-- Attachment #2: patch_2.txt --]
[-- Type: text/plain, Size: 16522 bytes --]

diff -r aee991c7723a tools/vnet/vnet-module/esp.c
--- a/tools/vnet/vnet-module/esp.c	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/esp.c	Sun Jul 15 17:03:47 2007
@@ -341,12 +341,13 @@
         dprintf("> ETH header pull...\n");
         memmove(skb->data, skb->mac.raw, ETH_HLEN);
         skb->mac.raw = skb->data; 
-        __skb_pull(skb, ETH_HLEN);
+        skb_pull_vn(skb, ETH_HLEN);
     }
     dprintf("> IP header pull...\n");
     memmove(skb->data, skb->nh.raw, ip_n);
     skb->nh.raw = skb->data;
-    __skb_pull(skb, ip_n);
+    skb_pull_vn(skb, ip_n);
+    skb->h.raw = skb->data;
     esph = (void*)skb->data;
     // Add spi and sequence number.
     esph->spi = sa->ident.spi;
@@ -457,7 +458,7 @@
     // Move skb->data back to ethernet header.
     // Do in 2 moves to ensure offsets are +ve,
     // since args to skb_pull/skb_push are unsigned.
-    __skb_pull(skb, head_n);
+    skb_pull_vn(skb, head_n);
     __skb_push(skb, skb->data - skb->mac.raw);
     // After this esph is invalid.
     esph = NULL;
@@ -763,7 +764,7 @@
     dprintf(">\n");
 #ifdef DEBUG
     dprintf("> recv skb=\n"); 
-    skb_print_bits(skb, 0, skb->len);
+    skb_print_bits("", skb, 0, skb->len);
 #endif
     ip_n = (skb->nh.iph->ihl << 2);
     if(skb->data == skb->mac.raw){
@@ -773,7 +774,7 @@
             err = -EINVAL;
             goto exit;
         }
-        skb_pull(skb, eth_n + ip_n);
+        skb_pull_vn(skb, eth_n + ip_n);
     }
     addr = skb->nh.iph->daddr;
     err = esp_skb_header(skb, &esph);
diff -r aee991c7723a tools/vnet/vnet-module/etherip.c
--- a/tools/vnet/vnet-module/etherip.c	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/etherip.c	Sun Jul 15 17:03:47 2007
@@ -270,6 +270,7 @@
     u32 saddr, daddr;
     char vnetbuf[VNET_ID_BUF];
     struct ethhdr *eth;
+    struct sk_buff *newskb;
 
     dprintf(">\n");
     saddr = skb->nh.iph->saddr;
@@ -293,7 +294,7 @@
             err = -EINVAL;
             goto exit;
         }
-        skb_pull(skb, pull_n);
+        skb_pull_vn(skb, pull_n);
     }
     // Assume skb->data points at etherip header.
     etheriph = (void*)skb->data;
@@ -318,7 +319,18 @@
         goto exit;
     }
     // Point at the headers in the contained ethernet frame.
-    skb->mac.raw = skb_pull(skb, etherip_n);
+    skb->mac.raw = skb_pull_vn(skb, etherip_n);
+
+    newskb = alloc_skb(skb->len, GFP_ATOMIC);
+    if (!newskb) {
+        wprintf("> alloc new sk_buff failed \n");
+        goto exit;
+    }
+    newskb->mac.raw = skb_put(newskb, skb->len);
+    skb_copy_bits(skb, 0, newskb->data, skb->len);
+    kfree_skb(skb);
+    skb = newskb;
+
     eth = eth_hdr(skb);
 
     // Simulate the logic from eth_type_trans()
@@ -340,27 +352,12 @@
     
     // Assuming a standard Ethernet frame.
     // Should check for protocol? Support ETH_P_8021Q too.
-    skb->nh.raw = skb_pull(skb, ETH_HLEN);
-
-#ifdef __KERNEL__
-    // Fix IP options, checksum, skb dst, netfilter state.
-    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
-    if (skb->ip_summed == CHECKSUM_HW){
-        skb->ip_summed = CHECKSUM_NONE;
-    }
-    dst_release(skb->dst);
-    skb->dst = NULL;
-    nf_reset(skb);
-#ifdef CONFIG_BRIDGE_NETFILTER
-    if(skb->nf_bridge){
-        // Stop the eth header being clobbered by nf_bridge_maybe_copy_header().
-        _nf_bridge_save_header(skb);
-    }
-#endif
-#endif // __KERNEL__
-
-    dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
+    skb->nh.raw = skb_pull_vn(skb, ETH_HLEN);
+    skb->h.raw = newskb->nh.raw + sizeof(struct iphdr);
+
+    dprintf("> Unpacked srcaddr=" IPFMT " dstaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
             NIPQUAD(skb->nh.iph->saddr),
+            NIPQUAD(skb->nh.iph->daddr),
             VnetId_ntoa(&vnet, vnetbuf),
             MAC6TUPLE(eth->h_source),
             MAC6TUPLE(eth->h_dest));
diff -r aee991c7723a tools/vnet/vnet-module/skb_util.h
--- a/tools/vnet/vnet-module/skb_util.h	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/skb_util.h	Sun Jul 15 17:03:47 2007
@@ -66,6 +66,21 @@
 }
 
 #endif
+
+/*
+ * It's a copy from {kernel}/include/linux/skbuff.h func '__skb_pull' and 'skb_pull'
+ * to aviodthe BUG_ON when pulling into the data (getting forwarded ip-frames)
+ */
+static inline unsigned char *__skb_pull_vn(struct sk_buff *skb, unsigned int len)
+{
+        skb->len -= len;
+        //BUG_ON(skb->len < skb->data_len);
+        return skb->data += len;
+}
+static inline unsigned char *skb_pull_vn(struct sk_buff *skb, unsigned int len)
+{
+        return unlikely(len > skb->len) ? NULL : __skb_pull_vn(skb, len);
+}
 
 
 #ifdef __KERNEL__
diff -r aee991c7723a tools/vnet/vnet-module/varp.c
--- a/tools/vnet/vnet-module/varp.c	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp.c	Sun Jul 15 17:03:47 2007
@@ -1365,7 +1365,7 @@
             goto exit;
         }
     }
-    varph = (void*)skb_pull(skb, sizeof(struct udphdr));
+    varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr));
     if(skb->len < sizeof(struct VnetMsgHdr)){
         wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct VnetMsgHdr));
         goto exit;
@@ -1378,11 +1378,11 @@
         }
         break;
     case VUDP_ID: // Etherip-in-udp packet.
-        skb_pull(skb, sizeof(struct VnetMsgHdr));
+        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
         err = etherip_protocol_recv(skb);
         goto exit;
     case VFWD_ID: // Forwarded.
-        skb_pull(skb, sizeof(struct VnetMsgHdr));
+        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
         err = vnet_forward_recv(skb);
         goto exit;
     default:
diff -r aee991c7723a tools/vnet/vnet-module/varp_socket.c
--- a/tools/vnet/vnet-module/varp_socket.c	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/varp_socket.c	Sun Jul 15 17:03:47 2007
@@ -79,8 +79,8 @@
  * Architectures using socketcall() define __ARCH_WANT_SYS_SOCKETCALL.
  */
 
-#ifdef __ARCH_WANT_SYS_SOCKETCALL
-
+#if defined(__ARCH_WANT_SYS_SOCKETCALLxx) && defined(__NR_socketcall)
+ 
 /* Define the socketcall() syscall.
  * Multiplexes all the socket-related calls.
  *
@@ -190,59 +190,260 @@
 
 /* No socketcall - define the individual syscalls. */
 
-static inline _syscall3(int, socket,
-                        int, family,
-                        int, type,
-                        int, protocol);
-
-static inline _syscall3(int, bind,
-                        int, fd,
-                        struct sockaddr *, umyaddr,
-                        int, addrlen);
-
-static inline _syscall3(int, connect,
-                        int, fd,
-                        struct sockaddr *, uservaddr,
-                        int, addrlen);
-
-static inline _syscall6(int, sendto,
-                        int, fd,
-                        void *, buff,
-                        size_t, len,
-                        unsigned, flags,
-                        struct sockaddr *, addr,
-                        int, addr_len);
-
-static inline _syscall6(int, recvfrom,
-                        int, fd,
-                        void *, ubuf,
-                        size_t, size,
-                        unsigned, flags,
-                        struct sockaddr *, addr,
-                        int *, addr_len);
-
-static inline _syscall5(int, setsockopt,
-                        int, fd,
-                        int, level,
-                        int, optname,
-                        void *, optval,
-                        int, optlen);
-
-static inline _syscall5(int, getsockopt,
-                        int, fd,
-                        int, level,
-                        int, optname,
-                        void *, optval,
-                        int *, optlen);
-
-static inline _syscall2(int, shutdown,
-                        int, fd,
-                        int, how);
-
-static inline _syscall3(int, getsockname,
-                        int, fd,
-                        struct sockaddr *, usockaddr,
-                        int *, usockaddr_len);
+/* the following code is copied from linux-kernel/net/socket.c
+ * As replacement of the __NR_socketcall, which exists not in x86_64 and
+ * same other systems.
+ * An alternate will be an export of the copied-functions in net/socket.c
+ */ 
+#define MAX_SOCK_ADDR 128
+
+int socket(int family, int type, int protocol){
+
+       int retval;
+       struct socket *sock;
+       retval = sock_create(family, type, protocol, &sock);
+       if (retval < 0)
+               goto out;
+
+       retval = sock_map_fd(sock);
+       if (retval < 0)
+               goto out_release;
+
+out:
+       /* It may be already another descriptor 8) Not kernel problem. */
+       return retval;
+
+out_release:
+       sock_release(sock);
+       return retval;
+
+}
+
+
+int bind(int fd, struct sockaddr *umyaddr, int addrlen){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+
+       if((sock = sockfd_lookup(fd,&err))!=NULL)
+       {
+               if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
+                       err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
+                       if (err) {
+                               sockfd_put(sock);
+                               return err;
+                       }
+                       err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
+               }
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+
+int connect(int fd, struct sockaddr *uservaddr, int addrlen){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+       err = move_addr_to_kernel(uservaddr, addrlen, address);
+       if (err < 0)
+               goto out_put;
+
+       err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
+       if (err)
+               goto out_put;
+
+       err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
+                                sock->file->f_flags);
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+int sendto(int fd, void * buff, size_t len,
+           unsigned flags, struct sockaddr *addr,
+           int addr_len){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int err;
+       struct msghdr msg;
+       struct iovec iov;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+       iov.iov_base=buff;
+       iov.iov_len=len;
+       msg.msg_name=NULL;
+       msg.msg_iov=&iov;
+       msg.msg_iovlen=1;
+       msg.msg_control=NULL;
+       msg.msg_controllen=0;
+       msg.msg_namelen=0;
+       if(addr)
+       {
+               err = move_addr_to_kernel(addr, addr_len, address);
+               if (err < 0)
+                       goto out_put;
+               msg.msg_name=address;
+               msg.msg_namelen=addr_len;
+       }
+       if (sock->file->f_flags & O_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       msg.msg_flags = flags;
+       err = sock_sendmsg(sock, &msg, len);
+
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+
+int recvfrom(int fd, void * ubuf, size_t size,
+             unsigned flags, struct sockaddr *addr,
+             int *addr_len){
+
+       struct socket *sock;
+       struct iovec iov;
+       struct msghdr msg;
+       char address[MAX_SOCK_ADDR];
+       int err,err2;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+
+       msg.msg_control=NULL;
+       msg.msg_controllen=0;
+       msg.msg_iovlen=1;
+       msg.msg_iov=&iov;
+       iov.iov_len=size;
+       iov.iov_base=ubuf;
+       msg.msg_name=address;
+       msg.msg_namelen=MAX_SOCK_ADDR;
+       if (sock->file->f_flags & O_NONBLOCK)
+               flags |= MSG_DONTWAIT;
+       err=sock_recvmsg(sock, &msg, size, flags);
+
+       if(err >= 0 && addr != NULL)
+       {
+               err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
+               if(err2<0)
+                       err=err2;
+       }
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+int setsockopt(int fd, int level, int optname, void *optval, int optlen){
+
+       int err;
+       struct socket *sock;
+
+       if (optlen < 0)
+               return -EINVAL;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_setsockopt(sock,level,optname);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               if (level == SOL_SOCKET)
+                       err=sock_setsockopt(sock,level,optname,optval,optlen);
+               else
+                       err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+
+/*  not possible, because sock_getsockopt is not exported ...
+int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
+
+       int err;
+       struct socket *sock;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_getsockopt(sock, level, optname);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               if (level == SOL_SOCKET)
+                       err=sock_getsockopt(sock,level,optname,optval,optlen);
+               else
+                       err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
+               sockfd_put(sock);
+       }
+       return err;
+}
+*/
+
+
+int shutdown(int fd, int how){
+
+       int err;
+       struct socket *sock;
+
+       if ((sock = sockfd_lookup(fd, &err))!=NULL)
+       {
+               err = security_socket_shutdown(sock, how);
+               if (err) {
+                       sockfd_put(sock);
+                       return err;
+               }
+
+               err=sock->ops->shutdown(sock, how);
+               sockfd_put(sock);
+       }
+       return err;
+}
+
+int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
+
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       int len, err;
+
+       sock = sockfd_lookup(fd, &err);
+       if (!sock)
+               goto out;
+
+       err = security_socket_getsockname(sock);
+       if (err)
+               goto out_put;
+
+       err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
+       if (err)
+               goto out_put;
+       err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
+
+out_put:
+       sockfd_put(sock);
+out:
+       return err;
+}
+
+/**
+ * End of copy from net/socket.c
+ */
+
 
 #endif /* __ARCH_WANT_SYS_SOCKETCALL */
 
diff -r aee991c7723a tools/vnet/vnet-module/vnet_forward.c
--- a/tools/vnet/vnet-module/vnet_forward.c	Wed May  9 15:34:47 2007
+++ b/tools/vnet/vnet-module/vnet_forward.c	Sun Jul 15 17:03:47 2007
@@ -186,7 +186,7 @@
     printk("\nWrapped packet:\n");
     print_iphdr(__FUNCTION__, skb);
     print_udphdr(__FUNCTION__, skb);
-    skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
+    skb_print_bits(__FUNCTION__, skb, 0, skb->len);
 #endif
 
     err = _skb_xmit(skb, saddr);
@@ -304,7 +304,7 @@
     peer->rx_packets++;
     skb->mac.raw = NULL;
     skb->nh.raw = skb->data;
-    skb->h.raw = (void*)(skb->nh.iph + 1);
+    skb->h.raw = skb->data + sizeof(struct iphdr);
     if(!skb->nh.iph->saddr){
         skb->nh.iph->saddr = addr.u.ip4.s_addr;
     }
@@ -328,12 +328,17 @@
 
     // Handle (a copy of) it ourselves, because
     // if it is looped-back by xmit it will be ignored.
-    //recvskb = skb_clone(skb, GFP_ATOMIC);
-    recvskb = pskb_copy(skb, GFP_ATOMIC);
+    recvskb = alloc_skb(skb->len, GFP_ATOMIC);
     if(recvskb){
+        recvskb->protocol = htons(ETH_P_IP);
+
+        recvskb->nh.raw = skb_put(recvskb, skb->len);
+        recvskb->h.raw = recvskb->data + sizeof(struct iphdr); 
+        skb_copy_bits(skb, 0, recvskb->data, skb->len);
+        
         // Data points at the unwrapped iphdr, but varp_handle_message()
         // expects it to point at the udphdr, so pull.
-        skb_pull(recvskb, sizeof(struct iphdr));
+        skb_pull_vn(recvskb, sizeof(struct iphdr));
         if(varp_handle_message(recvskb) <= 0){
             kfree_skb(recvskb);
         }

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Patch] vnet-module ( for 3.1 && unstable )
  2007-07-15 16:26 [Patch] vnet-module ( for 3.1 && unstable ) Robert Valentan
@ 2007-07-16  2:15 ` Zhu Han
       [not found]   ` <469B07E4.3030805@solid-soft.at>
  0 siblings, 1 reply; 3+ messages in thread
From: Zhu Han @ 2007-07-16  2:15 UTC (permalink / raw)
  To: Robert Valentan; +Cc: Mike Wray, xen-devel

Hi, Robert,

Here are some questions:

1) If we just pull Eth Header and IP header, does it reach the
position out of the head room?  I dont' observe it before.

2) You don't move_addr_to_kernel/move_addr_to_user, you just need
change_fs to KERNEL_DS and then change it back.

3) Why not use "struct sock" directly to manipulate the socket instead
of using int file descriptor?

On 7/16/07, Robert Valentan <R.Valentan@solid-soft.at> wrote:
> Hallo!
>
> The attached patch makes the vnet-module working for i386
> and x86_64. (I had no other systems for testing)
> (My patch from 6.6.2007 is included)
>
>  - x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL
>    defined, but has no socketcall. Replacing the _syscall's
>    to a working one.
>
>  - moves the "skb_pull" function from kernel to skb_util.c
>    because pulling in the data will rise a "BUG_ON" in the
>    kernel. The kernel-function does not allow to pull into
>    the payload!
>
>  - the skb_buff is not always possible to modify a received
>    buffer to send it again to the network.
>    (vnet_forward.c and etherip.c) The code now work's..
>    I think with no or less time-penalty.
>
>
> Summary:
> Solved:
>   With this patch the vnet-module is working on i386 and x86_64,
>   including peering-option.   (except the following "open")
>
>
> Open:
>  - Vnet's only with security-option "none" are working.
>  - The module-option "vnet_encaps" is ignored, always "udp" is used.
>
>
>
> Signed-off-by: Robert Valentan <R.Valentan@solid-soft.at>
>
>
> --
> regards
> Robert Valentan
>
> diff -r aee991c7723a tools/vnet/vnet-module/esp.c
> --- a/tools/vnet/vnet-module/esp.c      Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/esp.c      Sun Jul 15 17:03:47 2007
> @@ -341,12 +341,13 @@
>          dprintf("> ETH header pull...\n");
>          memmove(skb->data, skb->mac.raw, ETH_HLEN);
>          skb->mac.raw = skb->data;
> -        __skb_pull(skb, ETH_HLEN);
> +        skb_pull_vn(skb, ETH_HLEN);
>      }
>      dprintf("> IP header pull...\n");
>      memmove(skb->data, skb->nh.raw, ip_n);
>      skb->nh.raw = skb->data;
> -    __skb_pull(skb, ip_n);
> +    skb_pull_vn(skb, ip_n);
> +    skb->h.raw = skb->data;
>      esph = (void*)skb->data;
>      // Add spi and sequence number.
>      esph->spi = sa->ident.spi;
> @@ -457,7 +458,7 @@
>      // Move skb->data back to ethernet header.
>      // Do in 2 moves to ensure offsets are +ve,
>      // since args to skb_pull/skb_push are unsigned.
> -    __skb_pull(skb, head_n);
> +    skb_pull_vn(skb, head_n);
>      __skb_push(skb, skb->data - skb->mac.raw);
>      // After this esph is invalid.
>      esph = NULL;
> @@ -763,7 +764,7 @@
>      dprintf(">\n");
>  #ifdef DEBUG
>      dprintf("> recv skb=\n");
> -    skb_print_bits(skb, 0, skb->len);
> +    skb_print_bits("", skb, 0, skb->len);
>  #endif
>      ip_n = (skb->nh.iph->ihl << 2);
>      if(skb->data == skb->mac.raw){
> @@ -773,7 +774,7 @@
>              err = -EINVAL;
>              goto exit;
>          }
> -        skb_pull(skb, eth_n + ip_n);
> +        skb_pull_vn(skb, eth_n + ip_n);
>      }
>      addr = skb->nh.iph->daddr;
>      err = esp_skb_header(skb, &esph);
> diff -r aee991c7723a tools/vnet/vnet-module/etherip.c
> --- a/tools/vnet/vnet-module/etherip.c  Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/etherip.c  Sun Jul 15 17:03:47 2007
> @@ -270,6 +270,7 @@
>      u32 saddr, daddr;
>      char vnetbuf[VNET_ID_BUF];
>      struct ethhdr *eth;
> +    struct sk_buff *newskb;
>
>      dprintf(">\n");
>      saddr = skb->nh.iph->saddr;
> @@ -293,7 +294,7 @@
>              err = -EINVAL;
>              goto exit;
>          }
> -        skb_pull(skb, pull_n);
> +        skb_pull_vn(skb, pull_n);
>      }
>      // Assume skb->data points at etherip header.
>      etheriph = (void*)skb->data;
> @@ -318,7 +319,18 @@
>          goto exit;
>      }
>      // Point at the headers in the contained ethernet frame.
> -    skb->mac.raw = skb_pull(skb, etherip_n);
> +    skb->mac.raw = skb_pull_vn(skb, etherip_n);
> +
> +    newskb = alloc_skb(skb->len, GFP_ATOMIC);
> +    if (!newskb) {
> +        wprintf("> alloc new sk_buff failed \n");
> +        goto exit;
> +    }
> +    newskb->mac.raw = skb_put(newskb, skb->len);
> +    skb_copy_bits(skb, 0, newskb->data, skb->len);
> +    kfree_skb(skb);
> +    skb = newskb;
> +
>      eth = eth_hdr(skb);
>
>      // Simulate the logic from eth_type_trans()
> @@ -340,27 +352,12 @@
>
>      // Assuming a standard Ethernet frame.
>      // Should check for protocol? Support ETH_P_8021Q too.
> -    skb->nh.raw = skb_pull(skb, ETH_HLEN);
> -
> -#ifdef __KERNEL__
> -    // Fix IP options, checksum, skb dst, netfilter state.
> -    memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
> -    if (skb->ip_summed == CHECKSUM_HW){
> -        skb->ip_summed = CHECKSUM_NONE;
> -    }
> -    dst_release(skb->dst);
> -    skb->dst = NULL;
> -    nf_reset(skb);
> -#ifdef CONFIG_BRIDGE_NETFILTER
> -    if(skb->nf_bridge){
> -        // Stop the eth header being clobbered by nf_bridge_maybe_copy_header().
> -        _nf_bridge_save_header(skb);
> -    }
> -#endif
> -#endif // __KERNEL__
> -
> -    dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
> +    skb->nh.raw = skb_pull_vn(skb, ETH_HLEN);
> +    skb->h.raw = newskb->nh.raw + sizeof(struct iphdr);
> +
> +    dprintf("> Unpacked srcaddr=" IPFMT " dstaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n",
>              NIPQUAD(skb->nh.iph->saddr),
> +            NIPQUAD(skb->nh.iph->daddr),
>              VnetId_ntoa(&vnet, vnetbuf),
>              MAC6TUPLE(eth->h_source),
>              MAC6TUPLE(eth->h_dest));
> diff -r aee991c7723a tools/vnet/vnet-module/skb_util.h
> --- a/tools/vnet/vnet-module/skb_util.h Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/skb_util.h Sun Jul 15 17:03:47 2007
> @@ -66,6 +66,21 @@
>  }
>
>  #endif
> +
> +/*
> + * It's a copy from {kernel}/include/linux/skbuff.h func '__skb_pull' and 'skb_pull'
> + * to aviodthe BUG_ON when pulling into the data (getting forwarded ip-frames)
> + */
> +static inline unsigned char *__skb_pull_vn(struct sk_buff *skb, unsigned int len)
> +{
> +        skb->len -= len;
> +        //BUG_ON(skb->len < skb->data_len);
> +        return skb->data += len;
> +}
> +static inline unsigned char *skb_pull_vn(struct sk_buff *skb, unsigned int len)
> +{
> +        return unlikely(len > skb->len) ? NULL : __skb_pull_vn(skb, len);
> +}
>
>
>  #ifdef __KERNEL__
> diff -r aee991c7723a tools/vnet/vnet-module/varp.c
> --- a/tools/vnet/vnet-module/varp.c     Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/varp.c     Sun Jul 15 17:03:47 2007
> @@ -1365,7 +1365,7 @@
>              goto exit;
>          }
>      }
> -    varph = (void*)skb_pull(skb, sizeof(struct udphdr));
> +    varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr));
>      if(skb->len < sizeof(struct VnetMsgHdr)){
>          wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct VnetMsgHdr));
>          goto exit;
> @@ -1378,11 +1378,11 @@
>          }
>          break;
>      case VUDP_ID: // Etherip-in-udp packet.
> -        skb_pull(skb, sizeof(struct VnetMsgHdr));
> +        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
>          err = etherip_protocol_recv(skb);
>          goto exit;
>      case VFWD_ID: // Forwarded.
> -        skb_pull(skb, sizeof(struct VnetMsgHdr));
> +        skb_pull_vn(skb, sizeof(struct VnetMsgHdr));
>          err = vnet_forward_recv(skb);
>          goto exit;
>      default:
> diff -r aee991c7723a tools/vnet/vnet-module/varp_socket.c
> --- a/tools/vnet/vnet-module/varp_socket.c      Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/varp_socket.c      Sun Jul 15 17:03:47 2007
> @@ -79,8 +79,8 @@
>   * Architectures using socketcall() define __ARCH_WANT_SYS_SOCKETCALL.
>   */
>
> -#ifdef __ARCH_WANT_SYS_SOCKETCALL
> -
> +#if defined(__ARCH_WANT_SYS_SOCKETCALLxx) && defined(__NR_socketcall)
> +
>  /* Define the socketcall() syscall.
>   * Multiplexes all the socket-related calls.
>   *
> @@ -190,59 +190,260 @@
>
>  /* No socketcall - define the individual syscalls. */
>
> -static inline _syscall3(int, socket,
> -                        int, family,
> -                        int, type,
> -                        int, protocol);
> -
> -static inline _syscall3(int, bind,
> -                        int, fd,
> -                        struct sockaddr *, umyaddr,
> -                        int, addrlen);
> -
> -static inline _syscall3(int, connect,
> -                        int, fd,
> -                        struct sockaddr *, uservaddr,
> -                        int, addrlen);
> -
> -static inline _syscall6(int, sendto,
> -                        int, fd,
> -                        void *, buff,
> -                        size_t, len,
> -                        unsigned, flags,
> -                        struct sockaddr *, addr,
> -                        int, addr_len);
> -
> -static inline _syscall6(int, recvfrom,
> -                        int, fd,
> -                        void *, ubuf,
> -                        size_t, size,
> -                        unsigned, flags,
> -                        struct sockaddr *, addr,
> -                        int *, addr_len);
> -
> -static inline _syscall5(int, setsockopt,
> -                        int, fd,
> -                        int, level,
> -                        int, optname,
> -                        void *, optval,
> -                        int, optlen);
> -
> -static inline _syscall5(int, getsockopt,
> -                        int, fd,
> -                        int, level,
> -                        int, optname,
> -                        void *, optval,
> -                        int *, optlen);
> -
> -static inline _syscall2(int, shutdown,
> -                        int, fd,
> -                        int, how);
> -
> -static inline _syscall3(int, getsockname,
> -                        int, fd,
> -                        struct sockaddr *, usockaddr,
> -                        int *, usockaddr_len);
> +/* the following code is copied from linux-kernel/net/socket.c
> + * As replacement of the __NR_socketcall, which exists not in x86_64 and
> + * same other systems.
> + * An alternate will be an export of the copied-functions in net/socket.c
> + */
> +#define MAX_SOCK_ADDR 128
> +
> +int socket(int family, int type, int protocol){
> +
> +       int retval;
> +       struct socket *sock;
> +       retval = sock_create(family, type, protocol, &sock);
> +       if (retval < 0)
> +               goto out;
> +
> +       retval = sock_map_fd(sock);
> +       if (retval < 0)
> +               goto out_release;
> +
> +out:
> +       /* It may be already another descriptor 8) Not kernel problem. */
> +       return retval;
> +
> +out_release:
> +       sock_release(sock);
> +       return retval;
> +
> +}
> +
> +
> +int bind(int fd, struct sockaddr *umyaddr, int addrlen){
> +
> +       struct socket *sock;
> +       char address[MAX_SOCK_ADDR];
> +       int err;
> +
> +       if((sock = sockfd_lookup(fd,&err))!=NULL)
> +       {
> +               if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
> +                       err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
> +                       if (err) {
> +                               sockfd_put(sock);
> +                               return err;
> +                       }
> +                       err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
> +               }
> +               sockfd_put(sock);
> +       }
> +       return err;
> +}
> +
> +
> +int connect(int fd, struct sockaddr *uservaddr, int addrlen){
> +
> +       struct socket *sock;
> +       char address[MAX_SOCK_ADDR];
> +       int err;
> +
> +       sock = sockfd_lookup(fd, &err);
> +       if (!sock)
> +               goto out;
> +       err = move_addr_to_kernel(uservaddr, addrlen, address);
> +       if (err < 0)
> +               goto out_put;
> +
> +       err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
> +       if (err)
> +               goto out_put;
> +
> +       err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
> +                                sock->file->f_flags);
> +out_put:
> +       sockfd_put(sock);
> +out:
> +       return err;
> +}
> +
> +int sendto(int fd, void * buff, size_t len,
> +           unsigned flags, struct sockaddr *addr,
> +           int addr_len){
> +
> +       struct socket *sock;
> +       char address[MAX_SOCK_ADDR];
> +       int err;
> +       struct msghdr msg;
> +       struct iovec iov;
> +
> +       sock = sockfd_lookup(fd, &err);
> +       if (!sock)
> +               goto out;
> +       iov.iov_base=buff;
> +       iov.iov_len=len;
> +       msg.msg_name=NULL;
> +       msg.msg_iov=&iov;
> +       msg.msg_iovlen=1;
> +       msg.msg_control=NULL;
> +       msg.msg_controllen=0;
> +       msg.msg_namelen=0;
> +       if(addr)
> +       {
> +               err = move_addr_to_kernel(addr, addr_len, address);
> +               if (err < 0)
> +                       goto out_put;
> +               msg.msg_name=address;
> +               msg.msg_namelen=addr_len;
> +       }
> +       if (sock->file->f_flags & O_NONBLOCK)
> +               flags |= MSG_DONTWAIT;
> +       msg.msg_flags = flags;
> +       err = sock_sendmsg(sock, &msg, len);
> +
> +out_put:
> +       sockfd_put(sock);
> +out:
> +       return err;
> +}
> +
> +
> +int recvfrom(int fd, void * ubuf, size_t size,
> +             unsigned flags, struct sockaddr *addr,
> +             int *addr_len){
> +
> +       struct socket *sock;
> +       struct iovec iov;
> +       struct msghdr msg;
> +       char address[MAX_SOCK_ADDR];
> +       int err,err2;
> +
> +       sock = sockfd_lookup(fd, &err);
> +       if (!sock)
> +               goto out;
> +
> +       msg.msg_control=NULL;
> +       msg.msg_controllen=0;
> +       msg.msg_iovlen=1;
> +       msg.msg_iov=&iov;
> +       iov.iov_len=size;
> +       iov.iov_base=ubuf;
> +       msg.msg_name=address;
> +       msg.msg_namelen=MAX_SOCK_ADDR;
> +       if (sock->file->f_flags & O_NONBLOCK)
> +               flags |= MSG_DONTWAIT;
> +       err=sock_recvmsg(sock, &msg, size, flags);
> +
> +       if(err >= 0 && addr != NULL)
> +       {
> +               err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
> +               if(err2<0)
> +                       err=err2;
> +       }
> +       sockfd_put(sock);
> +out:
> +       return err;
> +}
> +
> +int setsockopt(int fd, int level, int optname, void *optval, int optlen){
> +
> +       int err;
> +       struct socket *sock;
> +
> +       if (optlen < 0)
> +               return -EINVAL;
> +
> +       if ((sock = sockfd_lookup(fd, &err))!=NULL)
> +       {
> +               err = security_socket_setsockopt(sock,level,optname);
> +               if (err) {
> +                       sockfd_put(sock);
> +                       return err;
> +               }
> +
> +               if (level == SOL_SOCKET)
> +                       err=sock_setsockopt(sock,level,optname,optval,optlen);
> +               else
> +                       err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
> +               sockfd_put(sock);
> +       }
> +       return err;
> +}
> +
> +
> +/*  not possible, because sock_getsockopt is not exported ...
> +int getsockopt(int fd, int level, int optname, void *optval, int *optlen){
> +
> +       int err;
> +       struct socket *sock;
> +
> +       if ((sock = sockfd_lookup(fd, &err))!=NULL)
> +       {
> +               err = security_socket_getsockopt(sock, level, optname);
> +               if (err) {
> +                       sockfd_put(sock);
> +                       return err;
> +               }
> +
> +               if (level == SOL_SOCKET)
> +                       err=sock_getsockopt(sock,level,optname,optval,optlen);
> +               else
> +                       err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
> +               sockfd_put(sock);
> +       }
> +       return err;
> +}
> +*/
> +
> +
> +int shutdown(int fd, int how){
> +
> +       int err;
> +       struct socket *sock;
> +
> +       if ((sock = sockfd_lookup(fd, &err))!=NULL)
> +       {
> +               err = security_socket_shutdown(sock, how);
> +               if (err) {
> +                       sockfd_put(sock);
> +                       return err;
> +               }
> +
> +               err=sock->ops->shutdown(sock, how);
> +               sockfd_put(sock);
> +       }
> +       return err;
> +}
> +
> +int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){
> +
> +       struct socket *sock;
> +       char address[MAX_SOCK_ADDR];
> +       int len, err;
> +
> +       sock = sockfd_lookup(fd, &err);
> +       if (!sock)
> +               goto out;
> +
> +       err = security_socket_getsockname(sock);
> +       if (err)
> +               goto out_put;
> +
> +       err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
> +       if (err)
> +               goto out_put;
> +       err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
> +
> +out_put:
> +       sockfd_put(sock);
> +out:
> +       return err;
> +}
> +
> +/**
> + * End of copy from net/socket.c
> + */
> +
>
>  #endif /* __ARCH_WANT_SYS_SOCKETCALL */
>
> diff -r aee991c7723a tools/vnet/vnet-module/vnet_forward.c
> --- a/tools/vnet/vnet-module/vnet_forward.c     Wed May  9 15:34:47 2007
> +++ b/tools/vnet/vnet-module/vnet_forward.c     Sun Jul 15 17:03:47 2007
> @@ -186,7 +186,7 @@
>      printk("\nWrapped packet:\n");
>      print_iphdr(__FUNCTION__, skb);
>      print_udphdr(__FUNCTION__, skb);
> -    skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len);
> +    skb_print_bits(__FUNCTION__, skb, 0, skb->len);
>  #endif
>
>      err = _skb_xmit(skb, saddr);
> @@ -304,7 +304,7 @@
>      peer->rx_packets++;
>      skb->mac.raw = NULL;
>      skb->nh.raw = skb->data;
> -    skb->h.raw = (void*)(skb->nh.iph + 1);
> +    skb->h.raw = skb->data + sizeof(struct iphdr);
>      if(!skb->nh.iph->saddr){
>          skb->nh.iph->saddr = addr.u.ip4.s_addr;
>      }
> @@ -328,12 +328,17 @@
>
>      // Handle (a copy of) it ourselves, because
>      // if it is looped-back by xmit it will be ignored.
> -    //recvskb = skb_clone(skb, GFP_ATOMIC);
> -    recvskb = pskb_copy(skb, GFP_ATOMIC);
> +    recvskb = alloc_skb(skb->len, GFP_ATOMIC);
>      if(recvskb){
> +        recvskb->protocol = htons(ETH_P_IP);
> +
> +        recvskb->nh.raw = skb_put(recvskb, skb->len);
> +        recvskb->h.raw = recvskb->data + sizeof(struct iphdr);
> +        skb_copy_bits(skb, 0, recvskb->data, skb->len);
> +
>          // Data points at the unwrapped iphdr, but varp_handle_message()
>          // expects it to point at the udphdr, so pull.
> -        skb_pull(recvskb, sizeof(struct iphdr));
> +        skb_pull_vn(recvskb, sizeof(struct iphdr));
>          if(varp_handle_message(recvskb) <= 0){
>              kfree_skb(recvskb);
>          }
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>


-- 
best regards,
hanzhu

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Patch] vnet-module ( for 3.1 && unstable )
       [not found]   ` <469B07E4.3030805@solid-soft.at>
@ 2007-07-16  6:22     ` Zhu Han
  0 siblings, 0 replies; 3+ messages in thread
From: Zhu Han @ 2007-07-16  6:22 UTC (permalink / raw)
  To: Robert Valentan; +Cc: Mike Wray, xen-devel

On 7/16/07, Robert Valentan <R.Valentan@solid-soft.at> wrote:
> Zhu Han schrieb:
> > Hi, Robert,
> >
> > Here are some questions:
> >
> > 1) If we just pull Eth Header and IP header, does it reach the
> > position out of the head room?  I dont' observe it before.
>
> The __skb_pull  in include/linux/skbuff.h has a BUG_ON, which was
> always "active". The problem are not the header of the received
> packet, but the encapsed header. Without the change, my systems
> are always been killed by the BUG_ON in skbuff.h
> To observe it: use a vnet with peer's on different networks, eg
>  each part in an other datacenter...

The BUG_ON only wakes up when you pulls data more than head room
contained. Sometime s the vnet header will be put to fragmented page
instead of in head room. So when pull out the vnet header, it will hit
on the BUG_ON. That's what you observed, I'm afraid.


>
> > 2) You don't move_addr_to_kernel/move_addr_to_user, you just need
> > change_fs to KERNEL_DS and then change it back.
>
> ? The UDP & IP-header was not always changeable on received packets
> after pulling data, possible there is a better (or nicer) way..

I mean you don't need move_addr_to_kernel/move_addr_to_user. Whatever,
you code is already in kernel mode.

>
> > 3) Why not use "struct sock" directly to manipulate the socket instead
> > of using int file descriptor?
>
> I had want only to get it to work, without writing it new, i changed
> only the code, to make it work on systms without sys-socketcall.
>
> I don't think, that anyone had used vnet_module in 3.* before. With
> my patch, the vnet_module is now working local, in local-net and
> with peer's, as long you need no security-option. I tested it on
> i386 and x86_64 systems.

Unfortunately, I'm pretty sure vnet was broken serveral months ago.
When I tried to use it, I have to build my own patch.


>
> regards
> Robert Valentan
>
> > On 7/16/07, Robert Valentan <R.Valentan@solid-soft.at> wrote:
> >> Hallo!
> >>
> >> The attached patch makes the vnet-module working for i386
> >> and x86_64. (I had no other systems for testing)
> >> (My patch from 6.6.2007 is included)
> >>
> >>  - x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL
> >>    defined, but has no socketcall. Replacing the _syscall's
> >>    to a working one.
> >>
> >>  - moves the "skb_pull" function from kernel to skb_util.c
> >>    because pulling in the data will rise a "BUG_ON" in the
> >>    kernel. The kernel-function does not allow to pull into
> >>    the payload!
> >>
> >>  - the skb_buff is not always possible to modify a received
> >>    buffer to send it again to the network.
> >>    (vnet_forward.c and etherip.c) The code now work's..
> >>    I think with no or less time-penalty.
>


-- 
best regards,
hanzhu

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2007-07-16  6:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-15 16:26 [Patch] vnet-module ( for 3.1 && unstable ) Robert Valentan
2007-07-16  2:15 ` Zhu Han
     [not found]   ` <469B07E4.3030805@solid-soft.at>
2007-07-16  6:22     ` Zhu Han

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.