All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kelly Daly <kelly@au1.ibm.com>
To: netdev@vger.kernel.org
Cc: rusty@rustcorp.com.au, davem@davemloft.net
Subject: [PATCH 3/3] Rough VJ Channel Implementation - vj_udp.patch
Date: Wed, 26 Apr 2006 11:47:46 +0000	[thread overview]
Message-ID: <200604261147.46969.kelly@au.ibm.com> (raw)

Signed-off-by: Kelly Daly <kelly@au.ibm.com>

Hacked udp.c to receive directly to VJ Channel socket.
Breaks normal UDP - sockets don't speak non-VJ anymore!

----


diff -r 47031a1f466c linux-2.6.16/include/linux/udp.h
--- linux-2.6.16/include/linux/udp.h	Thu Mar 23 06:32:12 2006
+++ linux-2.6.16/include/linux/udp.h	Mon Apr 24 19:50:46 2006
@@ -51,6 +51,8 @@
 	 * when the socket is uncorked.
 	 */
 	__u16		 len;		/* total length of pending frames */
+	struct vj_channel *chan;        /* VJ net channel */
+	int		vj_reg_flag; 	/* is the vj channel registered */
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
diff -r 47031a1f466c linux-2.6.16/net/ipv4/udp.c
--- linux-2.6.16/net/ipv4/udp.c	Thu Mar 23 06:32:12 2006
+++ linux-2.6.16/net/ipv4/udp.c	Mon Apr 24 19:50:46 2006
@@ -1,3 +1,4 @@
+
 /*
  * INET		An implementation of the TCP/IP protocol suite for the LINUX
  *		operating system.  INET is implemented using the  BSD Socket
@@ -89,6 +90,7 @@
 #include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/errno.h>
+#include <linux/err.h>
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/config.h>
@@ -109,6 +111,7 @@
 #include <net/inet_common.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
+#include <linux/vjchan.h>
 
 /*
  *	Snmp MIB for the UDP layer
@@ -127,6 +130,7 @@
 	struct hlist_node *node;
 	struct sock *sk2;
 	struct inet_sock *inet = inet_sk(sk);
+	struct vj_flowid flowid;
 
 	write_lock_bh(&udp_hash_lock);
 	if (snum == 0) {
@@ -195,6 +199,17 @@
 		sk_add_node(sk, h);
 		sock_prot_inc_use(sk->sk_prot);
 	}
+
+	/* copied from udp_v4_lookup_longway */
+	flowid.saddr = inet->daddr;
+	flowid.daddr = inet->rcv_saddr;
+	flowid.sport = inet->dport;
+	flowid.dport = htons(inet->num);
+	flowid.ifindex = sk->sk_bound_dev_if;
+	flowid.proto = IPPROTO_UDP;
+	vj_register_chan(udp_sk(sk)->chan, &flowid);
+	udp_sk(sk)->vj_reg_flag = 1;
+
 	write_unlock_bh(&udp_hash_lock);
 	return 0;
 
@@ -771,18 +786,158 @@
 		__udp_checksum_complete(skb);
 }
 
+static inline unsigned short int vj_udp_csum(struct vj_buffer *buffer)
+{
+	struct iphdr *ip = (struct iphdr *)(buffer->data + buffer->header_len);
+	int udpoff = buffer->header_len + (ip->ihl * 4);
+	struct udphdr *up = (struct udphdr *)(buffer->data + udpoff);
+
+	if (up->check == 0)
+		return 0;
+
+	return csum_tcpudp_magic(ip->saddr,
+			  ip->daddr,
+			  (buffer->data_len - (ip->ihl * 4)), 
+			  IPPROTO_UDP,
+			  csum_partial((buffer->data + udpoff),
+				       (buffer->data_len - (ip->ihl * 4)),
+				       0));
+}
+
+/*
+ *	Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/* returns 1 if if we need to keep waiting, <= 0 indicates stop waiting */
+static int wait_for_vj_buffer(struct sock *sk, long *timeo_p)
+{
+	int error;
+	wait_queue_head_t *wq = &udp_sk(sk)->chan->wq;
+	DEFINE_WAIT(wait);
+
+	prepare_to_wait(wq, &wait, TASK_INTERRUPTIBLE);
+	vj_inc_wakecnt(udp_sk(sk)->chan);
+
+	error = sock_error(sk);
+	if (error)
+		goto out;
+	if (vj_peek_next_buffer(udp_sk(sk)->chan)) {
+		error = 1;
+		goto out;
+	}
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		error = 0;
+		goto out;
+	}
+	if (connection_based(sk) && !(sk->sk_state == TCP_ESTABLISHED ||
+				      sk->sk_state == TCP_LISTEN)) {
+		error = -ENOTCONN;
+		goto out;
+	}
+	if (signal_pending(current)) {
+		error = sock_intr_errno(*timeo_p);
+		goto out;
+	}
+
+	error = 1;
+
+	*timeo_p = schedule_timeout(*timeo_p);
+out:
+	finish_wait(wq, &wait);
+	return error;
+}
+
+/* almost a direct copy of skb_recv_datagram to get all req'd information while using a vj buffer instead of skb */
+struct vj_buffer *vj_recv_datagram(struct sock *sk, unsigned flags, 
+				   int noblock, int *err)
+{
+	struct vj_buffer *buffer;
+	long timeo;
+	*err = sock_error(sk);
+
+	if (*err)
+		return NULL;
+
+	timeo = sock_rcvtimeo(sk, noblock);
+	do {
+//we can just grab the buffer and return it seeing as either way will be a "peek".  Then after we consume we can figure out if (flags & MSG_PEEK) and move to the next buffer at that time...  we need to consume the buffer, write barrier before we move on to avoid a race condition.
+
+		buffer = vj_peek_next_buffer(udp_sk(sk)->chan);
+		if (buffer)
+			return buffer;
+
+		/* User doesn't want to wait */
+		*err = -EAGAIN;
+		if (!timeo) {
+			return NULL;
+		}
+	} while ((*err = wait_for_vj_buffer(sk, &timeo)) > 0);
+
+	return NULL;
+}
+
+static int vj_copy_datagram_iovec(struct vj_buffer *buffer, int offset, 
+		       struct iovec *to, int len)
+{
+// offset to be taken from buffer->header_len (which contains eth hdr + ip hdr)
+	if(memcpy_toiovec(to, buffer->data + offset, len))
+		return -EFAULT;
+	return 0;
+}
+
+/* FIXME: original code did timestamp in netif_rx */
+static __inline__ void vj_sock_recv_timestamp(struct msghdr *msg, 
+					   struct sock *sk)
+{
+	do_gettimeofday(&sk->sk_stamp);
+	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), &sk->sk_stamp);
+}
+
+/* Returns offset in buffer past ip hdr, or 0 if something wrong.  */
+static unsigned check_ip_packet(struct vj_buffer *buffer)
+{
+	struct iphdr *iph;
+
+	iph = (struct iphdr *)(buffer->data + buffer->header_len);
+
+	if (buffer->data_len < sizeof(*iph))
+		return 0;
+
+	if (iph->ihl < 5 || iph->version != 4)
+		return 0;
+
+	if (iph->ihl * 4 > ntohs(iph->tot_len))  //less than 0 data?
+		return 0;
+
+	if (ntohs(iph->tot_len) > buffer->data_len) { //truncated
+		return 0;
+	} else if (ntohs(iph->tot_len) < buffer->data_len) { //padded - trim it
+		buffer->data_len = ntohs(iph->tot_len);
+	}
+
+	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+		return 0;
+
+	return buffer->header_len + iph->ihl*4;
+}
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
  */
-
 static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t len, int noblock, int flags, int *addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
   	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
-  	struct sk_buff *skb;
-  	int copied, err;
+  	struct vj_buffer *buffer;
+	struct iphdr *ip;
+	struct udphdr *udph;
+  	int copied, err, udpoff;
 
 	/*
 	 *	Check any passed addresses
@@ -794,63 +949,71 @@
 		return ip_recv_error(sk, msg, len);
 
 try_again:
-	skb = skb_recv_datagram(sk, flags, noblock, &err);
-	if (!skb)
+	buffer = vj_recv_datagram(sk, flags, noblock, &err);
+	if (!buffer)
 		goto out;
-  
-  	copied = skb->len - sizeof(struct udphdr);
+
+	ip = (struct iphdr *)(buffer->data + buffer->header_len);
+	udpoff = check_ip_packet(buffer);
+	if (udpoff == 0)
+		goto bad_packet;
+
+	udph = (struct udphdr *)(buffer->data + udpoff);
+
+	buffer->data_len = ntohs(ip->tot_len);
+
+	if (((ip->ihl * 4) + ntohs(udph->len)) > buffer->data_len)
+		goto bad_packet;
+	buffer->data_len = (ip->ihl * 4) + ntohs(udph->len);
+
+	copied = buffer->data_len - ((ip->ihl * 4) + sizeof(struct udphdr));
+
 	if (copied > len) {
 		copied = len;
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
-					      copied);
-	} else if (msg->msg_flags&MSG_TRUNC) {
-		if (__udp_checksum_complete(skb))
-			goto csum_copy_err;
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
-					      copied);
-	} else {
-		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
-
-		if (err == -EINVAL)
-			goto csum_copy_err;
-	}
-
-	if (err)
-		goto out_free;
-
-	sock_recv_timestamp(msg, sk, skb);
+/* FIXME: if card is calculating csum, should be using that rather
+ * than calculating here */
+	if (vj_udp_csum(buffer) != 0) //bad checksum
+		goto bad_packet;
+
+	err = vj_copy_datagram_iovec(buffer, udpoff + sizeof(struct udphdr), msg->msg_iov, copied);
+
+	if (err) {
+		vj_done_with_buffer(udp_sk(sk)->chan);
+		return err;
+	}
+
+	vj_sock_recv_timestamp(msg, sk);
 
 	/* Copy the address. */
 	if (sin)
 	{
 		sin->sin_family = AF_INET;
-		sin->sin_port = skb->h.uh->source;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_port = udph->source;
+		sin->sin_addr.s_addr = ip->saddr;
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
   	}
+
+#if 0 				/* FIXME: implement this! */
 	if (inet->cmsg_flags)
 		ip_cmsg_recv(msg, skb);
+#endif
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = buffer->data_len - (ip->ihl * 4) - sizeof(struct udphdr);
+	if (!(flags & MSG_PEEK))
+		vj_done_with_buffer(udp_sk(sk)->chan);
   
-out_free:
-  	skb_free_datagram(sk, skb);
 out:
   	return err;
 
-csum_copy_err:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
-
-	skb_kill_datagram(sk, skb, flags);
-
-	if (noblock)
-		return -EAGAIN;	
+bad_packet:
+	vj_done_with_buffer(udp_sk(sk)->chan);
+	if(noblock)
+		return -EAGAIN;
 	goto try_again;
 }
 
@@ -858,10 +1021,15 @@
 int udp_disconnect(struct sock *sk, int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct udp_sock *up = udp_sk(sk);
 	/*
 	 *	1003.1g - break association.
 	 */
-	 
+	if (up->vj_reg_flag) {
+		vj_unregister_chan(up->chan);
+		up->vj_reg_flag = 0;
+	}	
+ 
 	sk->sk_state = TCP_CLOSE;
 	inet->daddr = 0;
 	inet->dport = 0;
@@ -879,6 +1047,14 @@
 
 static void udp_close(struct sock *sk, long timeout)
 {
+	struct udp_sock *up = udp_sk(sk);
+
+	if (up->vj_reg_flag) {
+		vj_unregister_chan(up->chan);
+		up->vj_reg_flag = 0;
+	}
+	vj_free_chan(up->chan);
+
 	sk_common_release(sk);
 }
 
@@ -1293,6 +1469,46 @@
   	return 0;
 }
 
+unsigned int vj_datagram_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+
+	poll_wait(file, &udp_sk(sk)->chan->wq, wait);
+	vj_inc_wakecnt(udp_sk(sk)->chan);
+
+	mask = 0;
+
+	/* exceptional events? */
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+
+
+	/* readable? */
+	if (vj_peek_next_buffer(udp_sk(sk)->chan) ||
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* Connection-based need to check for termination and startup */
+	if (connection_based(sk)) {
+		if (sk->sk_state == TCP_CLOSE)
+			mask |= POLLHUP;
+		/* connection hasn't started yet? */
+		if (sk->sk_state == TCP_SYN_SENT)
+			return mask;
+	}
+
+	/* writable? */
+	if (sock_writeable(sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+	else
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	return mask;
+}
+
 /**
  * 	udp_poll - wait for a UDP event.
  *	@file - file struct
@@ -1308,41 +1524,47 @@
  */
 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
-	unsigned int mask = datagram_poll(file, sock, wait);
+	unsigned int mask = vj_datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 	
 	/* Check for false positives due to checksum errors */
 	if ( (mask & POLLRDNORM) &&
 	     !(file->f_flags & O_NONBLOCK) &&
 	     !(sk->sk_shutdown & RCV_SHUTDOWN)){
-		struct sk_buff_head *rcvq = &sk->sk_receive_queue;
-		struct sk_buff *skb;
-
-		spin_lock_bh(&rcvq->lock);
-		while ((skb = skb_peek(rcvq)) != NULL) {
-			if (udp_checksum_complete(skb)) {
-				UDP_INC_STATS_BH(UDP_MIB_INERRORS);
-				__skb_unlink(skb, rcvq);
-				kfree_skb(skb);
-			} else {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
+		struct vj_buffer *buffer;
+
+		while ((buffer = vj_peek_next_buffer(udp_sk(sk)->chan)) != NULL) {
+//test that this fixes the csum
+	check_ip_packet(buffer);
+			if (vj_udp_csum(buffer) == 0)
 				break;
-			}
-		}
-		spin_unlock_bh(&rcvq->lock);
+			UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+			vj_done_with_buffer(udp_sk(sk)->chan);
+		}
 
 		/* nothing to see, move along */
-		if (skb == NULL)
+		if (buffer == NULL)
 			mask &= ~(POLLIN | POLLRDNORM);
 	}
 
 	return mask;
 	
 }
+
+static int udp_init(struct sock *sk)
+{
+	udp_sk(sk)->chan = vj_alloc_chan(0);
+	udp_sk(sk)->vj_reg_flag = 0;
+	if (!udp_sk(sk)->chan)
+		return -ENOMEM;
+	return 0;
+}
+
 
 struct proto udp_prot = {
  	.name =		"UDP",
 	.owner =	THIS_MODULE,
+	.init = 	udp_init,
 	.close =	udp_close,
 	.connect =	ip4_datagram_connect,
 	.disconnect =	udp_disconnect,

                 reply	other threads:[~2006-04-26  1:47 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200604261147.46969.kelly@au.ibm.com \
    --to=kelly@au1.ibm.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.