From: Kelly Daly <kelly@au1.ibm.com>
To: netdev@vger.kernel.org
Cc: rusty@rustcorp.com.au, davem@davemloft.net
Subject: [PATCH 3/3] Rough VJ Channel Implementation - vj_udp.patch
Date: Wed, 26 Apr 2006 11:47:46 +0000 [thread overview]
Message-ID: <200604261147.46969.kelly@au.ibm.com> (raw)
Signed-off-by: Kelly Daly <kelly@au.ibm.com>
Hacked udp.c to receive directly to VJ Channel socket.
Breaks normal UDP - sockets don't speak non-VJ anymore!
----
diff -r 47031a1f466c linux-2.6.16/include/linux/udp.h
--- linux-2.6.16/include/linux/udp.h Thu Mar 23 06:32:12 2006
+++ linux-2.6.16/include/linux/udp.h Mon Apr 24 19:50:46 2006
@@ -51,6 +51,8 @@
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
+ struct vj_channel *chan; /* VJ net channel */
+ int vj_reg_flag; /* is the vj channel registered */
};
static inline struct udp_sock *udp_sk(const struct sock *sk)
diff -r 47031a1f466c linux-2.6.16/net/ipv4/udp.c
--- linux-2.6.16/net/ipv4/udp.c Thu Mar 23 06:32:12 2006
+++ linux-2.6.16/net/ipv4/udp.c Mon Apr 24 19:50:46 2006
@@ -1,3 +1,4 @@
+
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -89,6 +90,7 @@
#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/config.h>
@@ -109,6 +111,7 @@
#include <net/inet_common.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <linux/vjchan.h>
/*
* Snmp MIB for the UDP layer
@@ -127,6 +130,7 @@
struct hlist_node *node;
struct sock *sk2;
struct inet_sock *inet = inet_sk(sk);
+ struct vj_flowid flowid;
write_lock_bh(&udp_hash_lock);
if (snum == 0) {
@@ -195,6 +199,17 @@
sk_add_node(sk, h);
sock_prot_inc_use(sk->sk_prot);
}
+
+ /* copied from udp_v4_lookup_longway */
+ flowid.saddr = inet->daddr;
+ flowid.daddr = inet->rcv_saddr;
+ flowid.sport = inet->dport;
+ flowid.dport = htons(inet->num);
+ flowid.ifindex = sk->sk_bound_dev_if;
+ flowid.proto = IPPROTO_UDP;
+ vj_register_chan(udp_sk(sk)->chan, &flowid);
+ udp_sk(sk)->vj_reg_flag = 1;
+
write_unlock_bh(&udp_hash_lock);
return 0;
@@ -771,18 +786,158 @@
__udp_checksum_complete(skb);
}
+static inline unsigned short int vj_udp_csum(struct vj_buffer *buffer)
+{
+ struct iphdr *ip = (struct iphdr *)(buffer->data + buffer->header_len);
+ int udpoff = buffer->header_len + (ip->ihl * 4);
+ struct udphdr *up = (struct udphdr *)(buffer->data + udpoff);
+
+ if (up->check == 0)
+ return 0;
+
+ return csum_tcpudp_magic(ip->saddr,
+ ip->daddr,
+ (buffer->data_len - (ip->ihl * 4)),
+ IPPROTO_UDP,
+ csum_partial((buffer->data + udpoff),
+ (buffer->data_len - (ip->ihl * 4)),
+ 0));
+}
+
+/*
+ * Is a socket 'connection oriented' ?
+ */
+static inline int connection_based(struct sock *sk)
+{
+ return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
+}
+
+/* returns 1 if if we need to keep waiting, <= 0 indicates stop waiting */
+static int wait_for_vj_buffer(struct sock *sk, long *timeo_p)
+{
+ int error;
+ wait_queue_head_t *wq = &udp_sk(sk)->chan->wq;
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(wq, &wait, TASK_INTERRUPTIBLE);
+ vj_inc_wakecnt(udp_sk(sk)->chan);
+
+ error = sock_error(sk);
+ if (error)
+ goto out;
+ if (vj_peek_next_buffer(udp_sk(sk)->chan)) {
+ error = 1;
+ goto out;
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ error = 0;
+ goto out;
+ }
+ if (connection_based(sk) && !(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_LISTEN)) {
+ error = -ENOTCONN;
+ goto out;
+ }
+ if (signal_pending(current)) {
+ error = sock_intr_errno(*timeo_p);
+ goto out;
+ }
+
+ error = 1;
+
+ *timeo_p = schedule_timeout(*timeo_p);
+out:
+ finish_wait(wq, &wait);
+ return error;
+}
+
+/* almost a direct copy of skb_recv_datagram to get all req'd information while using a vj buffer instead of skb */
+struct vj_buffer *vj_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err)
+{
+ struct vj_buffer *buffer;
+ long timeo;
+ *err = sock_error(sk);
+
+ if (*err)
+ return NULL;
+
+ timeo = sock_rcvtimeo(sk, noblock);
+ do {
+//we can just grab the buffer and return it seeing as either way will be a "peek". Then after we consume we can figure out if (flags & MSG_PEEK) and move to the next buffer at that time... we need to consume the buffer, write barrier before we move on to avoid a race condition.
+
+ buffer = vj_peek_next_buffer(udp_sk(sk)->chan);
+ if (buffer)
+ return buffer;
+
+ /* User doesn't want to wait */
+ *err = -EAGAIN;
+ if (!timeo) {
+ return NULL;
+ }
+ } while ((*err = wait_for_vj_buffer(sk, &timeo)) > 0);
+
+ return NULL;
+}
+
+static int vj_copy_datagram_iovec(struct vj_buffer *buffer, int offset,
+ struct iovec *to, int len)
+{
+// offset to be taken from buffer->header_len (which contains eth hdr + ip hdr)
+ if(memcpy_toiovec(to, buffer->data + offset, len))
+ return -EFAULT;
+ return 0;
+}
+
+/* FIXME: original code did timestamp in netif_rx */
+static __inline__ void vj_sock_recv_timestamp(struct msghdr *msg,
+ struct sock *sk)
+{
+ do_gettimeofday(&sk->sk_stamp);
+ put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), &sk->sk_stamp);
+}
+
+/* Returns offset in buffer past ip hdr, or 0 if something wrong. */
+static unsigned check_ip_packet(struct vj_buffer *buffer)
+{
+ struct iphdr *iph;
+
+ iph = (struct iphdr *)(buffer->data + buffer->header_len);
+
+ if (buffer->data_len < sizeof(*iph))
+ return 0;
+
+ if (iph->ihl < 5 || iph->version != 4)
+ return 0;
+
+ if (iph->ihl * 4 > ntohs(iph->tot_len)) //less than 0 data?
+ return 0;
+
+ if (ntohs(iph->tot_len) > buffer->data_len) { //truncated
+ return 0;
+ } else if (ntohs(iph->tot_len) < buffer->data_len) { //padded - trim it
+ buffer->data_len = ntohs(iph->tot_len);
+ }
+
+ if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
+ return 0;
+
+ return buffer->header_len + iph->ihl*4;
+}
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
-
static int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
- struct sk_buff *skb;
- int copied, err;
+ struct vj_buffer *buffer;
+ struct iphdr *ip;
+ struct udphdr *udph;
+ int copied, err, udpoff;
/*
* Check any passed addresses
@@ -794,63 +949,71 @@
return ip_recv_error(sk, msg, len);
try_again:
- skb = skb_recv_datagram(sk, flags, noblock, &err);
- if (!skb)
+ buffer = vj_recv_datagram(sk, flags, noblock, &err);
+ if (!buffer)
goto out;
-
- copied = skb->len - sizeof(struct udphdr);
+
+ ip = (struct iphdr *)(buffer->data + buffer->header_len);
+ udpoff = check_ip_packet(buffer);
+ if (udpoff == 0)
+ goto bad_packet;
+
+ udph = (struct udphdr *)(buffer->data + udpoff);
+
+ buffer->data_len = ntohs(ip->tot_len);
+
+ if (((ip->ihl * 4) + ntohs(udph->len)) > buffer->data_len)
+ goto bad_packet;
+ buffer->data_len = (ip->ihl * 4) + ntohs(udph->len);
+
+ copied = buffer->data_len - ((ip->ihl * 4) + sizeof(struct udphdr));
+
if (copied > len) {
copied = len;
msg->msg_flags |= MSG_TRUNC;
}
- if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else if (msg->msg_flags&MSG_TRUNC) {
- if (__udp_checksum_complete(skb))
- goto csum_copy_err;
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
- copied);
- } else {
- err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
-
- if (err == -EINVAL)
- goto csum_copy_err;
- }
-
- if (err)
- goto out_free;
-
- sock_recv_timestamp(msg, sk, skb);
+/* FIXME: if card is calculating csum, should be using that rather
+ * than calculating here */
+ if (vj_udp_csum(buffer) != 0) //bad checksum
+ goto bad_packet;
+
+ err = vj_copy_datagram_iovec(buffer, udpoff + sizeof(struct udphdr), msg->msg_iov, copied);
+
+ if (err) {
+ vj_done_with_buffer(udp_sk(sk)->chan);
+ return err;
+ }
+
+ vj_sock_recv_timestamp(msg, sk);
/* Copy the address. */
if (sin)
{
sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
+ sin->sin_port = udph->source;
+ sin->sin_addr.s_addr = ip->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
+
+#if 0 /* FIXME: implement this! */
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
+#endif
err = copied;
if (flags & MSG_TRUNC)
- err = skb->len - sizeof(struct udphdr);
+ err = buffer->data_len - (ip->ihl * 4) - sizeof(struct udphdr);
+ if (!(flags & MSG_PEEK))
+ vj_done_with_buffer(udp_sk(sk)->chan);
-out_free:
- skb_free_datagram(sk, skb);
out:
return err;
-csum_copy_err:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
-
- skb_kill_datagram(sk, skb, flags);
-
- if (noblock)
- return -EAGAIN;
+bad_packet:
+ vj_done_with_buffer(udp_sk(sk)->chan);
+ if(noblock)
+ return -EAGAIN;
goto try_again;
}
@@ -858,10 +1021,15 @@
int udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
+ struct udp_sock *up = udp_sk(sk);
/*
* 1003.1g - break association.
*/
-
+ if (up->vj_reg_flag) {
+ vj_unregister_chan(up->chan);
+ up->vj_reg_flag = 0;
+ }
+
sk->sk_state = TCP_CLOSE;
inet->daddr = 0;
inet->dport = 0;
@@ -879,6 +1047,14 @@
static void udp_close(struct sock *sk, long timeout)
{
+ struct udp_sock *up = udp_sk(sk);
+
+ if (up->vj_reg_flag) {
+ vj_unregister_chan(up->chan);
+ up->vj_reg_flag = 0;
+ }
+ vj_free_chan(up->chan);
+
sk_common_release(sk);
}
@@ -1293,6 +1469,46 @@
return 0;
}
+unsigned int vj_datagram_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ unsigned int mask;
+
+ poll_wait(file, &udp_sk(sk)->chan->wq, wait);
+ vj_inc_wakecnt(udp_sk(sk)->chan);
+
+ mask = 0;
+
+ /* exceptional events? */
+ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ mask |= POLLERR;
+ if (sk->sk_shutdown == SHUTDOWN_MASK)
+ mask |= POLLHUP;
+
+
+ /* readable? */
+ if (vj_peek_next_buffer(udp_sk(sk)->chan) ||
+ (sk->sk_shutdown & RCV_SHUTDOWN))
+ mask |= POLLIN | POLLRDNORM;
+
+ /* Connection-based need to check for termination and startup */
+ if (connection_based(sk)) {
+ if (sk->sk_state == TCP_CLOSE)
+ mask |= POLLHUP;
+ /* connection hasn't started yet? */
+ if (sk->sk_state == TCP_SYN_SENT)
+ return mask;
+ }
+
+ /* writable? */
+ if (sock_writeable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+ else
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+ return mask;
+}
+
/**
* udp_poll - wait for a UDP event.
* @file - file struct
@@ -1308,41 +1524,47 @@
*/
unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
- unsigned int mask = datagram_poll(file, sock, wait);
+ unsigned int mask = vj_datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
/* Check for false positives due to checksum errors */
if ( (mask & POLLRDNORM) &&
!(file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN)){
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
- struct sk_buff *skb;
-
- spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL) {
- if (udp_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ struct vj_buffer *buffer;
+
+ while ((buffer = vj_peek_next_buffer(udp_sk(sk)->chan)) != NULL) {
+//test that this fixes the csum
+ check_ip_packet(buffer);
+ if (vj_udp_csum(buffer) == 0)
break;
- }
- }
- spin_unlock_bh(&rcvq->lock);
+ UDP_INC_STATS_BH(UDP_MIB_INERRORS);
+ vj_done_with_buffer(udp_sk(sk)->chan);
+ }
/* nothing to see, move along */
- if (skb == NULL)
+ if (buffer == NULL)
mask &= ~(POLLIN | POLLRDNORM);
}
return mask;
}
+
+static int udp_init(struct sock *sk)
+{
+ udp_sk(sk)->chan = vj_alloc_chan(0);
+ udp_sk(sk)->vj_reg_flag = 0;
+ if (!udp_sk(sk)->chan)
+ return -ENOMEM;
+ return 0;
+}
+
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
+ .init = udp_init,
.close = udp_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
reply other threads:[~2006-04-26 1:47 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200604261147.46969.kelly@au.ibm.com \
--to=kelly@au1.ibm.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=rusty@rustcorp.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).