Netdev List
 help / color / mirror / Atom feed
* [PATCH 2/5] Phonet: routing table backend
From: Rémi Denis-Courmont @ 2009-09-15 11:32 UTC (permalink / raw)
  To: netdev; +Cc: Rémi Denis-Courmont
In-Reply-To: <1253014343-20326-1-git-send-email-remi@remlab.net>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

The Phonet "universe" only has 64 addresses, so we keep a trivial flat
routing table.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
---
 include/net/phonet/pn_dev.h |    5 ++
 net/phonet/pn_dev.c         |  100 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 44c923c..87b5d81 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -47,6 +47,11 @@ u8 phonet_address_get(struct net_device *dev, u8 addr);
 int phonet_address_lookup(struct net *net, u8 addr);
 void phonet_address_notify(int event, struct net_device *dev, u8 addr);
 
+int phonet_route_add(struct net_device *dev, u8 daddr);
+int phonet_route_del(struct net_device *dev, u8 daddr);
+struct net_device *phonet_route_get(struct net *net, u8 daddr);
+struct net_device *phonet_route_output(struct net *net, u8 daddr);
+
 #define PN_NO_ADDR	0xff
 
 extern const struct file_operations pn_sock_seq_fops;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5f42f30..71fffa5 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -33,8 +33,14 @@
 #include <net/netns/generic.h>
 #include <net/phonet/pn_dev.h>
 
+struct phonet_routes {
+	spinlock_t		lock;
+	struct net_device	*table[64];
+};
+
 struct phonet_net {
 	struct phonet_device_list pndevs;
+	struct phonet_routes routes;
 };
 
 int phonet_net_id;
@@ -154,10 +160,11 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 }
 
 /* Gets a source address toward a destination, through a interface. */
-u8 phonet_address_get(struct net_device *dev, u8 addr)
+u8 phonet_address_get(struct net_device *dev, u8 daddr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
 	struct phonet_device *pnd;
+	u8 saddr;
 
 	spin_lock_bh(&pndevs->lock);
 	pnd = __phonet_get(dev);
@@ -165,12 +172,26 @@ u8 phonet_address_get(struct net_device *dev, u8 addr)
 		BUG_ON(bitmap_empty(pnd->addrs, 64));
 
 		/* Use same source address as destination, if possible */
-		if (!test_bit(addr >> 2, pnd->addrs))
-			addr = find_first_bit(pnd->addrs, 64) << 2;
+		if (test_bit(daddr >> 2, pnd->addrs))
+			saddr = daddr;
+		else
+			saddr = find_first_bit(pnd->addrs, 64) << 2;
 	} else
-		addr = PN_NO_ADDR;
+		saddr = PN_NO_ADDR;
 	spin_unlock_bh(&pndevs->lock);
-	return addr;
+
+	if (saddr == PN_NO_ADDR) {
+		/* Fallback to another device */
+		struct net_device *def_dev;
+
+		def_dev = phonet_device_get(dev_net(dev));
+		if (def_dev) {
+			if (def_dev != dev)
+				saddr = phonet_address_get(def_dev, daddr);
+			dev_put(def_dev);
+		}
+	}
+	return saddr;
 }
 
 int phonet_address_lookup(struct net *net, u8 addr)
@@ -246,7 +267,7 @@ static struct notifier_block phonet_device_notifier = {
 /* Per-namespace Phonet devices handling */
 static int phonet_init_net(struct net *net)
 {
-	struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL);
+	struct phonet_net *pnn = kzalloc(sizeof(*pnn), GFP_KERNEL);
 	if (!pnn)
 		return -ENOMEM;
 
@@ -257,6 +278,7 @@ static int phonet_init_net(struct net *net)
 
 	INIT_LIST_HEAD(&pnn->pndevs.list);
 	spin_lock_init(&pnn->pndevs.lock);
+	spin_lock_init(&pnn->routes.lock);
 	net_assign_generic(net, phonet_net_id, pnn);
 	return 0;
 }
@@ -300,3 +322,69 @@ void phonet_device_exit(void)
 	unregister_netdevice_notifier(&phonet_device_notifier);
 	unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops);
 }
+
+int phonet_route_add(struct net_device *dev, u8 daddr)
+{
+	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_routes *routes = &pnn->routes;
+	int err = -EEXIST;
+
+	daddr = daddr >> 2;
+	spin_lock_bh(&routes->lock);
+	if (routes->table[daddr] == NULL) {
+		routes->table[daddr] = dev;
+		dev_hold(dev);
+		err = 0;
+	}
+	spin_unlock_bh(&routes->lock);
+	return err;
+}
+
+int phonet_route_del(struct net_device *dev, u8 daddr)
+{
+	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_routes *routes = &pnn->routes;
+	int err = -ENOENT;
+
+	daddr = daddr >> 2;
+	spin_lock_bh(&routes->lock);
+	if (dev == routes->table[daddr]) {
+		routes->table[daddr] = NULL;
+		dev_put(dev);
+		err = 0;
+	}
+	spin_unlock_bh(&routes->lock);
+	return err;
+}
+
+struct net_device *phonet_route_get(struct net *net, u8 daddr)
+{
+	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_routes *routes = &pnn->routes;
+	struct net_device *dev;
+
+	ASSERT_RTNL(); /* no need to hold the device */
+
+	daddr >>= 2;
+	spin_lock_bh(&routes->lock);
+	dev = routes->table[daddr];
+	spin_unlock_bh(&routes->lock);
+	return dev;
+}
+
+struct net_device *phonet_route_output(struct net *net, u8 daddr)
+{
+	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_routes *routes = &pnn->routes;
+	struct net_device *dev;
+
+	spin_lock_bh(&routes->lock);
+	dev = routes->table[daddr >> 2];
+	if (dev)
+		dev_hold(dev);
+	spin_unlock_bh(&routes->lock);
+
+	if (!dev)
+		dev = phonet_device_get(net); /* Default route */
+	return dev;
+}
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH 3/5] Phonet: routing table Netlink interface
From: Rémi Denis-Courmont @ 2009-09-15 11:32 UTC (permalink / raw)
  To: netdev; +Cc: Rémi Denis-Courmont
In-Reply-To: <1253014343-20326-2-git-send-email-remi@remlab.net>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
---
 include/net/phonet/pn_dev.h |    1 +
 net/phonet/pn_dev.c         |   31 ++++++++++
 net/phonet/pn_netlink.c     |  130 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+), 0 deletions(-)

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 87b5d81..afa7def 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -49,6 +49,7 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr);
 
 int phonet_route_add(struct net_device *dev, u8 daddr);
 int phonet_route_del(struct net_device *dev, u8 daddr);
+void rtm_phonet_notify(int event, struct net_device *dev, u8 dst);
 struct net_device *phonet_route_get(struct net *net, u8 daddr);
 struct net_device *phonet_route_output(struct net *net, u8 daddr);
 
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 71fffa5..6d64fda 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -240,6 +240,27 @@ static int phonet_device_autoconf(struct net_device *dev)
 	return 0;
 }
 
+static void phonet_route_autodel(struct net_device *dev)
+{
+	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	unsigned i;
+	DECLARE_BITMAP(deleted, 64);
+
+	/* Remove left-over Phonet routes */
+	bitmap_zero(deleted, 64);
+	spin_lock_bh(&pnn->routes.lock);
+	for (i = 0; i < 64; i++)
+		if (dev == pnn->routes.table[i]) {
+			set_bit(i, deleted);
+			pnn->routes.table[i] = NULL;
+			dev_put(dev);
+		}
+	spin_unlock_bh(&pnn->routes.lock);
+	for (i = find_first_bit(deleted, 64); i < 64;
+			i = find_next_bit(deleted, 64, i + 1))
+		rtm_phonet_notify(RTM_DELROUTE, dev, i);
+}
+
 /* notify Phonet of device events */
 static int phonet_device_notify(struct notifier_block *me, unsigned long what,
 				void *arg)
@@ -253,6 +274,7 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what,
 		break;
 	case NETDEV_UNREGISTER:
 		phonet_device_destroy(dev);
+		phonet_route_autodel(dev);
 		break;
 	}
 	return 0;
@@ -287,10 +309,19 @@ static void phonet_exit_net(struct net *net)
 {
 	struct phonet_net *pnn = net_generic(net, phonet_net_id);
 	struct net_device *dev;
+	unsigned i;
 
 	rtnl_lock();
 	for_each_netdev(net, dev)
 		phonet_device_destroy(dev);
+
+	for (i = 0; i < 64; i++) {
+		dev = pnn->routes.table[i];
+		if (dev) {
+			rtm_phonet_notify(RTM_DELROUTE, dev, i);
+			dev_put(dev);
+		}
+	}
 	rtnl_unlock();
 
 	proc_net_remove(net, "phonet");
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index d21fd35..d8f5d3f 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -29,6 +29,8 @@
 #include <net/sock.h>
 #include <net/phonet/pn_dev.h>
 
+/* Device address handling */
+
 static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
 		     u32 pid, u32 seq, int event);
 
@@ -160,6 +162,131 @@ out:
 	return skb->len;
 }
 
+/* Routes handling */
+
+static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
+			u32 pid, u32 seq, int event)
+{
+	struct rtmsg *rtm;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family = AF_PHONET;
+	rtm->rtm_dst_len = 6;
+	rtm->rtm_src_len = 0;
+	rtm->rtm_tos = 0;
+	rtm->rtm_table = RT_TABLE_MAIN;
+	rtm->rtm_protocol = RTPROT_STATIC;
+	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	rtm->rtm_type = RTN_UNICAST;
+	rtm->rtm_flags = 0;
+	NLA_PUT_U8(skb, RTA_DST, dst);
+	NLA_PUT_U32(skb, RTA_OIF, dev->ifindex);
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
+{
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+			nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+	err = fill_route(skb, dev, dst, 0, 0, event);
+	if (err < 0) {
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, dev_net(dev), 0,
+			  RTNLGRP_PHONET_ROUTE, NULL, GFP_KERNEL);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_ROUTE, err);
+}
+
+static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
+	[RTA_DST] = { .type = NLA_U8 },
+	[RTA_OIF] = { .type = NLA_U32 },
+};
+
+static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[RTA_MAX+1];
+	struct net_device *dev;
+	struct rtmsg *rtm;
+	int err;
+	u8 dst;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ASSERT_RTNL();
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_phonet_policy);
+	if (err < 0)
+		return err;
+
+	rtm = nlmsg_data(nlh);
+	if (rtm->rtm_table != RT_TABLE_MAIN || rtm->rtm_type != RTN_UNICAST)
+		return -EINVAL;
+	if (tb[RTA_DST] == NULL || tb[RTA_OIF] == NULL)
+		return -EINVAL;
+	dst = nla_get_u8(tb[RTA_DST]);
+	if (dst & 3) /* Phonet addresses only have 6 high-order bits */
+		return -EINVAL;
+
+	dev = __dev_get_by_index(net, nla_get_u32(tb[RTA_OIF]));
+	if (dev == NULL)
+		return -ENODEV;
+
+	if (nlh->nlmsg_type == RTM_NEWROUTE)
+		err = phonet_route_add(dev, dst);
+	else
+		err = phonet_route_del(dev, dst);
+	if (!err)
+		rtm_phonet_notify(nlh->nlmsg_type, dev, dst);
+	return err;
+}
+
+static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	u8 addr, addr_idx = 0, addr_start_idx = cb->args[0];
+
+	for (addr = 0; addr < 64; addr++) {
+		struct net_device *dev;
+
+		dev = phonet_route_get(net, addr << 2);
+		if (!dev)
+			continue;
+
+		if (addr_idx++ < addr_start_idx)
+			continue;
+		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, RTM_NEWROUTE))
+			goto out;
+	}
+
+out:
+	cb->args[0] = addr_idx;
+	cb->args[1] = 0;
+
+	return skb->len;
+}
+
 int __init phonet_netlink_register(void)
 {
 	int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL);
@@ -169,5 +296,8 @@ int __init phonet_netlink_register(void)
 	/* Further __rtnl_register() cannot fail */
 	__rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL);
 	__rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
+	__rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL);
+	__rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL);
+	__rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit);
 	return 0;
 }
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH 1/5] Phonet: error on broadcast sending (unimplemented)
From: Rémi Denis-Courmont @ 2009-09-15 11:32 UTC (permalink / raw)
  To: netdev; +Cc: Rémi Denis-Courmont
In-Reply-To: <a9972d3fe3d2bf09387bee7827b571c0@chewa.net>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

---
 include/linux/phonet.h |    1 +
 net/phonet/af_phonet.c |    6 ++++++
 2 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 1ef5a07..e5126cf 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
 #define PNPIPE_IFINDEX		2
 
 #define PNADDR_ANY		0
+#define PNADDR_BROADCAST	0xFC
 #define PNPORT_RESOURCE_ROUTING	0
 
 /* Values for PNPIPE_ENCAP option */
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a662e62..f60c0c2 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 	}
 
+	/* Broadcast sending is not implemented */
+	if (pn_addr(dst) == PNADDR_BROADCAST) {
+		err = -EOPNOTSUPP;
+		goto drop;
+	}
+
 	skb_reset_transport_header(skb);
 	WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
 	skb_push(skb, sizeof(struct phonethdr));
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH 5/5] Phonet: forward incoming packets
From: Rémi Denis-Courmont @ 2009-09-15 11:32 UTC (permalink / raw)
  To: netdev; +Cc: Rémi Denis-Courmont
In-Reply-To: <1253014343-20326-4-git-send-email-remi@remlab.net>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
---
 net/phonet/af_phonet.c |   32 ++++++++++++++++++++++++++++++++
 1 files changed, 32 insertions(+), 0 deletions(-)

diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f0ef6f8..e69c915 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -394,8 +394,40 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 			send_obj_unreachable(skb);
 			send_reset_indications(skb);
 		}
+	} else if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		goto out; /* Race between address deletion and loopback */
+	else {
+		/* Phonet packet routing */
+		struct net_device *out_dev;
+
+		out_dev = phonet_route_output(net, pn_sockaddr_get_addr(&sa));
+		if (!out_dev) {
+			LIMIT_NETDEBUG(KERN_WARNING"No Phonet route to %02X\n",
+					pn_sockaddr_get_addr(&sa));
+			goto out;
+		}
+
+		__skb_push(skb, sizeof(struct phonethdr));
+		skb->dev = out_dev;
+		if (out_dev == dev) {
+			LIMIT_NETDEBUG(KERN_ERR"Phonet loop to %02X on %s\n",
+					pn_sockaddr_get_addr(&sa), dev->name);
+			goto out_dev;
+		}
+		/* Some drivers (e.g. TUN) do not allocate HW header space */
+		if (skb_cow_head(skb, out_dev->hard_header_len))
+			goto out_dev;
+
+		if (dev_hard_header(skb, out_dev, ETH_P_PHONET, NULL, NULL,
+					skb->len) < 0)
+			goto out_dev;
+		dev_queue_xmit(skb);
+		dev_put(out_dev);
+		return NET_RX_SUCCESS;
 	}
 
+out_dev:
+	dev_put(skb->dev);
 out:
 	kfree_skb(skb);
 	return NET_RX_DROP;
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH 4/5] Phonet: route outgoing packets
From: Rémi Denis-Courmont @ 2009-09-15 11:32 UTC (permalink / raw)
  To: netdev; +Cc: Rémi Denis-Courmont
In-Reply-To: <1253014343-20326-3-git-send-email-remi@remlab.net>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
---
 net/phonet/af_phonet.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 013f471..f0ef6f8 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -190,9 +190,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 	skb->priority = 0;
 	skb->dev = dev;
 
-	if (pn_addr(src) == pn_addr(dst)) {
+	if (skb->pkt_type == PACKET_LOOPBACK) {
 		skb_reset_mac_header(skb);
-		skb->pkt_type = PACKET_LOOPBACK;
 		skb_orphan(skb);
 		if (irq)
 			netif_rx(skb);
@@ -222,6 +221,9 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev,
 	if (skb == NULL)
 		return -ENOMEM;
 
+	if (phonet_address_lookup(dev_net(dev), pn_addr(dst)) == 0)
+		skb->pkt_type = PACKET_LOOPBACK;
+
 	skb_reserve(skb, MAX_PHONET_HEADER);
 	__skb_put(skb, len);
 	skb_copy_to_linear_data(skb, data, len);
@@ -235,6 +237,7 @@ static int pn_raw_send(const void *data, int len, struct net_device *dev,
 int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 		const struct sockaddr_pn *target)
 {
+	struct net *net = sock_net(sk);
 	struct net_device *dev;
 	struct pn_sock *pn = pn_sk(sk);
 	int err;
@@ -243,9 +246,13 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
 
 	err = -EHOSTUNREACH;
 	if (sk->sk_bound_dev_if)
-		dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if);
-	else
-		dev = phonet_device_get(sock_net(sk));
+		dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+	else if (phonet_address_lookup(net, daddr) == 0) {
+		dev = phonet_device_get(net);
+		skb->pkt_type = PACKET_LOOPBACK;
+	} else
+		dev = phonet_route_output(net, daddr);
+
 	if (!dev || !(dev->flags & IFF_UP))
 		goto drop;
 
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH] early retransmit
From: Christian Samsel @ 2009-09-15 11:31 UTC (permalink / raw)
  To: netdev

This patch implements draft-ietf-tcpm-early-rexmt.
It prevents a rto by lowering the dupack threshold in case
we expect less dupacks then the usual threshold.
Signed-off-by: Christian Samsel <christian.samsel@rwth-aachen.de>

---
 net/ipv4/tcp_input.c |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af6d6fa..c0cc4fd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2913,6 +2913,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
 				    (tcp_fackets_out(tp) > tp->reordering));
 	int fast_rexmit = 0, mib_idx;
+	u32 in_flight;
 
 	if (WARN_ON(!tp->packets_out && tp->sacked_out))
 		tp->sacked_out = 0;
@@ -3062,6 +3063,21 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
 		tcp_update_scoreboard(sk, fast_rexmit);
 	tcp_cwnd_down(sk, flag);
+        
+
+	/* draft-ietf-tcpm-early-rexmt: lowers dup ack threshold to prevent rto
+	 * in case we don't expect enough dup ack. if number of outstanding 
+	 * packets is less than four and there is either no unsent data ready
+	 * for transmission or the advertised window does not permit new 
+	 * segments.
+	 */
+	in_flight = tcp_packets_in_flight(tp);
+	if ( in_flight < 4 && (skb_queue_empty(&sk->sk_write_queue) || 
+		tcp_may_send_now(sk) == 0) )
+		tp->reordering = in_flight - 1;
+	else if (tp->reordering != sysctl_tcp_reordering)
+		tp->reordering = sysctl_tcp_reordering;
+	
 	tcp_xmit_retransmit_queue(sk);
 }
 
-- 
1.6.4.1

^ permalink raw reply related

* [PATCH 0/5] Phonet: basic routing support
From: Rémi Denis-Courmont @ 2009-09-15 11:30 UTC (permalink / raw)
  To: netdev


   Hello,

This small patch series adds simplistic routing support to the Phonet
stack. Usually, there is an "upstream" interface to the Phonet modem.
However, if Linux runs on an embedded USB gadget, we have two interfaces in
the same namespace:
- an internal interface to the modem (e.g. OMAP SSI bus), and
- an external interface to the USB host (CDC Phonet).

Patches follow.
 include/linux/phonet.h      |    1
 include/net/phonet/pn_dev.h |    6 ++
 net/phonet/af_phonet.c      |   55 ++++++++++++++++--
 net/phonet/pn_dev.c         |  131
+++++++++++++++++++++++++++++++++++++++++---
 net/phonet/pn_netlink.c     |  130
+++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 312 insertions(+), 11 deletions(-)

I am not sure if feature patches are still allowed. If not, I can just
repost this at a more convenient time.

-- 
Rémi Denis-Courmont


^ permalink raw reply

* Re: [PATCH 2/2] ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface
From: Eric Dumazet @ 2009-09-15 11:10 UTC (permalink / raw)
  To: Sebastian Haas; +Cc: netdev, greg, wg, oliver, socketcan-core, linux-usb
In-Reply-To: <20090914082229.5441.16650.stgit@localhost.localdomain>

Sebastian Haas a écrit :
> This patch adds support for one channel CAN/USB interace CPC-USB/ARM7 from
> EMS Dr. Thomas Wuensche (http://www.ems-wuensche.com).
> 
> Signed-off-by: Sebastian Haas <haas@ems-wuensche.com>
> ---
> 

...

> +	netif_rx(skb);
> +
> +	dev->netdev->last_rx = jiffies;

Please dont update last_rx unless your driver *really* needs to.

drivers/net/sky2.c is an example of driver that has to update ->last_rx
for its internal use.

But most drivers dont need it anymore, since core network
already update it if necessary  (check skb_bond_should_drop())


> +	stats->rx_packets++;
> +	stats->rx_bytes += cf->can_dlc;
> +}
> +

^ permalink raw reply

* Re: [PATCH 2/2] ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface
From: Wolfgang Grandegger @ 2009-09-15 10:57 UTC (permalink / raw)
  To: Sebastian Haas
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	greg-U8xfFu+wG4EAvxtiuMwx3w, oliver-fJ+pQTUTwRTk1uMJSBkQmQ,
	linux-usb-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20090914082229.5441.16650.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>

Hi Sebastian,

Sebastian Haas wrote:
> This patch adds support for one channel CAN/USB interace CPC-USB/ARM7 from
> EMS Dr. Thomas Wuensche (http://www.ems-wuensche.com).
> 
> Signed-off-by: Sebastian Haas <haas-zsNKPWJ8Pib6hrUXjxyGrA@public.gmane.org>

The driver is almost OK from the Socket-CAN point of view. Just some
*final* nitpicking:

[snip]
> +static void ems_usb_read_interrupt_callback(struct urb *urb)
> +{
> +	struct ems_usb *dev = urb->context;
> +	struct net_device *netdev;
> +	int err;
> +
> +	netdev = dev->netdev;

Could be done in the declaration part above.

> +	if (!netif_device_present(netdev))
> +		return;
> +
> +	switch (urb->status) {
> +	case 0:
> +		dev->free_slots = dev->intr_in_buffer[1];
> +		break;
> +
> +	case -ECONNRESET: /* unlink */
> +	case -ENOENT:
> +	case -ESHUTDOWN:
> +		return;
> +
> +	default:
> +		dev_info(netdev->dev.parent, "Rx interrupt aborted %d\n",
> +			 urb->status);
> +		break;
> +	}
> +
> +	err = usb_submit_urb(urb, GFP_ATOMIC);
> +
> +	if (err == -ENODEV)
> +		netif_device_detach(netdev);
> +	else if (err)
> +		dev_err(netdev->dev.parent,
> +			"failed resubmitting intr urb: %d\n", err);
> +
> +	return;
> +}
> +
> +static void ems_usb_rx_can_msg(struct ems_usb *dev, struct ems_cpc_msg *msg)
> +{
> +	struct can_frame *cf;
> +	struct sk_buff *skb;
> +	int i;
> +	struct net_device_stats *stats = &dev->netdev->stats;
> +
> +	skb = dev_alloc_skb(sizeof(struct can_frame));

Please use netdev_alloc_skb() ...

> +	if (skb == NULL)
> +		return;
> +
> +	skb->dev = dev->netdev;

... making the line above obsolete.

> +	skb->protocol = htons(ETH_P_CAN);
> +
> +	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
> +
> +	cf->can_id = msg->msg.can_msg.id;
> +	cf->can_dlc = min_t(u8, msg->msg.can_msg.length, 8);
> +
> +	if (msg->type == CPC_MSG_TYPE_EXT_CAN_FRAME
> +	    || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME)
> +		cf->can_id |= CAN_EFF_FLAG;
> +
> +	if (msg->type == CPC_MSG_TYPE_RTR_FRAME
> +	    || msg->type == CPC_MSG_TYPE_EXT_RTR_FRAME) {
> +		cf->can_id |= CAN_RTR_FLAG;
> +	} else {
> +		for (i = 0; i < cf->can_dlc; i++)
> +			cf->data[i] = msg->msg.can_msg.msg[i];
> +	}
> +
> +	netif_rx(skb);
> +
> +	dev->netdev->last_rx = jiffies;
> +	stats->rx_packets++;
> +	stats->rx_bytes += cf->can_dlc;
> +}
> +
> +static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
> +{
> +	struct can_frame *cf;
> +	struct sk_buff *skb;
> +	struct net_device_stats *stats = &dev->netdev->stats;
> +
> +	skb = dev_alloc_skb(sizeof(struct can_frame));
> +	if (skb == NULL)
> +		return;
> +
> +	skb->dev = dev->netdev;

Ditto.

[snip]
> +static int ems_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev)
> +{
> +	struct ems_usb *dev = netdev_priv(netdev);
> +	struct ems_tx_urb_context *context = NULL;
> +	struct net_device_stats *stats = &netdev->stats;
> +	struct can_frame *cf = (struct can_frame *)skb->data;
> +	struct ems_cpc_msg *msg;
> +	struct urb *urb;
> +	u8 *buf;
> +	int i, err;
> +	size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
> +			+ sizeof(struct cpc_can_msg);
> +
> +	/* create a URB, and a buffer for it, and copy the data to the URB */
> +	urb = usb_alloc_urb(0, GFP_ATOMIC);
> +	if (!urb) {
> +		dev_err(netdev->dev.parent, "No memory left for URBs\n");
> +		goto nomem;
> +	}
> +
> +	buf = usb_buffer_alloc(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
> +	if (!buf) {
> +		dev_err(netdev->dev.parent, "No memory left for USB buffer\n");
> +		usb_free_urb(urb);
> +		goto nomem;
> +	}
> +
> +	msg = (struct ems_cpc_msg *)&buf[CPC_HEADER_SIZE];
> +
> +	msg->msg.can_msg.id = cf->can_id & 0x1FFFFFFFU;

Please use CAN_ERR_MASK instead (even if the name is somehow
misleading). See http://lxr.linux.no/#linux+v2.6.31/include/linux/can.h#L31.

[snip]
> +}
> +
> +

Remove one empty line, please.

> +static void init_params_sja1000(struct ems_cpc_msg *msg)
> +{
> +	struct cpc_sja1000_params *sja1000 =
> +		&msg->msg.can_params.cc_params.sja1000;
> +
> +	msg->type = CPC_CMD_TYPE_CAN_PARAMS;
> +	msg->length = sizeof(struct cpc_can_params);
> +	msg->msgid = 0;
> +
> +	msg->msg.can_params.cc_type = CPC_CC_TYPE_SJA1000;
> +
> +	/* Acceptance filter open */
> +	sja1000->acc_code0 = 0x00;
> +	sja1000->acc_code1 = 0x00;
> +	sja1000->acc_code2 = 0x00;
> +	sja1000->acc_code3 = 0x00;
> +
> +	/* Acceptance filter open */
> +	sja1000->acc_mask0 = 0xFF;
> +	sja1000->acc_mask1 = 0xFF;
> +	sja1000->acc_mask2 = 0xFF;
> +	sja1000->acc_mask3 = 0xFF;
> +
> +	sja1000->btr0 = 0;
> +	sja1000->btr1 = 0;
> +
> +	sja1000->outp_contr = SJA1000_DEFAULT_OUTPUT_CONTROL;
> +	sja1000->mode = SJA1000_MOD_RM;
> +}
> +
> +/*
> + * probe function for new CPC-USB devices
> + */
> +static int ems_usb_probe(struct usb_interface *intf,
> +			 const struct usb_device_id *id)
> +{
> +	struct net_device *netdev;
> +	struct ems_usb *dev;
> +	int i, err;
> +
> +	netdev = alloc_candev(sizeof(struct ems_usb));
> +	if (!netdev) {
> +		dev_err(netdev->dev.parent, "Couldn't alloc candev\n");
> +		return -ENOMEM;
> +	}
> +
> +	dev = netdev_priv(netdev);
> +
> +	dev->udev = interface_to_usbdev(intf);
> +	dev->netdev = netdev;
> +
> +	dev->can.state = CAN_STATE_STOPPED;
> +	dev->can.bittiming_const = &ems_usb_bittiming_const;
> +	dev->can.do_set_bittiming = ems_usb_set_bittiming;
> +	dev->can.do_set_mode = ems_usb_set_mode;
> +
> +	netdev->flags |= IFF_ECHO; /* we support local echo */
> +
> +	/*
> +	 * The device actually uses a 16MHz clock to generate the CAN clock
> +	 * but it expects SJA1000 bit settings based on 8MHz (is internally
> +	 * converted).
> +	 */

Should go up to the macro definition.

> +	dev->can.clock.freq = EMS_USB_ARM7_CLOCK;
> +
> +	netdev->netdev_ops = &ems_usb_netdev_ops;
> +
> +	netdev->flags |= IFF_ECHO; /* we support local echo */
> +
> +	init_usb_anchor(&dev->rx_submitted);
> +
> +	init_usb_anchor(&dev->tx_submitted);
> +	atomic_set(&dev->active_tx_urbs, 0);
> +
> +	for (i = 0; i < MAX_TX_URBS; i++)
> +		dev->tx_contexts[i].echo_index = MAX_TX_URBS;
> +
> +	dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
> +	if (!dev->intr_urb) {
> +		dev_err(netdev->dev.parent, "Couldn't alloc intr URB\n");
> +		free_candev(netdev);

Please use goto's for cleanup to avoid code duplication.

> +		return -ENOMEM;
> +	}
> +
> +	dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
> +	if (!dev->intr_in_buffer) {
> +		dev_err(netdev->dev.parent, "Couldn't alloc Intr buffer\n");
> +		free_candev(netdev);
> +		usb_free_urb(dev->intr_urb);
> +		return -ENOMEM;

Ditto.

> +	}
> +
> +	dev->tx_msg_buffer = kzalloc(CPC_HEADER_SIZE +
> +				     sizeof(struct ems_cpc_msg), GFP_KERNEL);
> +	if (!dev->tx_msg_buffer) {
> +		dev_err(netdev->dev.parent, "Couldn't alloc Tx buffer\n");
> +		free_candev(netdev);
> +		usb_free_urb(dev->intr_urb);
> +		kfree(dev->intr_in_buffer);
> +		return -ENOMEM;

Ditto.

> +	}
> +
> +	usb_set_intfdata(intf, dev);
> +
> +	SET_NETDEV_DEV(netdev, &intf->dev);
> +
> +	init_params_sja1000(&dev->active_params);
> +
> +	err = ems_usb_command_msg(dev, &dev->active_params);
> +	if (err) {
> +		dev_err(netdev->dev.parent,
> +			"couldn't initialize controller: %d\n", err);
> +		free_candev(netdev);
> +		usb_free_urb(dev->intr_urb);
> +		return err;

Ditto. Also kfree(dev->intr_in_buffer) seems to be missing.

Please add a version number to the next patch, e.g. [PATCH v3 ...].

It would also be nice, if some USB guru's could have a look as well.

Thanks,

Wolfgang.
--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] RxRPC: Use uX/sX rather than uintX_t/intX_t types
From: David Howells @ 2009-09-15 10:16 UTC (permalink / raw)
  To: torvalds, akpm; +Cc: linux-afs, netdev, David Howells

Use uX rather than uintX_t types for consistency.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 include/keys/rxrpc-type.h |   20 ++++++++++----------
 net/rxrpc/ar-ack.c        |    6 +++---
 net/rxrpc/ar-internal.h   |   16 ++++++++--------
 net/rxrpc/ar-key.c        |    2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)


diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5eb2357..5cb86c3 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -53,8 +53,8 @@ struct krb5_tagged_data {
 	 * - KRB5_AUTHDATA_* for auth data
 	 * - 
 	 */
-	int32_t		tag;
-	uint32_t	data_len;
+	s32		tag;
+	u32		data_len;
 	u8		*data;
 };
 
@@ -62,17 +62,17 @@ struct krb5_tagged_data {
  * RxRPC key for Kerberos V (type-5 security)
  */
 struct rxk5_key {
-	uint64_t		authtime;	/* time at which auth token generated */
-	uint64_t		starttime;	/* time at which auth token starts */
-	uint64_t		endtime;	/* time at which auth token expired */
-	uint64_t		renew_till;	/* time to which auth token can be renewed */
-	int32_t			is_skey;	/* T if ticket is encrypted in another ticket's
+	u64			authtime;	/* time at which auth token generated */
+	u64			starttime;	/* time at which auth token starts */
+	u64			endtime;	/* time at which auth token expired */
+	u64			renew_till;	/* time to which auth token can be renewed */
+	s32			is_skey;	/* T if ticket is encrypted in another ticket's
 						 * skey */
-	int32_t			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
+	s32			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
 	struct krb5_principal	client;		/* client principal name */
 	struct krb5_principal	server;		/* server principal name */
-	uint16_t		ticket_len;	/* length of ticket */
-	uint16_t		ticket2_len;	/* length of second ticket */
+	u16			ticket_len;	/* length of ticket */
+	u16			ticket2_len;	/* length of second ticket */
 	u8			n_authdata;	/* number of authorisation data elements */
 	u8			n_addresses;	/* number of addresses */
 	struct krb5_tagged_data	session;	/* session data; tag is enctype */
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index c9f1f0a..b4a2209 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -40,7 +40,7 @@ static const s8 rxrpc_ack_priority[] = {
 /*
  * propose an ACK be sent
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 			 __be32 serial, bool immediate)
 {
 	unsigned long expiry;
@@ -120,7 +120,7 @@ cancel_timer:
 /*
  * propose an ACK be sent, locking the call structure
  */
-void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		       __be32 serial, bool immediate)
 {
 	s8 prior = rxrpc_ack_priority[ack_reason];
@@ -520,7 +520,7 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
 	unsigned long _skb, *acks_window;
-	uint8_t winsz = call->acks_winsz;
+	u8 winsz = call->acks_winsz;
 	int tail;
 
 	acks_window = call->acks_window;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 46c6d88..7043b29 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -229,7 +229,7 @@ struct rxrpc_conn_bundle {
 	int			debug_id;	/* debug ID for printks */
 	unsigned short		num_conns;	/* number of connections in this bundle */
 	__be16			service_id;	/* service ID */
-	uint8_t			security_ix;	/* security type */
+	u8			security_ix;	/* security type */
 };
 
 /*
@@ -370,10 +370,10 @@ struct rxrpc_call {
 	u8			channel;	/* connection channel occupied by this call */
 
 	/* transmission-phase ACK management */
-	uint8_t			acks_head;	/* offset into window of first entry */
-	uint8_t			acks_tail;	/* offset into window of last entry */
-	uint8_t			acks_winsz;	/* size of un-ACK'd window */
-	uint8_t			acks_unacked;	/* lowest unacked packet in last ACK received */
+	u8			acks_head;	/* offset into window of first entry */
+	u8			acks_tail;	/* offset into window of last entry */
+	u8			acks_winsz;	/* size of un-ACK'd window */
+	u8			acks_unacked;	/* lowest unacked packet in last ACK received */
 	int			acks_latest;	/* serial number of latest ACK received */
 	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
 	unsigned long		*acks_window;	/* sent packet window
@@ -388,7 +388,7 @@ struct rxrpc_call {
 	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
 	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
 	rxrpc_seq_net_t		ackr_prev_seq;	/* previous sequence number received */
-	uint8_t			ackr_reason;	/* reason to ACK */
+	u8			ackr_reason;	/* reason to ACK */
 	__be32			ackr_serial;	/* serial of packet being ACK'd */
 	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
 
@@ -434,8 +434,8 @@ extern int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * ar-ack.c
  */
-extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
-extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
 extern void rxrpc_process_call(struct work_struct *);
 
 /*
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 44836f6..74697b2 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -360,7 +360,7 @@ static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
 /*
  * extract a krb5 ticket
  */
-static int rxrpc_krb5_decode_ticket(u8 **_ticket, uint16_t *_tktlen,
+static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 				    const __be32 **_xdr, unsigned *_toklen)
 {
 	const __be32 *xdr = *_xdr;


^ permalink raw reply related

* Re: [AX25] kernel panic
From: Bernard Pidoux @ 2009-09-15 10:16 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: Ralf Baechle DL5RB, Linux Netdev List, linux-hams
In-Reply-To: <4AAAA344.4030200@gmail.com>

Hi Jarek,

I was not aware of this debug option and will turn it ON.
Thanks for the suggestion.

Bernard

Jarek Poplawski a écrit :
> Bernard Pidoux wrote, On 09/10/2009 12:28 AM:
>
>   
>> Hi Ralf,
>>
>> Here is a set of not so frequent kernel panics captured via netconsole
>> that seem related to AX25 timer. 
>>
>> Regards,
>>
>> Bernard Pidoux
>>
>>     
>
> Hi Bernard,
>
> Could/did you try to turn on this debugging option below, btw?
>
> Regards,
> Jarek P.
>
>    CONFIG_DEBUG_OBJECTS_TIMERS:
>   ?                                                                                                        ?  
>   ? If you say Y here, additional code will be inserted into the                                           ?  
>   ? timer routines to track the life time of timer objects and                                             ?  
>   ? validate the timer operations.                                                                         ?  
>   ?                                                                                                        ?  
>   ? Symbol: DEBUG_OBJECTS_TIMERS [=y]                                                                      ?  
>   ? Prompt: Debug timer objects                                                                            ?  
>   ?   Defined at lib/Kconfig.debug:247                                                                     ?  
>   ?   Depends on: DEBUG_OBJECTS                                                                            ?  
>   ?   Location:                                                                                            ?  
>   ?     -> Kernel hacking                                                                                  ?  
>   ?       -> Kernel debugging (DEBUG_KERNEL [=y])                                                          ?  
>   ?         -> Debug object operations (DEBUG_OBJECTS [=y])                                                ?  
>   ?                                                                      
>
>   

--
To unsubscribe from this list: send the line "unsubscribe linux-hams" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC] net/core: Delay neighbor only if it has been used after confirmed
From: David Miller @ 2009-09-15 10:13 UTC (permalink / raw)
  To: jens; +Cc: netdev, yoshfuji
In-Reply-To: <1253009246.8352.10.camel@fnki-nb00130>

From: Jens Rosenboom <jens@mcbone.net>
Date: Tue, 15 Sep 2009 12:07:26 +0200

> On Thu, 2009-09-10 at 18:21 +0200, Jens Rosenboom wrote:
>> On Wed, 2009-09-02 at 21:22 +0900, YOSHIFUJI Hideaki wrote:
>> [...]
>> > And, this "if" for REACHABLE->DELAY may be completely needless.
>> > Timer in REACHABLE is only for state transition for toward REACHABLE
>> > or STALE.
>> 
>> I did some testing with the following patch, which works fine for me, so
>> I propose this one now instead of my previous one. I still have no real
>> idea about the non-IPv6 implications of this, though.
>> 
>> ---
>> 
>> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
>> index e587e68..f61926f 100644
>> --- a/net/core/neighbour.c
>> +++ b/net/core/neighbour.c
>> @@ -819,13 +819,6 @@ static void neigh_timer_handler(unsigned long arg)
>>  				   neigh->confirmed + neigh->parms->reachable_time)) {
>>  			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
>>  			next = neigh->confirmed + neigh->parms->reachable_time;
>> -		} else if (time_before_eq(now,
>> -					  neigh->used + neigh->parms->delay_probe_time)) {
>> -			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
>> -			neigh->nud_state = NUD_DELAY;
>> -			neigh->updated = jiffies;
>> -			neigh_suspect(neigh);
>> -			next = now + neigh->parms->delay_probe_time;
>>  		} else {
>>  			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
>>  			neigh->nud_state = NUD_STALE;
>> 
> 
> Hi David, what are your thoughts on this one? There is IMHO a real bug
> to fix, namely sending tons of repeated neighbor solicitations when
> there is no actual traffic to be sent, so this should qualify to go into
> 2.6.32. Do you want to wait for further comments or should I submit this
> for net-2.6 so it can get some testing?

I'm waiting for Yoshifuji's feedback to your latest patch.

^ permalink raw reply

* Re: [RFC] net/core: Delay neighbor only if it has been used after confirmed
From: Jens Rosenboom @ 2009-09-15 10:07 UTC (permalink / raw)
  To: David Miller; +Cc: Linux Network Developers, YOSHIFUJI Hideaki
In-Reply-To: <1252599707.5980.13.camel@fnki-nb00130>

On Thu, 2009-09-10 at 18:21 +0200, Jens Rosenboom wrote:
> On Wed, 2009-09-02 at 21:22 +0900, YOSHIFUJI Hideaki wrote:
> [...]
> > And, this "if" for REACHABLE->DELAY may be completely needless.
> > Timer in REACHABLE is only for state transition for toward REACHABLE
> > or STALE.
> 
> I did some testing with the following patch, which works fine for me, so
> I propose this one now instead of my previous one. I still have no real
> idea about the non-IPv6 implications of this, though.
> 
> ---
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index e587e68..f61926f 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -819,13 +819,6 @@ static void neigh_timer_handler(unsigned long arg)
>  				   neigh->confirmed + neigh->parms->reachable_time)) {
>  			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
>  			next = neigh->confirmed + neigh->parms->reachable_time;
> -		} else if (time_before_eq(now,
> -					  neigh->used + neigh->parms->delay_probe_time)) {
> -			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
> -			neigh->nud_state = NUD_DELAY;
> -			neigh->updated = jiffies;
> -			neigh_suspect(neigh);
> -			next = now + neigh->parms->delay_probe_time;
>  		} else {
>  			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
>  			neigh->nud_state = NUD_STALE;
> 

Hi David, what are your thoughts on this one? There is IMHO a real bug
to fix, namely sending tons of repeated neighbor solicitations when
there is no actual traffic to be sent, so this should qualify to go into
2.6.32. Do you want to wait for further comments or should I submit this
for net-2.6 so it can get some testing?



^ permalink raw reply

* Re: [PATCH v2] pkt_sched: Fix tx queue selection in tc_modify_qdisc
From: David Miller @ 2009-09-15  9:53 UTC (permalink / raw)
  To: jarkao2; +Cc: kaber, netdev
In-Reply-To: <20090914225022.GA13363@ami.dom.local>

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 15 Sep 2009 00:50:22 +0200

> -----------------------> (take 2)
> pkt_sched: Fix tx queue selection in tc_modify_qdisc
> 
> After the recent mq change there is the new select_queue qdisc class
> method used in tc_modify_qdisc, but it works OK only for direct child
> qdiscs of mq qdisc. Grandchildren always get the first tx queue, which
> would give wrong qdisc_root etc. results (e.g. for sch_htb as child of
> sch_prio). This patch fixes it by using parent's dev_queue for such
> grandchildren qdiscs. The select_queue method's return type is changed
> BTW.
> 
> With feedback from: Patrick McHardy <kaber@trash.net>
> 
> Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>

Applied, thanks Jarek.

^ permalink raw reply

* Re: [PATCH alt] sky2: Make sure both ports initialize correctly
From: David Miller @ 2009-09-15  9:50 UTC (permalink / raw)
  To: shemminger; +Cc: mikem, netdev
In-Reply-To: <20090914092229.31490f45@nehalam>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Sep 2009 09:22:29 -0700

> Sorry Mike, I sent you off the wrong way. The following is simpler and the
> second port is diffrent enough in setup (because of NAPI), that the
> following is simpler.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Applied.

^ permalink raw reply

* Re: [PATCH alt] sky2: transmit ring accounting
From: David Miller @ 2009-09-15  9:50 UTC (permalink / raw)
  To: shemminger; +Cc: mikem, netdev
In-Reply-To: <20090914091255.2cda7d24@nehalam>

From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 14 Sep 2009 09:12:55 -0700

> Be more accurate about number of transmit list elements required.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

Applied.

^ permalink raw reply

* Re: [PATCH 1/4] RxRPC: Declare the security index constants symbolically
From: David Miller @ 2009-09-15  9:46 UTC (permalink / raw)
  To: dhowells; +Cc: netdev


All 4 patches applied, but...

The rxrpc code seems to use a hodge-podge of uint32_t, uint16_t et
al. and the Linux kernel preferred "u8".

Please consolidate it all to use u32, u16, etc.

This looks especially weird since you use the endianness aware
fixed sized Linux types (be32, be16) as well.

^ permalink raw reply

* Re: [PATCH kernel 2.6.31] pcnet_cs: add cis of Linksys multifunction pcmcia card
From: David Miller @ 2009-09-15  9:42 UTC (permalink / raw)
  To: ken_kawasaki; +Cc: netdev
In-Reply-To: <20090913172257.6c1976ec.ken_kawasaki@spring.nifty.jp>

From: Ken Kawasaki <ken_kawasaki@spring.nifty.jp>
Date: Sun, 13 Sep 2009 17:22:57 +0900

> 
> pcnet_cs,serial_cs:
>  
> add cis of Linksys lan&modem mulitifunction pcmcia card
> and some modem card(MT5634ZLX, RS-COM-2P).
> 
>   
> Signed-off-by: Ken Kawasaki <ken_kawasaki@spring.nifty.jp>

Applied, thank you.

^ permalink raw reply

* Re: WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (bf438284)
From: David Miller @ 2009-09-15  9:39 UTC (permalink / raw)
  To: mingo; +Cc: eric.dumazet, penberg, vegard.nossum, linux-kernel, netdev
In-Reply-To: <20090915093440.GA7921@elte.hu>

From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 15 Sep 2009 11:34:40 +0200

> 
> * Eric Dumazet <eric.dumazet@gmail.com> wrote:
> 
>> Either we add kmemcheck annotations, or we switch sock->type from short
>> to int to avoid the hole, and possibly to speedup things...
>> 
>> [PATCH] net: kmemcheck annotation in struct socket
>> 
>> struct socket has a 16 bit hole that triggers kmemcheck warnings.
>> 
>> As suggested by Ingo, use kmemcheck annotations
>> 
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
>> ---
>>  include/linux/net.h |    5 +++++
>>  net/socket.c        |    1 +
>>  2 files changed, 6 insertions(+)
> 
> Acked-by: Ingo Molnar <mingo@elte.hu>

Applied, thanks everyone.

^ permalink raw reply

* Re: WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (bf438284)
From: Ingo Molnar @ 2009-09-15  9:34 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Pekka Enberg, Vegard Nossum, linux-kernel, Linux Netdev List,
	David S. Miller
In-Reply-To: <4AAF5757.30500@gmail.com>


* Eric Dumazet <eric.dumazet@gmail.com> wrote:

> Ingo Molnar a ?crit :
> > FYI, we still have this one on latest mainline:
> > 
> > [    2.159614] NET: Registered protocol family 16
> > [    2.163109] initcall netlink_proto_init+0x0/0x1b0 returned 0 after 5859 usecs
> > [    2.164008] WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (bf438284)
> > [    2.165006] 0100000002000000000000000000000000000000ad4eaddeffffffffffffffff
> > [    2.172006]  i i i i i i u u i i i i i i i i i i i i i i i i i i i i i i i i
> > [    2.179005]          ^
> > [    2.180005] 
> > [    2.181008] Pid: 1, comm: swapper Not tainted (2.6.31-tip-02389-gc9f313c-dirty #151) 
> > [    2.182006] EIP: 0060:[<815a8101>] EFLAGS: 00010282 CPU: 0
> > [    2.183009] EIP is at sock_init_data+0xe1/0x210
> > [    2.184006] EAX: 0001b000 EBX: bf855938 ECX: 8233b614 EDX: 819ac7bf
> > [    2.185006] ESI: bf855800 EDI: bf438280 EBP: bf867f10 ESP: 81b3afcc
> > [    2.186006]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
> > [    2.187006] CR0: 8005003b CR2: bf83bdf0 CR3: 01b2c000 CR4: 000006d0
> > [    2.188006] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> > [    2.189006] DR6: ffff4ff0 DR7: 00000400
> > [    2.190005]  [<815d82b5>] __netlink_create+0x35/0xa0
> > [    2.192005]  [<815dabaa>] netlink_kernel_create+0x5a/0x150
> > [    2.194004]  [<815bc8ee>] rtnetlink_net_init+0x1e/0x40
> > [    2.196005]  [<815af381>] register_pernet_operations+0x11/0x30
> > [    2.198004]  [<815af4be>] register_pernet_subsys+0x1e/0x30
> > [    2.200004]  [<81adb49c>] rtnetlink_init+0x4c/0x100
> > [    2.202004]  [<81adbfe5>] netlink_proto_init+0x105/0x1b0
> > [    2.204004]  [<81001127>] do_one_initcall+0x27/0x190
> > [    2.206004]  [<81a9f567>] do_initcalls+0x27/0x40
> > [    2.208004]  [<81a9f5a6>] do_basic_setup+0x26/0x30
> > [    2.210004]  [<81a9f907>] kernel_init+0x57/0xa0
> > [    2.212004]  [<81004867>] kernel_thread_helper+0x7/0x30
> > [    2.214004]  [<ffffffff>] 0xffffffff
> > [    2.216021] calling  bdi_class_init+0x0/0x30 @ 1
> > [    2.217015] device class 'bdi': registering
> > [    2.218702] initcall bdi_class_init+0x0/0x30 returned 0 after 976 usecs
> > [    2.219041] calling  kobject_uevent_init+0x0/0x50 @ 1
> > 
> > config attached.
> > 
> > 	Ingo
> > 
> 
> I thought this was already discussed and fixed somehow ?

yes, it looked familar.

> Either we add kmemcheck annotations, or we switch sock->type from short
> to int to avoid the hole, and possibly to speedup things...
> 
> [PATCH] net: kmemcheck annotation in struct socket
> 
> struct socket has a 16 bit hole that triggers kmemcheck warnings.
> 
> As suggested by Ingo, use kmemcheck annotations
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
>  include/linux/net.h |    5 +++++
>  net/socket.c        |    1 +
>  2 files changed, 6 insertions(+)

Acked-by: Ingo Molnar <mingo@elte.hu>

	Ingo

^ permalink raw reply

* Re: alloc skb based on a given data buffer
From: Zhu Yi @ 2009-09-15  9:15 UTC (permalink / raw)
  To: David Miller
  Cc: mel@csn.ul.ie, Chatre, Reinette, elendil@planet.nl,
	Larry.Finger@lwfinger.net, linville@tuxdriver.com,
	penberg@cs.helsinki.fi, linux-kernel@vger.kernel.org,
	linux-wireless@vger.kernel.org,
	ipw3945-devel@lists.sourceforge.net, akpm@linux-foundation.org,
	cl@linux-foundation.org, Krauss, Assaf, johannes@sipsolutions.net,
	Abbas, Mohamed, netdev@vger.kernel.org
In-Reply-To: <20090915.020903.93643290.davem@davemloft.net>

On Tue, 2009-09-15 at 17:09 +0800, David Miller wrote:
> From: Zhu Yi <yi.zhu@intel.com>
> Date: Tue, 15 Sep 2009 16:57:29 +0800
> 
> > Thanks. So we can put the 8K buffer into 2 skb_shinfo()->frags[] slots
> > and set nr_frags to 2, right? Is this supported allover the network code
> > already? At a first glance, I didn't find any frags handling in mac80211
> > stack.
> 
> You have to pre-pull the link level protocol headers into the
> linear area, but that's it.
> 
> Again, see niu.c for details, it does:
> 
> static void niu_rx_skb_append(struct sk_buff *skb, struct page *page,
> 			      u32 offset, u32 size)
> {
> 	int i = skb_shinfo(skb)->nr_frags;
> 	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
> 
> 	frag->page = page;
> 	frag->page_offset = offset;
> 	frag->size = size;
> 
> 	skb->len += size;
> 	skb->data_len += size;
> 	skb->truesize += size;
> 
> 	skb_shinfo(skb)->nr_frags = i + 1;
> }
> 
> to add pages to SKBs and then at the end it goes:
> 
> 	skb_reserve(skb, NET_IP_ALIGN);
> 	__pskb_pull_tail(skb, min(len, NIU_RXPULL_MAX));
> 
> Right before giving the SKB to the networking stack.  NIU_RXPULL_MAX
> should be a value that will be large enough to cover the largest
> possible link level header.

I see. Thanks for this info. I'll try implementing the same for iwlagn.

Thanks,
-yi

^ permalink raw reply

* Re: alloc skb based on a given data buffer
From: David Miller @ 2009-09-15  9:09 UTC (permalink / raw)
  To: yi.zhu
  Cc: mel, reinette.chatre, elendil, Larry.Finger, linville, penberg,
	linux-kernel, linux-wireless, ipw3945-devel, akpm, cl,
	assaf.krauss, johannes, mohamed.abbas, netdev
In-Reply-To: <1253005050.7549.58.camel@debian>

From: Zhu Yi <yi.zhu@intel.com>
Date: Tue, 15 Sep 2009 16:57:29 +0800

> Thanks. So we can put the 8K buffer into 2 skb_shinfo()->frags[] slots
> and set nr_frags to 2, right? Is this supported allover the network code
> already? At a first glance, I didn't find any frags handling in mac80211
> stack.

You have to pre-pull the link level protocol headers into the
linear area, but that's it.

Again, see niu.c for details, it does:

static void niu_rx_skb_append(struct sk_buff *skb, struct page *page,
			      u32 offset, u32 size)
{
	int i = skb_shinfo(skb)->nr_frags;
	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];

	frag->page = page;
	frag->page_offset = offset;
	frag->size = size;

	skb->len += size;
	skb->data_len += size;
	skb->truesize += size;

	skb_shinfo(skb)->nr_frags = i + 1;
}

to add pages to SKBs and then at the end it goes:

	skb_reserve(skb, NET_IP_ALIGN);
	__pskb_pull_tail(skb, min(len, NIU_RXPULL_MAX));

Right before giving the SKB to the networking stack.  NIU_RXPULL_MAX
should be a value that will be large enough to cover the largest
possible link level header.

^ permalink raw reply

* Re: WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (bf438284)
From: Eric Dumazet @ 2009-09-15  8:59 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Pekka Enberg, Vegard Nossum, linux-kernel, Linux Netdev List,
	David S. Miller
In-Reply-To: <20090915080953.GA24958@elte.hu>

Ingo Molnar a écrit :
> FYI, we still have this one on latest mainline:
> 
> [    2.159614] NET: Registered protocol family 16
> [    2.163109] initcall netlink_proto_init+0x0/0x1b0 returned 0 after 5859 usecs
> [    2.164008] WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (bf438284)
> [    2.165006] 0100000002000000000000000000000000000000ad4eaddeffffffffffffffff
> [    2.172006]  i i i i i i u u i i i i i i i i i i i i i i i i i i i i i i i i
> [    2.179005]          ^
> [    2.180005] 
> [    2.181008] Pid: 1, comm: swapper Not tainted (2.6.31-tip-02389-gc9f313c-dirty #151) 
> [    2.182006] EIP: 0060:[<815a8101>] EFLAGS: 00010282 CPU: 0
> [    2.183009] EIP is at sock_init_data+0xe1/0x210
> [    2.184006] EAX: 0001b000 EBX: bf855938 ECX: 8233b614 EDX: 819ac7bf
> [    2.185006] ESI: bf855800 EDI: bf438280 EBP: bf867f10 ESP: 81b3afcc
> [    2.186006]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
> [    2.187006] CR0: 8005003b CR2: bf83bdf0 CR3: 01b2c000 CR4: 000006d0
> [    2.188006] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [    2.189006] DR6: ffff4ff0 DR7: 00000400
> [    2.190005]  [<815d82b5>] __netlink_create+0x35/0xa0
> [    2.192005]  [<815dabaa>] netlink_kernel_create+0x5a/0x150
> [    2.194004]  [<815bc8ee>] rtnetlink_net_init+0x1e/0x40
> [    2.196005]  [<815af381>] register_pernet_operations+0x11/0x30
> [    2.198004]  [<815af4be>] register_pernet_subsys+0x1e/0x30
> [    2.200004]  [<81adb49c>] rtnetlink_init+0x4c/0x100
> [    2.202004]  [<81adbfe5>] netlink_proto_init+0x105/0x1b0
> [    2.204004]  [<81001127>] do_one_initcall+0x27/0x190
> [    2.206004]  [<81a9f567>] do_initcalls+0x27/0x40
> [    2.208004]  [<81a9f5a6>] do_basic_setup+0x26/0x30
> [    2.210004]  [<81a9f907>] kernel_init+0x57/0xa0
> [    2.212004]  [<81004867>] kernel_thread_helper+0x7/0x30
> [    2.214004]  [<ffffffff>] 0xffffffff
> [    2.216021] calling  bdi_class_init+0x0/0x30 @ 1
> [    2.217015] device class 'bdi': registering
> [    2.218702] initcall bdi_class_init+0x0/0x30 returned 0 after 976 usecs
> [    2.219041] calling  kobject_uevent_init+0x0/0x50 @ 1
> 
> config attached.
> 
> 	Ingo
> 

I thought this was already discussed and fixed somehow ?

Either we add kmemcheck annotations, or we switch sock->type from short
to int to avoid the hole, and possibly to speedup things...

[PATCH] net: kmemcheck annotation in struct socket

struct socket has a 16 bit hole that triggers kmemcheck warnings.

As suggested by Ingo, use kmemcheck annotations

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/net.h |    5 +++++
 net/socket.c        |    1 +
 2 files changed, 6 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 4fc2ffd..9040a10 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -57,6 +57,7 @@ typedef enum {
 #include <linux/random.h>
 #include <linux/wait.h>
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/kmemcheck.h>
 
 struct poll_table_struct;
 struct pipe_inode_info;
@@ -127,7 +128,11 @@ enum sock_shutdown_cmd {
  */
 struct socket {
 	socket_state		state;
+
+	kmemcheck_bitfield_begin(type);
 	short			type;
+	kmemcheck_bitfield_end(type);
+
 	unsigned long		flags;
 	/*
 	 * Please keep fasync_list & wait fields in the same cache line
diff --git a/net/socket.c b/net/socket.c
index 6d47165..2a022c0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -489,6 +489,7 @@ static struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
+	kmemcheck_annotate_bitfield(sock, type);
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();

^ permalink raw reply related

* Re: alloc skb based on a given data buffer
From: Zhu Yi @ 2009-09-15  8:57 UTC (permalink / raw)
  To: David Miller
  Cc: mel@csn.ul.ie, Chatre, Reinette, elendil@planet.nl,
	Larry.Finger@lwfinger.net, linville@tuxdriver.com,
	penberg@cs.helsinki.fi, linux-kernel@vger.kernel.org,
	linux-wireless@vger.kernel.org,
	ipw3945-devel@lists.sourceforge.net, akpm@linux-foundation.org,
	cl@linux-foundation.org, Krauss, Assaf, johannes@sipsolutions.net,
	Abbas, Mohamed, netdev@vger.kernel.org
In-Reply-To: <20090915.013321.07006714.davem@davemloft.net>

On Tue, 2009-09-15 at 16:33 +0800, David Miller wrote:
> From: Zhu Yi <yi.zhu@intel.com>
> Date: Tue, 15 Sep 2009 16:30:20 +0800
> 
> > This way, device drivers can allocate the Rx buffers with their own size
> > and alignment requirement. i.e. do an order-1 page allocation directly
> > with free_pages() in the iwlagn driver for a 256 bytes aligned 8K Rx
> > buffer. After DMA is finished, drivers can use the above function to
> > assemble an skb based on the Rx buffer. It should resolve the problem
> > for requiring an order-2 allocation by alloc_skb() in the first place.
> 
> You can create paged RX skbs just like drivers such as niu.c
> and others already do, there is no need for special APIs for
> this.

Thanks. So we can put the 8K buffer into 2 skb_shinfo()->frags[] slots
and set nr_frags to 2, right? Is this supported allover the network code
already? At a first glance, I didn't find any frags handling in mac80211
stack.

Thanks,
-yi


^ permalink raw reply

* Re: Fwd: [RFC v3] net: Introduce recvmmsg socket syscall
From: Nir Tzachar @ 2009-09-15  8:37 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Linux Networking Development Mailing List, Ziv Ayalon
In-Reply-To: <20090914230902.GC22743@ghostprotocols.net>

On Tue, Sep 15, 2009 at 2:09 AM, Arnaldo Carvalho de
Melo<acme@ghostprotocols.net> wrote:
> Em Thu, Aug 06, 2009 at 10:15:26AM +0300, Nir Tzachar escreveu:
>> Hello.
>>
>> Is there anything new with this patch? What are the plans for merging
>> it upstream?
>
> I'm doing perf runs using a test app using recvmsg, then with the first
> patch, that introduces recvmmsg, then with the second, that locks the
> series of unlocked_recvmmsg calls just once, will try to get this posted
> here soon.
>
> I'd really appreciate if the people interested in this could try it and
> post numbers too, to get this ball rolling again.
>
> As for getting it upstream, well, posting numbers here would definetely
> help with that :-)

Ok, here are some crude results:

Setup:
linux 2.6.29.2 with the third version of the patch, running on an
Intel Xeon X3220 2.4GHz quad core, with 4Gbyte of ram, running Ubuntu
9.04

Application:
A financial application, subscribing to quotes from a stock exchange.
Typical traffic is small (around 50-100 bytes) multicast packets in
large volumes. The application just receives  the quotes and pass them
along.

The test:
Run two version of the application, head to head. one version using
recvmsg and the other recvmmsg. The data is passed to a third
application measuring the latency of the data.

Results:
On general, the recvmmsg beats the pants off the regular recvmsg by a
whole millisecond (which might not sound much, but is _really_ a lot
for us ;). The exact distribution fluctuates between half a milli and
2 millis, but the average is 1 milli.

Conclusions:
We would _really_ like to see this patch go upstream. It gives an
important performance boost in out use cases.

We are willing to perform more accurate tests if needed, and would
appreciate the feedback on how to conduct them.

Cheers,
Nir.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox