Netdev List

Netdev List
 help / color / mirror / Atom feed

* skb configured but can't get data allocated
From: DHAJOGLO @ 2007-09-12 17:31 UTC (permalink / raw)
  To: netdev; +Cc: kernelnewbies

Right,
    I managed to figure out through looking at the code how to configure and send my skb properly.  For those who say the code is well documented.. you're right.. however I fell like a blind man in an "adult book store" and just don't see what I'm missing.  Below is my code. The packets are making it back and forth but the data is absent on the return trip.  d_out is my little struct with two bytes and a char.



struct sk_buff *send_back(struct sk_buff *oldskb)
{
        struct sk_buff *skb;
        struct iphdr *iph;
        struct net_device *dev = (struct net_device *)oldskb->dev;
        struct rtable *rt;
        int iplen;
        struct ethhdr *mac;
        struct exp_packet *d_out;

        skb = alloc_skb(sizeof(struct exp_packet) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
        if(skb == NULL)
                return NULL;

        /* insert the data probably need to allocatd differently */
        d_out = (struct exp_packet *)skb->data;
        d_out->headbits = 0xAA;
        d_out->sequence = 0xBB;
        d_out->payload = 0xCCDD;

        /* from igmp... configure the rt */
        {
                struct flowi fl = { .oif = dev->ifindex,
                                        .nl_u = { .ip4_u = {
                                                .daddr = htonl(oldskb->nh.iph->saddr) } },
                                                .proto = IPPROTO_EXP };
                if (ip_route_output_key(&rt,&fl)) {
                        printk("No route?\n");
                        kfree_skb(skb);
                        return NULL;
                }
        }
        if (rt->rt_src == 0) {
                printk("No source info?\n");
                ip_rt_put(rt);
                return NULL;
        }

        skb->dst = &rt->u.dst;
        skb->dev = dev;

        skb_reserve(skb,LL_RESERVED_SPACE(dev));

        /* configure the ipheader */
 skb->nh.iph = iph = (struct iphdr *)skb_put(skb,sizeof(struct iphdr)+4);

        iph->version    = 4;
        iph->ihl        = (sizeof(struct iphdr)+4)>>2;
        iph->protocol   = IPPROTO_EXP;
        iph->saddr      = rt->rt_src;
        iph->daddr      = htonl(rt->rt_dst);
        iph->tos        = 0;
        iph->ttl        = 5;
        ip_select_ident(iph,&rt->u.dst,NULL);
        iplen = skb->tail - (unsigned char *)skb->nh.iph;
        iph->tot_len = htons(iplen);
        ip_send_check(iph);


        skb->pkt_type = PACKET_OUTGOING;

        mac = (struct ethhdr *)oldskb->mac.raw;

        /* configure the mac addresses */
        if(dev->hard_header && dev->hard_header(skb,dev,ETH_P_IP,mac->h_source,mac->h_dest,skb->len) < 0)
                printk("hard header worked?");

        return skb;
}

=== the packets look like this and they should be the same save for swapped src/dst:

00:12:3f:56:bf:1c > 00:0c:29:71:67:1e, ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl  20, id 153, offset 0, flags [DF], proto: unknown (253), length: 24) src.src.src.src > dst.dst.dst.dst:  ip-proto-253 4
        0x0000:  4500 0018 0099 4000 14fd c4ed 8cbe 414d  E.....@.......AM
        0x0010:  8cbe 4599 001b ddee 0000 0000 0000 0000  ..E.............
        0x0020:  0000 0000 0000 0000 0000 0000 0000       ..............
00:0c:29:71:67:1e > 00:12:3f:56:bf:1c, ethertype IPv4 (0x0800), length 38: (tos 0x0, ttl   5, id 0, offset 65528, flags [+, DF, rsvd], proto: unknown (253), length: 24, options ( unknown (243) len 252[|ip] )) src.src.src.src > dst.dst.dst.dst: ip-proto-253
        0x0000:  4600 0018 0000 ffff 05fd 1f8a 8cbe 4599  F.............E.
        0x0010:  8cbe 414d f3fc 0000                      ..AM....


^ permalink raw reply

* Re: [-mm patch] make tcp_splice_data_recv() static
From: Jens Axboe @ 2007-09-12 17:44 UTC (permalink / raw)
  To: David Miller; +Cc: bunk, akpm, linux-kernel, netdev
In-Reply-To: <20070912.062102.116370922.davem@davemloft.net>

On Wed, Sep 12 2007, David Miller wrote:
> From: Adrian Bunk <bunk@kernel.org>
> Date: Sun, 9 Sep 2007 22:25:58 +0200
> 
> > On Fri, Aug 31, 2007 at 09:58:22PM -0700, Andrew Morton wrote:
> > >...
> > > Changes since 2.6.23-rc3-mm1:
> > >...
> > >  git-block.patch
> > >...
> > >  git trees
> > >...
> > 
> > tcp_splice_data_recv() can become static.
> > 
> > Signed-off-by: Adrian Bunk <bunk@kernel.org>
> 
> I'll let Jens or similar pick this one up since it
> obviously won't apply to my tree.

I'll shove it in my #splice-net branch, where it originates from.

-- 
Jens Axboe


^ permalink raw reply

* [PATCH 1/3 v4] rfkill: Remove IRDA
From: Ivo van Doorn @ 2007-09-12 18:14 UTC (permalink / raw)
  To: davem; +Cc: Dmitry Torokhov, netdev, Inaky Perez-Gonzalez
In-Reply-To: <200709101954.50852.IvDoorn@gmail.com>

As Dmitry pointed out earlier, rfkill-input.c
doesn't support irda because there are no users
and we shouldn't add unrequired KEY_ defines.

However, RFKILL_TYPE_IRDA was defined in the
rfkill.h header file and would confuse people
about whether it is implemented or not.

This patch removes IRDA support completely,
so it can be added whenever a driver wants the
feature.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
CC: Dmitry Torokhov <dmitry.torokhov@gmail.com>
CC: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 include/linux/rfkill.h |    8 +++-----
 net/rfkill/Kconfig     |    2 +-
 net/rfkill/rfkill.c    |    5 +----
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index a8a6ea8..c4546e1 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -31,13 +31,11 @@
  * enum rfkill_type - type of rfkill switch.
  * RFKILL_TYPE_WLAN: switch is no a Wireless network devices.
  * RFKILL_TYPE_BlUETOOTH: switch is on a bluetooth device.
- * RFKILL_TYPE_IRDA: switch is on an infrared devices.
  */
 enum rfkill_type {
-	RFKILL_TYPE_WLAN = 0,
-	RFKILL_TYPE_BLUETOOTH = 1,
-	RFKILL_TYPE_IRDA = 2,
-	RFKILL_TYPE_MAX = 3,
+	RFKILL_TYPE_WLAN ,
+	RFKILL_TYPE_BLUETOOTH,
+	RFKILL_TYPE_MAX,
 };
 
 enum rfkill_state {
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 8b31759..d28a6d9 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -5,7 +5,7 @@ menuconfig RFKILL
 	tristate "RF switch subsystem support"
 	help
 	  Say Y here if you want to have control over RF switches
-	  found on many WiFi, Bluetooth and IRDA cards.
+	  found on many WiFi and Bluetooth cards.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rfkill.
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index db3395b..50e0102 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -106,9 +106,6 @@ static ssize_t rfkill_type_show(struct device *dev,
 	case RFKILL_TYPE_BLUETOOTH:
 		type = "bluetooth";
 		break;
-	case RFKILL_TYPE_IRDA:
-		type = "irda";
-		break;
 	default:
 		BUG();
 	}
@@ -281,7 +278,7 @@ static void rfkill_remove_switch(struct rfkill *rfkill)
 /**
  * rfkill_allocate - allocate memory for rfkill structure.
  * @parent: device that has rf switch on it
- * @type: type of the switch (wlan, bluetooth, irda)
+ * @type: type of the switch (RFKILL_TYPE_*)
  *
  * This function should be called by the network driver when it needs
  * rfkill structure. Once the structure is allocated the driver shoud
-- 
1.5.3


^ permalink raw reply related

* [PATCH 2/3 v4] rfkill: Add support for ultrawideband
From: Ivo van Doorn @ 2007-09-12 18:14 UTC (permalink / raw)
  To: davem; +Cc: Dmitry Torokhov, netdev, Inaky Perez-Gonzalez
In-Reply-To: <200709101954.50852.IvDoorn@gmail.com>

This patch will add support for UWB keys to rfkill,
support for this has been requested by Inaky.

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
CC: Dmitry Torokhov <dmitry.torokhov@gmail.com>
CC: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 include/linux/input.h     |    1 +
 include/linux/rfkill.h    |    2 ++
 net/rfkill/rfkill-input.c |    9 +++++++++
 net/rfkill/rfkill.c       |    3 +++
 4 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/include/linux/input.h b/include/linux/input.h
index cf2b561..8e5828d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -360,6 +360,7 @@ struct input_absinfo {
 
 #define KEY_BLUETOOTH		237
 #define KEY_WLAN		238
+#define KEY_UWB			239
 
 #define KEY_UNKNOWN		240
 
diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index c4546e1..f9a50da 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -31,10 +31,12 @@
  * enum rfkill_type - type of rfkill switch.
  * RFKILL_TYPE_WLAN: switch is no a Wireless network devices.
  * RFKILL_TYPE_BlUETOOTH: switch is on a bluetooth device.
+ * RFKILL_TYPE_UWB: switch is on a Ultra wideband device.
  */
 enum rfkill_type {
 	RFKILL_TYPE_WLAN ,
 	RFKILL_TYPE_BLUETOOTH,
+	RFKILL_TYPE_UWB,
 	RFKILL_TYPE_MAX,
 };
 
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 9f746be..8e4516a 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -81,6 +81,7 @@ static void rfkill_schedule_toggle(struct rfkill_task *task)
 
 static DEFINE_RFKILL_TASK(rfkill_wlan, RFKILL_TYPE_WLAN);
 static DEFINE_RFKILL_TASK(rfkill_bt, RFKILL_TYPE_BLUETOOTH);
+static DEFINE_RFKILL_TASK(rfkill_uwb, RFKILL_TYPE_UWB);
 
 static void rfkill_event(struct input_handle *handle, unsigned int type,
 			unsigned int code, int down)
@@ -93,6 +94,9 @@ static void rfkill_event(struct input_handle *handle, unsigned int type,
 		case KEY_BLUETOOTH:
 			rfkill_schedule_toggle(&rfkill_bt);
 			break;
+		case KEY_UWB:
+			rfkill_schedule_toggle(&rfkill_uwb);
+			break;
 		default:
 			break;
 		}
@@ -148,6 +152,11 @@ static const struct input_device_id rfkill_ids[] = {
 		.evbit = { BIT(EV_KEY) },
 		.keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) },
 	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT(EV_KEY) },
+		.keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) },
+	},
 	{ }
 };
 
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index 50e0102..03ed7fd 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -106,6 +106,9 @@ static ssize_t rfkill_type_show(struct device *dev,
 	case RFKILL_TYPE_BLUETOOTH:
 		type = "bluetooth";
 		break;
+	case RFKILL_TYPE_UWB:
+		type = "ultrawideband";
+		break;
 	default:
 		BUG();
 	}
-- 
1.5.3


^ permalink raw reply related

* [PATCH 3/3 v4] rfkill: Add rfkill documentation
From: Ivo van Doorn @ 2007-09-12 18:14 UTC (permalink / raw)
  To: davem; +Cc: Dmitry Torokhov, netdev, Inaky Perez-Gonzalez, Randy Dunlap
In-Reply-To: <200709101954.50852.IvDoorn@gmail.com>

Add a documentation file which contains
a short description about rfkill with some
notes about drivers and the userspace interface.

Changes since v1 and v2:
 - Spellchecking

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
---
 Documentation/rfkill.txt |   89 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 89 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/rfkill.txt

diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
new file mode 100644
index 0000000..a83ff23
--- /dev/null
+++ b/Documentation/rfkill.txt
@@ -0,0 +1,89 @@
+rfkill - RF switch subsystem support
+====================================
+
+1 Implementation details
+2 Driver support
+3 Userspace support
+
+===============================================================================
+1: Implementation details
+
+The rfkill switch subsystem offers support for keys often found on laptops
+to enable wireless devices like WiFi and Bluetooth.
+
+This is done by providing the user 3 possibilities:
+ 1 - The rfkill system handles all events; userspace is not aware of events.
+ 2 - The rfkill system handles all events; userspace is informed about the events.
+ 3 - The rfkill system does not handle events; userspace handles all events.
+
+The buttons to enable and disable the wireless radios are important in
+situations where the user is for example using his laptop on a location where
+wireless radios _must_ be disabled (e.g. airplanes).
+Because of this requirement, userspace support for the keys should not be
+made mandatory. Because userspace might want to perform some additional smarter
+tasks when the key is pressed, rfkill still provides userspace the possibility
+to take over the task to handle the key events.
+
+The system inside the kernel has been split into 2 separate sections:
+	1 - RFKILL
+	2 - RFKILL_INPUT
+
+The first option enables rfkill support and will make sure userspace will
+be notified of any events through the input device. It also creates several
+sysfs entries which can be used by userspace. See section "Userspace support".
+
+The second option provides an rfkill input handler. This handler will
+listen to all rfkill key events and will toggle the radio accordingly.
+With this option enabled userspace could either do nothing or simply
+perform monitoring tasks.
+
+====================================
+2: Driver support
+
+To build a driver with rfkill subsystem support, the driver should
+depend on the Kconfig symbol RFKILL; it should _not_ depend on
+RKFILL_INPUT.
+
+Unless key events trigger an interrupt to which the driver listens, polling
+will be required to determine the key state changes. For this the input
+layer providers the input-polldev handler.
+
+A driver should implement a few steps to correctly make use of the
+rfkill subsystem. First for non-polling drivers:
+
+	- rfkill_allocate()
+	- input_allocate_device()
+	- rfkill_register()
+	- input_register_device()
+
+For polling drivers:
+
+	- rfkill_allocate()
+	- input_allocate_polled_device()
+	- rfkill_register()
+	- input_register_polled_device()
+
+When a key event has been detected, the correct event should be
+sent over the input device which has been registered by the driver.
+
+====================================
+3: Userspace support
+
+For each key an input device will be created which will send out the correct
+key event when the rfkill key has been pressed.
+
+The following sysfs entries will be created:
+
+	name: Name assigned by driver to this key (interface or driver name).
+	type: Name of the key type ("wlan", "bluetooth", etc).
+	state: Current state of the key. 1: On, 0: Off.
+	claim: 1: Userspace handles events, 0: Kernel handles events
+
+Both the "state" and "claim" entries are also writable. For the "state" entry
+this means that when 1 or 0 is written all radios, not yet in the requested
+state, will be will be toggled accordingly.
+For the "claim" entry writing 1 to it means that the kernel no longer handles
+key events even though RFKILL_INPUT input was enabled. When "claim" has been
+set to 0, userspace should make sure that it listens for the input events or
+check the sysfs "state" entry regularly to correctly perform the required
+tasks when the rkfill key is pressed.
-- 
1.5.3


^ permalink raw reply related

* Re: [PATCH v3] Make the pr_*() family of macros in kernel.h complete
From: Jan Engelhardt @ 2007-09-12 18:04 UTC (permalink / raw)
  To: Emil Medve; +Cc: linux-kernel, netdev, i2c, linux-omap-open-source
In-Reply-To: <11896151802410-git-send-email-Emilian.Medve@Freescale.com>


On Sep 12 2007 11:39, Emil Medve wrote:
>
>Other/Some pr_*() macros are already defined in kernel.h, but pr_err() was defined
>multiple times in several other places

Note http://lkml.org/lkml/2007/8/4/30 .

^ permalink raw reply

* [ofa-general] RE: [PATCH] RDMA/CMA: Use neigh_event_send() to initiate neighbour discovery.
From: Sean Hefty @ 2007-09-12 18:13 UTC (permalink / raw)
  To: 'Steve Wise', rdreier; +Cc: netdev, linux-kernel, general
In-Reply-To: <20070912100025.3190.89259.stgit@dell3.ogc.int>

>RDMA/CMA: Use neigh_event_send() to initiate neighbour discovery.
>
>Calling arp_send() to initiate neighbour discovery (ND) doesn't do the
>full ND protocol.  Namely, it doesn't handle retransmitting the arp
>request if it is dropped. The function neigh_event_send() does all this.
>Without doing full ND, rdma address resolution fails in the presence of
>dropped arp bcast packets.
>
>Signed-off-by: Steve Wise <swise@opengridcomputing.com>

Acked-by: Sean Hefty <sean.hefty@intel.com>

Roland - can you please queue this up for 2.6.24?

^ permalink raw reply

* [PATCH] [-MM, FIX] ixgbe: incorporate napi_struct changes from net-2.6.24.git
From: Auke Kok @ 2007-09-12 18:13 UTC (permalink / raw)
  To: akpm, davem; +Cc: netdev, jeff, jesse.brandeburg

This incorporates the new napi_struct changes into ixgbe.

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
---

 drivers/net/ixgbe/ixgbe.h      |    1 +
 drivers/net/ixgbe/ixgbe_main.c |   62 +++++++++++++++++-----------------------
 2 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h
index b24803f..c160a7d 100644
--- a/drivers/net/ixgbe/ixgbe.h
+++ b/drivers/net/ixgbe/ixgbe.h
@@ -179,6 +179,7 @@ struct ixgbe_adapter {
 
 	/* TX */
 	struct ixgbe_ring *tx_ring;	/* One per active queue */
+	struct napi_struct napi;
 	u64 restart_queue;
 	u64 lsc_int;
 	u64 hw_tso_ctxt;
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index 23fb1ed..a08a462 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -557,14 +557,15 @@ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data)
 	struct ixgbe_adapter *adapter = rxr->adapter;
 
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value);
-	netif_rx_schedule(adapter->netdev);
+	netif_rx_schedule(adapter->netdev, &adapter->napi);
 	return IRQ_HANDLED;
 }
 
-static int ixgbe_clean_rxonly(struct net_device *netdev, int *budget)
+static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget)
 {
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int work_to_do = min(*budget, netdev->quota);
+	struct ixgbe_adapter *adapter = container_of(napi,
+					struct ixgbe_adapter, napi);
+	struct net_device *netdev = adapter->netdev;
 	int work_done = 0;
 	struct ixgbe_ring *rxr = adapter->rx_ring;
 
@@ -572,22 +573,18 @@ static int ixgbe_clean_rxonly(struct net_device *netdev, int *budget)
 	if (!netif_carrier_ok(netdev))
 		goto quit_polling;
 
-	ixgbe_clean_rx_irq(adapter, rxr, &work_done, work_to_do);
-
-	*budget -= work_done;
-	netdev->quota -= work_done;
+	ixgbe_clean_rx_irq(adapter, rxr, &work_done, budget);
 
 	/* If no Tx and not enough Rx work done, exit the polling mode */
-	if ((work_done == 0) || !netif_running(netdev)) {
+	if ((work_done < budget) || !netif_running(netdev)) {
 quit_polling:
-		netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
 			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS,
 					rxr->eims_value);
-		return 0;
 	}
 
-	return 1;
+	return work_done;
 }
 
 /**
@@ -669,7 +666,8 @@ static int ixgbe_setup_msix(struct ixgbe_adapter *adapter)
 		goto release_irqs;
 	}
 
-	adapter->netdev->poll = ixgbe_clean_rxonly;
+	/* FIXME: implement netif_napi_remove() instead */
+	adapter->napi.poll = ixgbe_clean_rxonly;
 	adapter->flags |= IXGBE_FLAG_MSIX_ENABLED;
 	return 0;
 
@@ -713,12 +711,12 @@ static irqreturn_t ixgbe_intr(int irq, void *data)
 		if (!test_bit(__IXGBE_DOWN, &adapter->state))
 			mod_timer(&adapter->watchdog_timer, jiffies);
 	}
-	if (netif_rx_schedule_prep(netdev)) {
+	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
 		/* Disable interrupts and register for poll. The flush of the
 		 * posted write is intentionally left out. */
 		atomic_inc(&adapter->irq_sem);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &adapter->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -1218,7 +1216,7 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
 		ixgbe_configure_msi_and_legacy(adapter);
 
 	clear_bit(__IXGBE_DOWN, &adapter->state);
-	netif_poll_enable(netdev);
+	napi_enable(&adapter->napi);
 	ixgbe_irq_enable(adapter);
 
 	/* bring the link up in the watchdog, this could race with our first
@@ -1412,7 +1410,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 
 	ixgbe_irq_disable(adapter);
 
-	netif_poll_disable(netdev);
+	napi_disable(&adapter->napi);
 	del_timer_sync(&adapter->watchdog_timer);
 
 	netif_carrier_off(netdev);
@@ -1464,11 +1462,12 @@ static void ixgbe_shutdown(struct pci_dev *pdev)
  * ixgbe_clean - NAPI Rx polling callback
  * @adapter: board private structure
  **/
-static int ixgbe_clean(struct net_device *netdev, int *budget)
+static int ixgbe_clean(struct napi_struct *napi, int budget)
 {
-	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int work_to_do = min(*budget, netdev->quota);
-	int tx_cleaned, work_done = 0;
+	struct ixgbe_adapter *adapter = container_of(napi,
+					struct ixgbe_adapter, napi);
+	struct net_device *netdev = adapter->netdev;
+	int tx_cleaned = 0, work_done = 0;
 
 	/* Keep link state information with original netdev */
 	if (!netif_carrier_ok(adapter->netdev))
@@ -1477,24 +1476,17 @@ static int ixgbe_clean(struct net_device *netdev, int *budget)
 	/* In non-MSIX case, there is no multi-Tx/Rx queue */
 	tx_cleaned = ixgbe_clean_tx_irq(adapter, adapter->tx_ring);
 	ixgbe_clean_rx_irq(adapter, &adapter->rx_ring[0], &work_done,
-			   work_to_do);
-
-	*budget -= work_done;
-	netdev->quota -= work_done;
+			   budget);
 
 	/* If no Tx and not enough Rx work done, exit the polling mode */
-	if ((!tx_cleaned && (work_done == 0)) ||
+	if ((!tx_cleaned && (work_done < budget)) ||
 	    !netif_running(adapter->netdev)) {
 quit_polling:
-		netif_rx_complete(netdev);
-		if (test_bit(__IXGBE_DOWN, &adapter->state))
-			atomic_dec(&adapter->irq_sem);
-		else
-			ixgbe_irq_enable(adapter);
-		return 0;
+		netif_rx_complete(netdev, napi);
+		ixgbe_irq_enable(adapter);
 	}
 
-	return 1;
+	return work_done;
 }
 
 /**
@@ -2592,8 +2584,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	ixgbe_set_ethtool_ops(netdev);
 	netdev->tx_timeout = &ixgbe_tx_timeout;
 	netdev->watchdog_timeo = 5 * HZ;
-	netdev->poll = &ixgbe_clean;
-	netdev->weight = 64;
+	netif_napi_add(netdev, &adapter->napi, ixgbe_clean, 64);
 	netdev->vlan_rx_register = ixgbe_vlan_rx_register;
 	netdev->vlan_rx_add_vid = ixgbe_vlan_rx_add_vid;
 	netdev->vlan_rx_kill_vid = ixgbe_vlan_rx_kill_vid;
@@ -2698,7 +2689,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	netif_carrier_off(netdev);
 	netif_stop_queue(netdev);
-	netif_poll_disable(netdev);
 
 	strcpy(netdev->name, "eth%d");
 	err = register_netdev(netdev);

^ permalink raw reply related

* Re: cc: skb configured but can't get data allocated
From: Macnish @ 2007-09-12 18:21 UTC (permalink / raw)
  To: DHAJOGLO; +Cc: netdev, kernelnewbies
In-Reply-To: <200709121731456120c5e4ad@mail.smumn.edu>

DHAJOGLO escreveu:
> Right,
>     I managed to figure out through looking at the code how to configure and send my skb properly.  For those who say the code is well documented.. you're right.. however I fell like a blind man in an "adult book store" and just don't see what I'm missing.  Below is my code. The packets are making it back and forth but the data is absent on the return trip.  d_out is my little struct with two bytes and a char.
>
>
>
> struct sk_buff *send_back(struct sk_buff *oldskb)
> {
>         struct sk_buff *skb;
>         struct iphdr *iph;
>         struct net_device *dev = (struct net_device *)oldskb->dev;
>         struct rtable *rt;
>         int iplen;
>         struct ethhdr *mac;
>         struct exp_packet *d_out;
>
>         skb = alloc_skb(sizeof(struct exp_packet) + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
>         if(skb == NULL)
>                 return NULL;
>
>         /* insert the data probably need to allocatd differently */
>         d_out = (struct exp_packet *)skb->data;
>         d_out->headbits = 0xAA;
>         d_out->sequence = 0xBB;
>         d_out->payload = 0xCCDD;
>
>         /* from igmp... configure the rt */
>         {
>                 struct flowi fl = { .oif = dev->ifindex,
>                                         .nl_u = { .ip4_u = {
>                                                 .daddr = htonl(oldskb->nh.iph->saddr) } },
>                                                 .proto = IPPROTO_EXP };
>                 if (ip_route_output_key(&rt,&fl)) {
>                         printk("No route?\n");
>                         kfree_skb(skb);
>                         return NULL;
>                 }
>         }
>         if (rt->rt_src == 0) {
>                 printk("No source info?\n");
>                 ip_rt_put(rt);
>                 return NULL;
>         }
>
>         skb->dst = &rt->u.dst;
>         skb->dev = dev;
>
>         skb_reserve(skb,LL_RESERVED_SPACE(dev));
>
>         /* configure the ipheader */
>  skb->nh.iph = iph = (struct iphdr *)skb_put(skb,sizeof(struct iphdr)+4);
>
>         iph->version    = 4;
>         iph->ihl        = (sizeof(struct iphdr)+4)>>2;
>         iph->protocol   = IPPROTO_EXP;
>         iph->saddr      = rt->rt_src;
>         iph->daddr      = htonl(rt->rt_dst);
>         iph->tos        = 0;
>         iph->ttl        = 5;
>         ip_select_ident(iph,&rt->u.dst,NULL);
>         iplen = skb->tail - (unsigned char *)skb->nh.iph;
>         iph->tot_len = htons(iplen);
>         ip_send_check(iph);
>
>
>         skb->pkt_type = PACKET_OUTGOING;
>
>         mac = (struct ethhdr *)oldskb->mac.raw;
>
>         /* configure the mac addresses */
>         if(dev->hard_header && dev->hard_header(skb,dev,ETH_P_IP,mac->h_source,mac->h_dest,skb->len) < 0)
>                 printk("hard header worked?");
>
>         return skb;
> }
>
> === the packets look like this and they should be the same save for swapped src/dst:
>
> 00:12:3f:56:bf:1c > 00:0c:29:71:67:1e, ethertype IPv4 (0x0800), length 60: (tos 0x0, ttl  20, id 153, offset 0, flags [DF], proto: unknown (253), length: 24) src.src.src.src > dst.dst.dst.dst:  ip-proto-253 4
>         0x0000:  4500 0018 0099 4000 14fd c4ed 8cbe 414d  E.....@.......AM
>         0x0010:  8cbe 4599 001b ddee 0000 0000 0000 0000  ..E.............
>         0x0020:  0000 0000 0000 0000 0000 0000 0000       ..............
> 00:0c:29:71:67:1e > 00:12:3f:56:bf:1c, ethertype IPv4 (0x0800), length 38: (tos 0x0, ttl   5, id 0, offset 65528, flags [+, DF, rsvd], proto: unknown (253), length: 24, options ( unknown (243) len 252[|ip] )) src.src.src.src > dst.dst.dst.dst: ip-proto-253
>         0x0000:  4600 0018 0000 ffff 05fd 1f8a 8cbe 4599  F.............E.
>         0x0010:  8cbe 414d f3fc 0000                      ..AM....
>
>
> --
> To unsubscribe from this list: send an email with
> "unsubscribe kernelnewbies" to ecartis@nl.linux.org
> Please read the FAQ at http://kernelnewbies.org/FAQ
>
>
>   
Am I missing something or the d_out struct hadn't been put inside the skb?

--
Best Regards

Alan Menegotto


^ permalink raw reply

* Re: [PATCH] phy: implement release function
From: Andy Fleming @ 2007-09-12 18:21 UTC (permalink / raw)
  To: Anton Vorontsov; +Cc: netdev, linuxppc-dev
In-Reply-To: <20070912112601.GE15556@localhost.localdomain>


On Sep 12, 2007, at 06:26, Anton Vorontsov wrote:

> Lately I've got this nice badness on mdio bus removal:
>
> Device 'e0103120:06' does not have a release() function, it is  
> broken and must be fixed.
> ------------[ cut here ]------------
> Badness at drivers/base/core.c:107
> NIP: c015c1a8 LR: c015c1a8 CTR: c0157488
> REGS: c34bdcf0 TRAP: 0700   Not tainted  (2.6.23-rc5-g9ebadfbb-dirty)
> MSR: 00029032 <EE,ME,IR,DR>  CR: 24088422  XER: 00000000
> ...
> [c34bdda0] [c015c1a8] device_release+0x78/0x80 (unreliable)
> [c34bddb0] [c01354cc] kobject_cleanup+0x80/0xbc
> [c34bddd0] [c01365f0] kref_put+0x54/0x6c
> [c34bdde0] [c013543c] kobject_put+0x24/0x34
> [c34bddf0] [c015c384] put_device+0x1c/0x2c
> [c34bde00] [c0180e84] mdiobus_unregister+0x2c/0x58
> ...
>
> Though actually there is nothing broken, it just device
> subsystem core expects another "pattern" of resource managment.
>
> This patch implement phy device's release function, thus
> we're getting rid of this badness.
>
> Also small hidden bug fixed, hope none other introduced. ;-)
>
> Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

Acked-by: Andy Fleming <afleming@freescale.com>

Andy

^ permalink raw reply

* Re: ne driver crashes when unloaded in 2.6.22.6
From: Chris Rankin @ 2007-09-12 18:23 UTC (permalink / raw)
  To: Dan Williams; +Cc: netdev
In-Reply-To: <1189459880.15229.18.camel@xo-3E-67-34.localdomain>

--- Dan Williams <dcbw@redhat.com> wrote:
> > > Offhand question, does your ne2000 card support carrier detection?
> > 
> > Err... there is a /sys/class/net/eth0/carrier entry 
> 
> Does it read '0' when you unplug the cable?

Hmm, apparently not. The light on the card goes out though, so could this just be a lack of driver
support?

Cheers,
Chris



      ___________________________________________________________
Yahoo! Answers - Got a question? Someone out there knows the answer. Try it
now.
http://uk.answers.yahoo.com/ 

^ permalink raw reply

* Re: [PATCH] bonding: update some distro-specific documentation
From: Andy Gospodarek @ 2007-09-12 18:18 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: netdev
In-Reply-To: <21001.1189189669@death>

On Fri, Sep 07, 2007 at 11:27:49AM -0700, Jay Vosburgh wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
> 
> 	This all looks fine except for one nit (well, request for extra
> detail, really):
> 
> >@@ -802,15 +802,20 @@ BROADCAST=192.168.1.255
> > ONBOOT=yes
> > BOOTPROTO=none
> > USERCTL=no
> >+BONDING_OPTS="mode=balance-alb miimon=100"
> >
> > 	Be sure to change the networking specific lines (IPADDR,
> > NETMASK, NETWORK and BROADCAST) to match your network configuration.
> >+You also need to set the BONDING_OPTS= line to specify the desired
> >+options for your bond0 interface.  Specifying bonding options in this
> >+way is the preferred method for configuring bonding interfaces.
> 
> 	Can you add something here that mentions that, for the
> arp_ip_target option, it has to be supplied as "arp_ip_target=+10.0.0.1"
> and not just "arp_ip_target=10.0.0.1"?  Also, multiple targets require
> multiple instances of the arp_ip_target option; it doesn't work to put
> multiple IP addresses as in the module option (i.e.,
> "arp_ip_target=10.0.0.1,10.0.0.2").
> 
> 	This is necessary because ifup-eth isn't adding the "+" when it
> translates the option for use with sysfs or parsing the multiple IP
> address syntax.
> 

Jay,

I could do that, or we could just take this as-is and get initscripts
fixed up to account for this.  Does that seem reasonable?

I'd rather go that route, and I've even got a patch that *seems* to work
already:

--- initscripts-8.45.17.EL/sysconfig/network-scripts/ifup-eth.orig
+++ initscripts-8.45.17.EL/sysconfig/network-scripts/ifup-eth
@@ -125,7 +125,16 @@ if [ "$ISALIAS" = no ] && is_bonding_dev
     for arg in $BONDING_OPTS ; do   
         key=${arg%%=*};
         value=${arg##*=};
-        echo $value > /sys/class/net/${DEVICE}/bonding/$key
+        OLDIFS=$IFS;
+        IFS=',';
+        if [ "${key}" = "arp_ip_target" ]; then
+           for arp_ip in $value; do
+             echo +$arp_ip > /sys/class/net/${DEVICE}/bonding/$key
+           done
+        else
+           echo $value > /sys/class/net/${DEVICE}/bonding/$key
+        fi
+        IFS=$OLDIFS;
     done
 
     /sbin/ip link set dev ${DEVICE} up

-andy



^ permalink raw reply

* Re: [patch] sunrpc: make closing of old temporary sockets work (was: problems with lockd in 2.6.22.6)
From: J. Bruce Fields @ 2007-09-12 18:42 UTC (permalink / raw)
  To: Neil Brown; +Cc: netdev, trond.myklebust, nfs, linux-kernel, Wolfgang Walter
In-Reply-To: <18151.62510.891210.485277@notabene.brown>

On Wed, Sep 12, 2007 at 04:14:06PM +0200, Neil Brown wrote:
> So it is in 2.6.21 and later and should probably go to .stable for .21
> and .22.
> 
> Bruce:  for you :-)

OK, thanks!  But, (as is alas often the case) I'm still confused:

>  		if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
>  			continue;
> -		if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
> +		if (atomic_read(&svsk->sk_inuse) > 1
> +		    || test_bit(SK_BUSY, &svsk->sk_flags))
>  			continue;
>  		atomic_inc(&svsk->sk_inuse);
>  		list_move(le, &to_be_aged);

What is it that ensures svsk->sk_inuse isn't incremented or SK_BUSY set
after that test?  Not all the code that does either of those is under
the same serv->sv_lock lock that this code is.

--b.

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply

* RE: [PATCH v3] Make the pr_*() family of macros in kernel.h complete
From: Medve Emilian-EMMEDVE1 @ 2007-09-12 18:44 UTC (permalink / raw)
  To: linux-kernel, netdev, i2c, linux-omap-open-source
In-Reply-To: <Pine.LNX.4.64.0709122003410.22467@fbirervta.pbzchgretzou.qr>

> -----Original Message-----
> From: Jan Engelhardt [mailto:jengelh@computergmbh.de] 
> Sent: Wednesday, September 12, 2007 1:04 PM
> To: Medve Emilian-EMMEDVE1
> Cc: linux-kernel@vger.kernel.org; netdev@vger.kernel.org; 
> i2c@lm-sensors.org; linux-omap-open-source@linux.omap.com
> Subject: Re: [PATCH v3] Make the pr_*() family of macros in 
> kernel.h complete
> 
> 
> On Sep 12 2007 11:39, Emil Medve wrote:
> >
> >Other/Some pr_*() macros are already defined in kernel.h, 
> but pr_err() was defined
> >multiple times in several other places
> 
> Note http://lkml.org/lkml/2007/8/4/30 .

Hi Jan,

I didn't see that thread before I submitted the patch... I don't want to
start the conversation from the top 'cause this is not a patch that
fixes some existing showstopper... however, I have two comments to make.
First, this patch doesn't have the trailing "\n" problem that one had.
Second, it seems that multiple people at different moments in time have
the same idea to complete and, more important, to use the pr_*() family
of macros because I guess it makes it easier to use them. In addition,
it would make it less likely for people to forget using the loglevel in
their kernel messages.

Is trying to add them a dead path? Thing is that the existing ones
suggest to, more or less, new people to the kernel that they should use
them and then discover the incompleteness/inconsistency...

Cheers,
Emil.

^ permalink raw reply

* Re: [PATCH] bonding: update some distro-specific documentation
From: Jay Vosburgh @ 2007-09-12 18:47 UTC (permalink / raw)
  To: Andy Gospodarek; +Cc: netdev
In-Reply-To: <20070912181839.GD3754@gospo.rdu.redhat.com>

Andy Gospodarek <andy@greyhouse.net> wrote:

>I could do that, or we could just take this as-is and get initscripts
>fixed up to account for this.  Does that seem reasonable?

	Changing initscripts is fine, too, but is there then going to be
some (perhaps small) installed base for which the documentation will be
incorrect?

>I'd rather go that route, and I've even got a patch that *seems* to work
>already:
>
>--- initscripts-8.45.17.EL/sysconfig/network-scripts/ifup-eth.orig
>+++ initscripts-8.45.17.EL/sysconfig/network-scripts/ifup-eth
>@@ -125,7 +125,16 @@ if [ "$ISALIAS" = no ] && is_bonding_dev
>     for arg in $BONDING_OPTS ; do   
>         key=${arg%%=*};
>         value=${arg##*=};
>-        echo $value > /sys/class/net/${DEVICE}/bonding/$key
>+        OLDIFS=$IFS;
>+        IFS=',';
>+        if [ "${key}" = "arp_ip_target" ]; then
>+           for arp_ip in $value; do
>+             echo +$arp_ip > /sys/class/net/${DEVICE}/bonding/$key
>+           done
>+        else
>+           echo $value > /sys/class/net/${DEVICE}/bonding/$key
>+        fi
>+        IFS=$OLDIFS;
>     done
>
>     /sbin/ip link set dev ${DEVICE} up

	That looks like it should do the right thing, although I didn't
actually try it.  The other bonding sysfs thingies that use the "+" type
of syntax don't appear in BONDING_OPTS.

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com



^ permalink raw reply

* Re: [PATCH] ixgbe: driver for Intel(R) 82598 PCI-Express 10GbE adapters (v4)
From: Kok, Auke @ 2007-09-12 18:47 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: David Miller, auke-jan.h.kok, netdev, ayyappan.veeraiyan, akpm,
	arjan, hch, billfink, shemminger, rick.jones2, inaky, mb, nhorman
In-Reply-To: <46E1D282.50902@garzik.org>

Jeff Garzik wrote:
> David Miller wrote:
>> From: "Kok, Auke" <auke-jan.h.kok@intel.com>
>> Date: Thu, 06 Sep 2007 11:31:47 -0700
>>
>>> Also available through git:// and http:// here:
>>>
>>>    http://foo-projects.org/~sofar/ixgbe-20070905-submission.patch
>>>    http://foo-projects.org/~sofar/ixgbe-20070905-submission.patch.bz2
>>>    (git-am formatted!)
>>>
>>>    git://lost.foo-projects.org/~ahkok/linux-2.6 ixgbe-20070905-submission
 >>
>> To be honest I have absolutely no problems with this driver and we
>> should just cut the crap and merge it in now.
> 
> AFAICS nobody objected to it, and Auke cleaned it up a la e1000e, which 
> got queued during KS.

Jeff,

as of today ixgbe is ready for a possible merge with davem's net-2.6.24 - I sent 
you the patch earlier today as well as to David and Andrew.

Since this takes away the only obstruction that I can see that would prevent a 
normal merge, I would like to know if you are thinking of merging this driver 
for 2.6.24 or not. No pressure of course, but the approaching merge window is 
dooming and postponing this driver for a whole kernel version just because you 
accidentally missed or forgot it would be a shame.

Auke

^ permalink raw reply

* Re: [PATCH]: xfrm audit calls
From: Valdis.Kletnieks @ 2007-09-12 18:56 UTC (permalink / raw)
  To: Joy Latten; +Cc: netdev, linux-audit, davem
In-Reply-To: <200709120003.l8C03E4G004949@faith.austin.ibm.com>

[-- Attachment #1: Type: text/plain, Size: 292 bytes --]

On Tue, 11 Sep 2007 19:03:14 CDT, Joy Latten said:
> This patch modifies the current ipsec audit layer
> by breaking it up into purpose driven audit calls.
>
> So far, the only audit calls made are when add/delete
> an SA/policy.

What other audit calls do you envision adding in the future?

[-- Attachment #2: Type: application/pgp-signature, Size: 226 bytes --]

^ permalink raw reply

* [PATCH] CIPSO: remove duplicated code in the cipso_v4_*_getattr() functions
From: Paul Moore @ 2007-09-12 19:29 UTC (permalink / raw)
  To: netdev, selinux
In-Reply-To: <20070912192305.18318.11911.stgit@flek.americas.hpqcorp.net>

The bulk of the CIPSO option parsing/processing in the cipso_v4_sock_getattr()
and cipso_v4_skb_getattr() functions are identical, the only real difference
being where the functions obtain the CIPSO option itself.  This patch creates
a new function, cipso_v4_getattr(), which contains the common CIPSO option
parsing/processing code and modifies the existing functions to call this new
helper function.

Signed-off-by: Paul Moore <paul.moore@hp.com>
---

 net/ipv4/cipso_ipv4.c |  115 ++++++++++++++++++-------------------------------
 1 files changed, 42 insertions(+), 73 deletions(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ab56a05..805a78e 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1831,68 +1831,75 @@ socket_setattr_failure:
 }
 
 /**
- * cipso_v4_sock_getattr - Get the security attributes from a sock
- * @sk: the sock
+ * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
+ * @cipso: the CIPSO v4 option
  * @secattr: the security attributes
  *
  * Description:
- * Query @sk to see if there is a CIPSO option attached to the sock and if
- * there is return the CIPSO security attributes in @secattr.  This function
- * requires that @sk be locked, or privately held, but it does not do any
- * locking itself.  Returns zero on success and negative values on failure.
+ * Inspect @cipso and return the security attributes in @secattr.  Returns zero
+ * on success and negative values on failure.
  *
  */
-int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+static int cipso_v4_getattr(const unsigned char *cipso,
+			    struct netlbl_lsm_secattr *secattr)
 {
 	int ret_val = -ENOMSG;
-	struct inet_sock *sk_inet;
-	unsigned char *cipso_ptr;
 	u32 doi;
 	struct cipso_v4_doi *doi_def;
 
-	sk_inet = inet_sk(sk);
-	if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0)
-		return -ENOMSG;
-	cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso -
-		sizeof(struct iphdr);
-	ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr);
-	if (ret_val == 0)
-		return ret_val;
+	if (cipso_v4_cache_check(cipso, cipso[1], secattr) == 0)
+		return 0;
 
-	doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2]));
+	doi = ntohl(get_unaligned((__be32 *)&cipso[2]));
 	rcu_read_lock();
 	doi_def = cipso_v4_doi_search(doi);
-	if (doi_def == NULL) {
-		rcu_read_unlock();
-		return -ENOMSG;
-	}
-
+	if (doi_def == NULL)
+		goto getattr_return;
 	/* XXX - This code assumes only one tag per CIPSO option which isn't
 	 * really a good assumption to make but since we only support the MAC
 	 * tags right now it is a safe assumption. */
-	switch (cipso_ptr[6]) {
+	switch (cipso[6]) {
 	case CIPSO_V4_TAG_RBITMAP:
-		ret_val = cipso_v4_parsetag_rbm(doi_def,
-						&cipso_ptr[6],
-						secattr);
+		ret_val = cipso_v4_parsetag_rbm(doi_def, &cipso[6], secattr);
 		break;
 	case CIPSO_V4_TAG_ENUM:
-		ret_val = cipso_v4_parsetag_enum(doi_def,
-						 &cipso_ptr[6],
-						 secattr);
+		ret_val = cipso_v4_parsetag_enum(doi_def, &cipso[6], secattr);
 		break;
 	case CIPSO_V4_TAG_RANGE:
-		ret_val = cipso_v4_parsetag_rng(doi_def,
-						&cipso_ptr[6],
-						secattr);
+		ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr);
 		break;
 	}
-	rcu_read_unlock();
 
+getattr_return:
+	rcu_read_unlock();
 	return ret_val;
 }
 
 /**
+ * cipso_v4_sock_getattr - Get the security attributes from a sock
+ * @sk: the sock
+ * @secattr: the security attributes
+ *
+ * Description:
+ * Query @sk to see if there is a CIPSO option attached to the sock and if
+ * there is return the CIPSO security attributes in @secattr.  This function
+ * requires that @sk be locked, or privately held, but it does not do any
+ * locking itself.  Returns zero on success and negative values on failure.
+ *
+ */
+int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
+{
+	struct ip_options *opt;
+
+	opt = inet_sk(sk)->opt;
+	if (opt == NULL || opt->cipso == 0)
+		return -ENOMSG;
+
+	return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr),
+				secattr);
+}
+
+/**
  * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option
  * @skb: the packet
  * @secattr: the security attributes
@@ -1905,45 +1912,7 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
 int cipso_v4_skbuff_getattr(const struct sk_buff *skb,
 			    struct netlbl_lsm_secattr *secattr)
 {
-	int ret_val = -ENOMSG;
-	unsigned char *cipso_ptr;
-	u32 doi;
-	struct cipso_v4_doi *doi_def;
-
-	cipso_ptr = CIPSO_V4_OPTPTR(skb);
-	if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0)
-		return 0;
-
-	doi = ntohl(get_unaligned((__be32 *)&cipso_ptr[2]));
-	rcu_read_lock();
-	doi_def = cipso_v4_doi_search(doi);
-	if (doi_def == NULL)
-		goto skbuff_getattr_return;
-
-	/* XXX - This code assumes only one tag per CIPSO option which isn't
-	 * really a good assumption to make but since we only support the MAC
-	 * tags right now it is a safe assumption. */
-	switch (cipso_ptr[6]) {
-	case CIPSO_V4_TAG_RBITMAP:
-		ret_val = cipso_v4_parsetag_rbm(doi_def,
-						&cipso_ptr[6],
-						secattr);
-		break;
-	case CIPSO_V4_TAG_ENUM:
-		ret_val = cipso_v4_parsetag_enum(doi_def,
-						 &cipso_ptr[6],
-						 secattr);
-		break;
-	case CIPSO_V4_TAG_RANGE:
-		ret_val = cipso_v4_parsetag_rng(doi_def,
-						&cipso_ptr[6],
-						secattr);
-		break;
-	}
-
-skbuff_getattr_return:
-	rcu_read_unlock();
-	return ret_val;
+	return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr);
 }
 
 /*


^ permalink raw reply related

* [PATCH] Small cleanup for 2.6.24 and a git tree test
From: Paul Moore @ 2007-09-12 19:29 UTC (permalink / raw)
  To: netdev, selinux

The cleanup patch is pretty trivial and is explained in it's own email, but
essentially it is just moving similar code into a new function and then
modifying the original functions to call the newly created function.

The new and exciting thing is that I actually decided I should probably know
how to do more than just clone git trees, which means you can get this patch
by pulling from the brand spankin' new labeled networking git tree.  The tree
looks okay to me but it's possible I munged something, if that is the case let
me know and I'll try to fix it ... I'd like to get this working at some point.

 * git://git.infradead.org/users/pcmoore/lblnet-2.6

Thanks.

-- 
paul moore
linux security @ hp

^ permalink raw reply

* Re: [PATCH]: xfrm audit calls
From: Joy Latten @ 2007-09-12 19:35 UTC (permalink / raw)
  To: Valdis.Kletnieks; +Cc: netdev, linux-audit, davem
In-Reply-To: <30135.1189623401@turing-police.cc.vt.edu>

On Wed, 2007-09-12 at 14:56 -0400, Valdis.Kletnieks@vt.edu wrote:
> On Tue, 11 Sep 2007 19:03:14 CDT, Joy Latten said:
> > This patch modifies the current ipsec audit layer
> > by breaking it up into purpose driven audit calls.
> >
> > So far, the only audit calls made are when add/delete
> > an SA/policy.
> 
> What other audit calls do you envision adding in the future?

Those specified in updated RFCs for ipsec, mainly 4301,
4302 and 4303. 

Joy

^ permalink raw reply

* Re: [PATCH] bonding: update some distro-specific documentation
From: Andy Gospodarek @ 2007-09-12 19:40 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: netdev
In-Reply-To: <1085.1189622856@death>

On 9/12/07, Jay Vosburgh <fubar@us.ibm.com> wrote:
> Andy Gospodarek <andy@greyhouse.net> wrote:
>
> >I could do that, or we could just take this as-is and get initscripts
> >fixed up to account for this.  Does that seem reasonable?
>
>         Changing initscripts is fine, too, but is there then going to be
> some (perhaps small) installed base for which the documentation will be
> incorrect?
>

Sure, but it could be wrong if we updated the kernel doc again after
initscripts was fixed and some chose not to update initscripts on
their boxes.  There could be a note about running a particular version
of initscripts, but I'd rather not start down the path of turning the
kernel doc into something that looks like distro release notes.

Either way it seems like there could be a chance for something to be
out of sync, so I guess we just have to pick the one that seems 'least
bad.'  I'd vote for whatever creates the fewest kernel patches. ;-)

^ permalink raw reply

* Re: [patch] sunrpc: make closing of old temporary sockets work (was: problems with lockd in 2.6.22.6)
From: Wolfgang Walter @ 2007-09-12 19:40 UTC (permalink / raw)
  To: J. Bruce Fields; +Cc: Neil Brown, trond.myklebust, netdev, nfs, linux-kernel
In-Reply-To: <20070912184222.GG4274@fieldses.org>

On Wednesday 12 September 2007, J. Bruce Fields wrote:
> On Wed, Sep 12, 2007 at 04:14:06PM +0200, Neil Brown wrote:
> > So it is in 2.6.21 and later and should probably go to .stable for .21
> > and .22.
> > 
> > Bruce:  for you :-)
> 
> OK, thanks!  But, (as is alas often the case) I'm still confused:
> 
> >  		if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
> >  			continue;
> > -		if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
> > +		if (atomic_read(&svsk->sk_inuse) > 1
> > +		    || test_bit(SK_BUSY, &svsk->sk_flags))
> >  			continue;
> >  		atomic_inc(&svsk->sk_inuse);
> >  		list_move(le, &to_be_aged);
> 
> What is it that ensures svsk->sk_inuse isn't incremented or SK_BUSY set
> after that test?  Not all the code that does either of those is under
> the same serv->sv_lock lock that this code is.
> 

This should not matter - SK_CLOSED may be set at any time.

svc_age_temp_sockets only detaches the socket, sets SK_CLOSED and then 
enqueues it. If SK_BUSY is set its already enqueued and svc_sock_enqueue 
ensures that it is not enqueued twice.

Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts

^ permalink raw reply

* [RFC v2 PATCH 0/2] Add RCU locking to SCTP address management
From: Vlad Yasevich @ 2007-09-12 19:46 UTC (permalink / raw)
  To: netdev; +Cc: lksctp-developers

Ok, this is version 2 of the patch that incorporates comments from
Sridhar Samudrala and Paul McKenney.

The changes icorporated are:
 1.  Add locking around the modification of the global sctp_local_addr_list
 when processing the notifiers.  After looking around, it is possible for
 the IPv4 and IPv6 notifiers to be called at the same time, which means that
 we need a spin lock.

 2.  After the Paul's explanation of why writers would would to call
 rcu_read_lock, it's apparent that we really don't need that in our usage.
 I've removed all that I could find and conser safe.

 3. I took Paul's suggestiong of passing an explicit rcu callback when
 removing entries from the list since these can be done it different
 contexts.  This made the removal code rather simple.

Things I've left behind:
 1.  The valid flag remains.  After discussing the virtues with Paul Moore
 (who used the same functionality in Netlabel code), I think that the
 valid flag slightly reduces the possibility that the reader will use
 an entry that's about to be removed.  It's a good thing in our case.
 It doesn't really harm anything if a reader used a !valid entry, but
 I'd like to reduce that chance.

I would appreciate any further comments

Thanks
-vlad

^ permalink raw reply

* [RFC v2 PATCH 2/2] SCTP: Convert bind_addr_list locking to RCU
From: Vlad Yasevich @ 2007-09-12 19:46 UTC (permalink / raw)
  To: netdev; +Cc: lksctp-developers, Vlad Yasevich
In-Reply-To: <11896263983281-git-send-email-vladislav.yasevich@hp.com>

Since the sctp_sockaddr_entry is now RCU enabled as part of
the patch to synchronize sctp_localaddr_list, it makes sense to
change all handling of these entries to RCU.  This includes the
sctp_bind_addrs structure and it's list of bound addresses.

This list is currently protected by an external rw_lock and that
looks like an overkill.  There are only 2 writers to the list:
bind()/bindx() calls, and BH processing of ASCONF-ACK chunks.
These are already seriealized via the socket lock, so they will
not step on each other.  These are also relatively rare, so we
should be good with RCU.

The readers are varied and they are easily converted to RCU.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h |    7 +--
 net/sctp/associola.c       |   14 +-----
 net/sctp/bind_addr.c       |   68 ++++++++++++++++++++----------
 net/sctp/endpointola.c     |   27 +++---------
 net/sctp/ipv6.c            |   12 ++---
 net/sctp/protocol.c        |   25 ++++-------
 net/sctp/sm_make_chunk.c   |   18 +++-----
 net/sctp/socket.c          |   98 ++++++++++++-------------------------------
 8 files changed, 106 insertions(+), 163 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a89e361..c2fe2dc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1155,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
 			int flags);
 int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
 		       __u8 use_as_src, gfp_t gfp);
-int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
+int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
+			void (*rcu_call)(struct rcu_head *,
+					  void (*func)(struct rcu_head *)));
 int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
 			 struct sctp_sock *);
 union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
@@ -1226,9 +1228,6 @@ struct sctp_ep_common {
 	 * bind_addr.address_list is our set of local IP addresses.
 	 */
 	struct sctp_bind_addr bind_addr;
-
-	/* Protection during address list comparisons. */
-	rwlock_t   addr_lock;
 };
 
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2ad1caf..9bad8ba 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	/* Initialize the bind addr area.  */
 	sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
-	rwlock_init(&asoc->base.addr_lock);
 
 	asoc->state = SCTP_STATE_CLOSED;
 
@@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
 {
 	struct sctp_transport *transport;
 
-	sctp_read_lock(&asoc->base.addr_lock);
-
 	if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
 	    (htons(asoc->peer.port) == paddr->v4.sin_port)) {
 		transport = sctp_assoc_lookup_paddr(asoc, paddr);
@@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
 	transport = NULL;
 
 out:
-	sctp_read_unlock(&asoc->base.addr_lock);
 	return transport;
 }
 
@@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
 int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 			    const union sctp_addr *laddr)
 {
-	int found;
+	int found = 0;
 
-	sctp_read_lock(&asoc->base.addr_lock);
 	if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
 	    sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
-				 sctp_sk(asoc->base.sk))) {
+				 sctp_sk(asoc->base.sk)))
 		found = 1;
-		goto out;
-	}
 
-	found = 0;
-out:
-	sctp_read_unlock(&asoc->base.addr_lock);
 	return found;
 }
 
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 7fc369f..14f4c02 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -167,7 +167,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
 
 	INIT_LIST_HEAD(&addr->list);
 	INIT_RCU_HEAD(&addr->rcu);
-	list_add_tail(&addr->list, &bp->address_list);
+
+	/* We always hold a socket lock when calling this function,
+	 * so rcu_read_lock is not needed.
+	 */
+	list_add_tail_rcu(&addr->list, &bp->address_list);
 	SCTP_DBG_OBJCNT_INC(addr);
 
 	return 0;
@@ -176,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
 /* Delete an address from the bind address list in the SCTP_bind_addr
  * structure.
  */
-int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
+int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
+			void (*rcu_call)(struct rcu_head *head,
+					 void (*func)(struct rcu_head *head)))
 {
-	struct list_head *pos, *temp;
-	struct sctp_sockaddr_entry *addr;
+	struct sctp_sockaddr_entry *addr, *temp;
 
-	list_for_each_safe(pos, temp, &bp->address_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	/* We hold the socket lock when calling this function, so
+	 * rcu_read_lock is not needed.
+	 */
+	list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
 		if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
 			/* Found the exact match. */
-			list_del(pos);
-			kfree(addr);
-			SCTP_DBG_OBJCNT_DEC(addr);
-
-			return 0;
+			addr->valid = 0;
+			list_del_rcu(&addr->list);
+			break;
 		}
 	}
 
+	/* Call the rcu callback provided in the args.  This function is
+	 * called by both BH packet processing and user side socket option
+	 * processing, but it works on different lists in those 2 contexts.
+	 * Each context provides it's own callback, whether call_rc_bh()
+	 * or call_rcu(), to make sure that we wait an for appropriate time.
+	 */
+	if (addr && !addr->valid) {
+		rcu_call(&addr->rcu, sctp_local_addr_free);
+		SCTP_DBG_OBJCNT_DEC(addr);
+	}
+
 	return -EINVAL;
 }
 
@@ -302,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
 			 struct sctp_sock *opt)
 {
 	struct sctp_sockaddr_entry *laddr;
-	struct list_head *pos;
-
-	list_for_each(pos, &bp->address_list) {
-		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-		if (opt->pf->cmp_addr(&laddr->a, addr, opt))
-			return 1;
+	int match = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
+		if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
+			match = 1;
+			break;
+		}
 	}
+	rcu_read_unlock();
 
-	return 0;
+	return match;
 }
 
 /* Find the first address in the bind address list that is not present in
@@ -325,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 	union sctp_addr			*addr;
 	void 				*addr_buf;
 	struct sctp_af			*af;
-	struct list_head		*pos;
 	int				i;
 
-	list_for_each(pos, &bp->address_list) {
-		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+	/* This is only called sctp_send_asconf_del_ip() and we hold
+	 * the socket lock in that code patch, so that address list
+	 * can't change.
+	 */
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
 		addr_buf = (union sctp_addr *)addrs;
 		for (i = 0; i < addrcnt; i++) {
 			addr = (union sctp_addr *)addr_buf;
 			af = sctp_get_af_specific(addr->v4.sin_family);
 			if (!af)
-				return NULL;
+				break;
 
 			if (opt->pf->cmp_addr(&laddr->a, addr, opt))
 				break;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1404a9e..d888332 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 
 	/* Initialize the bind addr area */
 	sctp_bind_addr_init(&ep->base.bind_addr, 0);
-	rwlock_init(&ep->base.addr_lock);
 
 	/* Remember who we are attached to.  */
 	ep->base.sk = sk;
@@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
 					       const union sctp_addr *laddr)
 {
-	struct sctp_endpoint *retval;
+	struct sctp_endpoint *retval = NULL;
 
-	sctp_read_lock(&ep->base.addr_lock);
 	if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
 		if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
-					 sctp_sk(ep->base.sk))) {
+					 sctp_sk(ep->base.sk)))
 			retval = ep;
-			goto out;
-		}
 	}
 
-	retval = NULL;
-
-out:
-	sctp_read_unlock(&ep->base.addr_lock);
 	return retval;
 }
 
@@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 	list_for_each(pos, &ep->asocs) {
 		asoc = list_entry(pos, struct sctp_association, asocs);
 		if (rport == asoc->peer.port) {
-			sctp_read_lock(&asoc->base.addr_lock);
 			*transport = sctp_assoc_lookup_paddr(asoc, paddr);
-			sctp_read_unlock(&asoc->base.addr_lock);
 
 			if (*transport)
 				return asoc;
@@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
 int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
 				const union sctp_addr *paddr)
 {
-	struct list_head *pos;
 	struct sctp_sockaddr_entry *addr;
 	struct sctp_bind_addr *bp;
 
-	sctp_read_lock(&ep->base.addr_lock);
 	bp = &ep->base.bind_addr;
-	list_for_each(pos, &bp->address_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
-		if (sctp_has_association(&addr->a, paddr)) {
-			sctp_read_unlock(&ep->base.addr_lock);
+	/* This function is called whith the socket lock held,
+	 * so the address_list can not change.
+	 */
+	list_for_each_entry_rcu(addr, &bp->address_list, list) {
+		if (sctp_has_association(&addr->a, paddr))
 			return 1;
-		}
 	}
-	sctp_read_unlock(&ep->base.addr_lock);
 
 	return 0;
 }
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 54ff472..c8b0115 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -302,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 			      union sctp_addr *saddr)
 {
 	struct sctp_bind_addr *bp;
-	rwlock_t *addr_lock;
 	struct sctp_sockaddr_entry *laddr;
-	struct list_head *pos;
 	sctp_scope_t scope;
 	union sctp_addr *baddr = NULL;
 	__u8 matchlen = 0;
@@ -324,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	scope = sctp_scope(daddr);
 
 	bp = &asoc->base.bind_addr;
-	addr_lock = &asoc->base.addr_lock;
 
 	/* Go through the bind address list and find the best source address
 	 * that matches the scope of the destination address.
 	 */
-	sctp_read_lock(addr_lock);
-	list_for_each(pos, &bp->address_list) {
-		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
 		if ((laddr->use_as_src) &&
 		    (laddr->a.sa.sa_family == AF_INET6) &&
 		    (scope <= sctp_scope(&laddr->a))) {
@@ -353,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 		       __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
 	}
 
-	sctp_read_unlock(addr_lock);
+	rcu_read_unlock();
 }
 
 /* Make a copy of all potential local addresses. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4688559..35af75b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -223,7 +223,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
 			      (copy_flags & SCTP_ADDR6_ALLOWED) &&
 			      (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
 				error = sctp_add_bind_addr(bp, &addr->a, 1,
-							   GFP_ATOMIC);
+						    GFP_ATOMIC);
 				if (error)
 					goto end_copy;
 			}
@@ -427,9 +427,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 	struct rtable *rt;
 	struct flowi fl;
 	struct sctp_bind_addr *bp;
-	rwlock_t *addr_lock;
 	struct sctp_sockaddr_entry *laddr;
-	struct list_head *pos;
 	struct dst_entry *dst = NULL;
 	union sctp_addr dst_saddr;
 
@@ -458,23 +456,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		goto out;
 
 	bp = &asoc->base.bind_addr;
-	addr_lock = &asoc->base.addr_lock;
 
 	if (dst) {
 		/* Walk through the bind address list and look for a bind
 		 * address that matches the source address of the returned dst.
 		 */
-		sctp_read_lock(addr_lock);
-		list_for_each(pos, &bp->address_list) {
-			laddr = list_entry(pos, struct sctp_sockaddr_entry,
-					   list);
-			if (!laddr->use_as_src)
+		rcu_read_lock();
+		list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+			if (!laddr->valid || !laddr->use_as_src)
 				continue;
 			sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
 			if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
 				goto out_unlock;
 		}
-		sctp_read_unlock(addr_lock);
+		rcu_read_unlock();
 
 		/* None of the bound addresses match the source address of the
 		 * dst. So release it.
@@ -486,10 +481,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 	/* Walk through the bind address list and try to get a dst that
 	 * matches a bind address as the source address.
 	 */
-	sctp_read_lock(addr_lock);
-	list_for_each(pos, &bp->address_list) {
-		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+	rcu_read_lock();
+	list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+		if (!laddr->valid)
+			continue;
 		if ((laddr->use_as_src) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
@@ -501,7 +496,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 	}
 
 out_unlock:
-	sctp_read_unlock(addr_lock);
+	rcu_read_unlock();
 out:
 	if (dst)
 		SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 79856c9..0dc965c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1531,7 +1531,7 @@ no_hmac:
 	/* Also, add the destination address. */
 	if (list_empty(&retval->base.bind_addr.address_list)) {
 		sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
-				   GFP_ATOMIC);
+				GFP_ATOMIC);
 	}
 
 	retval->next_tsn = retval->c.initial_tsn;
@@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
 
 	switch (asconf_param->param_hdr.type) {
 	case SCTP_PARAM_ADD_IP:
-		sctp_local_bh_disable();
-		sctp_write_lock(&asoc->base.addr_lock);
-		list_for_each(pos, &bp->address_list) {
-			saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		/* This is always done in BH context with a socket lock
+		 * held, so the list can not change.
+		 */
+		list_for_each_entry_rcu(saddr, &bp->address_list, list) {
 			if (sctp_cmp_addr_exact(&saddr->a, &addr))
 				saddr->use_as_src = 1;
 		}
-		sctp_write_unlock(&asoc->base.addr_lock);
-		sctp_local_bh_enable();
 		break;
 	case SCTP_PARAM_DEL_IP:
-		sctp_local_bh_disable();
-		sctp_write_lock(&asoc->base.addr_lock);
-		retval = sctp_del_bind_addr(bp, &addr);
-		sctp_write_unlock(&asoc->base.addr_lock);
-		sctp_local_bh_enable();
+		retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh);
 		list_for_each(pos, &asoc->peer.transport_addr_list) {
 			transport = list_entry(pos, struct sctp_transport,
 						 transports);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a3acf78..cb253ab 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 	if (!bp->port)
 		bp->port = inet_sk(sk)->num;
 
-	/* Add the address to the bind address list.  */
-	sctp_local_bh_disable();
-	sctp_write_lock(&ep->base.addr_lock);
-
-	/* Use GFP_ATOMIC since BHs are disabled.  */
+	/* Add the address to the bind address list.
+	 * Use GFP_ATOMIC since BHs will be disabled.
+	 */
 	ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
-	sctp_write_unlock(&ep->base.addr_lock);
-	sctp_local_bh_enable();
 
 	/* Copy back into socket for getsockname() use. */
 	if (!ret) {
@@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 		if (i < addrcnt)
 			continue;
 
-		/* Use the first address in bind addr list of association as
-		 * Address Parameter of ASCONF CHUNK.
+		/* Use the first valid address in bind addr list of
+		 * association as Address Parameter of ASCONF CHUNK.
 		 */
-		sctp_read_lock(&asoc->base.addr_lock);
 		bp = &asoc->base.bind_addr;
 		p = bp->address_list.next;
 		laddr = list_entry(p, struct sctp_sockaddr_entry, list);
-		sctp_read_unlock(&asoc->base.addr_lock);
-
 		chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
 						   addrcnt, SCTP_PARAM_ADD_IP);
 		if (!chunk) {
@@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 		/* Add the new addresses to the bind address list with
 		 * use_as_src set to 0.
 		 */
-		sctp_local_bh_disable();
-		sctp_write_lock(&asoc->base.addr_lock);
 		addr_buf = addrs;
 		for (i = 0; i < addrcnt; i++) {
 			addr = (union sctp_addr *)addr_buf;
@@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 						    GFP_ATOMIC);
 			addr_buf += af->sockaddr_len;
 		}
-		sctp_write_unlock(&asoc->base.addr_lock);
-		sctp_local_bh_enable();
 	}
 
 out:
@@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
 		 * socket routing and failover schemes. Refer to comments in
 		 * sctp_do_bind(). -daisy
 		 */
-		sctp_local_bh_disable();
-		sctp_write_lock(&ep->base.addr_lock);
-
-		retval = sctp_del_bind_addr(bp, sa_addr);
-
-		sctp_write_unlock(&ep->base.addr_lock);
-		sctp_local_bh_enable();
+		retval = sctp_del_bind_addr(bp, sa_addr, call_rcu);
 
 		addr_buf += af->sockaddr_len;
 err_bindx_rem:
@@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 		 * make sure that we do not delete all the addresses in the
 		 * association.
 		 */
-		sctp_read_lock(&asoc->base.addr_lock);
 		bp = &asoc->base.bind_addr;
 		laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
 					       addrcnt, sp);
-		sctp_read_unlock(&asoc->base.addr_lock);
 		if (!laddr)
 			continue;
 
+		/* We do not need RCU protection throughout this loop
+		 * because this is done under a socket lock from the
+		 * setsockopt call.
+		 */
 		chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
 						   SCTP_PARAM_DEL_IP);
 		if (!chunk) {
@@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 		/* Reset use_as_src flag for the addresses in the bind address
 		 * list that are to be deleted.
 		 */
-		sctp_local_bh_disable();
-		sctp_write_lock(&asoc->base.addr_lock);
 		addr_buf = addrs;
 		for (i = 0; i < addrcnt; i++) {
 			laddr = (union sctp_addr *)addr_buf;
 			af = sctp_get_af_specific(laddr->v4.sin_family);
-			list_for_each(pos1, &bp->address_list) {
-				saddr = list_entry(pos1,
-						   struct sctp_sockaddr_entry,
-						   list);
+			list_for_each_entry_rcu(saddr, &bp->address_list, list) {
 				if (sctp_cmp_addr_exact(&saddr->a, laddr))
 					saddr->use_as_src = 0;
 			}
 			addr_buf += af->sockaddr_len;
 		}
-		sctp_write_unlock(&asoc->base.addr_lock);
-		sctp_local_bh_enable();
 
 		/* Update the route and saddr entries for all the transports
 		 * as some of the addresses in the bind address list are
@@ -4057,11 +4035,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 					       int __user *optlen)
 {
 	sctp_assoc_t id;
-	struct list_head *pos;
 	struct sctp_bind_addr *bp;
 	struct sctp_association *asoc;
 	struct sctp_sockaddr_entry *addr;
-	rwlock_t *addr_lock;
 	int cnt = 0;
 
 	if (len < sizeof(sctp_assoc_t))
@@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 	 */
 	if (0 == id) {
 		bp = &sctp_sk(sk)->ep->base.bind_addr;
-		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
 	} else {
 		asoc = sctp_id2assoc(sk, id);
 		if (!asoc)
 			return -EINVAL;
 		bp = &asoc->base.bind_addr;
-		addr_lock = &asoc->base.addr_lock;
 	}
 
-	sctp_read_lock(addr_lock);
-
 	/* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
 	 * addresses from the global local address list.
 	 */
@@ -4115,12 +4087,14 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 		goto done;
 	}
 
-	list_for_each(pos, &bp->address_list) {
+	/* Protection on the bound address list is not needed,
+	 * since in the socket option context we hold the socket lock,
+	 * so there is no way that the bound address list can change.
+	 */
+	list_for_each_entry_rcu(addr, &bp->address_list, list) {
 		cnt ++;
 	}
-
 done:
-	sctp_read_unlock(addr_lock);
 	return cnt;
 }
 
@@ -4204,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 {
 	struct sctp_bind_addr *bp;
 	struct sctp_association *asoc;
-	struct list_head *pos;
 	int cnt = 0;
 	struct sctp_getaddrs_old getaddrs;
 	struct sctp_sockaddr_entry *addr;
@@ -4212,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	union sctp_addr temp;
 	struct sctp_sock *sp = sctp_sk(sk);
 	int addrlen;
-	rwlock_t *addr_lock;
 	int err = 0;
 	void *addrs;
 	void *buf;
@@ -4234,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	 */
 	if (0 == getaddrs.assoc_id) {
 		bp = &sctp_sk(sk)->ep->base.bind_addr;
-		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
 	} else {
 		asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
 		if (!asoc)
 			return -EINVAL;
 		bp = &asoc->base.bind_addr;
-		addr_lock = &asoc->base.addr_lock;
 	}
 
 	to = getaddrs.addrs;
@@ -4254,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	if (!addrs)
 		return -ENOMEM;
 
-	sctp_read_lock(addr_lock);
-
 	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
 	 * addresses from the global local address list.
 	 */
@@ -4271,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	}
 
 	buf = addrs;
-	list_for_each(pos, &bp->address_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	/* Protection on the bound address list is not needed since
+	 * in the socket option context we hold a socket lock and
+	 * thus the bound address list can't change.
+	 */
+	list_for_each_entry_rcu(addr, &bp->address_list, list) {
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
@@ -4284,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	}
 
 copy_getaddrs:
-	sctp_read_unlock(addr_lock);
-
 	/* copy the entire address list into the user provided space */
 	if (copy_to_user(to, addrs, bytes_copied)) {
 		err = -EFAULT;
@@ -4307,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 {
 	struct sctp_bind_addr *bp;
 	struct sctp_association *asoc;
-	struct list_head *pos;
 	int cnt = 0;
 	struct sctp_getaddrs getaddrs;
 	struct sctp_sockaddr_entry *addr;
@@ -4315,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	union sctp_addr temp;
 	struct sctp_sock *sp = sctp_sk(sk);
 	int addrlen;
-	rwlock_t *addr_lock;
 	int err = 0;
 	size_t space_left;
 	int bytes_copied = 0;
@@ -4336,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	 */
 	if (0 == getaddrs.assoc_id) {
 		bp = &sctp_sk(sk)->ep->base.bind_addr;
-		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
 	} else {
 		asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
 		if (!asoc)
 			return -EINVAL;
 		bp = &asoc->base.bind_addr;
-		addr_lock = &asoc->base.addr_lock;
 	}
 
 	to = optval + offsetof(struct sctp_getaddrs,addrs);
@@ -4352,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	if (!addrs)
 		return -ENOMEM;
 
-	sctp_read_lock(addr_lock);
-
 	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
 	 * addresses from the global local address list.
 	 */
@@ -4365,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 						space_left, &bytes_copied);
 			if (cnt < 0) {
 				err = cnt;
-				goto error_lock;
+				goto out;
 			}
 			goto copy_getaddrs;
 		}
 	}
 
 	buf = addrs;
-	list_for_each(pos, &bp->address_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	/* Protection on the bound address list is not needed since
+	 * in the socket option context we hold a socket lock and
+	 * thus the bound address list can't change.
+	 */
+	list_for_each_entry_rcu(addr, &bp->address_list, list) {
 		memcpy(&temp, &addr->a, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
 		if (space_left < addrlen) {
 			err =  -ENOMEM; /*fixme: right error?*/
-			goto error_lock;
+			goto out;
 		}
 		memcpy(buf, &temp, addrlen);
 		buf += addrlen;
@@ -4389,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	}
 
 copy_getaddrs:
-	sctp_read_unlock(addr_lock);
-
 	if (copy_to_user(to, addrs, bytes_copied)) {
 		err = -EFAULT;
 		goto out;
@@ -4401,12 +4365,6 @@ copy_getaddrs:
 	}
 	if (put_user(bytes_copied, optlen))
 		err = -EFAULT;
-
-	goto out;
-
-error_lock:
-	sctp_read_unlock(addr_lock);
-
 out:
 	kfree(addrs);
 	return err;
-- 
1.5.2.4


^ permalink raw reply related

* [RFC v2 PATCH 1/2] SCTP: Add RCU synchronization around sctp_localaddr_list
From: Vlad Yasevich @ 2007-09-12 19:46 UTC (permalink / raw)
  To: netdev; +Cc: lksctp-developers, Vlad Yasevich
In-Reply-To: <11896263983281-git-send-email-vladislav.yasevich@hp.com>

sctp_localaddr_list is modified dynamically via NETDEV_UP
and NETDEV_DOWN events, but there is not synchronization
between writer (even handler) and readers.  As a result,
the readers can access an entry that has been freed and
crash the sytem.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/sctp.h    |    1 +
 include/net/sctp/structs.h |    6 +++++
 net/sctp/bind_addr.c       |    2 +
 net/sctp/ipv6.c            |   34 ++++++++++++++++++++++---------
 net/sctp/protocol.c        |   46 ++++++++++++++++++++++++++++++-------------
 net/sctp/socket.c          |   38 +++++++++++++++++++++++------------
 6 files changed, 90 insertions(+), 37 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d529045..c9cc00c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -123,6 +123,7 @@
  * sctp/protocol.c
  */
 extern struct sock *sctp_get_ctl_sock(void);
+extern void sctp_local_addr_free(struct rcu_head *head);
 extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
 				     sctp_scope_t, gfp_t gfp,
 				     int flags);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c0d5848..a89e361 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -207,6 +207,9 @@ extern struct sctp_globals {
 	 * It is a list of sctp_sockaddr_entry.
 	 */
 	struct list_head local_addr_list;
+
+	/* Lock that protects the local_addr_list writers */
+	spinlock_t addr_list_lock;
 	
 	/* Flag to indicate if addip is enabled. */
 	int addip_enable;
@@ -242,6 +245,7 @@ extern struct sctp_globals {
 #define sctp_port_alloc_lock		(sctp_globals.port_alloc_lock)
 #define sctp_port_hashtable		(sctp_globals.port_hashtable)
 #define sctp_local_addr_list		(sctp_globals.local_addr_list)
+#define sctp_local_addr_lock		(sctp_globals.addr_list_lock)
 #define sctp_addip_enable		(sctp_globals.addip_enable)
 #define sctp_prsctp_enable		(sctp_globals.prsctp_enable)
 
@@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
 /* This is a structure for holding either an IPv6 or an IPv4 address.  */
 struct sctp_sockaddr_entry {
 	struct list_head list;
+	struct rcu_head	rcu;
 	union sctp_addr a;
 	__u8 use_as_src;
+	__u8 valid;
 };
 
 typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index fdb287a..7fc369f 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -163,8 +163,10 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
 		addr->a.v4.sin_port = htons(bp->port);
 
 	addr->use_as_src = use_as_src;
+	addr->valid = 1;
 
 	INIT_LIST_HEAD(&addr->list);
+	INIT_RCU_HEAD(&addr->rcu);
 	list_add_tail(&addr->list, &bp->address_list);
 	SCTP_DBG_OBJCNT_INC(addr);
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f8aa23d..54ff472 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -77,13 +77,18 @@
 
 #include <asm/uaccess.h>
 
-/* Event handler for inet6 address addition/deletion events.  */
+/* Event handler for inet6 address addition/deletion events.
+ * This even is part of the atomic notifier call chain
+ * and thus happens atomically and can NOT sleep.  As a result
+ * we can't and really don't need to add any locks to guard the
+ * RCU.
+ */
 static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 				void *ptr)
 {
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-	struct sctp_sockaddr_entry *addr;
-	struct list_head *pos, *temp;
+	struct sctp_sockaddr_entry *addr = NULL;
+	struct sctp_sockaddr_entry *temp;
 
 	switch (ev) {
 	case NETDEV_UP:
@@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 			memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
 				 sizeof(struct in6_addr));
 			addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
-			list_add_tail(&addr->list, &sctp_local_addr_list);
+			addr->valid = 1;
+			spin_lock_bh(&sctp_local_addr_lock);
+			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+			spin_unlock_bh(&sctp_local_addr_lock);
 		}
 		break;
 	case NETDEV_DOWN:
-		list_for_each_safe(pos, temp, &sctp_local_addr_list) {
-			addr = list_entry(pos, struct sctp_sockaddr_entry, list);
-			if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
-				list_del(pos);
-				kfree(addr);
+		spin_lock_bh(&sctp_local_addr_lock);
+		list_for_each_entry_safe(addr, temp,
+					&sctp_local_addr_list, list) {
+			if (ipv6_addr_equal(&addr->a.v6.sin6_addr,
+					     &ifa->addr)) {
+				addr->valid = 0;
+				list_del_rcu(&addr->list);
 				break;
 			}
 		}
-
+		spin_unlock_bh(&sctp_local_addr_lock);
+		if (addr && !addr->valid)
+			call_rcu(&addr->rcu, sctp_local_addr_free);
 		break;
 	}
 
@@ -367,7 +379,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 			addr->a.v6.sin6_port = 0;
 			addr->a.v6.sin6_addr = ifp->addr;
 			addr->a.v6.sin6_scope_id = dev->ifindex;
+			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
+			INIT_RCU_HEAD(&addr->rcu);
 			list_add_tail(&addr->list, addrlist);
 		}
 	}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e98579b..4688559 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -153,6 +153,8 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
 			addr->a.v4.sin_family = AF_INET;
 			addr->a.v4.sin_port = 0;
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
+			addr->valid = 1;
+			INIT_RCU_HEAD(&addr->rcu);
 			list_add_tail(&addr->list, addrlist);
 		}
 	}
@@ -192,16 +194,24 @@ static void sctp_free_local_addr_list(void)
 	}
 }
 
+void sctp_local_addr_free(struct rcu_head *head)
+{
+	struct sctp_sockaddr_entry *e = container_of(head,
+				struct sctp_sockaddr_entry, rcu);
+	kfree(e);
+}
+
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
 int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
 			      gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	int error = 0;
-	struct list_head *pos, *temp;
 
-	list_for_each_safe(pos, temp, &sctp_local_addr_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+		if (!addr->valid)
+			continue;
 		if (sctp_in_scope(&addr->a, scope)) {
 			/* Now that the address is in scope, check to see if
 			 * the address type is really supported by the local
@@ -221,6 +231,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
 	}
 
 end_copy:
+	rcu_read_unlock();
 	return error;
 }
 
@@ -605,8 +616,8 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 			       void *ptr)
 {
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-	struct sctp_sockaddr_entry *addr;
-	struct list_head *pos, *temp;
+	struct sctp_sockaddr_entry *addr = NULL;
+	struct sctp_sockaddr_entry *temp;
 
 	switch (ev) {
 	case NETDEV_UP:
@@ -615,19 +626,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 			addr->a.v4.sin_family = AF_INET;
 			addr->a.v4.sin_port = 0;
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
-			list_add_tail(&addr->list, &sctp_local_addr_list);
+			addr->valid = 1;
+			spin_lock_bh(&sctp_local_addr_lock);
+			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+			spin_unlock_bh(&sctp_local_addr_lock);
 		}
 		break;
 	case NETDEV_DOWN:
-		list_for_each_safe(pos, temp, &sctp_local_addr_list) {
-			addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+		spin_lock_bh(&sctp_local_addr_lock);
+		list_for_each_entry_safe(addr, temp,
+					&sctp_local_addr_list, list) {
 			if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
-				list_del(pos);
-				kfree(addr);
+				addr->valid = 0;
+				list_del_rcu(&addr->list);
 				break;
 			}
 		}
-
+		spin_unlock_bh(&sctp_local_addr_lock);
+		if (addr && !addr->valid)
+			call_rcu(&addr->rcu, sctp_local_addr_free);
 		break;
 	}
 
@@ -1160,6 +1177,7 @@ SCTP_STATIC __init int sctp_init(void)
 
 	/* Initialize the local address list. */
 	INIT_LIST_HEAD(&sctp_local_addr_list);
+	spin_lock_init(&sctp_local_addr_lock);
 	sctp_get_local_addr_list();
 
 	/* Register notifier for inet address additions/deletions. */
@@ -1227,6 +1245,9 @@ SCTP_STATIC __exit void sctp_exit(void)
 	sctp_v6_del_protocol();
 	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
 
+	/* Unregister notifier for inet address additions/deletions. */
+	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
 	/* Free the local address list.  */
 	sctp_free_local_addr_list();
 
@@ -1240,9 +1261,6 @@ SCTP_STATIC __exit void sctp_exit(void)
 	inet_unregister_protosw(&sctp_stream_protosw);
 	inet_unregister_protosw(&sctp_seqpacket_protosw);
 
-	/* Unregister notifier for inet address additions/deletions. */
-	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
-
 	sctp_sysctl_unregister();
 	list_del(&sctp_ipv4_specific.list);
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3335460..a3acf78 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4057,9 +4057,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 					       int __user *optlen)
 {
 	sctp_assoc_t id;
+	struct list_head *pos;
 	struct sctp_bind_addr *bp;
 	struct sctp_association *asoc;
-	struct list_head *pos, *temp;
 	struct sctp_sockaddr_entry *addr;
 	rwlock_t *addr_lock;
 	int cnt = 0;
@@ -4096,15 +4096,19 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
 		addr = list_entry(bp->address_list.next,
 				  struct sctp_sockaddr_entry, list);
 		if (sctp_is_any(&addr->a)) {
-			list_for_each_safe(pos, temp, &sctp_local_addr_list) {
-				addr = list_entry(pos,
-						  struct sctp_sockaddr_entry,
-						  list);
+			rcu_read_lock();
+			list_for_each_entry_rcu(addr,
+						&sctp_local_addr_list, list) {
+				if (!addr->valid)
+					continue;
+
 				if ((PF_INET == sk->sk_family) &&
 				    (AF_INET6 == addr->a.sa.sa_family))
 					continue;
+
 				cnt++;
 			}
+			rcu_read_unlock();
 		} else {
 			cnt = 1;
 		}
@@ -4127,14 +4131,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
 					int max_addrs, void *to,
 					int *bytes_copied)
 {
-	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr temp;
 	int cnt = 0;
 	int addrlen;
 
-	list_for_each_safe(pos, next, &sctp_local_addr_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+		if (!addr->valid)
+			continue;
+
 		if ((PF_INET == sk->sk_family) &&
 		    (AF_INET6 == addr->a.sa.sa_family))
 			continue;
@@ -4149,6 +4155,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
 		cnt ++;
 		if (cnt >= max_addrs) break;
 	}
+	rcu_read_unlock();
 
 	return cnt;
 }
@@ -4156,14 +4163,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
 static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
 			    size_t space_left, int *bytes_copied)
 {
-	struct list_head *pos, *next;
 	struct sctp_sockaddr_entry *addr;
 	union sctp_addr temp;
 	int cnt = 0;
 	int addrlen;
 
-	list_for_each_safe(pos, next, &sctp_local_addr_list) {
-		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+		if (!addr->valid)
+			continue;
+
 		if ((PF_INET == sk->sk_family) &&
 		    (AF_INET6 == addr->a.sa.sa_family))
 			continue;
@@ -4171,8 +4180,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
 								&temp);
 		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
-		if (space_left < addrlen)
-			return -ENOMEM;
+		if (space_left < addrlen) {
+			cnt =  -ENOMEM;
+			break;
+		}
 		memcpy(to, &temp, addrlen);
 
 		to += addrlen;
@@ -4180,6 +4191,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
 		space_left -= addrlen;
 		*bytes_copied += addrlen;
 	}
+	rcu_read_unlock();
 
 	return cnt;
 }
-- 
1.5.2.4


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox