Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-2.6 PATCH 4/5] qlge: Protect reset recovery with rtnl_lock().
From: Ron Mercer @ 2009-09-29 18:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ron.mercer
In-Reply-To: <1254249565-16381-1-git-send-email-ron.mercer@qlogic.com>

Move the call to rtnl_lock() to before the internal call to
ql_adapter_down()/ql_adapter_up().  This prevents collisions that can
happen when recovering from an asic error.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
---
 drivers/net/qlge/qlge_main.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index b05300d..fbef305 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3703,7 +3703,7 @@ static void ql_asic_reset_work(struct work_struct *work)
 	struct ql_adapter *qdev =
 	    container_of(work, struct ql_adapter, asic_reset_work.work);
 	int status;
-
+	rtnl_lock();
 	status = ql_adapter_down(qdev);
 	if (status)
 		goto error;
@@ -3711,12 +3711,12 @@ static void ql_asic_reset_work(struct work_struct *work)
 	status = ql_adapter_up(qdev);
 	if (status)
 		goto error;
-
+	rtnl_unlock();
 	return;
 error:
 	QPRINTK(qdev, IFUP, ALERT,
 		"Driver up/down cycle failed, closing device\n");
-	rtnl_lock();
+
 	set_bit(QL_ADAPTER_UP, &qdev->flags);
 	dev_close(qdev->ndev);
 	rtnl_unlock();
-- 
1.6.0.2


^ permalink raw reply related

* [net-2.6 PATCH 2/5] qlge: Fix out of sync hardware semaphore.
From: Ron Mercer @ 2009-09-29 18:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ron.mercer
In-Reply-To: <1254249565-16381-1-git-send-email-ron.mercer@qlogic.com>

ql_clear_routing_entries() takes/gives it's own hardware semaphore since
it is called from more than one place.  ql_route_initialize() should
make this call and THEN take it's own semaphore before doing it's work.

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
---
 drivers/net/qlge/qlge_main.c |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 2205292..e4b756d 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3142,14 +3142,14 @@ static int ql_route_initialize(struct ql_adapter *qdev)
 {
 	int status = 0;

-	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
+	/* Clear all the entries in the routing table. */
+	status = ql_clear_routing_entries(qdev);
 	if (status)
 		return status;

-	/* Clear all the entries in the routing table. */
-	status = ql_clear_routing_entries(qdev);
+	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
 	if (status)
-		goto exit;
+		return status;

 	status = ql_set_routing_reg(qdev, RT_IDX_ALL_ERR_SLOT, RT_IDX_ERR, 1);
 	if (status) {
-- 
1.6.0.2

^ permalink raw reply related

* [net-2.6 PATCH 1/5] qlge: Fix bad bit definitions.
From: Ron Mercer @ 2009-09-29 18:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ron.mercer
In-Reply-To: <1254249565-16381-1-git-send-email-ron.mercer@qlogic.com>

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
---
 drivers/net/qlge/qlge.h |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h
index a9845a2..30d5585 100644
--- a/drivers/net/qlge/qlge.h
+++ b/drivers/net/qlge/qlge.h
@@ -1381,15 +1381,15 @@ struct intr_context {
 
 /* adapter flags definitions. */
 enum {
-	QL_ADAPTER_UP = (1 << 0),	/* Adapter has been brought up. */
-	QL_LEGACY_ENABLED = (1 << 3),
-	QL_MSI_ENABLED = (1 << 3),
-	QL_MSIX_ENABLED = (1 << 4),
-	QL_DMA64 = (1 << 5),
-	QL_PROMISCUOUS = (1 << 6),
-	QL_ALLMULTI = (1 << 7),
-	QL_PORT_CFG = (1 << 8),
-	QL_CAM_RT_SET = (1 << 9),
+	QL_ADAPTER_UP = 0,	/* Adapter has been brought up. */
+	QL_LEGACY_ENABLED = 1,
+	QL_MSI_ENABLED = 2,
+	QL_MSIX_ENABLED = 3,
+	QL_DMA64 = 4,
+	QL_PROMISCUOUS = 5,
+	QL_ALLMULTI = 6,
+	QL_PORT_CFG = 7,
+	QL_CAM_RT_SET = 8,
 };
 
 /* link_status bit definitions */
-- 
1.6.0.2


^ permalink raw reply related

* [net-2.6 PATCH 3/5] qlge: Fix spin_lock warning.
From: Ron Mercer @ 2009-09-29 18:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ron.mercer
In-Reply-To: <1254249565-16381-1-git-send-email-ron.mercer@qlogic.com>

Remove the unnecessary locking around the call to ql_adapter_reset().

Sep 25 08:17:29 localhost kernel:    SOFTIRQ-ON-W at:
Sep 25 08:17:29 localhost kernel:                         [<c0000000000a2964>] .lock_acquire+0x10c/0x158
Sep 25 08:17:29 localhost kernel:                         [<c0000000004542e0>] ._spin_lock+0x34/0x58
Sep 25 08:17:29 localhost kernel:                         [<d000000006723070>] .ql_adapter_down+0x40c/0x4a0 [qlge]
Sep 25 08:17:29 localhost kernel:                         [<d0000000067256d8>] .qlge_close+0x38/0x58 [qlge]
Sep 25 08:17:29 localhost kernel:                         [<c0000000003ada6c>] .dev_close+0xdc/0x118
Sep 25 08:17:29 localhost kernel:                         [<c0000000003adb48>] .rollback_registered+0xa0/0x158
Sep 25 08:17:29 localhost kernel:                         [<c0000000003adc50>] .unregister_netdevice+0x50/0x7c
Sep 25 08:17:29 localhost kernel:                         [<c0000000003adca0>] .unregister_netdev+0x24/0x40
Sep 25 08:17:29 localhost kernel:                         [<d00000000672e0c0>] .qlge_remove+0x28/0x64 [qlge]
Sep 25 08:17:29 localhost kernel:                         [<c000000000253fdc>] .pci_device_remove+0x50/0x90
Sep 25 08:17:29 localhost kernel:                         [<c0000000002f5434>] .__device_release_driver+0x94/0xf8
Sep 25 08:17:29 localhost kernel:                         [<c0000000002f5560>] .driver_detach+0xc8/0xfc
Sep 25 08:17:29 localhost kernel:                         [<c0000000002f3fd8>] .bus_remove_driver+0xb4/0x114
Sep 25 08:17:29 localhost kernel:                         [<c0000000002f5d4c>] .driver_unregister+0x80/0xa4
Sep 25 08:17:29 localhost kernel:                         [<c00000000025421c>] .pci_unregister_driver+0x50/0xc8
Sep 25 08:17:29 localhost kernel:                         [<d00000000672e044>] .qlge_exit+0x1c/0x34 [qlge]
Sep 25 08:17:29 localhost kernel:                         [<c0000000000ac8b0>] .SyS_delete_module+0x234/0x2d0
Sep 25 08:17:29 localhost kernel:                         [<c000000000008554>] syscall_exit+0x0/0x40
Sep 25 08:17:29 localhost kernel:    INITIAL USE at:

Signed-off-by: Ron Mercer <ron.mercer@qlogic.com>
---
 drivers/net/qlge/qlge_main.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index e4b756d..b05300d 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -3380,12 +3380,10 @@ static int ql_adapter_down(struct ql_adapter *qdev)
 
 	ql_free_rx_buffers(qdev);
 
-	spin_lock(&qdev->hw_lock);
 	status = ql_adapter_reset(qdev);
 	if (status)
 		QPRINTK(qdev, IFDOWN, ERR, "reset(func #%d) FAILED!\n",
 			qdev->func);
-	spin_unlock(&qdev->hw_lock);
 	return status;
 }
 
-- 
1.6.0.2


^ permalink raw reply related

* [net-2.6 PATCH 0/5] qlge: Bug fixes for qlge.
From: Ron Mercer @ 2009-09-29 18:39 UTC (permalink / raw)
  To: davem; +Cc: netdev, ron.mercer



^ permalink raw reply

* Re: [PATCH] /proc/net/tcp, overhead removed
From: Yakov Lerner @ 2009-09-29 17:34 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Eric Dumazet, netdev, davem
In-Reply-To: <20090929084534.41274f66@nehalam>

On Tue, Sep 29, 2009 at 18:45, Stephen Hemminger <shemminger@vyatta.com> wrote:
> On Tue, 29 Sep 2009 11:55:18 +0300
> Yakov Lerner <iler.ml@gmail.com> wrote:
>
>> On Tue, Sep 29, 2009 at 10:56, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> >
>> > Yakov Lerner a écrit :
>> > > Take 2.
>> > >
>> > > "Sharp improvement in performance of /proc/net/tcp when number of
>> > > sockets is large and hashsize is large.
>> > > O(numsock * hashsize) time becomes O(numsock + hashsize). On slow
>> > > processors, speed difference can be x100 and more."
>> > >
>> > > I must say that I'm not fully satisfied with my choice of "st->sbucket"
>> > > for the new preserved index. The better name would be "st->snum".
>> > > Re-using "st->sbucket" saves 4 bytes, and keeps the patch to one sourcefile.
>> > > But "st->sbucket" has different meaning in OPENREQ and LISTEN states;
>> > > this can be confusing.
>> > > Maybe better add "snum" member to struct tcp_iter_state ?
>> > >
>> > > Shall I change subject when sending "take N+1", or keep the old subject ?
>> > >
>> > > Signed-off-by: Yakov Lerner <iler.ml@gmail.com>
>> > > ---
>> > >  net/ipv4/tcp_ipv4.c |   35 +++++++++++++++++++++++++++++++++--
>> > >  1 files changed, 33 insertions(+), 2 deletions(-)
>> > >
>> > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
>> > > index 7cda24b..e4c4f19 100644
>> > > --- a/net/ipv4/tcp_ipv4.c
>> > > +++ b/net/ipv4/tcp_ipv4.c
>> > > @@ -1994,13 +1994,14 @@ static inline int empty_bucket(struct tcp_iter_state *st)
>> > >               hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
>> > >  }
>> > >
>> > > -static void *established_get_first(struct seq_file *seq)
>> > > +static void *established_get_first_after(struct seq_file *seq, int bucket)
>> > >  {
>> > >       struct tcp_iter_state *st = seq->private;
>> > >       struct net *net = seq_file_net(seq);
>> > >       void *rc = NULL;
>> > >
>> > > -     for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
>> > > +     for (st->bucket = bucket; st->bucket < tcp_hashinfo.ehash_size;
>> > > +          ++st->bucket) {
>> > >               struct sock *sk;
>> > >               struct hlist_nulls_node *node;
>> > >               struct inet_timewait_sock *tw;
>> > > @@ -2010,6 +2011,8 @@ static void *established_get_first(struct seq_file *seq)
>> > >               if (empty_bucket(st))
>> > >                       continue;
>> > >
>> > > +             st->sbucket = st->num;
>> > > +
>> > >               spin_lock_bh(lock);
>> > >               sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
>> > >                       if (sk->sk_family != st->family ||
>> > > @@ -2036,6 +2039,11 @@ out:
>> > >       return rc;
>> > >  }
>> > >
>> > > +static void *established_get_first(struct seq_file *seq)
>> > > +{
>> > > +     return established_get_first_after(seq, 0);
>> > > +}
>> > > +
>> > >  static void *established_get_next(struct seq_file *seq, void *cur)
>> > >  {
>> > >       struct sock *sk = cur;
>> > > @@ -2064,6 +2072,9 @@ get_tw:
>> > >               while (++st->bucket < tcp_hashinfo.ehash_size &&
>> > >                               empty_bucket(st))
>> > >                       ;
>> > > +
>> > > +             st->sbucket = st->num;
>> > > +
>> > >               if (st->bucket >= tcp_hashinfo.ehash_size)
>> > >                       return NULL;
>> > >
>> > > @@ -2107,6 +2118,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
>> > >
>> > >       if (!rc) {
>> > >               st->state = TCP_SEQ_STATE_ESTABLISHED;
>> > > +             st->sbucket = 0;
>> > >               rc        = established_get_idx(seq, pos);
>> > >       }
>> > >
>> > > @@ -2116,6 +2128,25 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
>> > >  static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
>> > >  {
>> > >       struct tcp_iter_state *st = seq->private;
>> > > +
>> > > +     if (*pos && *pos >= st->sbucket &&
>> > > +         (st->state == TCP_SEQ_STATE_ESTABLISHED ||
>> > > +          st->state == TCP_SEQ_STATE_TIME_WAIT)) {
>> > > +             void *cur;
>> > > +             int nskip;
>> > > +
>> > > +             /* for states estab and tw, st->sbucket is index (*pos) */
>> > > +             /* corresponding to the beginning of bucket st->bucket */
>> > > +
>> > > +             st->num = st->sbucket;
>> > > +             /* jump to st->bucket, then skip (*pos - st->sbucket) items */
>> > > +             st->state = TCP_SEQ_STATE_ESTABLISHED;
>> > > +             cur = established_get_first_after(seq, st->bucket);
>> > > +             for (nskip = *pos - st->num; cur && nskip > 0; --nskip)
>> > > +                     cur = established_get_next(seq, cur);
>> > > +             return cur;
>> > > +     }
>> > > +
>> > >       st->state = TCP_SEQ_STATE_LISTENING;
>> > >       st->num = 0;
>> > >       return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
>> >
>> > Just in case you are working on "take 3" of the patch, there is a fondamental problem.
>> >
>> > All the scalability problems come from the fact that tcp_seq_start()
>> > *has* to rescan all the tables from the begining, because of lseek() capability
>> > on /proc/net/tcp file
>> >
>> > We probably could disable llseek() (on other positions than start of the file),
>> > and rely only on internal state (listening/established hashtable, hash bucket, position in chain)
>> >
>> > I cannot imagine how an application could rely on lseek() on >0 position in this file.
>>
>>
>> I thought  /proc/net/tcp  can  both  be fast and allow lseek;
>> (1) when no lseek was issued since last read
>> (we can detect this), /proc/net/tcp can jump to the
>> last known bucket (common case), vs
>> (2) switch to slow mode (scan from the beginning of hash)
>> when lseek was used , no ?
>
> If you look at fib_hash and fib_trie, they already do the same thing.
>  * fib_hash records last hash chain to avoid overhead of rescan.
>  * fib_trie records last route and does fast lookup to restart from there.

Thanks for the pointer.

^ permalink raw reply

* Re: [PATCH] [RFC] IPv4 TCP fails to send window scale option when window scale is zero
From: Eric Dumazet @ 2009-09-29 17:19 UTC (permalink / raw)
  To: Gilad Ben-Yossef; +Cc: netdev, Ori Finkalman
In-Reply-To: <4AC22250.7060301@codefidence.com>

Gilad Ben-Yossef a écrit :
> From: Ori Finkalman <ori@comsleep.com>
> 
> 
> Acknowledge TCP window scale support by inserting the proper option in
> SYN/ACK header
> even if our window scale is zero.
> 
> 
> This fixes the following observed behavior:
> 
> 
> 1. Client sends a SYN with TCP window scaling option and non zero window
> scale value to a Linux box.
> 
> 2. Linux box notes large receive window from client.
> 
> 3. Linux decides on a zero value of window scale for its part.
> 
> 4. Due to compare against requested window scale size option, Linux does
> not to send windows scale
> 
> TCP option header on SYN/ACK at all.
> 
> 
> Result:
> 
> 
> Client box thinks TCP window scaling is not supported, since SYN/ACK had
> no TCP window scale option,
> while Linux thinks that TCP window scaling is supported (and scale might
> be non zero), since SYN had
> 
> TCP window scale option and we have a mismatched idea between the client
> and server regarding window sizes.
> 
> 
> Please comment and/or apply.
> 
> 
> ---
> 
> 
> Bug reported and patch written by Ori Finkalman from Comsleep Ltd. I'm
> just helping mainline it.
> 
> 
> The behavior was observed with a Windows box as the client and latest
> Debian kernel but for the best
> of my understanding this can happen with latest kernel versions and
> other client OS (probably also Linux)
> 
> as well.
> 
> 
> 
> Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
> Signed-off-by: Ori Finkelman <ori@comsleep.com>
> 
> 
> Index: net/ipv4/tcp_output.c
> ===================================================================
> --- net/ipv4/tcp_output.c    (revision 46)
> +++ net/ipv4/tcp_output.c    (revision 210)
> @@ -353,6 +353,7 @@ static void tcp_init_nondata_skb(struct
> #define OPTION_SACK_ADVERTISE    (1 << 0)
> #define OPTION_TS        (1 << 1)
> #define OPTION_MD5        (1 << 2)
> +#define OPTION_WSCALE        (1 << 3)
> 
> struct tcp_out_options {
>     u8 options;        /* bit field of OPTION_* */
> @@ -417,7 +418,7 @@ static void tcp_options_write(__be32 *pt
>                    TCPOLEN_SACK_PERM);
>     }
> 
> -    if (unlikely(opts->ws)) {
> +    if (unlikely(OPTION_WSCALE & opts->options)) {
>         *ptr++ = htonl((TCPOPT_NOP << 24) |
>                    (TCPOPT_WINDOW << 16) |
>                    (TCPOLEN_WINDOW << 8) |
> @@ -530,8 +531,8 @@ static unsigned tcp_synack_options(struc
> 
>     if (likely(ireq->wscale_ok)) {
>         opts->ws = ireq->rcv_wscale;
> -        if(likely(opts->ws))
> -            size += TCPOLEN_WSCALE_ALIGNED;
> +        opts->options |= OPTION_WSCALE;
> +        size += TCPOLEN_WSCALE_ALIGNED;
>     }
>     if (likely(doing_ts)) {
>         opts->options |= OPTION_TS;
> 
> 
> 

Seems not the more logical places to put this logic...

How about this instead ?

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5200aab..b78c084 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -216,6 +216,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
 			space >>= 1;
 			(*rcv_wscale)++;
 		}
+		/*
+		 * Set a minimum wscale of 1
+		 */
+		if (*rcv_wscale == 0)
+			*rcv_wscale = 1;
        }

        /* Set initial window to value enough for senders,

^ permalink raw reply related

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Evgeniy Polyakov @ 2009-09-29 17:08 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Christian Borntraeger, Evgeny Polyakov, Scott James Remnant,
	Linux Kernel, Matt Helsley, David S. Miller, netdev
In-Reply-To: <20090929153631.GA12699@redhat.com>

On Tue, Sep 29, 2009 at 05:36:31PM +0200, Oleg Nesterov (oleg@redhat.com) wrote:
> > Doesn't it only check pgid while patch intention was to send
> > notification about sid?
> 
> If the proposed sid already was the session id, then prgp shouldn't
> be empty.
> 
> but this doesn't really matter, we also check ->signal->leader
> (not sure, but afaics this check is not strictly necessary because
>  of PIDTYPE_PGID check)

Ok, I see, thanks.

-- 
	Evgeniy Polyakov

^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Evgeniy Polyakov @ 2009-09-29 17:07 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: Oleg Nesterov, Scott James Remnant, Linux Kernel, Matt Helsley,
	David S. Miller, netdev
In-Reply-To: <200909291712.33099.borntraeger@de.ibm.com>

On Tue, Sep 29, 2009 at 05:12:33PM +0200, Christian Borntraeger (borntraeger@de.ibm.com) wrote:
> just in case the discussion concludes that my patch is fine,
> here is a fixed version.
> 
> [PATCH] connector: Fix sid connector
> 
> The sid connector gives the following warning:
> Badness at kernel/softirq.c:143
> [...]
> Call Trace:
> ([<000000013fe04100>] 0x13fe04100)
>  [<000000000048a946>] sk_filter+0x9a/0xd0
>  [<000000000049d938>] netlink_broadcast+0x2c0/0x53c
>  [<00000000003ba9ae>] cn_netlink_send+0x272/0x2b0
>  [<00000000003baef0>] proc_sid_connector+0xc4/0xd4
>  [<0000000000142604>] __set_special_pids+0x58/0x90
>  [<0000000000159938>] sys_setsid+0xb4/0xd8
>  [<00000000001187fe>] sysc_noemu+0x10/0x16
>  [<00000041616cb266>] 0x41616cb266
> 
> The warning is
> --->    WARN_ON_ONCE(in_irq() || irqs_disabled());
> 
> The network code must not be called with disabled interrupts but
> sys_setsid holds the tasklist_lock with spinlock_irq while calling
> the connector. We can safely move proc_sid_connector from
> __set_special_pids to sys_setsid.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

Looks good, thank you.
Ack.

-- 
	Evgeniy Polyakov

^ permalink raw reply

* Re: [PATCH] dev_alloc_skb: avoid using GFP_ATOMIC
From: Stephen Hemminger @ 2009-09-29 16:58 UTC (permalink / raw)
  To: swalter; +Cc: davem, netdev, swalter, Steven Walter
In-Reply-To: <1254242593-2279-1-git-send-email-swalter@lexmark.com>

On Tue, 29 Sep 2009 12:43:13 -0400
swalter@lexmark.com wrote:

> From: swalter <swalter@swalter-d630.(none)>
> 
> 
> Signed-off-by: Steven Walter <swalter@lpdev.prtdev.lexmark.com>
> ---
>  net/core/skbuff.c |    6 +++++-
>  1 files changed, 5 insertions(+), 1 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 9e0597d..58ec625 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -306,7 +306,11 @@ struct sk_buff *dev_alloc_skb(unsigned int length)
>  	 * There is more code here than it seems:
>  	 * __dev_alloc_skb is an inline
>  	 */
> -	return __dev_alloc_skb(length, GFP_ATOMIC);
> +	if (in_interrupt()  ||  in_atomic()  ||  irqs_disabled()) {
> +		return __dev_alloc_skb(length, GFP_ATOMIC);
> +	} else {
> +		return __dev_alloc_skb(length, GFP_KERNEL);
> +	}
>  }
>  EXPORT_SYMBOL(dev_alloc_skb);
>  

No, this should be fixed by caller (using netdev_alloc_skb)
also, it may break cases like swap over NFS that want to get memory
when memory pool is low


-- 

^ permalink raw reply

* [PATCH] dev_alloc_skb: avoid using GFP_ATOMIC
From: swalter @ 2009-09-29 16:43 UTC (permalink / raw)
  To: davem, netdev; +Cc: swalter, Steven Walter

From: swalter <swalter@swalter-d630.(none)>


Signed-off-by: Steven Walter <swalter@lpdev.prtdev.lexmark.com>
---
 net/core/skbuff.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9e0597d..58ec625 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -306,7 +306,11 @@ struct sk_buff *dev_alloc_skb(unsigned int length)
 	 * There is more code here than it seems:
 	 * __dev_alloc_skb is an inline
 	 */
-	return __dev_alloc_skb(length, GFP_ATOMIC);
+	if (in_interrupt()  ||  in_atomic()  ||  irqs_disabled()) {
+		return __dev_alloc_skb(length, GFP_ATOMIC);
+	} else {
+		return __dev_alloc_skb(length, GFP_KERNEL);
+	}
 }
 EXPORT_SYMBOL(dev_alloc_skb);
 
-- 
1.6.2.3.g5bbe6


^ permalink raw reply related

* RE: [PATCH 2.6.31-rc9] net: VMware virtual Ethernet NIC driver: vmxnet3
From: Shreyas Bhatewara @ 2009-09-29 16:37 UTC (permalink / raw)
  To: David Miller
  Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	shemminger@linux-foundation.org, jgarzik@pobox.com,
	anthony@codemonkey.ws, chrisw@sous-sol.org, greg@kroah.com,
	akpm@linux-foundation.org,
	virtualization@lists.linux-foundation.org, pv-drivers@vmware.com
In-Reply-To: <20090928.170821.124026517.davem@davemloft.net>

> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Monday, September 28, 2009 5:08 PM
> To: Shreyas Bhatewara
> Cc: linux-kernel@vger.kernel.org; netdev@vger.kernel.org;
> shemminger@linux-foundation.org; jgarzik@pobox.com;
> anthony@codemonkey.ws; chrisw@sous-sol.org; greg@kroah.com; akpm@linux-
> foundation.org; virtualization@lists.linux-foundation.org; pv-
> drivers@vmware.com
> Subject: Re: [PATCH 2.6.31-rc9] net: VMware virtual Ethernet NIC
> driver: vmxnet3
> 
> From: Shreyas Bhatewara <sbhatewara@vmware.com>
> Date: Mon, 28 Sep 2009 16:56:45 -0700
> 
> > +       uint32_t rxdIdx:12;    /* Index of the RxDesc */
> 
> Don't use uint32_t et al. sized types, use "u32" and friends
> throughout.

Sure, I will fix that.

->Shreyas

^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Oleg Nesterov @ 2009-09-29 16:28 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: Scott James Remnant, Evgeniy Polyakov, Linux Kernel, Matt Helsley,
	David S. Miller, netdev
In-Reply-To: <200909291712.33099.borntraeger@de.ibm.com>

On 09/29, Christian Borntraeger wrote:
>
>
> The network code must not be called with disabled interrupts but
> sys_setsid holds the tasklist_lock with spinlock_irq while calling
> the connector. We can safely move proc_sid_connector from
> __set_special_pids to sys_setsid.
>
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>
> ---
>  kernel/exit.c |    4 +---
>  kernel/sys.c  |    2 ++
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> Index: linux-2.6/kernel/exit.c
> ===================================================================
> --- linux-2.6.orig/kernel/exit.c
> +++ linux-2.6/kernel/exit.c
> @@ -359,10 +359,8 @@ void __set_special_pids(struct pid *pid)
>  {
>  	struct task_struct *curr = current->group_leader;
>
> -	if (task_session(curr) != pid) {
> +	if (task_session(curr) != pid)
>  		change_pid(curr, PIDTYPE_SID, pid);
> -		proc_sid_connector(curr);
> -	}
>
>  	if (task_pgrp(curr) != pid)
>  		change_pid(curr, PIDTYPE_PGID, pid);
> Index: linux-2.6/kernel/sys.c
> ===================================================================
> --- linux-2.6.orig/kernel/sys.c
> +++ linux-2.6/kernel/sys.c
> @@ -1110,6 +1110,8 @@ SYSCALL_DEFINE0(setsid)
>  	err = session;
>  out:
>  	write_unlock_irq(&tasklist_lock);
> +	if (err > 0)
> +		proc_sid_connector(sid);
>  	return err;
>  }

Acked-by: Oleg Nesterov <oleg@redhat.com>

I'd suggest you to resend this patch to Andrew. Unless you know
another way to push it into Linus's tree ;)

Perhaps it makes sense to update the changelog, it should mention
the issues with daemonize().

Oleg.

^ permalink raw reply

* Re: [PATCH] /proc/net/tcp, overhead removed
From: Stephen Hemminger @ 2009-09-29 15:45 UTC (permalink / raw)
  To: Yakov Lerner; +Cc: Eric Dumazet, netdev, davem
In-Reply-To: <f36b08ee0909290155u177b1983y2eafa8b353a143e0@mail.gmail.com>

On Tue, 29 Sep 2009 11:55:18 +0300
Yakov Lerner <iler.ml@gmail.com> wrote:

> On Tue, Sep 29, 2009 at 10:56, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Yakov Lerner a écrit :
> > > Take 2.
> > >
> > > "Sharp improvement in performance of /proc/net/tcp when number of
> > > sockets is large and hashsize is large.
> > > O(numsock * hashsize) time becomes O(numsock + hashsize). On slow
> > > processors, speed difference can be x100 and more."
> > >
> > > I must say that I'm not fully satisfied with my choice of "st->sbucket"
> > > for the new preserved index. The better name would be "st->snum".
> > > Re-using "st->sbucket" saves 4 bytes, and keeps the patch to one sourcefile.
> > > But "st->sbucket" has different meaning in OPENREQ and LISTEN states;
> > > this can be confusing.
> > > Maybe better add "snum" member to struct tcp_iter_state ?
> > >
> > > Shall I change subject when sending "take N+1", or keep the old subject ?
> > >
> > > Signed-off-by: Yakov Lerner <iler.ml@gmail.com>
> > > ---
> > >  net/ipv4/tcp_ipv4.c |   35 +++++++++++++++++++++++++++++++++--
> > >  1 files changed, 33 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> > > index 7cda24b..e4c4f19 100644
> > > --- a/net/ipv4/tcp_ipv4.c
> > > +++ b/net/ipv4/tcp_ipv4.c
> > > @@ -1994,13 +1994,14 @@ static inline int empty_bucket(struct tcp_iter_state *st)
> > >               hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
> > >  }
> > >
> > > -static void *established_get_first(struct seq_file *seq)
> > > +static void *established_get_first_after(struct seq_file *seq, int bucket)
> > >  {
> > >       struct tcp_iter_state *st = seq->private;
> > >       struct net *net = seq_file_net(seq);
> > >       void *rc = NULL;
> > >
> > > -     for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
> > > +     for (st->bucket = bucket; st->bucket < tcp_hashinfo.ehash_size;
> > > +          ++st->bucket) {
> > >               struct sock *sk;
> > >               struct hlist_nulls_node *node;
> > >               struct inet_timewait_sock *tw;
> > > @@ -2010,6 +2011,8 @@ static void *established_get_first(struct seq_file *seq)
> > >               if (empty_bucket(st))
> > >                       continue;
> > >
> > > +             st->sbucket = st->num;
> > > +
> > >               spin_lock_bh(lock);
> > >               sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
> > >                       if (sk->sk_family != st->family ||
> > > @@ -2036,6 +2039,11 @@ out:
> > >       return rc;
> > >  }
> > >
> > > +static void *established_get_first(struct seq_file *seq)
> > > +{
> > > +     return established_get_first_after(seq, 0);
> > > +}
> > > +
> > >  static void *established_get_next(struct seq_file *seq, void *cur)
> > >  {
> > >       struct sock *sk = cur;
> > > @@ -2064,6 +2072,9 @@ get_tw:
> > >               while (++st->bucket < tcp_hashinfo.ehash_size &&
> > >                               empty_bucket(st))
> > >                       ;
> > > +
> > > +             st->sbucket = st->num;
> > > +
> > >               if (st->bucket >= tcp_hashinfo.ehash_size)
> > >                       return NULL;
> > >
> > > @@ -2107,6 +2118,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
> > >
> > >       if (!rc) {
> > >               st->state = TCP_SEQ_STATE_ESTABLISHED;
> > > +             st->sbucket = 0;
> > >               rc        = established_get_idx(seq, pos);
> > >       }
> > >
> > > @@ -2116,6 +2128,25 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
> > >  static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
> > >  {
> > >       struct tcp_iter_state *st = seq->private;
> > > +
> > > +     if (*pos && *pos >= st->sbucket &&
> > > +         (st->state == TCP_SEQ_STATE_ESTABLISHED ||
> > > +          st->state == TCP_SEQ_STATE_TIME_WAIT)) {
> > > +             void *cur;
> > > +             int nskip;
> > > +
> > > +             /* for states estab and tw, st->sbucket is index (*pos) */
> > > +             /* corresponding to the beginning of bucket st->bucket */
> > > +
> > > +             st->num = st->sbucket;
> > > +             /* jump to st->bucket, then skip (*pos - st->sbucket) items */
> > > +             st->state = TCP_SEQ_STATE_ESTABLISHED;
> > > +             cur = established_get_first_after(seq, st->bucket);
> > > +             for (nskip = *pos - st->num; cur && nskip > 0; --nskip)
> > > +                     cur = established_get_next(seq, cur);
> > > +             return cur;
> > > +     }
> > > +
> > >       st->state = TCP_SEQ_STATE_LISTENING;
> > >       st->num = 0;
> > >       return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
> >
> > Just in case you are working on "take 3" of the patch, there is a fondamental problem.
> >
> > All the scalability problems come from the fact that tcp_seq_start()
> > *has* to rescan all the tables from the begining, because of lseek() capability
> > on /proc/net/tcp file
> >
> > We probably could disable llseek() (on other positions than start of the file),
> > and rely only on internal state (listening/established hashtable, hash bucket, position in chain)
> >
> > I cannot imagine how an application could rely on lseek() on >0 position in this file.
> 
> 
> I thought  /proc/net/tcp  can  both  be fast and allow lseek;
> (1) when no lseek was issued since last read
> (we can detect this), /proc/net/tcp can jump to the
> last known bucket (common case), vs
> (2) switch to slow mode (scan from the beginning of hash)
> when lseek was used , no ?

If you look at fib_hash and fib_trie, they already do the same thing.
  * fib_hash records last hash chain to avoid overhead of rescan.
  * fib_trie records last route and does fast lookup to restart from there.


-- 

^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Oleg Nesterov @ 2009-09-29 15:36 UTC (permalink / raw)
  To: Evgeniy Polyakov
  Cc: Christian Borntraeger, Evgeny Polyakov, Scott James Remnant,
	Linux Kernel, Matt Helsley, David S. Miller, netdev
In-Reply-To: <20090929145413.GA26327@ioremap.net>

On 09/29, Evgeniy Polyakov wrote:
>
> On Tue, Sep 29, 2009 at 04:25:38PM +0200, Oleg Nesterov (oleg@redhat.com) wrote:
> > > --- a/kernel/sys.c
> > > +++ b/kernel/sys.c
> > > @@ -1090,6 +1090,7 @@ SYSCALL_DEFINE0(setsid)
> > >  	struct pid *sid = task_pid(group_leader);
> > >  	pid_t session = pid_vnr(sid);
> > >  	int err = -EPERM;
> > > +	int send_cn = 0;
> > >
> > >  	write_lock_irq(&tasklist_lock);
> > >  	/* Fail if I am already a session leader */
> > > @@ -1104,12 +1105,18 @@ SYSCALL_DEFINE0(setsid)
> > >
> > >  	group_leader->signal->leader = 1;
> > >  	__set_special_pids(sid);
> > > +	if (task_session(group_leader) != sid)
> > > +		send_cn = 1;
> >
> > This is not right, task_session(group_leader) must be == sid after
> > __set_special_pids().
>
> Yeah, that check should be done before __set_special_pids().
>
> > And I don't think "int send_cn" is needed. sys_setsid() must not
> > succeed if the caller lived in session == task_pid(group_leader).
>
> Doesn't it only check pgid while patch intention was to send
> notification about sid?

If the proposed sid already was the session id, then prgp shouldn't
be empty.

but this doesn't really matter, we also check ->signal->leader
(not sure, but afaics this check is not strictly necessary because
 of PIDTYPE_PGID check)

> I.e. setsid() succeeds, although nothing
> happens.

This shouldn't happen, or sys_setsid() is buggy. Look, the new session
id is task_pid(current). If sys_setsid() succeeds but we don't change
the session, this means we were already the leader. In that case we
should return -EPERM.

Oleg.


^ permalink raw reply

* [PATCH] [RFC] IPv4 TCP fails to send window scale option when window scale is zero
From: Gilad Ben-Yossef @ 2009-09-29 15:05 UTC (permalink / raw)
  To: netdev; +Cc: Ori Finkalman

From: Ori Finkalman <ori@comsleep.com>


Acknowledge TCP window scale support by inserting the proper option in 
SYN/ACK header
even if our window scale is zero.


This fixes the following observed behavior:


1. Client sends a SYN with TCP window scaling option and non zero window 
scale value to a Linux box.

2. Linux box notes large receive window from client.

3. Linux decides on a zero value of window scale for its part.

4. Due to compare against requested window scale size option, Linux does 
not to send windows scale

TCP option header on SYN/ACK at all.


Result:


Client box thinks TCP window scaling is not supported, since SYN/ACK had 
no TCP window scale option,
while Linux thinks that TCP window scaling is supported (and scale might 
be non zero), since SYN had

TCP window scale option and we have a mismatched idea between the client 
and server regarding window sizes.


Please comment and/or apply.


---


Bug reported and patch written by Ori Finkalman from Comsleep Ltd. I'm 
just helping mainline it.


The behavior was observed with a Windows box as the client and latest 
Debian kernel but for the best
of my understanding this can happen with latest kernel versions and 
other client OS (probably also Linux)

as well.



Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
Signed-off-by: Ori Finkelman <ori@comsleep.com>


Index: net/ipv4/tcp_output.c
===================================================================
--- net/ipv4/tcp_output.c    (revision 46)
+++ net/ipv4/tcp_output.c    (revision 210)
@@ -353,6 +353,7 @@ static void tcp_init_nondata_skb(struct
 #define OPTION_SACK_ADVERTISE    (1 << 0)
 #define OPTION_TS        (1 << 1)
 #define OPTION_MD5        (1 << 2)
+#define OPTION_WSCALE        (1 << 3)
 
 struct tcp_out_options {
     u8 options;        /* bit field of OPTION_* */
@@ -417,7 +418,7 @@ static void tcp_options_write(__be32 *pt
                    TCPOLEN_SACK_PERM);
     }
 
-    if (unlikely(opts->ws)) {
+    if (unlikely(OPTION_WSCALE & opts->options)) {
         *ptr++ = htonl((TCPOPT_NOP << 24) |
                    (TCPOPT_WINDOW << 16) |
                    (TCPOLEN_WINDOW << 8) |
@@ -530,8 +531,8 @@ static unsigned tcp_synack_options(struc
 
     if (likely(ireq->wscale_ok)) {
         opts->ws = ireq->rcv_wscale;
-        if(likely(opts->ws))
-            size += TCPOLEN_WSCALE_ALIGNED;
+        opts->options |= OPTION_WSCALE;
+        size += TCPOLEN_WSCALE_ALIGNED;
     }
     if (likely(doing_ts)) {
         opts->options |= OPTION_TS;



-- 
Gilad Ben-Yossef
Chief Coffee Drinker & CTO
Codefidence Ltd.

Web:   http://codefidence.com
Cell:  +972-52-8260388
Skype: gilad_codefidence
Tel:   +972-8-9316883 ext. 201
Fax:   +972-8-9316884
Email: gilad@codefidence.com

Check out our Open Source technology and training blog - http://tuxology.net

	"Now the world has gone to bed
	 Darkness won't engulf my head
	 I can see by infra-red
	 How I hate the night."


^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Christian Borntraeger @ 2009-09-29 15:12 UTC (permalink / raw)
  To: Oleg Nesterov, Scott James Remnant
  Cc: Evgeniy Polyakov, Linux Kernel, Matt Helsley, David S. Miller,
	netdev
In-Reply-To: <20090929144554.GA10937@redhat.com>

Am Dienstag 29 September 2009 16:45:54 schrieb Oleg Nesterov:
> I think Christian's patch only needs the small fixup.

Oleg, Evgeniy,

just in case the discussion concludes that my patch is fine,
here is a fixed version.

[PATCH] connector: Fix sid connector

The sid connector gives the following warning:
Badness at kernel/softirq.c:143
[...]
Call Trace:
([<000000013fe04100>] 0x13fe04100)
 [<000000000048a946>] sk_filter+0x9a/0xd0
 [<000000000049d938>] netlink_broadcast+0x2c0/0x53c
 [<00000000003ba9ae>] cn_netlink_send+0x272/0x2b0
 [<00000000003baef0>] proc_sid_connector+0xc4/0xd4
 [<0000000000142604>] __set_special_pids+0x58/0x90
 [<0000000000159938>] sys_setsid+0xb4/0xd8
 [<00000000001187fe>] sysc_noemu+0x10/0x16
 [<00000041616cb266>] 0x41616cb266

The warning is
--->    WARN_ON_ONCE(in_irq() || irqs_disabled());

The network code must not be called with disabled interrupts but
sys_setsid holds the tasklist_lock with spinlock_irq while calling
the connector. We can safely move proc_sid_connector from
__set_special_pids to sys_setsid.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

---
 kernel/exit.c |    4 +---
 kernel/sys.c  |    2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

Index: linux-2.6/kernel/exit.c
===================================================================
--- linux-2.6.orig/kernel/exit.c
+++ linux-2.6/kernel/exit.c
@@ -359,10 +359,8 @@ void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (task_session(curr) != pid) {
+	if (task_session(curr) != pid)
 		change_pid(curr, PIDTYPE_SID, pid);
-		proc_sid_connector(curr);
-	}
 
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
Index: linux-2.6/kernel/sys.c
===================================================================
--- linux-2.6.orig/kernel/sys.c
+++ linux-2.6/kernel/sys.c
@@ -1110,6 +1110,8 @@ SYSCALL_DEFINE0(setsid)
 	err = session;
 out:
 	write_unlock_irq(&tasklist_lock);
+	if (err > 0)
+		proc_sid_connector(sid);
 	return err;
 }
 

^ permalink raw reply

* Re: [PATCH v2 0/4] IPVS full NAT support + netfilter 'ipvs' match support
From: Hannes Eder @ 2009-09-29 15:07 UTC (permalink / raw)
  To: Hannes Eder, lvs-devel, Wensong Zhang, Julius Volz, lvs-users,
	Laurent 
In-Reply-To: <20090929145156.GB19797@verge.net.au>

On Tue, Sep 29, 2009 at 16:51, Simon Horman <horms@verge.net.au> wrote:
> On Tue, Sep 29, 2009 at 02:35:15PM +0200, Hannes Eder wrote:
>> The following series implements full NAT support for IPVS.  The
>> approach is via a minimal change to IPVS (make friends with
>> nf_conntrack) and adding a netfilter matcher, kernel- and user-space
>> part, i.e. xt_ipvs and libxt_ipvs.
>
> Its a bit late in the day for me to review the code, but I have a few
> quick comments.
>
>>
>> Example usage:
>>
>> % ipvsadm -A -t 192.168.100.30:80 -s rr
>> % ipvsadm -a -t 192.168.100.30:80 -r 192.168.10.20:80 -m
>> # ...
>>
>> # Source NAT for VIP 192.168.100.30:80
>> % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
>> > --vport 80 -j SNAT --to-source 192.168.10.10
>>
>> or SNAT-ing only a specific real server:
>>
>> % iptables -t nat -A POSTROUTING --dst 192.168.11.20 \
>> > -m ipvs --vaddr 192.168.100.30/32 -j SNAT --to-source 192.168.10.10
>
> If the iptables rule is not in place does LVS just use
> its old NAT behaviour?

Yes, without iptables rules LVS NAT does DNAT.

>> First of all, thanks for all the feedback.  This is the changelog for v2:
>>
>> - Make ip_vs_ftp work again.  Setup nf_conntrack expectations for
>>   related data connections (based on Julian's patch see
>>   http://www.ssi.bg/~ja/nfct/) and let nf_conntrack/nf_nat do the
>>   packet mangling and the TCP sequence adjusting.
>>
>>   This change rises the question how to deal with ip_vs_sync?  Does it
>>   work together with conntrackd?  Wild idea: what about getting rid of
>>   ip_vs_sync and piggy packing all on nf_conntrack and use conntrackd?
>>
>>   Any comments on this?
>
>    That sounds like a reasonable suggestion.
>
>    I think that ip_vs_sync came along before conntrackd
>    and no one has given much thought to merging the functionality.

Okay, I'll dig further in this direction.

Cheers,
-Hannes

^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Evgeniy Polyakov @ 2009-09-29 14:54 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Christian Borntraeger, Evgeny Polyakov, Scott James Remnant,
	Linux Kernel, Matt Helsley, David S. Miller, netdev
In-Reply-To: <20090929142538.GA10180@redhat.com>

On Tue, Sep 29, 2009 at 04:25:38PM +0200, Oleg Nesterov (oleg@redhat.com) wrote:
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -1090,6 +1090,7 @@ SYSCALL_DEFINE0(setsid)
> >  	struct pid *sid = task_pid(group_leader);
> >  	pid_t session = pid_vnr(sid);
> >  	int err = -EPERM;
> > +	int send_cn = 0;
> >
> >  	write_lock_irq(&tasklist_lock);
> >  	/* Fail if I am already a session leader */
> > @@ -1104,12 +1105,18 @@ SYSCALL_DEFINE0(setsid)
> >
> >  	group_leader->signal->leader = 1;
> >  	__set_special_pids(sid);
> > +	if (task_session(group_leader) != sid)
> > +		send_cn = 1;
> 
> This is not right, task_session(group_leader) must be == sid after
> __set_special_pids().

Yeah, that check should be done before __set_special_pids().

> And I don't think "int send_cn" is needed. sys_setsid() must not
> succeed if the caller lived in session == task_pid(group_leader).

Doesn't it only check pgid while patch intention was to send
notification about sid? I.e. setsid() succeeds, although nothing
happens.

-- 
	Evgeniy Polyakov

^ permalink raw reply

* Re: [PATCH v2 0/4] IPVS full NAT support + netfilter 'ipvs' match support
From: Simon Horman @ 2009-09-29 14:51 UTC (permalink / raw)
  To: Hannes Eder
  Cc: lvs-devel, Wensong Zhang, Julius Volz, lvs-users, Laurent Grawet,
	Jean-Luc Fortemaison, linux-kernel, Jan Engelhardt,
	Julian Anastasov, netfilter-devel, netdev, Fabien Duchêne,
	Joseph Mack NA3T, Patrick McHardy
In-Reply-To: <20090929123501.13798.84004.stgit@jazzy.zrh.corp.google.com>

On Tue, Sep 29, 2009 at 02:35:15PM +0200, Hannes Eder wrote:
> The following series implements full NAT support for IPVS.  The
> approach is via a minimal change to IPVS (make friends with
> nf_conntrack) and adding a netfilter matcher, kernel- and user-space
> part, i.e. xt_ipvs and libxt_ipvs.

Its a bit late in the day for me to review the code, but I have a few
quick comments.

> 
> Example usage:
> 
> % ipvsadm -A -t 192.168.100.30:80 -s rr
> % ipvsadm -a -t 192.168.100.30:80 -r 192.168.10.20:80 -m
> # ...
> 
> # Source NAT for VIP 192.168.100.30:80
> % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
> > --vport 80 -j SNAT --to-source 192.168.10.10
> 
> or SNAT-ing only a specific real server:
> 
> % iptables -t nat -A POSTROUTING --dst 192.168.11.20 \
> > -m ipvs --vaddr 192.168.100.30/32 -j SNAT --to-source 192.168.10.10

If the iptables rule is not in place does LVS just use
its old NAT behaviour?

> First of all, thanks for all the feedback.  This is the changelog for v2:
> 
> - Make ip_vs_ftp work again.  Setup nf_conntrack expectations for
>   related data connections (based on Julian's patch see
>   http://www.ssi.bg/~ja/nfct/) and let nf_conntrack/nf_nat do the
>   packet mangling and the TCP sequence adjusting.
> 
>   This change rises the question how to deal with ip_vs_sync?  Does it
>   work together with conntrackd?  Wild idea: what about getting rid of
>   ip_vs_sync and piggy packing all on nf_conntrack and use conntrackd?
> 
>   Any comments on this?

    That sounds like a reasonable suggestion.

    I think that ip_vs_sync came along before conntrackd
    and no one has given much thought to merging the functionality.

> - xt_ipvs: add new rule '--vportctl port' to match the VIP port of the
>   controlling connection, e.g. port 21 for FTP.  Can be used to match
>   a related data connection for FTP:
> 
>   # SNAT FTP control connection
>   % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
>   > --vport 21 -j SNAT --to-source 192.168.10.10
>   
>   # SNAT FTP passive data connection
>   % iptables -t nat -A POSTROUTING -m ipvs --vaddr 192.168.100.30/32 \
>   > --vportctl 21 -j SNAT --to-source 192.168.10.10
> 
> - xt_ipvs: use 'par->family' instead of 'skb->protocol'
> 
> - xt_ipvs: add ipvs_mt_check and restrict to NFPROTO_IPV4 and NFPROTO_IPV6
> 
> - Call nf_conntrack_alter_reply(), so helper lookup is performed based
>   on the changed tuple.
> 
> Changes to the linux kernel (rebased to next-20090925):
> 
> Hannes Eder (3):
>       netfilter: xt_ipvs (netfilter matcher for IPVS)
>       IPVS: make friends with nf_conntrack
>       IPVS: make FTP work with full NAT support
> 
> 
>  include/linux/netfilter/xt_ipvs.h |   25 +++++
>  include/net/ip_vs.h               |    2 
>  net/netfilter/Kconfig             |    9 ++
>  net/netfilter/Makefile            |    1 
>  net/netfilter/ipvs/Kconfig        |    4 -
>  net/netfilter/ipvs/ip_vs_app.c    |   43 ---------
>  net/netfilter/ipvs/ip_vs_core.c   |   37 -------
>  net/netfilter/ipvs/ip_vs_ftp.c    |  178 ++++++++++++++++++++++++++++++++---
>  net/netfilter/ipvs/ip_vs_proto.c  |    1 
>  net/netfilter/ipvs/ip_vs_xmit.c   |   30 ++++++
>  net/netfilter/xt_ipvs.c           |  187 +++++++++++++++++++++++++++++++++++++
>  11 files changed, 418 insertions(+), 99 deletions(-)
>  create mode 100644 include/linux/netfilter/xt_ipvs.h
>  create mode 100644 net/netfilter/xt_ipvs.c
> 
> 
> Changes to iptables (relative to 1.4.5):
> 
> Hannes Eder (1):
>       libxt_ipvs: user-space lib for netfilter matcher xt_ipvs
> 
>  configure.ac                      |   11 +
>  extensions/libxt_ipvs.c           |  365 +++++++++++++++++++++++++++++++++++++
>  extensions/libxt_ipvs.man         |   24 ++
>  include/linux/netfilter/xt_ipvs.h |   25 +++
>  4 files changed, 422 insertions(+), 3 deletions(-)
>  create mode 100644 extensions/libxt_ipvs.c
>  create mode 100644 extensions/libxt_ipvs.man
>  create mode 100644 include/linux/netfilter/xt_ipvs.h

^ permalink raw reply

* Re: [PATCH] connector: Fix sid connector (was: Badness at kernel/softirq.c:143...)
From: Oleg Nesterov @ 2009-09-29 14:45 UTC (permalink / raw)
  To: Evgeniy Polyakov
  Cc: Christian Borntraeger, Evgeny Polyakov, Scott James Remnant,
	Linux Kernel, Matt Helsley, David S. Miller, netdev
In-Reply-To: <20090929142538.GA10180@redhat.com>

On 09/29, Oleg Nesterov wrote:
>
> On 09/29, Evgeniy Polyakov wrote:
> >
> > On Tue, Sep 29, 2009 at 03:47:21PM +0200, Christian Borntraeger (borntraeger@de.ibm.com) wrote:
> > > Ok,  can confirm that this patch fixes my problem, but I am not sure if the
> > > intended behaviour is still working as expected.
> >
> > Your patch breaks assumption that task_session(current->group_leader) is
> > not equal to new session id,
>
> Afaics, no.
>
> > --- a/kernel/sys.c
> > +++ b/kernel/sys.c
> > @@ -1090,6 +1090,7 @@ SYSCALL_DEFINE0(setsid)
> >  	struct pid *sid = task_pid(group_leader);
> >  	pid_t session = pid_vnr(sid);
> >  	int err = -EPERM;
> > +	int send_cn = 0;
> >
> >  	write_lock_irq(&tasklist_lock);
> >  	/* Fail if I am already a session leader */
> > @@ -1104,12 +1105,18 @@ SYSCALL_DEFINE0(setsid)
> >
> >  	group_leader->signal->leader = 1;
> >  	__set_special_pids(sid);
> > +	if (task_session(group_leader) != sid)
> > +		send_cn = 1;
>
> This is not right, task_session(group_leader) must be == sid after
> __set_special_pids().
>
> And I don't think "int send_cn" is needed. sys_setsid() must not
> succeed if the caller lived in session == task_pid(group_leader).

IOW, if sys_setsid() succeeds, we know it creates the new unique session,
we should report this change.

Note this check

	if (pid_task(sid, PIDTYPE_PGID))
		goto out;

before we actually change pids.

I think Christian's patch only needs the small fixup.

Oleg.


^ permalink raw reply

* [PATCH] connector: Provide the sender's credentials to the callback
From: Philipp Reisner @ 2009-09-29 14:48 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-kernel, netdev, Lars Ellenberg, Philipp Reisner
In-Reply-To: <1254235692-1631-2-git-send-email-philipp.reisner@linbit.com>

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 Documentation/connector/cn_test.c      |    2 +-
 Documentation/connector/connector.txt  |    8 ++++----
 drivers/connector/cn_queue.c           |    7 ++++---
 drivers/connector/connector.c          |    4 ++--
 drivers/md/dm-log-userspace-transfer.c |    2 +-
 drivers/staging/dst/dcore.c            |    2 +-
 drivers/staging/pohmelfs/config.c      |    2 +-
 drivers/video/uvesafb.c                |    2 +-
 drivers/w1/w1_netlink.c                |    2 +-
 include/linux/connector.h              |    6 +++---
 10 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 1711adc..b07add3 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test";
 static struct sock *nls;
 static struct timer_list cn_test_timer;
 
-static void cn_test_callback(struct cn_msg *msg)
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
 	        __func__, jiffies, msg->id.idx, msg->id.val,
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index 81e6bf6..78c9466 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -23,7 +23,7 @@ handling, etc...  The Connector driver allows any kernelspace agents to use
 netlink based networking for inter-process communication in a significantly
 easier way:
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
 
 struct cb_id
@@ -53,15 +53,15 @@ struct cn_msg
 Connector interfaces.
 /*****************************************/
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 
  Registers new callback with connector core.
 
  struct cb_id *id		- unique connector's user identifier.
 				  It must be registered in connector.h for legal in-kernel users.
  char *name			- connector's callback symbolic name.
- void (*callback) (void *)	- connector's callback.
-				  Argument must be dereferenced to struct cn_msg *.
+ void (*callback) (struct cn..)	- connector's callback.
+				  cn_msg and the sender's credentials
 
 
 void cn_del_callback(struct cb_id *id);
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index b4cfac9..163c3e3 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -79,8 +79,9 @@ void cn_queue_wrapper(struct work_struct *work)
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
 	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
+	struct netlink_skb_parms *nsp = &NETLINK_CB(d->skb);
 
-	d->callback(msg);
+	d->callback(msg, nsp);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
@@ -90,7 +91,7 @@ void cn_queue_wrapper(struct work_struct *work)
 
 static struct cn_callback_entry *
 cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
-			      void (*callback)(struct cn_msg *))
+			      void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq;
 
@@ -124,7 +125,7 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
 }
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id,
-			  void (*callback)(struct cn_msg *))
+			  void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq, *__cbq;
 	int found = 0;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fc9887f..e59f0ab 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -269,7 +269,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
  * May sleep.
  */
 int cn_add_callback(struct cb_id *id, char *name,
-		    void (*callback)(struct cn_msg *))
+		    void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	int err;
 	struct cn_dev *dev = &cdev;
@@ -351,7 +351,7 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
  *
  * Used for notification of a request's processing.
  */
-static void cn_callback(struct cn_msg *msg)
+static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct cn_ctl_msg *ctl;
 	struct cn_ctl_entry *ent;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index ba0edad..556131f 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,7 +129,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
  * This is the connector callback that delivers data
  * that was sent from userspace.
  */
-static void cn_ulog_callback(void *data)
+static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
 {
 	struct cn_msg *msg = (struct cn_msg *)data;
 	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac85773..3943c91 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -847,7 +847,7 @@ static dst_command_func dst_commands[] = {
 /*
  * Configuration parser.
  */
-static void cn_dst_callback(struct cn_msg *msg)
+static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct dst_ctl *ctl;
 	int err;
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 90f962e..c9162b3 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -527,7 +527,7 @@ out_unlock:
 	return err;
 }
 
-static void pohmelfs_cn_callback(struct cn_msg *msg)
+static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	int err;
 
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index e98baf6..aa7cd95 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -67,7 +67,7 @@ static DEFINE_MUTEX(uvfb_lock);
  * find the kernel part of the task struct, copy the registers and
  * the buffer contents and then complete the task.
  */
-static void uvesafb_cn_callback(struct cn_msg *msg)
+static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct uvesafb_task *utask;
 	struct uvesafb_ktask *task;
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 52ccb3d..45c126f 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -306,7 +306,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
 	return error;
 }
 
-static void w1_cn_callback(struct cn_msg *msg)
+static void w1_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1);
 	struct w1_netlink_cmd *cmd;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 05a7a14..545728e 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -136,7 +136,7 @@ struct cn_callback_data {
 	void *ddata;
 
 	struct sk_buff *skb;
-	void (*callback) (struct cn_msg *);
+	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
 	void *free;
 };
@@ -167,11 +167,11 @@ struct cn_dev {
 	struct cn_queue_dev *cbdev;
 };
 
-int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *));
+int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *));
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
 int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
-- 
1.6.0.4


^ permalink raw reply related

* [PATCH] connector: Removed the destruct_data callback since it is always kfree_skb()
From: Philipp Reisner @ 2009-09-29 14:48 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-kernel, netdev, Lars Ellenberg, Philipp Reisner
In-Reply-To: <1254235692-1631-4-git-send-email-philipp.reisner@linbit.com>

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/connector/cn_queue.c  |    4 ++--
 drivers/connector/connector.c |   11 +++--------
 include/linux/connector.h     |    3 ---
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 163c3e3..210338e 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -83,8 +83,8 @@ void cn_queue_wrapper(struct work_struct *work)
 
 	d->callback(msg, nsp);
 
-	d->destruct_data(d->ddata);
-	d->ddata = NULL;
+	kfree_skb(d->skb);
+	d->skb = NULL;
 
 	kfree(d->free);
 }
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index e59f0ab..f060246 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
@@ -140,12 +140,9 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
-					__cbq->data.ddata == NULL)) {
+					__cbq->data.skb == NULL)) {
 				__cbq->data.skb = skb;
 
-				__cbq->data.ddata = data;
-				__cbq->data.destruct_data = destruct_data;
-
 				if (queue_cn_work(__cbq, &__cbq->work))
 					err = 0;
 				else
@@ -159,8 +156,6 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 					d = &__new_cbq->data;
 					d->skb = skb;
 					d->callback = __cbq->data.callback;
-					d->ddata = data;
-					d->destruct_data = destruct_data;
 					d->free = __new_cbq;
 
 					__new_cbq->pdev = __cbq->pdev;
@@ -208,7 +203,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 545728e..3a14615 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -132,9 +132,6 @@ struct cn_callback_id {
 };
 
 struct cn_callback_data {
-	void (*destruct_data) (void *);
-	void *ddata;
-
 	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
-- 
1.6.0.4

^ permalink raw reply related

* [PATCH] connector/dm: Fixed a compilation warning
From: Philipp Reisner @ 2009-09-29 14:48 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-kernel, netdev, Lars Ellenberg, Philipp Reisner
In-Reply-To: <1254235692-1631-3-git-send-email-philipp.reisner@linbit.com>

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/md/dm-log-userspace-transfer.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 556131f..1327e1a 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,9 +129,8 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
  * This is the connector callback that delivers data
  * that was sent from userspace.
  */
-static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
+static void cn_ulog_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
-	struct cn_msg *msg = (struct cn_msg *)data;
 	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
 
 	spin_lock(&receiving_list_lock);
-- 
1.6.0.4

^ permalink raw reply related

* [PATCH] connector: Keep the skb in cn_callback_data
From: Philipp Reisner @ 2009-09-29 14:48 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-kernel, netdev, Lars Ellenberg, Philipp Reisner
In-Reply-To: <1254235692-1631-1-git-send-email-philipp.reisner@linbit.com>

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/connector/cn_queue.c  |    3 ++-
 drivers/connector/connector.c |   11 +++++------
 include/linux/connector.h     |    4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 4a1dfe1..b4cfac9 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -78,8 +78,9 @@ void cn_queue_wrapper(struct work_struct *work)
 	struct cn_callback_entry *cbq =
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
 
-	d->callback(d->callback_priv);
+	d->callback(msg);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 74f52af..fc9887f 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,10 +129,11 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(skb));
 	int err = -ENODEV;
 
 	spin_lock_bh(&dev->cbdev->queue_lock);
@@ -140,7 +141,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
 					__cbq->data.ddata == NULL)) {
-				__cbq->data.callback_priv = msg;
+				__cbq->data.skb = skb;
 
 				__cbq->data.ddata = data;
 				__cbq->data.destruct_data = destruct_data;
@@ -156,7 +157,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 				__new_cbq = kzalloc(sizeof(struct cn_callback_entry), GFP_ATOMIC);
 				if (__new_cbq) {
 					d = &__new_cbq->data;
-					d->callback_priv = msg;
+					d->skb = skb;
 					d->callback = __cbq->data.callback;
 					d->ddata = data;
 					d->destruct_data = destruct_data;
@@ -191,7 +192,6 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
  */
 static void cn_rx_skb(struct sk_buff *__skb)
 {
-	struct cn_msg *msg;
 	struct nlmsghdr *nlh;
 	int err;
 	struct sk_buff *skb;
@@ -208,8 +208,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		msg = NLMSG_DATA(nlh);
-		err = cn_call_callback(msg, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 47ebf41..05a7a14 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -134,8 +134,8 @@ struct cn_callback_id {
 struct cn_callback_data {
 	void (*destruct_data) (void *);
 	void *ddata;
-	
-	void *callback_priv;
+
+	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *);
 
 	void *free;
-- 
1.6.0.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox