* [PATCH] xfrm: SAD entries do not expire after suspend-resume
@ 2009-05-11 18:21 Yury Polyanskiy
2009-05-11 18:30 ` Peter Zijlstra
0 siblings, 1 reply; 7+ messages in thread
From: Yury Polyanskiy @ 2009-05-11 18:21 UTC (permalink / raw)
To: netdev, davem, kuznet, yoshfuji, peterz, mingo
[-- Attachment #1: Type: text/plain, Size: 6298 bytes --]
This fixes the following bug in the current implementation of
net/xfrm: if SAD entry expires in 8 hr and the kernel is put into
suspend for 10hr then upon wakeup SAD will not expire until 8 more
hours pass. This, of course, leads to connectivity problems because
the corresponding entry has already expired on the remote end.
The patch is against 2.6.26.8 (applies to 2.6.26 cleanly) for two
reasons:
(1) that is what I am testing it on;
(2) it can not be ported to anything post 2.6.28-rc7 because hrtimer's
callbacks are run in hardirq context ever since. This would require a
major rewrite of xfrm_state's locking (i.e. replacing spin_lock_bh()
with spin_lock_irqsave() at least) which is (a) outside of my competence
and (b) will introduce excessive irq-disabled codepaths.
Due to (2) I am copying the authors of the hrtimer's patch. Unless
there is an alternative (to hrtimer_start) way of requesting a
CLOCK_REALTIME softirq callback the only solution I could think of is
to hook into PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the
timers on xfrm_state_all list to go off after resume.
I hope this is still useful for some.
Best,
Yury Polyanskiy.
--
include/net/xfrm.h | 2 +-
net/xfrm/xfrm_state.c | 43
++++++++++++++++++++++++++++++------------- 2 files changed, 31
insertions(+), 14 deletions(-)
diff -urpd linux-2.6.26.8/include/net/xfrm.h linux-2.6.26.8up1/include/net/xfrm.h
--- linux-2.6.26.8/include/net/xfrm.h 2008-08-20 14:11:37.000000000 -0400
+++ linux-2.6.26.8up1/include/net/xfrm.h 2009-05-10 21:24:12.000000000 -0400
@@ -196,7 +196,7 @@ struct xfrm_state
struct xfrm_stats stats;
struct xfrm_lifetime_cur curlft;
- struct timer_list timer;
+ struct hrtimer mtimer;
/* Last used time */
unsigned long lastused;
diff -urpd linux-2.6.26.8/net/xfrm/xfrm_state.c linux-2.6.26.8up1/net/xfrm/xfrm_state.c
--- linux-2.6.26.8/net/xfrm/xfrm_state.c 2009-05-10 22:40:51.000000000 -0400
+++ linux-2.6.26.8up1/net/xfrm/xfrm_state.c 2009-05-11 01:28:56.000000000 -0400
@@ -21,6 +21,9 @@
#include <linux/cache.h>
#include <linux/audit.h>
#include <asm/uaccess.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
#include "xfrm_hash.h"
@@ -380,7 +383,7 @@ static void xfrm_put_mode(struct xfrm_mo
static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
- del_timer_sync(&x->timer);
+ hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
kfree(x->aalg);
kfree(x->ealg);
@@ -426,9 +429,9 @@ static inline unsigned long make_jiffies
return secs*HZ;
}
-static void xfrm_timer_handler(unsigned long data)
+static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
{
- struct xfrm_state *x = (struct xfrm_state*)data;
+ struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
@@ -478,8 +481,16 @@ static void xfrm_timer_handler(unsigned
if (warn)
km_state_expired(x, 0, 0);
resched:
- if (next != LONG_MAX)
- mod_timer(&x->timer, jiffies + make_jiffies(next));
+ if (next != LONG_MAX){
+ /* Note: here we could use a less expensive:
+ *
+ * hrtimer_start(&x->mtimer, ktime_set(now + next, 0), HRTIMER_MODE_ABS);
+ *
+ * but this would depend on ktime_get_real() and get_seconds() using
+ * the same clock source and might break (?) in the future.
+ */
+ hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+ }
goto out;
@@ -501,6 +512,7 @@ expired:
out:
spin_unlock(&x->lock);
+ return HRTIMER_NORESTART;
}
static void xfrm_replay_timer_handler(unsigned long data);
@@ -518,7 +530,9 @@ struct xfrm_state *xfrm_state_alloc(void
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
+ hrtimer_init(&x->mtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ x->mtimer.function = xfrm_timer_handler;
+ x->mtimer.cb_mode = HRTIMER_CB_SOFTIRQ;
setup_timer(&x->rtimer, xfrm_replay_timer_handler,
(unsigned long)x);
x->curlft.add_time = get_seconds();
@@ -866,8 +880,7 @@ xfrm_state_find(xfrm_address_t *daddr, x
hlist_add_head(&x->byspi, xfrm_state_byspi+h);
}
x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
- add_timer(&x->timer);
+ hrtimer_start(&x->mtimer, ktime_set(sysctl_xfrm_acq_expires, 0), HRTIMER_MODE_REL);
xfrm_state_num++;
xfrm_hash_grow_check(x->bydst.next != NULL);
} else {
@@ -942,7 +955,7 @@ static void __xfrm_state_insert(struct x
hlist_add_head(&x->byspi, xfrm_state_byspi+h);
}
- mod_timer(&x->timer, jiffies + HZ);
+ hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1053,8 +1066,7 @@ static struct xfrm_state *__find_acq_cor
x->props.reqid = reqid;
x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
xfrm_state_hold(x);
- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
- add_timer(&x->timer);
+ hrtimer_start(&x->mtimer, ktime_set(sysctl_xfrm_acq_expires, 0), HRTIMER_MODE_REL);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(daddr, saddr, family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
@@ -1330,8 +1342,8 @@ out:
memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
x1->km.dying = 0;
-
- mod_timer(&x1->timer, jiffies + HZ);
+
+ hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
if (x1->curlft.use_time)
xfrm_state_check_expire(x1);
@@ -1356,7 +1368,10 @@ int xfrm_state_check_expire(struct xfrm_
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
- mod_timer(&x->timer, jiffies);
+ /* TODO: analyze if dropping x->lock is possible,
+ * in which case we might do hrtimer_cancel(); x->mtimer.function();
+ */
+ hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL);
return -EINVAL;
}
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 18:21 [PATCH] xfrm: SAD entries do not expire after suspend-resume Yury Polyanskiy
@ 2009-05-11 18:30 ` Peter Zijlstra
2009-05-11 20:07 ` Yury Polyanskiy
0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2009-05-11 18:30 UTC (permalink / raw)
To: Yury Polyanskiy; +Cc: netdev, davem, kuznet, yoshfuji, mingo, Rafael J. Wysocki
On Mon, 2009-05-11 at 14:21 -0400, Yury Polyanskiy wrote:
> This fixes the following bug in the current implementation of
> net/xfrm: if SAD entry expires in 8 hr and the kernel is put into
> suspend for 10hr then upon wakeup SAD will not expire until 8 more
> hours pass. This, of course, leads to connectivity problems because
> the corresponding entry has already expired on the remote end.
>
> The patch is against 2.6.26.8 (applies to 2.6.26 cleanly) for two
> reasons:
>
> (1) that is what I am testing it on;
>
> (2) it can not be ported to anything post 2.6.28-rc7 because hrtimer's
> callbacks are run in hardirq context ever since. This would require a
> major rewrite of xfrm_state's locking (i.e. replacing spin_lock_bh()
> with spin_lock_irqsave() at least) which is (a) outside of my competence
> and (b) will introduce excessive irq-disabled codepaths.
>
> Due to (2) I am copying the authors of the hrtimer's patch. Unless
> there is an alternative (to hrtimer_start) way of requesting a
> CLOCK_REALTIME softirq callback the only solution I could think of is
> to hook into PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the
> timers on xfrm_state_all list to go off after resume.
Given that the whole problem is suspend related, this last option sounds
like the best thing.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 18:30 ` Peter Zijlstra
@ 2009-05-11 20:07 ` Yury Polyanskiy
2009-05-11 20:25 ` Peter Zijlstra
0 siblings, 1 reply; 7+ messages in thread
From: Yury Polyanskiy @ 2009-05-11 20:07 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: netdev, davem, kuznet, yoshfuji, mingo, Rafael J. Wysocki
On Mon, May 11, 2009 at 2:30 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Mon, 2009-05-11 at 14:21 -0400, Yury Polyanskiy wrote:
>> (2) it can not be ported to anything post 2.6.28-rc7 because hrtimer's
>> callbacks are run in hardirq context ever since. This would require a
>> major rewrite of xfrm_state's locking (i.e. replacing spin_lock_bh()
>> with spin_lock_irqsave() at least) which is (a) outside of my competence
>> and (b) will introduce excessive irq-disabled codepaths.
>>
>> Due to (2) I am copying the authors of the hrtimer's patch. Unless
>> there is an alternative (to hrtimer_start) way of requesting a
>> CLOCK_REALTIME softirq callback the only solution I could think of is
>> to hook into PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the
>> timers on xfrm_state_all list to go off after resume.
>
> Given that the whole problem is suspend related, this last option sounds
> like the best thing.
>
Can somebody from the Networking Team please confirm that the other
sources of time leaps can indeed be neglected? (such as ntp
corrections e.g.)
Best,
YP
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 20:07 ` Yury Polyanskiy
@ 2009-05-11 20:25 ` Peter Zijlstra
2009-05-11 20:50 ` Yury Polyanskiy
0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2009-05-11 20:25 UTC (permalink / raw)
To: Yury Polyanskiy
Cc: netdev, davem, kuznet, yoshfuji, mingo, Rafael J. Wysocki,
Thomas Gleixner, john stultz
On Mon, 2009-05-11 at 16:07 -0400, Yury Polyanskiy wrote:
> On Mon, May 11, 2009 at 2:30 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Mon, 2009-05-11 at 14:21 -0400, Yury Polyanskiy wrote:
>
> >> (2) it can not be ported to anything post 2.6.28-rc7 because hrtimer's
> >> callbacks are run in hardirq context ever since. This would require a
> >> major rewrite of xfrm_state's locking (i.e. replacing spin_lock_bh()
> >> with spin_lock_irqsave() at least) which is (a) outside of my competence
> >> and (b) will introduce excessive irq-disabled codepaths.
> >>
> >> Due to (2) I am copying the authors of the hrtimer's patch. Unless
> >> there is an alternative (to hrtimer_start) way of requesting a
> >> CLOCK_REALTIME softirq callback the only solution I could think of is
> >> to hook into PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the
> >> timers on xfrm_state_all list to go off after resume.
> >
> > Given that the whole problem is suspend related, this last option sounds
> > like the best thing.
> >
>
> Can somebody from the Networking Team please confirm that the other
> sources of time leaps can indeed be neglected? (such as ntp
> corrections e.g.)
ntp time adjustments are very fine grained and should not distort time.
Setting the system clock otoh might screw you over though.
What I'm not quite getting is though, if we have a real-time timer 8h in
the future, and we suspend for 10h, the timer should fire the moment we
resume and readjust the clock, finding this -2h expired timer.
Since they're realtime timers and we don't know what they're used for,
we cannot handle this time lapse in the generic code, hence its users
are stuck with dealing with this.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 20:25 ` Peter Zijlstra
@ 2009-05-11 20:50 ` Yury Polyanskiy
2009-05-11 21:12 ` Peter Zijlstra
0 siblings, 1 reply; 7+ messages in thread
From: Yury Polyanskiy @ 2009-05-11 20:50 UTC (permalink / raw)
To: Peter Zijlstra
Cc: netdev, davem, kuznet, yoshfuji, mingo, Rafael J. Wysocki,
Thomas Gleixner, john stultz
[-- Attachment #1: Type: text/plain, Size: 1896 bytes --]
On Mon, 11 May 2009 22:25:57 +0200
Peter Zijlstra <peterz@infradead.org> wrote:
> > >> Due to (2) I am copying the authors of the hrtimer's patch.
> > >> Unless there is an alternative (to hrtimer_start) way of
> > >> requesting a CLOCK_REALTIME softirq callback the only solution I
> > >> could think of is to hook into
> > >> PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the timers
> > >> on xfrm_state_all list to go off after resume.
> > >
> > > Given that the whole problem is suspend related, this last option
> > > sounds like the best thing.
> > >
> >
> > Can somebody from the Networking Team please confirm that the other
> > sources of time leaps can indeed be neglected? (such as ntp
> > corrections e.g.)
>
> ntp time adjustments are very fine grained and should not distort
> time. Setting the system clock otoh might screw you over though.
Isn't ntp sometimes used for initial clock setting? (i.e. host boots
with sysclock set to 1971 and then ntp makes it to the correct 2009).
Another reason for not doing it pm_notify()-wise is to reduce the
amount of resume code running in the system: why run a user-specific
O(N) post-suspend code if there is already an O(1) one in the
hres_timers_resume()?
>
> What I'm not quite getting is though, if we have a real-time timer 8h
> in the future, and we suspend for 10h, the timer should fire the
> moment we resume and readjust the clock, finding this -2h expired
> timer.
>
> Since they're realtime timers and we don't know what they're used for,
> we cannot handle this time lapse in the generic code, hence its users
> are stuck with dealing with this.
>
The problem is that the current code simply arms a usual timer_list
timer, which remains stopped during suspend. So how do you set up a
CLOCK_REALTIME timer w/o using hrtimer_start()?
Thanks in advance,
Yury.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 20:50 ` Yury Polyanskiy
@ 2009-05-11 21:12 ` Peter Zijlstra
2009-05-11 21:31 ` john stultz
0 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2009-05-11 21:12 UTC (permalink / raw)
To: Yury Polyanskiy
Cc: netdev, davem, kuznet, yoshfuji, mingo, Rafael J. Wysocki,
Thomas Gleixner, john stultz
On Mon, 2009-05-11 at 16:50 -0400, Yury Polyanskiy wrote:
> On Mon, 11 May 2009 22:25:57 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
>
> > > >> Due to (2) I am copying the authors of the hrtimer's patch.
> > > >> Unless there is an alternative (to hrtimer_start) way of
> > > >> requesting a CLOCK_REALTIME softirq callback the only solution I
> > > >> could think of is to hook into
> > > >> PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the timers
> > > >> on xfrm_state_all list to go off after resume.
> > > >
> > > > Given that the whole problem is suspend related, this last option
> > > > sounds like the best thing.
> > > >
> > >
> > > Can somebody from the Networking Team please confirm that the other
> > > sources of time leaps can indeed be neglected? (such as ntp
> > > corrections e.g.)
> >
> > ntp time adjustments are very fine grained and should not distort
> > time. Setting the system clock otoh might screw you over though.
>
> Isn't ntp sometimes used for initial clock setting? (i.e. host boots
> with sysclock set to 1971 and then ntp makes it to the correct 2009).
ntp (as in the userspace software bits) would use something like
sys_settimeofday() to move the clock, after that they use sys_adjtimex()
to keep in sync.
> Another reason for not doing it pm_notify()-wise is to reduce the
> amount of resume code running in the system: why run a user-specific
> O(N) post-suspend code if there is already an O(1) one in the
> hres_timers_resume()?
>
> >
> > What I'm not quite getting is though, if we have a real-time timer 8h
> > in the future, and we suspend for 10h, the timer should fire the
> > moment we resume and readjust the clock, finding this -2h expired
> > timer.
> >
> > Since they're realtime timers and we don't know what they're used for,
> > we cannot handle this time lapse in the generic code, hence its users
> > are stuck with dealing with this.
> >
>
>
> The problem is that the current code simply arms a usual timer_list
> timer, which remains stopped during suspend. So how do you set up a
> CLOCK_REALTIME timer w/o using hrtimer_start()?
Ah, right. Well use hrtimers, just don't run all that stuff you used to
do in softirq context in it. Use it to queue a worklet somewhere or wake
a thread.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] xfrm: SAD entries do not expire after suspend-resume
2009-05-11 21:12 ` Peter Zijlstra
@ 2009-05-11 21:31 ` john stultz
0 siblings, 0 replies; 7+ messages in thread
From: john stultz @ 2009-05-11 21:31 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Yury Polyanskiy, netdev, davem, kuznet, yoshfuji, mingo,
Rafael J. Wysocki, Thomas Gleixner
On Mon, 2009-05-11 at 23:12 +0200, Peter Zijlstra wrote:
> On Mon, 2009-05-11 at 16:50 -0400, Yury Polyanskiy wrote:
> > On Mon, 11 May 2009 22:25:57 +0200
> > Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > > > >> Due to (2) I am copying the authors of the hrtimer's patch.
> > > > >> Unless there is an alternative (to hrtimer_start) way of
> > > > >> requesting a CLOCK_REALTIME softirq callback the only solution I
> > > > >> could think of is to hook into
> > > > >> PM_POST_HIBERNATION+PM_POST_SUSPEND and force all of the timers
> > > > >> on xfrm_state_all list to go off after resume.
> > > > >
> > > > > Given that the whole problem is suspend related, this last option
> > > > > sounds like the best thing.
> > > > >
> > > >
> > > > Can somebody from the Networking Team please confirm that the other
> > > > sources of time leaps can indeed be neglected? (such as ntp
> > > > corrections e.g.)
> > >
> > > ntp time adjustments are very fine grained and should not distort
> > > time. Setting the system clock otoh might screw you over though.
> >
> > Isn't ntp sometimes used for initial clock setting? (i.e. host boots
> > with sysclock set to 1971 and then ntp makes it to the correct 2009).
>
> ntp (as in the userspace software bits) would use something like
> sys_settimeofday() to move the clock, after that they use sys_adjtimex()
> to keep in sync.
ntp will use settimeofday() to set the clock in the following
conditions:
1) If the step-tickers option is used, it will set the clock at bootup.
2) If the time offset becomes greater then the slew boundary (0.128s)
Otherwise it will slew the clock by making a freq adjustments via
adjtimex().
thanks
-john
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-05-11 21:31 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-11 18:21 [PATCH] xfrm: SAD entries do not expire after suspend-resume Yury Polyanskiy
2009-05-11 18:30 ` Peter Zijlstra
2009-05-11 20:07 ` Yury Polyanskiy
2009-05-11 20:25 ` Peter Zijlstra
2009-05-11 20:50 ` Yury Polyanskiy
2009-05-11 21:12 ` Peter Zijlstra
2009-05-11 21:31 ` john stultz
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).