* [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field
@ 2025-06-27 10:50 Sebastian Andrzej Siewior
2025-07-03 7:55 ` Paolo Abeni
0 siblings, 1 reply; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-06-27 10:50 UTC (permalink / raw)
To: linux-ppp, netdev, linux-rt-devel
Cc: David S. Miller, Andrew Lunn, Clark Williams, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Steven Rostedt, Thomas Gleixner,
Gao Feng, Guillaume Nault
The per-CPU variable ppp::xmit_recursion is protecting against recursion
due to wrong configuration of the ppp channels. The per-CPU variable
relies on disabled BH for its locking. Without per-CPU locking in
local_bh_disable() on PREEMPT_RT this data structure requires explicit
locking.
The ppp::xmit_recursion is used as a per-CPU boolean. The counter is
checked early in the send routing and the transmit path is only entered
if the counter is zero. Then the counter is incremented to avoid
recursion. It used to detect recursion on channel::downl and
ppp::wlock.
Replace the per-CPU ppp:xmit_recursion counter with an explicit owner
field for both structs.
pch_downl_lock() is helper to check for recursion on channel::downl and
either assign the owner field if there is no recursion.
__ppp_channel_push() is moved into ppp_channel_push() and gets the
recursion check unconditionally because it is based on the lock now.
The recursion check in ppp_xmit_process() is based on ppp::wlock which
is acquired by ppp_xmit_lock(). The locking is moved from
__ppp_xmit_process() into ppp_xmit_lock() to check the owner, lock and
then assign the owner in one spot.
The local_bh_disable() in ppp_xmit_lock() can be removed because
ppp_xmit_lock() disables BH as part of the locking.
Cc: Gao Feng <gfree.wind@vip.163.com>
Cc: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/net/ppp/ppp_generic.c | 94 ++++++++++++++++-------------------
1 file changed, 44 insertions(+), 50 deletions(-)
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index def84e87e05b2..d7b10d60c5d08 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -132,7 +132,7 @@ struct ppp {
int n_channels; /* how many channels are attached 54 */
spinlock_t rlock; /* lock for receive side 58 */
spinlock_t wlock; /* lock for transmit side 5c */
- int __percpu *xmit_recursion; /* xmit recursion detect */
+ struct task_struct *wlock_owner;/* xmit recursion detect */
int mru; /* max receive unit 60 */
unsigned int flags; /* control bits 64 */
unsigned int xstate; /* transmit state bits 68 */
@@ -186,6 +186,7 @@ struct channel {
struct ppp_channel *chan; /* public channel data structure */
struct rw_semaphore chan_sem; /* protects `chan' during chan ioctl */
spinlock_t downl; /* protects `chan', file.xq dequeue */
+ struct task_struct *downl_owner;/* xmit recursion detect */
struct ppp *ppp; /* ppp unit we're connected to */
struct net *chan_net; /* the net channel belongs to */
netns_tracker ns_tracker;
@@ -391,6 +392,24 @@ static const int npindex_to_ethertype[NUM_NP] = {
#define ppp_unlock(ppp) do { ppp_recv_unlock(ppp); \
ppp_xmit_unlock(ppp); } while (0)
+static bool pch_downl_lock(struct channel *pch, struct ppp *ppp)
+{
+ if (pch->downl_owner == current) {
+ if (net_ratelimit())
+ netdev_err(ppp->dev, "recursion detected\n");
+ return false;
+ }
+ spin_lock(&pch->downl);
+ pch->downl_owner = current;
+ return true;
+}
+
+static void pch_downl_unlock(struct channel *pch)
+{
+ pch->downl_owner = NULL;
+ spin_unlock(&pch->downl);
+}
+
/*
* /dev/ppp device routines.
* The /dev/ppp device is used by pppd to control the ppp unit.
@@ -1246,7 +1265,6 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
struct ppp *ppp = netdev_priv(dev);
int indx;
int err;
- int cpu;
ppp->dev = dev;
ppp->ppp_net = src_net;
@@ -1262,14 +1280,6 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
spin_lock_init(&ppp->rlock);
spin_lock_init(&ppp->wlock);
- ppp->xmit_recursion = alloc_percpu(int);
- if (!ppp->xmit_recursion) {
- err = -ENOMEM;
- goto err1;
- }
- for_each_possible_cpu(cpu)
- (*per_cpu_ptr(ppp->xmit_recursion, cpu)) = 0;
-
#ifdef CONFIG_PPP_MULTILINK
ppp->minseq = -1;
skb_queue_head_init(&ppp->mrq);
@@ -1281,15 +1291,11 @@ static int ppp_dev_configure(struct net *src_net, struct net_device *dev,
err = ppp_unit_register(ppp, conf->unit, conf->ifname_is_set);
if (err < 0)
- goto err2;
+ return err;
conf->file->private_data = &ppp->file;
return 0;
-err2:
- free_percpu(ppp->xmit_recursion);
-err1:
- return err;
}
static const struct nla_policy ppp_nl_policy[IFLA_PPP_MAX + 1] = {
@@ -1660,7 +1666,6 @@ static void ppp_setup(struct net_device *dev)
/* Called to do any work queued up on the transmit side that can now be done */
static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
{
- ppp_xmit_lock(ppp);
if (!ppp->closing) {
ppp_push(ppp);
@@ -1678,27 +1683,21 @@ static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
} else {
kfree_skb(skb);
}
- ppp_xmit_unlock(ppp);
}
static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
{
- local_bh_disable();
-
- if (unlikely(*this_cpu_ptr(ppp->xmit_recursion)))
+ if (ppp->wlock_owner == current)
goto err;
- (*this_cpu_ptr(ppp->xmit_recursion))++;
+ ppp_xmit_lock(ppp);
+ ppp->wlock_owner = current;
__ppp_xmit_process(ppp, skb);
- (*this_cpu_ptr(ppp->xmit_recursion))--;
-
- local_bh_enable();
-
+ ppp->wlock_owner = NULL;
+ ppp_xmit_unlock(ppp);
return;
err:
- local_bh_enable();
-
kfree_skb(skb);
if (net_ratelimit())
@@ -1903,7 +1902,9 @@ ppp_push(struct ppp *ppp)
list = list->next;
pch = list_entry(list, struct channel, clist);
- spin_lock(&pch->downl);
+ if (!pch_downl_lock(pch, ppp))
+ goto free_out;
+
if (pch->chan) {
if (pch->chan->ops->start_xmit(pch->chan, skb))
ppp->xmit_pending = NULL;
@@ -1912,7 +1913,7 @@ ppp_push(struct ppp *ppp)
kfree_skb(skb);
ppp->xmit_pending = NULL;
}
- spin_unlock(&pch->downl);
+ pch_downl_unlock(pch);
return;
}
@@ -1923,6 +1924,7 @@ ppp_push(struct ppp *ppp)
return;
#endif /* CONFIG_PPP_MULTILINK */
+free_out:
ppp->xmit_pending = NULL;
kfree_skb(skb);
}
@@ -2041,8 +2043,10 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
pch->avail = 1;
}
+ if (!pch_downl_lock(pch, ppp))
+ continue;
+
/* check the channel's mtu and whether it is still attached. */
- spin_lock(&pch->downl);
if (pch->chan == NULL) {
/* can't use this channel, it's being deregistered */
if (pch->speed == 0)
@@ -2050,7 +2054,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
else
totspeed -= pch->speed;
- spin_unlock(&pch->downl);
+ pch_downl_unlock(pch);
pch->avail = 0;
totlen = len;
totfree--;
@@ -2101,7 +2105,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
*/
if (flen <= 0) {
pch->avail = 2;
- spin_unlock(&pch->downl);
+ pch_downl_unlock(pch);
continue;
}
@@ -2146,14 +2150,14 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
len -= flen;
++ppp->nxseq;
bits = 0;
- spin_unlock(&pch->downl);
+ pch_downl_unlock(pch);
}
ppp->nxchan = i;
return 1;
noskb:
- spin_unlock(&pch->downl);
+ pch_downl_unlock(pch);
if (ppp->debug & 1)
netdev_err(ppp->dev, "PPP: no memory (fragment)\n");
++ppp->dev->stats.tx_errors;
@@ -2163,12 +2167,15 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
#endif /* CONFIG_PPP_MULTILINK */
/* Try to send data out on a channel */
-static void __ppp_channel_push(struct channel *pch)
+static void ppp_channel_push(struct channel *pch)
{
struct sk_buff *skb;
struct ppp *ppp;
+ read_lock_bh(&pch->upl);
spin_lock(&pch->downl);
+ pch->downl_owner = current;
+
if (pch->chan) {
while (!skb_queue_empty(&pch->file.xq)) {
skb = skb_dequeue(&pch->file.xq);
@@ -2182,24 +2189,13 @@ static void __ppp_channel_push(struct channel *pch)
/* channel got deregistered */
skb_queue_purge(&pch->file.xq);
}
+ pch->downl_owner = NULL;
spin_unlock(&pch->downl);
/* see if there is anything from the attached unit to be sent */
if (skb_queue_empty(&pch->file.xq)) {
ppp = pch->ppp;
if (ppp)
- __ppp_xmit_process(ppp, NULL);
- }
-}
-
-static void ppp_channel_push(struct channel *pch)
-{
- read_lock_bh(&pch->upl);
- if (pch->ppp) {
- (*this_cpu_ptr(pch->ppp->xmit_recursion))++;
- __ppp_channel_push(pch);
- (*this_cpu_ptr(pch->ppp->xmit_recursion))--;
- } else {
- __ppp_channel_push(pch);
+ ppp_xmit_process(ppp, NULL);
}
read_unlock_bh(&pch->upl);
}
@@ -3424,8 +3420,6 @@ static void ppp_destroy_interface(struct ppp *ppp)
#endif /* CONFIG_PPP_FILTER */
kfree_skb(ppp->xmit_pending);
- free_percpu(ppp->xmit_recursion);
-
free_netdev(ppp->dev);
}
--
2.50.0
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field
2025-06-27 10:50 [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field Sebastian Andrzej Siewior
@ 2025-07-03 7:55 ` Paolo Abeni
2025-07-04 15:48 ` Sebastian Andrzej Siewior
0 siblings, 1 reply; 5+ messages in thread
From: Paolo Abeni @ 2025-07-03 7:55 UTC (permalink / raw)
To: Sebastian Andrzej Siewior, linux-ppp, netdev, linux-rt-devel
Cc: David S. Miller, Andrew Lunn, Clark Williams, Eric Dumazet,
Jakub Kicinski, Steven Rostedt, Thomas Gleixner, Gao Feng,
Guillaume Nault
On 6/27/25 12:50 PM, Sebastian Andrzej Siewior wrote:
> The per-CPU variable ppp::xmit_recursion is protecting against recursion
> due to wrong configuration of the ppp channels. The per-CPU variable
> relies on disabled BH for its locking. Without per-CPU locking in
> local_bh_disable() on PREEMPT_RT this data structure requires explicit
> locking.
>
> The ppp::xmit_recursion is used as a per-CPU boolean. The counter is
> checked early in the send routing and the transmit path is only entered
> if the counter is zero. Then the counter is incremented to avoid
> recursion. It used to detect recursion on channel::downl and
> ppp::wlock.
>
> Replace the per-CPU ppp:xmit_recursion counter with an explicit owner
> field for both structs.
> pch_downl_lock() is helper to check for recursion on channel::downl and
> either assign the owner field if there is no recursion.
> __ppp_channel_push() is moved into ppp_channel_push() and gets the
> recursion check unconditionally because it is based on the lock now.
> The recursion check in ppp_xmit_process() is based on ppp::wlock which
> is acquired by ppp_xmit_lock(). The locking is moved from
> __ppp_xmit_process() into ppp_xmit_lock() to check the owner, lock and
> then assign the owner in one spot.
> The local_bh_disable() in ppp_xmit_lock() can be removed because
> ppp_xmit_lock() disables BH as part of the locking.
>
> Cc: Gao Feng <gfree.wind@vip.163.com>
> Cc: Guillaume Nault <g.nault@alphalink.fr>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Is there any special reason to not use local_lock here? I find this
patch quite hard to read and follow, as opposed to the local_lock usage
pattern. Also the fact that the code change does not affect RT enabled
build only is IMHO a negative thing.
/P
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field
2025-07-03 7:55 ` Paolo Abeni
@ 2025-07-04 15:48 ` Sebastian Andrzej Siewior
2025-07-08 8:28 ` Paolo Abeni
0 siblings, 1 reply; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-07-04 15:48 UTC (permalink / raw)
To: Paolo Abeni
Cc: linux-ppp, netdev, linux-rt-devel, David S. Miller, Andrew Lunn,
Clark Williams, Eric Dumazet, Jakub Kicinski, Steven Rostedt,
Thomas Gleixner, Gao Feng, Guillaume Nault
On 2025-07-03 09:55:21 [+0200], Paolo Abeni wrote:
> Is there any special reason to not use local_lock here? I find this
> patch quite hard to read and follow, as opposed to the local_lock usage
> pattern. Also the fact that the code change does not affect RT enabled
> build only is IMHO a negative thing.
Adding a local_lock_t to "protect" the counter isn't that simple. I
still have to check for the owner of the lock before the lock is
acquired to avoid recursion on that local_lock_t. I need to acquire the
lock before checking the counter because another task might have
incremented the counter (so acquiring the lock would not deadlock). This
is similar to the recursion detection in openvswitch. That means I would
need to add the local_lock_t and an owner field next to the recursion
counter.
I've been looking at the counter and how it is used and it did not look
right. The recursion, it should detect, was described in commit
55454a565836e ("ppp: avoid dealock on recursive xmit"). There are two
locks that can be acquired due to recursion and that one counter is
supposed to catch both cases based on current code flow.
It is also not obvious why ppp_channel_push() makes the difference
depending on pch->ppp while ->start_xmit callback is invoked based on
pch->chan.
It looked more natural to avoid the per-CPU usage and detect the
recursion based on the lock that might be acquired recursively. I hope
this makes it easier to understand what is going on here.
While looking through the code I wasn't sure if
ppp_channel_bridge_input() requires the same kind of check for recursion
but adding it based on the lock, that is about to be acquired, would be
easier.
> /P
Sebastian
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field
2025-07-04 15:48 ` Sebastian Andrzej Siewior
@ 2025-07-08 8:28 ` Paolo Abeni
2025-07-08 9:49 ` Sebastian Andrzej Siewior
0 siblings, 1 reply; 5+ messages in thread
From: Paolo Abeni @ 2025-07-08 8:28 UTC (permalink / raw)
To: Sebastian Andrzej Siewior
Cc: linux-ppp, netdev, linux-rt-devel, David S. Miller, Andrew Lunn,
Clark Williams, Eric Dumazet, Jakub Kicinski, Steven Rostedt,
Thomas Gleixner, Gao Feng, Guillaume Nault
Hi,
I'm sorry for the latency, OoO here in between.
On 7/4/25 5:48 PM, Sebastian Andrzej Siewior wrote:
> On 2025-07-03 09:55:21 [+0200], Paolo Abeni wrote:
>> Is there any special reason to not use local_lock here? I find this
>> patch quite hard to read and follow, as opposed to the local_lock usage
>> pattern. Also the fact that the code change does not affect RT enabled
>> build only is IMHO a negative thing.
>
> Adding a local_lock_t to "protect" the counter isn't that simple. I
> still have to check for the owner of the lock before the lock is
> acquired to avoid recursion on that local_lock_t. I need to acquire the
> lock before checking the counter because another task might have
> incremented the counter (so acquiring the lock would not deadlock). This
> is similar to the recursion detection in openvswitch. That means I would
> need to add the local_lock_t and an owner field next to the recursion
> counter.
IMHO using a similar approach to something already implemented is a
plus, and the OVS code did not look that scaring. Also it had the IMHO
significant advantage of keeping the changes constrained to the RT build.
> I've been looking at the counter and how it is used and it did not look
> right. The recursion, it should detect, was described in commit
> 55454a565836e ("ppp: avoid dealock on recursive xmit"). There are two
> locks that can be acquired due to recursion and that one counter is
> supposed to catch both cases based on current code flow.
>
> It is also not obvious why ppp_channel_push() makes the difference
> depending on pch->ppp while ->start_xmit callback is invoked based on
> pch->chan.
> It looked more natural to avoid the per-CPU usage and detect the
> recursion based on the lock that might be acquired recursively. I hope
> this makes it easier to understand what is going on here.
Actually I'm a bit lost. According to 55454a565836e a single recursion
check in ppp_xmit_process() should be enough, and I think that keeping
the complexity constraint there be better.
> While looking through the code I wasn't sure if
> ppp_channel_bridge_input() requires the same kind of check for recursion
> but adding it based on the lock, that is about to be acquired, would be
> easier.
(still lost in PPP, but) The xmit -> input path transition should have
already break the recursion (via the backlog). Recursion check in tx
should be sufficient.
All in all I think it would be safer the local lock based approach.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field
2025-07-08 8:28 ` Paolo Abeni
@ 2025-07-08 9:49 ` Sebastian Andrzej Siewior
0 siblings, 0 replies; 5+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-07-08 9:49 UTC (permalink / raw)
To: Paolo Abeni
Cc: linux-ppp, netdev, linux-rt-devel, David S. Miller, Andrew Lunn,
Clark Williams, Eric Dumazet, Jakub Kicinski, Steven Rostedt,
Thomas Gleixner, Gao Feng, Guillaume Nault
On 2025-07-08 10:28:24 [+0200], Paolo Abeni wrote:
> Hi,
Hi,
> I'm sorry for the latency, OoO here in between.
All good. I appreciate someone looking at it.
> On 7/4/25 5:48 PM, Sebastian Andrzej Siewior wrote:
> > On 2025-07-03 09:55:21 [+0200], Paolo Abeni wrote:
> >> Is there any special reason to not use local_lock here? I find this
> >> patch quite hard to read and follow, as opposed to the local_lock usage
> >> pattern. Also the fact that the code change does not affect RT enabled
> >> build only is IMHO a negative thing.
> >
> > Adding a local_lock_t to "protect" the counter isn't that simple. I
> > still have to check for the owner of the lock before the lock is
> > acquired to avoid recursion on that local_lock_t. I need to acquire the
> > lock before checking the counter because another task might have
> > incremented the counter (so acquiring the lock would not deadlock). This
> > is similar to the recursion detection in openvswitch. That means I would
> > need to add the local_lock_t and an owner field next to the recursion
> > counter.
>
> IMHO using a similar approach to something already implemented is a
> plus, and the OVS code did not look that scaring. Also it had the IMHO
> significant advantage of keeping the changes constrained to the RT build.
I intended to improve the code and making it more understandable of what
happens here and why. Additionally it would also fit with RT and not
just make this change to fit with RT.
> > I've been looking at the counter and how it is used and it did not look
> > right. The recursion, it should detect, was described in commit
> > 55454a565836e ("ppp: avoid dealock on recursive xmit"). There are two
> > locks that can be acquired due to recursion and that one counter is
> > supposed to catch both cases based on current code flow.
> >
> > It is also not obvious why ppp_channel_push() makes the difference
> > depending on pch->ppp while ->start_xmit callback is invoked based on
> > pch->chan.
> > It looked more natural to avoid the per-CPU usage and detect the
> > recursion based on the lock that might be acquired recursively. I hope
> > this makes it easier to understand what is going on here.
>
> Actually I'm a bit lost. According to 55454a565836e a single recursion
> check in ppp_xmit_process() should be enough, and I think that keeping
> the complexity constraint there be better.
Okay. I didn't think that this complicated the code flow.
> > While looking through the code I wasn't sure if
> > ppp_channel_bridge_input() requires the same kind of check for recursion
> > but adding it based on the lock, that is about to be acquired, would be
> > easier.
>
> (still lost in PPP, but) The xmit -> input path transition should have
> already break the recursion (via the backlog). Recursion check in tx
> should be sufficient.
>
> All in all I think it would be safer the local lock based approach.
Okay. I disagree but let me do as you suggested.
Thank you.
> Thanks,
>
> Paolo
Sebastian
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-07-08 9:50 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-27 10:50 [PATCH net-next] ppp: Replace per-CPU recursion counter with lock-owner field Sebastian Andrzej Siewior
2025-07-03 7:55 ` Paolo Abeni
2025-07-04 15:48 ` Sebastian Andrzej Siewior
2025-07-08 8:28 ` Paolo Abeni
2025-07-08 9:49 ` Sebastian Andrzej Siewior
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).