* Re: [PATCH net-next] Add Mellanox BlueField Gigabit Ethernet driver
From: Andrew Lunn @ 2020-07-31 17:42 UTC (permalink / raw)
To: David Thompson; +Cc: netdev, davem, kuba, jiri, Asmaa Mnebhi
In-Reply-To: <1596047355-28777-1-git-send-email-dthompson@mellanox.com>
Hi David
>> +static int mlxbf_gige_mdio_poll_bit(struct mlxbf_gige *priv, u32 bit_mask)
> +{
> + unsigned long timeout;
> + u32 val;
> +
> + timeout = jiffies + msecs_to_jiffies(MLXBF_GIGE_MDIO_POLL_BUSY_TIMEOUT);
> + do {
> + val = readl(priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
> + if (!(val & bit_mask))
> + return 0;
> + udelay(MLXBF_GIGE_MDIO_POLL_DELAY_USEC);
> + } while (time_before(jiffies, timeout));
Please use one of the include/linux/iopoll.h macros.
> +
> + return -ETIME;
ETIMEDOUT, not ETIME. But that will automatically be fixed when you
use iopoll.h. Core code has less bugs, which is why you should use it.
> +}
> +
> +static int mlxbf_gige_mdio_read(struct mii_bus *bus, int phy_add, int phy_reg)
> +{
> + struct mlxbf_gige *priv = bus->priv;
> + u32 cmd;
> + u32 ret;
> +
> + /* If the lock is held by something else, drop the request.
> + * If the lock is cleared, that means the busy bit was cleared.
> + */
How can this happen? The mdio core has a mutex which prevents parallel
access?
> + ret = mlxbf_gige_mdio_poll_bit(priv, MLXBF_GIGE_MDIO_GW_LOCK_MASK);
> + if (ret)
> + return -EBUSY;
PHY drivers are not going to like that. They are not going to
retry. What is likely to happen is that phylib moves into the ERROR
state, and the PHY driver grinds to a halt.
> +static void mlxbf_gige_mdio_disable_phy_int(struct mlxbf_gige *priv)
> +{
> + unsigned long flags;
> + u32 val;
> +
> + spin_lock_irqsave(&priv->gpio_lock, flags);
> + val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> + val &= ~priv->phy_int_gpio_mask;
> + writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> + spin_unlock_irqrestore(&priv->gpio_lock, flags);
> +}
> +
> +static void mlxbf_gige_mdio_enable_phy_int(struct mlxbf_gige *priv)
> +{
> + unsigned long flags;
> + u32 val;
> +
> + spin_lock_irqsave(&priv->gpio_lock, flags);
> + /* The INT_N interrupt level is active low.
> + * So enable cause fall bit to detect when GPIO
> + * state goes low.
> + */
> + val = readl(priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
> + val |= priv->phy_int_gpio_mask;
> + writel(val, priv->gpio_io + MLXBF_GIGE_GPIO_CAUSE_FALL_EN);
> +
> + /* Enable PHY interrupt by setting the priority level */
> + val = readl(priv->gpio_io +
> + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> + val |= priv->phy_int_gpio_mask;
> + writel(val, priv->gpio_io +
> + MLXBF_GIGE_GPIO_CAUSE_OR_EVTEN0);
> + spin_unlock_irqrestore(&priv->gpio_lock, flags);
> +}
> +
> +/* Interrupt handler is called from mlxbf_gige_main.c
> + * driver whenever a phy interrupt is received.
> + */
> +irqreturn_t mlxbf_gige_mdio_handle_phy_interrupt(struct mlxbf_gige *priv)
> +{
> + u32 val;
> +
> + /* The YU interrupt is shared between SMBus and GPIOs.
> + * So first, determine whether this is a GPIO interrupt.
> + */
> + val = readl(priv->cause_rsh_coalesce0_io);
> + if (!MLXBF_GIGE_GPIO_CAUSE_IRQ_IS_SET(val)) {
> + /* Nothing to do here, not a GPIO interrupt */
> + return IRQ_NONE;
> + }
> + /* Then determine which gpio register this interrupt is for.
> + * Return if the interrupt is not for gpio block 0.
> + */
> + val = readl(priv->cause_gpio_arm_coalesce0_io);
> + if (!(val & MLXBF_GIGE_GPIO_BLOCK0_MASK))
> + return IRQ_NONE;
> +
> + /* Finally check if this interrupt is from PHY device.
> + * Return if it is not.
> + */
> + val = readl(priv->gpio_io +
> + MLXBF_GIGE_GPIO_CAUSE_OR_CAUSE_EVTEN0);
> + if (!(val & priv->phy_int_gpio_mask))
> + return IRQ_NONE;
> +
> + /* Clear interrupt when done, otherwise, no further interrupt
> + * will be triggered.
> + * Writing 0x1 to the clear cause register also clears the
> + * following registers:
> + * cause_gpio_arm_coalesce0
> + * cause_rsh_coalesce0
> + */
> + val = readl(priv->gpio_io +
> + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
> + val |= priv->phy_int_gpio_mask;
> + writel(val, priv->gpio_io +
> + MLXBF_GIGE_GPIO_CAUSE_OR_CLRCAUSE);
Shoudn't there be a call into the PHY driver at this point?
> +
> + return IRQ_HANDLED;
> +}
So these last three functions seem to be an interrupt controller? So
why not model it as a Linux interrupt controller?
> +static void mlxbf_gige_mdio_init_config(struct mlxbf_gige *priv)
> +{
> + struct device *dev = priv->dev;
> + u32 mdio_full_drive;
> + u32 mdio_out_sample;
> + u32 mdio_in_sample;
> + u32 mdio_voltage;
> + u32 mdc_period;
> + u32 mdio_mode;
> + u32 mdio_cfg;
> + int ret;
> +
> + ret = device_property_read_u32(dev, "mdio-mode", &mdio_mode);
> + if (ret < 0)
> + mdio_mode = MLXBF_GIGE_MDIO_MODE_MASTER;
> +
> + ret = device_property_read_u32(dev, "mdio-voltage", &mdio_voltage);
> + if (ret < 0)
> + mdio_voltage = MLXBF_GIGE_MDIO3_3;
> +
> + ret = device_property_read_u32(dev, "mdio-full-drive", &mdio_full_drive);
> + if (ret < 0)
> + mdio_full_drive = MLXBF_GIGE_MDIO_FULL_DRIVE;
> +
> + ret = device_property_read_u32(dev, "mdc-period", &mdc_period);
> + if (ret < 0)
> + mdc_period = MLXBF_GIGE_MDIO_PERIOD;
> +
> + ret = device_property_read_u32(dev, "mdio-in-sample", &mdio_in_sample);
> + if (ret < 0)
> + mdio_in_sample = MLXBF_GIGE_MDIO_IN_SAMP;
> +
> + ret = device_property_read_u32(dev, "mdio-out-sample", &mdio_out_sample);
> + if (ret < 0)
> + mdio_out_sample = MLXBF_GIGE_MDIO_OUT_SAMP;
Please see the discussion going on in the thread:
https://lore.kernel.org/linux-acpi/20200715090400.4733-1-calvin.johnson@oss.nxp.com/T/#t
and in particular
https://lore.kernel.org/linux-acpi/20200715090400.4733-1-calvin.johnson@oss.nxp.com/T/#t
My reading of this is you need to provide a specification of these
properties, and show they really are being used. Please join in on
that thread. Until we make progress on how ACPI should be used, you
might want to drop all these properties and just hard code it as a
standard 2.5Mhz MDIO bus.
> +int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv)
> +{
> +
> + ret = device_property_read_u32(dev, "phy-addr", &phy_addr);
> + if (ret < 0)
> + phy_addr = MLXBF_GIGE_MDIO_DEFAULT_PHY_ADDR;
This is going to be problematic. See above.
> +
> + priv->mdiobus->irq[phy_addr] = PHY_POLL;
That is the default anyway. You can skip this. But why do you have
interrupt handling code, and then poll it? Maybe just delete all the
interrupt code?
> +
> + /* Auto probe PHY at the corresponding address */
> + priv->mdiobus->phy_mask = ~(1 << phy_addr);
> + ret = mdiobus_register(priv->mdiobus);
> + if (ret)
> + dev_err(dev, "Failed to register MDIO bus\n");
Does it break if you scan the whole bus? It would allow you to avoid
some of the ACPI issues.
> +
> + return ret;
> +}
> +
Andrew
^ permalink raw reply
* Re: [PATCH v2] ravb: Fixed the problem that rmmod can not be done
From: Sergei Shtylyov @ 2020-07-31 17:45 UTC (permalink / raw)
To: Yoshihiro Shimoda, Yuusuke Ashizuka
Cc: netdev@vger.kernel.org, linux-renesas-soc@vger.kernel.org
In-Reply-To: <TY2PR01MB3692A94CD6479F2976458B0FD84E0@TY2PR01MB3692.jpnprd01.prod.outlook.com>
Hello!
On 7/31/20 9:43 AM, Yoshihiro Shimoda wrote:
>>>> From: Yuusuke Ashizuka, Sent: Thursday, July 30, 2020 7:02 PM
>>>> Subject: [PATCH v2] ravb: Fixed the problem that rmmod can not be done
>>>
>>> Thank you for the patch! I found a similar patch for another driver [1].
>>
>> It's not the same case -- that driver hadn't had the MDIO release code at all
>> before that patch.
>
> You're correct. I didn't realized it...
The patch description was somewhat incomplete there...
>>> So, we should apply this patch to the ravb driver.
>>
>> I believe the driver is innocent. :-)
>
> I hope so :)
Looks like I was wrong in this case. It's very fortunate that the MDIO bitbang
is not as popular as I thought.
> <snip>
>>>> $ lsmod
>>>> Module Size Used by
>>>> ravb 40960 1
>>>> $ rmmod ravb
>>>> rmmod: ERROR: Module ravb is in use
>>
>> Shouldn't the driver core call the remove() method for the affected devices
>> first, before checking the refcount?
>
> In this case, an mii bus of "mdiobb_ops bb_ops" is affected "device" by the ravb driver.
> And the ravb driver sets the owner of mii bus as THIS_MODULE like below:
>
> static struct mdiobb_ops bb_ops = {
> .owner = THIS_MODULE,
> .set_mdc = ravb_set_mdc,
> .set_mdio_dir = ravb_set_mdio_dir,
> .set_mdio_data = ravb_set_mdio_data,
> .get_mdio_data = ravb_get_mdio_data,
> };
>
> So, I don't think the driver core can call the remove() method for the mii bus
> because it's a part of the ravb driver...
And because the MDIO module just doesn't have the usual method! :-)
(I meant the EtherAVB driver's remove() method, and that one would be called after
a successful reference count check...)
> By the way, about the mdio-gpio driver, I'm wondering if the mdio-gpio
> driver cannot be removed by rmmod too. (perhaps, we need "rmmod -f" to remove it.)
You're on your own here. It's fortunate for this patch that I'm not currently loaded
at work! :-)
>>> By the way, I think you have to send this patch to the following maintainers too:
>>> # We can get it by using scripts/get_maintainers.pl.
>>> David S. Miller <davem@davemloft.net> (maintainer:NETWORKING DRIVERS,commit_signer:8/8=100%)
>>> Jakub Kicinski <kuba@kernel.org> (maintainer:NETWORKING DRIVERS)
Not critical, as DaveM uses the patchwork anyway. He started to be CC'ed on netdev patches
only recently. :-)
[...]
> Best regards,
> Yoshihiro Shimoda
MBR, Sergei
^ permalink raw reply
* Re: [RFC PATCH bpf-next 3/3] samples/bpf: Add a simple bridge example accelerated with XDP
From: Andrii Nakryiko @ 2020-07-31 17:48 UTC (permalink / raw)
To: Yoshiki Komachi
Cc: David S. Miller, Alexei Starovoitov, Daniel Borkmann,
Jesper Dangaard Brouer, John Fastabend, Jakub Kicinski,
Martin KaFai Lau, Song Liu, Yonghong Song, Andrii Nakryiko,
KP Singh, Roopa Prabhu, Nikolay Aleksandrov, David Ahern,
Networking, bridge, bpf
In-Reply-To: <1596170660-5582-4-git-send-email-komachi.yoshiki@gmail.com>
On Thu, Jul 30, 2020 at 9:45 PM Yoshiki Komachi
<komachi.yoshiki@gmail.com> wrote:
>
> This patch adds a simple example of XDP-based bridge with the new
> bpf_fdb_lookup helper. This program simply forwards packets based
> on the destination port given by FDB in the kernel. Note that both
> vlan filtering and learning features are currently unsupported in
> this example.
>
> There is another plan to recreate a userspace application
> (xdp_bridge_user.c) as a daemon process, which helps to automate
> not only detection of status changes in bridge port but also
> handling vlan protocol updates.
>
> Note: David Ahern suggested a new bpf helper [1] to get master
> vlan/bonding devices in XDP programs attached to their slaves
> when the master vlan/bonding devices are bridge ports. If this
> idea is accepted and the helper is introduced in the future, we
> can handle interfaces slaved to vlan/bonding devices in this
> sample by calling the suggested bpf helper (I guess it can get
> vlan/bonding ifindex from their slave ifindex). Notice that we
> don't need to change bpf_fdb_lookup() API to use such a feature,
> but we just need to modify bpf programs like this sample.
>
> [1]: http://vger.kernel.org/lpc-networking2018.html#session-1
>
> Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
> ---
Have you tried using a BPF skeleton for this? It could have saved a
bunch of mechanical code for your example. Also libbpf supports map
pinning out of the box now, I wonder if it would just work in your
case. Also it would be nice if you tried using BPF link-based approach
for this example, to show how it can be used. Thanks!
> samples/bpf/Makefile | 3 +
> samples/bpf/xdp_bridge_kern.c | 129 ++++++++++++++++++
> samples/bpf/xdp_bridge_user.c | 239 ++++++++++++++++++++++++++++++++++
> 3 files changed, 371 insertions(+)
> create mode 100644 samples/bpf/xdp_bridge_kern.c
> create mode 100644 samples/bpf/xdp_bridge_user.c
>
[...]
^ permalink raw reply
* Re: [PATCH 1/1] netfilter: nat: add range checks for access to nf_nat_l[34]protos[]
From: Pablo Neira Ayuso @ 2020-07-31 17:51 UTC (permalink / raw)
To: William Mcvicker
Cc: security, Jozsef Kadlecsik, Florian Westphal, David S. Miller,
Alexey Kuznetsov, Hideaki YOSHIFUJI, netfilter-devel, coreteam,
netdev, linux-kernel, kernel-team
In-Reply-To: <20200731002611.GA1035680@google.com>
Hi William,
On Fri, Jul 31, 2020 at 12:26:11AM +0000, William Mcvicker wrote:
> Hi Pablo,
>
> Yes, I believe this oops is only triggered by userspace when the user
> specifically passes in an invalid nf_nat_l3protos index. I'm happy to re-work
> the patch to check for this in ctnetlink_create_conntrack().
Great.
Note that this code does not exist in the tree anymore. I'm not sure
if this problem still exists upstream, this patch does not apply to
nf.git. This fix should only go for -stable maintainers.
> > BTW, do you have a Fixes: tag for this? This will be useful for
> > -stable maintainer to pick up this fix.
>
> Regarding the Fixes: tag, I don't have one offhand since this bug was reported
> to me, but I can search through the code history to find the commit that
> exposed this vulnerability.
That would be great.
Thank you.
^ permalink raw reply
* Re: [PATCH net] net: bridge: clear bridge's private skb space on xmit
From: Nikolay Aleksandrov @ 2020-07-31 17:51 UTC (permalink / raw)
To: David Ahern, netdev; +Cc: bridge, roopa, davem
In-Reply-To: <181931fb-dc60-7db6-60ac-b8ff1402efec@cumulusnetworks.com>
On 31/07/2020 20:37, Nikolay Aleksandrov wrote:
> On 31/07/2020 20:27, David Ahern wrote:
>> On 7/31/20 10:26 AM, Nikolay Aleksandrov wrote:
>>> We need to clear all of the bridge private skb variables as they can be
>>> stale due to the packet being recirculated through the stack and then
>>> transmitted through the bridge device. Similar memset is already done on
>>> bridge's input. We've seen cases where proxyarp_replied was 1 on routed
>>> multicast packets transmitted through the bridge to ports with neigh
>>> suppress which were getting dropped. Same thing can in theory happen with
>>> the port isolation bit as well.
>>>
>>> Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood")
>>> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>>> ---
>>> net/bridge/br_device.c | 2 ++
>>> 1 file changed, 2 insertions(+)
>>>
>>> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
>>> index 8c7b78f8bc23..9a2fb4aa1a10 100644
>>> --- a/net/bridge/br_device.c
>>> +++ b/net/bridge/br_device.c
>>> @@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>>> const unsigned char *dest;
>>> u16 vid = 0;
>>>
>>> + memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
>>> +
>>> rcu_read_lock();
>>> nf_ops = rcu_dereference(nf_br_ops);
>>> if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
>>>
>>
>> What's the performance hit of doing this on every packet?
>>
>> Can you just set a flag that tells the code to reset on recirculation?
>> Seems like br_input_skb_cb has space for that.
>>
>
> Virtually non-existent, we had a patch that turned that field into a 16 byte
> field so that is really 2 8 byte stores. It is already cache hot, we could
> initialize each individual field separately as br_input does.
>
> I don't want to waste flags on such thing, this makes it future-proof
> and I'll remove the individual field zeroing later which will alleviate
> the cost further.
>
Also note that we already do this on input for each packet since the
struct was reduced to 16 bytes. It's the safest way since every different
sub-part of the bridge uses some set of these private variables and
we've had many similar bugs where they were used stale or unintentionally
were not initialized for some path.
^ permalink raw reply
* Re: [PATCH v3 bpf-next 6/9] bpf: tcp: Allow bpf prog to write and parse TCP header option
From: Martin KaFai Lau @ 2020-07-31 17:59 UTC (permalink / raw)
To: Eric Dumazet
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, kernel-team,
Lawrence Brakmo, Neal Cardwell, netdev, Yuchung Cheng
In-Reply-To: <CANn89i+5RKTcBFqueEs48HUadC+dO54eR7Yp5pBJ6zgbosTDCQ@mail.gmail.com>
On Fri, Jul 31, 2020 at 09:06:57AM -0700, Eric Dumazet wrote:
> On Thu, Jul 30, 2020 at 1:57 PM Martin KaFai Lau <kafai@fb.com> wrote:
> >
> > The earlier effort in BPF-TCP-CC allows the TCP Congestion Control
> > algorithm to be written in BPF. It opens up opportunities to allow
> > a faster turnaround time in testing/releasing new congestion control
> > ideas to production environment.
> >
> > The same flexibility can be extended to writing TCP header option.
> > It is not uncommon that people want to test new TCP header option
> > to improve the TCP performance. Another use case is for data-center
> > that has a more controlled environment and has more flexibility in
> > putting header options for internal only use.
> >
> > For example, we want to test the idea in putting maximum delay
> > ACK in TCP header option which is similar to a draft RFC proposal [1].
> >
> > This patch introduces the necessary BPF API and use them in the
> > TCP stack to allow BPF_PROG_TYPE_SOCK_OPS program to parse
> > and write TCP header options. It currently supports most of
> > the TCP packet except RST.
> >
> > Supported TCP header option:
> > ───────────────────────────
> > This patch allows the bpf-prog to write any option kind.
> > Different bpf-progs can write its own option by calling the new helper
> > bpf_store_hdr_opt(). The helper will ensure there is no duplicated
> > option in the header.
> >
> > By allowing bpf-prog to write any option kind, this gives a lot of
> > flexibility to the bpf-prog. Different bpf-prog can write its
> > own option kind. It could also allow the bpf-prog to support a
> > recently standardized option on an older kernel.
> >
> > Sockops Callback Flags:
> > ──────────────────────
> > The header parsing and writing callback can be turned on
> > by enabling a few newly added callback flags:
> >
> > BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG:
> > Call bpf when kernel has received a header option that
> > the kernel cannot handle. It is useful when the peer doesn't
> > send bpf-options very often.
> >
> > The bpf-prog can inspect the received header by sock_ops->skb_data
> > which covers the whole header (including the fixed fields like
> > ports, flags...etc) or
> > use the new bpf_load_hdr_opt() to search for a particular TCP
> > header option.
> >
> >
> >
> >
>
> > [1]: draft-wang-tcpm-low-latency-opt-00
> > https://urldefense.proofpoint.com/v2/url?u=https-3A__tools.ietf.org_html_draft-2Dwang-2Dtcpm-2Dlow-2Dlatency-2Dopt-2D00&d=DwIFaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=Z-syoz304fodO8xPKCcJh0QYhXbb7_XVuRgTINFba2U&s=Ad66Zb5r0utWgnrB-QuDXBft6G1HXW2C_aBV9fTMxoo&e=
> >
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> > include/linux/bpf-cgroup.h | 25 +++
> > include/linux/filter.h | 4 +
> > include/net/tcp.h | 53 ++++-
> > include/uapi/linux/bpf.h | 231 ++++++++++++++++++++-
> > net/core/filter.c | 365 +++++++++++++++++++++++++++++++++
> > net/ipv4/tcp_fastopen.c | 2 +-
> > net/ipv4/tcp_input.c | 86 +++++++-
> > net/ipv4/tcp_ipv4.c | 3 +-
> > net/ipv4/tcp_minisocks.c | 1 +
> > net/ipv4/tcp_output.c | 194 ++++++++++++++++--
> > net/ipv6/tcp_ipv6.c | 3 +-
> > tools/include/uapi/linux/bpf.h | 231 ++++++++++++++++++++-
> > 12 files changed, 1171 insertions(+), 27 deletions(-)
>
> This is a truly gigantic patch.
>
> Could you split it in maybe two parts ?
Yes.
Most of the code changes in TCP are calling out the bpf prog to parse and
write header. Thus, they are all in this one patch.
I will put those callout changes (and a few func arg changes) in TCP
to a separate patch but leave the bpf callout function empty.
Then the next bpf specific patch will fill out those empty bpf
callout functions.
>
> This way I could focus on the TCP changes, and let eBPF experts focus
> on BPF changes.
Thanks for the review!
^ permalink raw reply
* Re: [PATCH net-next] cxgb4: Add support to flash firmware config image
From: Jakub Kicinski @ 2020-07-31 18:00 UTC (permalink / raw)
To: Ganji Aravind; +Cc: netdev, davem, vishal, rahul.lakkireddy
In-Reply-To: <20200731110904.GA1571@chelsio.com>
On Fri, 31 Jul 2020 16:39:04 +0530 Ganji Aravind wrote:
> On Thursday, July 07/30/20, 2020 at 16:23:35 -0700, Jakub Kicinski wrote:
> > On Thu, 30 Jul 2020 20:41:38 +0530 Ganji Aravind wrote:
> > > Update set_flash to flash firmware configuration image
> > > to flash region.
> >
> > And the reason why you need to flash some .ini files separately is?
>
> Hi Jakub,
>
> The firmware config file contains information on how the firmware
> should distribute the hardware resources among NIC and
> Upper Layer Drivers(ULD), like iWARP, crypto, filtering, etc.
>
> The firmware image comes with an in-built default config file that
> distributes resources among the NIC and all the ULDs. However, in
> some cases, where we don't want to run a particular ULD, or if we
> want to redistribute the resources, then we'd modify the firmware
> config file and then firmware will redistribute those resources
> according to the new configuration. So, if firmware finds this
> custom config file in flash, it reads this first. Otherwise, it'll
> continue initializing the adapter with its own in-built default
> config file.
Sounds like something devlink could be extended to do.
Firmware update interface is not for configuration.
^ permalink raw reply
* Re: pull-request: bpf 2020-07-31
From: Jiri Olsa @ 2020-07-31 18:08 UTC (permalink / raw)
To: Daniel Borkmann; +Cc: davem, kuba, ast, jolsa, netdev, bpf
In-Reply-To: <03545f38-c01a-faeb-adab-a0a471ff9fc3@iogearbox.net>
On Fri, Jul 31, 2020 at 06:12:48PM +0200, Daniel Borkmann wrote:
SNIP
> > > return -EINVAL;
> > > return id;
> > > }
> > >
> > > Let me know if you run into any others issues (CC'ing Jiri Olsa so he's in
> > > the loop with regards to merge conflict resolution).
> >
> > we'll loose the bpf_log message, but I'm fine with that ;-) looks good
>
> Checking again on the fix, even though it was only triggered by syzkaller
> so far, I think it's also possible if users don't have BTF debug data set
> in the Kconfig but use a helper that expects it, so agree, lets re-add the
> log in this case:
>
> int btf_resolve_helper_id(struct bpf_verifier_log *log,
> const struct bpf_func_proto *fn, int arg)
> {
> int id;
>
> if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
> return -EINVAL;
> if (!btf_vmlinux) {
> bpf_log(log, "btf_vmlinux doesn't exist\n");
> return -EINVAL;
> }
> id = fn->btf_id[arg];
> if (!id || id > btf_vmlinux->nr_types)
> return -EINVAL;
> return id;
> }
ok, looks good
jirka
^ permalink raw reply
* Re: [PATCH net] net: bridge: clear bridge's private skb space on xmit
From: Nikolay Aleksandrov @ 2020-07-31 18:10 UTC (permalink / raw)
To: David Ahern, netdev; +Cc: bridge, roopa, davem
In-Reply-To: <2bdc90a2-834f-941d-fea7-04e3c8924f7b@cumulusnetworks.com>
On 31/07/2020 20:51, Nikolay Aleksandrov wrote:
> On 31/07/2020 20:37, Nikolay Aleksandrov wrote:
>> On 31/07/2020 20:27, David Ahern wrote:
>>> On 7/31/20 10:26 AM, Nikolay Aleksandrov wrote:
>>>> We need to clear all of the bridge private skb variables as they can be
>>>> stale due to the packet being recirculated through the stack and then
>>>> transmitted through the bridge device. Similar memset is already done on
>>>> bridge's input. We've seen cases where proxyarp_replied was 1 on routed
>>>> multicast packets transmitted through the bridge to ports with neigh
>>>> suppress which were getting dropped. Same thing can in theory happen with
>>>> the port isolation bit as well.
>>>>
>>>> Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood")
>>>> Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
>>>> ---
>>>> net/bridge/br_device.c | 2 ++
>>>> 1 file changed, 2 insertions(+)
>>>>
>>>> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
>>>> index 8c7b78f8bc23..9a2fb4aa1a10 100644
>>>> --- a/net/bridge/br_device.c
>>>> +++ b/net/bridge/br_device.c
>>>> @@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
>>>> const unsigned char *dest;
>>>> u16 vid = 0;
>>>>
>>>> + memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
>>>> +
>>>> rcu_read_lock();
>>>> nf_ops = rcu_dereference(nf_br_ops);
>>>> if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
>>>>
>>>
>>> What's the performance hit of doing this on every packet?
>>>
>>> Can you just set a flag that tells the code to reset on recirculation?
>>> Seems like br_input_skb_cb has space for that.
>>>
>>
>> Virtually non-existent, we had a patch that turned that field into a 16 byte
>> field so that is really 2 8 byte stores. It is already cache hot, we could
>> initialize each individual field separately as br_input does.
>>
>> I don't want to waste flags on such thing, this makes it future-proof
>> and I'll remove the individual field zeroing later which will alleviate
>> the cost further.
>>
>
> Also note that we already do this on input for each packet since the
> struct was reduced to 16 bytes. It's the safest way since every different
> sub-part of the bridge uses some set of these private variables and
> we've had many similar bugs where they were used stale or unintentionally
> were not initialized for some path.
>
In addition this doesn't need to be a recirculation, in theory it could happen
by a routed packet to svi on the bridge which got its skb->cb initialized before
hitting the bridge's xmit function. So a flag can't catch all possible cases.
^ permalink raw reply
* [PATCH net] net: gre: recompute gre csum for sctp over gre tunnels
From: Lorenzo Bianconi @ 2020-07-31 18:12 UTC (permalink / raw)
To: netdev; +Cc: davem, lorenzo.bianconi, tom
The GRE tunnel can be used to transport traffic that does not rely on a
Internet checksum (e.g. SCTP). The issue can be triggered creating a GRE
or GRETAP tunnel and transmitting SCTP traffic ontop of it where CRC
offload has been disabled. In order to fix the issue we need to
recompute the GRE csum in gre_gso_segment() not relying on the inner
checksum.
The issue is still present when we have the CRC offload enabled.
In this case we need to disable the CRC offload if we require GRE
checksum since otherwise skb_checksum() will report a wrong value.
Fixes: 4749c09c37030 ("gre: Call gso_make_checksum")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
net/ipv4/gre_offload.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 2e6d1b7a7bc9..e0a246575887 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+ bool need_csum, need_recompute_csum, gso_partial;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
__be16 protocol = skb->protocol;
u16 mac_len = skb->mac_len;
int gre_offset, outer_hlen;
- bool need_csum, gso_partial;
if (!skb->encapsulation)
goto out;
@@ -41,6 +41,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
skb->protocol = skb->inner_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
+ need_recompute_csum = skb->csum_not_inet;
skb->encap_hdr_csum = need_csum;
features &= skb->dev->hw_enc_features;
@@ -98,7 +99,15 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
}
*(pcsum + 1) = 0;
- *pcsum = gso_make_checksum(skb, 0);
+ if (need_recompute_csum && !skb_is_gso(skb)) {
+ __wsum csum;
+
+ csum = skb_checksum(skb, gre_offset,
+ skb->len - gre_offset, 0);
+ *pcsum = csum_fold(csum);
+ } else {
+ *pcsum = gso_make_checksum(skb, 0);
+ }
} while ((skb = skb->next));
out:
return segs;
--
2.26.2
^ permalink raw reply related
* Re: [PATCH 1/1] netfilter: nat: add range checks for access to nf_nat_l[34]protos[]
From: William Mcvicker @ 2020-07-31 18:16 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: security, Jozsef Kadlecsik, Florian Westphal, David S. Miller,
Alexey Kuznetsov, Hideaki YOSHIFUJI, netfilter-devel, coreteam,
netdev, linux-kernel, kernel-team
In-Reply-To: <20200731175115.GA16982@salvia>
Hi Pablo,
> Note that this code does not exist in the tree anymore. I'm not sure
> if this problem still exists upstream, this patch does not apply to
> nf.git. This fix should only go for -stable maintainers.
Right, the vulnerability has been fixed by the refactor commit fe2d0020994cd
("netfilter: nat: remove l4proto->in_range"), but this patch is a part of
a full re-work of the code and doesn't backport very cleanly to the LTS
branches. So this fix is only applicable to the 4.19, 4.14, 4.9, and 4.4 LTS
branches. I missed the -stable email, but will re-add it to this thread with
the re-worked patch.
Thanks,
Will
On 07/31/2020, Pablo Neira Ayuso wrote:
> Hi William,
>
> On Fri, Jul 31, 2020 at 12:26:11AM +0000, William Mcvicker wrote:
> > Hi Pablo,
> >
> > Yes, I believe this oops is only triggered by userspace when the user
> > specifically passes in an invalid nf_nat_l3protos index. I'm happy to re-work
> > the patch to check for this in ctnetlink_create_conntrack().
>
> Great.
>
> Note that this code does not exist in the tree anymore. I'm not sure
> if this problem still exists upstream, this patch does not apply to
> nf.git. This fix should only go for -stable maintainers.
>
> > > BTW, do you have a Fixes: tag for this? This will be useful for
> > > -stable maintainer to pick up this fix.
> >
> > Regarding the Fixes: tag, I don't have one offhand since this bug was reported
> > to me, but I can search through the code history to find the commit that
> > exposed this vulnerability.
>
> That would be great.
>
> Thank you.
^ permalink raw reply
* Re: [Linux-kernel-mentees] [PATCH net] rds: Prevent kernel-infoleak in rds_notify_queue_get()
From: Jason Gunthorpe @ 2020-07-31 18:27 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: Leon Romanovsky, Peilin Ye, Santosh Shilimkar, David S. Miller,
Jakub Kicinski, Dan Carpenter, Arnd Bergmann,
linux-kernel-mentees, netdev, linux-rdma, rds-devel, linux-kernel
In-Reply-To: <20200731171924.GA2014207@kroah.com>
On Fri, Jul 31, 2020 at 07:19:24PM +0200, Greg Kroah-Hartman wrote:
> > I tried for a bit and didn't find a way to get even old gcc 4.4 to not
> > initialize the holes.
>
> Odd, so it is just the "= {0};" that does not zero out the holes?
Nope, it seems to work fine too. I tried a number of situations and I
could not get the compiler to not zero holes, even back to gcc 4.4
It is not just accidental either, take this:
struct rds_rdma_notify {
unsigned long user_token;
unsigned char status;
unsigned long user_token1 __attribute__((aligned(32)));
} foo = {0};
Which has quite a big hole, clang generates:
movq $0, 56(%rdi)
movq $0, 48(%rdi)
movq $0, 40(%rdi)
movq $0, 32(%rdi)
movq $0, 24(%rdi)
movq $0, 16(%rdi)
movq $0, 8(%rdi)
movq $0, (%rdi)
Deliberate extra instructions to fill both holes. gcc 10 does the
same, older gcc's do create a rep stosq over the whole thing.
Some fiddling with godbolt shows quite a variety of output, but I
didn't see anything that looks like a compiler not filling
padding. Even godbolt's gcc 4.1 filled the padding, which is super old.
In several cases it seems the aggregate initializer produced better
code than memset, in other cases it didn't
Without an actual example where this doesn't work right it is hard to
say anything more..
Jason
^ permalink raw reply
* [PATCH v2 2/3] ath10k: Add module param to enable history
From: Rakesh Pillai @ 2020-07-31 18:27 UTC (permalink / raw)
To: ath10k
Cc: linux-wireless, linux-kernel, kvalo, davem, kuba, netdev,
Rakesh Pillai
In-Reply-To: <1596220042-2778-1-git-send-email-pillair@codeaurora.org>
Add a module param to enable recording history
of certain debug events. This module parameter
is a mask of the different history recording to
be enabled.
The memory for recording the history will not be
allocated if its not enabled via module parameter.
This is to avoid unnecessary memory allocation when
recording the history is not needed.
To enable all the history recording, the driver
should be loaded as below
"insmod ath10k_core.ko history_enable_mask=0xf"
Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1
Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
---
drivers/net/wireless/ath/ath10k/core.c | 3 +++
drivers/net/wireless/ath/ath10k/core.h | 8 ++++++++
drivers/net/wireless/ath/ath10k/debug.c | 26 ++++++++++++++++++++++----
drivers/net/wireless/ath/ath10k/debug.h | 1 +
4 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 9104496..f91a9d0 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -29,6 +29,7 @@
unsigned int ath10k_debug_mask;
EXPORT_SYMBOL(ath10k_debug_mask);
+unsigned long ath10k_history_enable_mask;
static unsigned int ath10k_cryptmode_param;
static bool uart_print;
static bool skip_otp;
@@ -46,6 +47,7 @@ module_param(skip_otp, bool, 0644);
module_param(rawmode, bool, 0644);
module_param(fw_diag_log, bool, 0644);
module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444);
+module_param_named(history_enable_mask, ath10k_history_enable_mask, ulong, 0444);
MODULE_PARM_DESC(debug_mask, "Debugging mask");
MODULE_PARM_DESC(uart_print, "Uart target debugging");
@@ -54,6 +56,7 @@ MODULE_PARM_DESC(cryptmode, "Crypto mode: 0-hardware, 1-software");
MODULE_PARM_DESC(rawmode, "Use raw 802.11 frame datapath");
MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file");
MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging");
+MODULE_PARM_DESC(history_enable_mask, "Enable events history recording");
static const struct ath10k_hw_params ath10k_hw_params_list[] = {
{
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 46bd5aa..ce429df 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -977,6 +977,14 @@ struct ath10k_bus_params {
#define ATH10K_WMI_DATA_LEN 16
+enum ath10k_history {
+ ATH10K_REG_ACCESS_HISTORY,
+ ATH10K_CE_EVENTS_HISTORY,
+ ATH10K_WMI_CMD_HISTORY,
+ ATH10K_WMI_EVENTS_HISTORY,
+ ATH10K_HISTORY_MAX,
+};
+
enum ath10k_ce_event {
ATH10K_IRQ_TRIGGER,
ATH10K_NAPI_POLL,
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 9105b0b..5d08652 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -2724,6 +2724,9 @@ EXPORT_SYMBOL(ath10k_dbg_dump);
int ath10k_core_reg_access_history_init(struct ath10k *ar, u32 max_entries)
{
+ if (!test_bit(ATH10K_REG_ACCESS_HISTORY, &ath10k_history_enable_mask))
+ return 0;
+
ar->reg_access_history.record = vzalloc(max_entries *
sizeof(struct ath10k_reg_access_entry));
if (!ar->reg_access_history.record)
@@ -2739,6 +2742,9 @@ EXPORT_SYMBOL(ath10k_core_reg_access_history_init);
int ath10k_core_wmi_cmd_history_init(struct ath10k *ar, u32 max_entries)
{
+ if (!test_bit(ATH10K_WMI_CMD_HISTORY, &ath10k_history_enable_mask))
+ return 0;
+
ar->wmi_cmd_history.record = vzalloc(max_entries *
sizeof(struct ath10k_wmi_event_entry));
if (!ar->wmi_cmd_history.record)
@@ -2754,6 +2760,9 @@ EXPORT_SYMBOL(ath10k_core_wmi_cmd_history_init);
int ath10k_core_wmi_event_history_init(struct ath10k *ar, u32 max_entries)
{
+ if (!test_bit(ATH10K_WMI_EVENTS_HISTORY, &ath10k_history_enable_mask))
+ return 0;
+
ar->wmi_event_history.record = vzalloc(max_entries *
sizeof(struct ath10k_wmi_event_entry));
if (!ar->wmi_event_history.record)
@@ -2769,6 +2778,9 @@ EXPORT_SYMBOL(ath10k_core_wmi_event_history_init);
int ath10k_core_ce_event_history_init(struct ath10k *ar, u32 max_entries)
{
+ if (!test_bit(ATH10K_CE_EVENTS_HISTORY, &ath10k_history_enable_mask))
+ return 0;
+
ar->ce_event_history.record = vzalloc(max_entries *
sizeof(struct ath10k_ce_event_entry));
if (!ar->ce_event_history.record)
@@ -2787,7 +2799,8 @@ void ath10k_record_reg_access(struct ath10k *ar, u32 offset, u32 val, bool write
struct ath10k_reg_access_entry *entry;
u32 idx;
- if (!ar->reg_access_history.record)
+ if (!test_bit(ATH10K_REG_ACCESS_HISTORY, &ath10k_history_enable_mask) ||
+ !ar->reg_access_history.record)
return;
idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
@@ -2808,7 +2821,9 @@ void ath10k_record_wmi_event(struct ath10k *ar, enum ath10k_wmi_type type,
u32 idx;
if (type == ATH10K_WMI_EVENT) {
- if (!ar->wmi_event_history.record)
+ if (!test_bit(ATH10K_WMI_EVENTS_HISTORY,
+ &ath10k_history_enable_mask) ||
+ !ar->wmi_event_history.record)
return;
spin_lock_bh(&ar->wmi_event_history.hist_lock);
@@ -2817,7 +2832,9 @@ void ath10k_record_wmi_event(struct ath10k *ar, enum ath10k_wmi_type type,
spin_unlock_bh(&ar->wmi_event_history.hist_lock);
entry = &ar->wmi_event_history.record[idx];
} else {
- if (!ar->wmi_cmd_history.record)
+ if (!test_bit(ATH10K_WMI_CMD_HISTORY,
+ &ath10k_history_enable_mask) ||
+ !ar->wmi_event_history.record)
return;
spin_lock_bh(&ar->wmi_cmd_history.hist_lock);
@@ -2841,7 +2858,8 @@ void ath10k_record_ce_event(struct ath10k *ar, enum ath10k_ce_event event_type,
struct ath10k_ce_event_entry *entry;
u32 idx;
- if (!ar->ce_event_history.record)
+ if (!test_bit(ATH10K_CE_EVENTS_HISTORY, &ath10k_history_enable_mask) ||
+ !ar->ce_event_history.record)
return;
idx = ath10k_core_get_next_idx(&ar->ce_event_history.index,
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index c28aeb1..7799b89 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -75,6 +75,7 @@ struct ath10k_pktlog_hdr {
#define ATH10K_TX_POWER_MIN_VAL 0
extern unsigned int ath10k_debug_mask;
+extern unsigned long ath10k_history_enable_mask;
__printf(2, 3) void ath10k_info(struct ath10k *ar, const char *fmt, ...);
__printf(2, 3) void ath10k_err(struct ath10k *ar, const char *fmt, ...);
--
2.7.4
^ permalink raw reply related
* [PATCH v2 1/3] ath10k: Add history for tracking certain events
From: Rakesh Pillai @ 2020-07-31 18:27 UTC (permalink / raw)
To: ath10k
Cc: linux-wireless, linux-kernel, kvalo, davem, kuba, netdev,
Rakesh Pillai
In-Reply-To: <1596220042-2778-1-git-send-email-pillair@codeaurora.org>
Add history for tracking the below events
- register read
- register write
- IRQ trigger
- NAPI poll
- CE service
- WMI cmd
- WMI event
- WMI tx completion
This will help in debugging any crash or any
improper behaviour.
Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1
Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
---
drivers/net/wireless/ath/ath10k/ce.c | 1 +
drivers/net/wireless/ath/ath10k/core.h | 74 +++++++++++++++++
drivers/net/wireless/ath/ath10k/debug.c | 133 ++++++++++++++++++++++++++++++
drivers/net/wireless/ath/ath10k/debug.h | 74 +++++++++++++++++
drivers/net/wireless/ath/ath10k/snoc.c | 15 +++-
drivers/net/wireless/ath/ath10k/wmi-tlv.c | 1 +
drivers/net/wireless/ath/ath10k/wmi.c | 10 +++
7 files changed, 307 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index 84ec80c..0f541de 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -1299,6 +1299,7 @@ void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
u32 ctrl_addr = ce_state->ctrl_addr;
+ ath10k_record_ce_event(ar, ATH10K_CE_SERVICE, ce_id);
/*
* Clear before handling
*
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 5c18f6c..46bd5aa 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -970,6 +970,75 @@ struct ath10k_bus_params {
bool hl_msdu_ids;
};
+#define ATH10K_REG_ACCESS_HISTORY_MAX 512
+#define ATH10K_CE_EVENT_HISTORY_MAX 1024
+#define ATH10K_WMI_EVENT_HISTORY_MAX 512
+#define ATH10K_WMI_CMD_HISTORY_MAX 256
+
+#define ATH10K_WMI_DATA_LEN 16
+
+enum ath10k_ce_event {
+ ATH10K_IRQ_TRIGGER,
+ ATH10K_NAPI_POLL,
+ ATH10K_CE_SERVICE,
+ ATH10K_NAPI_COMPLETE,
+ ATH10K_NAPI_RESCHED,
+ ATH10K_IRQ_SUMMARY,
+};
+
+enum ath10k_wmi_type {
+ ATH10K_WMI_EVENT,
+ ATH10K_WMI_CMD,
+ ATH10K_WMI_TX_COMPL,
+};
+
+struct ath10k_reg_access_entry {
+ u32 cpu_id;
+ bool write;
+ u32 offset;
+ u32 val;
+ u64 timestamp;
+};
+
+struct ath10k_wmi_event_entry {
+ u32 cpu_id;
+ enum ath10k_wmi_type type;
+ u32 id;
+ u64 timestamp;
+ unsigned char data[ATH10K_WMI_DATA_LEN];
+};
+
+struct ath10k_ce_event_entry {
+ u32 cpu_id;
+ enum ath10k_ce_event event_type;
+ u32 ce_id;
+ u64 timestamp;
+};
+
+struct ath10k_wmi_event_history {
+ struct ath10k_wmi_event_entry *record;
+ u32 max_entries;
+ atomic_t index;
+ /* lock for accessing wmi event history */
+ spinlock_t hist_lock;
+};
+
+struct ath10k_ce_event_history {
+ struct ath10k_ce_event_entry *record;
+ u32 max_entries;
+ atomic_t index;
+ /* lock for accessing ce event history */
+ spinlock_t hist_lock;
+};
+
+struct ath10k_reg_access_history {
+ struct ath10k_reg_access_entry *record;
+ u32 max_entries;
+ atomic_t index;
+ /* lock for accessing register access history */
+ spinlock_t hist_lock;
+};
+
struct ath10k {
struct ath_common ath_common;
struct ieee80211_hw *hw;
@@ -1261,6 +1330,11 @@ struct ath10k {
bool coex_support;
int coex_gpio_pin;
+ struct ath10k_reg_access_history reg_access_history;
+ struct ath10k_ce_event_history ce_event_history;
+ struct ath10k_wmi_event_history wmi_event_history;
+ struct ath10k_wmi_event_history wmi_cmd_history;
+
/* must be last */
u8 drv_priv[] __aligned(sizeof(void *));
};
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index e8250a6..9105b0b 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -2722,4 +2722,137 @@ void ath10k_dbg_dump(struct ath10k *ar,
}
EXPORT_SYMBOL(ath10k_dbg_dump);
+int ath10k_core_reg_access_history_init(struct ath10k *ar, u32 max_entries)
+{
+ ar->reg_access_history.record = vzalloc(max_entries *
+ sizeof(struct ath10k_reg_access_entry));
+ if (!ar->reg_access_history.record)
+ return -ENOMEM;
+
+ ar->reg_access_history.max_entries = max_entries;
+ atomic_set(&ar->reg_access_history.index, 0);
+ spin_lock_init(&ar->reg_access_history.hist_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ath10k_core_reg_access_history_init);
+
+int ath10k_core_wmi_cmd_history_init(struct ath10k *ar, u32 max_entries)
+{
+ ar->wmi_cmd_history.record = vzalloc(max_entries *
+ sizeof(struct ath10k_wmi_event_entry));
+ if (!ar->wmi_cmd_history.record)
+ return -ENOMEM;
+
+ ar->wmi_cmd_history.max_entries = max_entries;
+ atomic_set(&ar->wmi_cmd_history.index, 0);
+ spin_lock_init(&ar->wmi_cmd_history.hist_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ath10k_core_wmi_cmd_history_init);
+
+int ath10k_core_wmi_event_history_init(struct ath10k *ar, u32 max_entries)
+{
+ ar->wmi_event_history.record = vzalloc(max_entries *
+ sizeof(struct ath10k_wmi_event_entry));
+ if (!ar->wmi_event_history.record)
+ return -ENOMEM;
+
+ ar->wmi_event_history.max_entries = max_entries;
+ atomic_set(&ar->wmi_event_history.index, 0);
+ spin_lock_init(&ar->wmi_event_history.hist_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ath10k_core_wmi_event_history_init);
+
+int ath10k_core_ce_event_history_init(struct ath10k *ar, u32 max_entries)
+{
+ ar->ce_event_history.record = vzalloc(max_entries *
+ sizeof(struct ath10k_ce_event_entry));
+ if (!ar->ce_event_history.record)
+ return -ENOMEM;
+
+ ar->ce_event_history.max_entries = max_entries;
+ atomic_set(&ar->ce_event_history.index, 0);
+ spin_lock_init(&ar->ce_event_history.hist_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(ath10k_core_ce_event_history_init);
+
+void ath10k_record_reg_access(struct ath10k *ar, u32 offset, u32 val, bool write)
+{
+ struct ath10k_reg_access_entry *entry;
+ u32 idx;
+
+ if (!ar->reg_access_history.record)
+ return;
+
+ idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
+ ar->reg_access_history.max_entries);
+ entry = &ar->reg_access_history.record[idx];
+
+ entry->timestamp = ath10k_core_get_timestamp();
+ entry->write = write;
+ entry->offset = offset;
+ entry->val = val;
+}
+EXPORT_SYMBOL(ath10k_record_reg_access);
+
+void ath10k_record_wmi_event(struct ath10k *ar, enum ath10k_wmi_type type,
+ u32 id, unsigned char *data)
+{
+ struct ath10k_wmi_event_entry *entry;
+ u32 idx;
+
+ if (type == ATH10K_WMI_EVENT) {
+ if (!ar->wmi_event_history.record)
+ return;
+
+ spin_lock_bh(&ar->wmi_event_history.hist_lock);
+ idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
+ ar->wmi_event_history.max_entries);
+ spin_unlock_bh(&ar->wmi_event_history.hist_lock);
+ entry = &ar->wmi_event_history.record[idx];
+ } else {
+ if (!ar->wmi_cmd_history.record)
+ return;
+
+ spin_lock_bh(&ar->wmi_cmd_history.hist_lock);
+ idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
+ ar->wmi_cmd_history.max_entries);
+ spin_unlock_bh(&ar->wmi_cmd_history.hist_lock);
+ entry = &ar->wmi_cmd_history.record[idx];
+ }
+
+ entry->timestamp = ath10k_core_get_timestamp();
+ entry->cpu_id = smp_processor_id();
+ entry->type = type;
+ entry->id = id;
+ memcpy(&entry->data, data + 4, ATH10K_WMI_DATA_LEN);
+}
+EXPORT_SYMBOL(ath10k_record_wmi_event);
+
+void ath10k_record_ce_event(struct ath10k *ar, enum ath10k_ce_event event_type,
+ int ce_id)
+{
+ struct ath10k_ce_event_entry *entry;
+ u32 idx;
+
+ if (!ar->ce_event_history.record)
+ return;
+
+ idx = ath10k_core_get_next_idx(&ar->ce_event_history.index,
+ ar->ce_event_history.max_entries);
+ entry = &ar->ce_event_history.record[idx];
+
+ entry->timestamp = ath10k_core_get_timestamp();
+ entry->cpu_id = smp_processor_id();
+ entry->event_type = event_type;
+ entry->ce_id = ce_id;
+}
+EXPORT_SYMBOL(ath10k_record_ce_event);
+
#endif /* CONFIG_ATH10K_DEBUG */
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 997c1c8..c28aeb1 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -258,6 +258,38 @@ void ath10k_dbg_dump(struct ath10k *ar,
enum ath10k_debug_mask mask,
const char *msg, const char *prefix,
const void *buf, size_t len);
+
+/* ========== History init APIs =========== */
+int ath10k_core_reg_access_history_init(struct ath10k *ar, u32 max_entries);
+int ath10k_core_wmi_cmd_history_init(struct ath10k *ar, u32 max_entries);
+int ath10k_core_wmi_event_history_init(struct ath10k *ar, u32 max_entries);
+int ath10k_core_ce_event_history_init(struct ath10k *ar, u32 max_entries);
+
+/* ========== History record APIs =========== */
+void ath10k_record_reg_access(struct ath10k *ar, u32 offset, u32 val,
+ bool write);
+void ath10k_record_wmi_event(struct ath10k *ar, enum ath10k_wmi_type type,
+ u32 id, unsigned char *data);
+void ath10k_record_ce_event(struct ath10k *ar,
+ enum ath10k_ce_event event_type,
+ int ce_id);
+
+static inline u64 ath10k_core_get_timestamp(void)
+{
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ return ((u64)ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);
+}
+
+static inline int ath10k_core_get_next_idx(atomic_t *index, u32 max_entries)
+{
+ u32 curr_idx;
+
+ curr_idx = atomic_fetch_inc(index);
+ return (curr_idx & (max_entries - 1));
+}
+
#else /* CONFIG_ATH10K_DEBUG */
static inline int __ath10k_dbg(struct ath10k *ar,
@@ -273,6 +305,48 @@ static inline void ath10k_dbg_dump(struct ath10k *ar,
const void *buf, size_t len)
{
}
+
+static inline int ath10k_core_reg_access_history_init(struct ath10k *ar,
+ u32 max_entries)
+{
+ return 0;
+}
+
+static inline int ath10k_core_wmi_cmd_history_init(struct ath10k *ar,
+ u32 max_entries)
+{
+ return 0;
+}
+
+static inline int ath10k_core_wmi_event_history_init(struct ath10k *ar,
+ u32 max_entries)
+{
+ return 0;
+}
+
+static inline int ath10k_core_ce_event_history_init(struct ath10k *ar,
+ u32 max_entries)
+{
+ return 0;
+}
+
+static inline void ath10k_record_reg_access(struct ath10k *ar, u32 offset,
+ u32 val, bool write)
+{
+}
+
+static inline void ath10k_record_wmi_event(struct ath10k *ar,
+ enum ath10k_wmi_type type,
+ u32 id, unsigned char *data)
+{
+}
+
+static inline void ath10k_record_ce_event(struct ath10k *ar,
+ enum ath10k_ce_event event_type,
+ int ce_id)
+{
+}
+
#endif /* CONFIG_ATH10K_DEBUG */
/* Avoid calling __ath10k_dbg() if debug_mask is not set and tracing
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index 1ef5fdb..aa7ee32 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -473,6 +473,7 @@ static void ath10k_snoc_write32(struct ath10k *ar, u32 offset, u32 value)
{
struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+ ath10k_record_reg_access(ar, offset, value, true);
iowrite32(value, ar_snoc->mem + offset);
}
@@ -482,6 +483,7 @@ static u32 ath10k_snoc_read32(struct ath10k *ar, u32 offset)
u32 val;
val = ioread32(ar_snoc->mem + offset);
+ ath10k_record_reg_access(ar, offset, val, false);
return val;
}
@@ -1159,6 +1161,7 @@ static irqreturn_t ath10k_snoc_per_engine_handler(int irq, void *arg)
ce_id);
return IRQ_HANDLED;
}
+ ath10k_record_ce_event(ar, ATH10K_IRQ_TRIGGER, ce_id);
ath10k_ce_disable_interrupt(ar, ce_id);
set_bit(ce_id, ar_snoc->pending_ce_irqs);
@@ -1175,6 +1178,7 @@ static int ath10k_snoc_napi_poll(struct napi_struct *ctx, int budget)
int done = 0;
int ce_id;
+ ath10k_record_ce_event(ar, ATH10K_NAPI_POLL, 0);
if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) {
napi_complete(ctx);
return done;
@@ -1188,8 +1192,12 @@ static int ath10k_snoc_napi_poll(struct napi_struct *ctx, int budget)
done = ath10k_htt_txrx_compl_task(ar, budget);
- if (done < budget)
+ if (done < budget) {
napi_complete(ctx);
+ ath10k_record_ce_event(ar, ATH10K_NAPI_COMPLETE, 0);
+ } else {
+ ath10k_record_ce_event(ar, ATH10K_NAPI_RESCHED, 0);
+ }
return done;
}
@@ -1660,6 +1668,11 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
ar->ce_priv = &ar_snoc->ce;
msa_size = drv_data->msa_size;
+ ath10k_core_reg_access_history_init(ar, ATH10K_REG_ACCESS_HISTORY_MAX);
+ ath10k_core_wmi_event_history_init(ar, ATH10K_WMI_EVENT_HISTORY_MAX);
+ ath10k_core_wmi_cmd_history_init(ar, ATH10K_WMI_CMD_HISTORY_MAX);
+ ath10k_core_ce_event_history_init(ar, ATH10K_CE_EVENT_HISTORY_MAX);
+
ath10k_snoc_quirks_init(ar);
ret = ath10k_snoc_resource_init(ar);
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 932266d..9df5748 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -627,6 +627,7 @@ static void ath10k_wmi_tlv_op_rx(struct ath10k *ar, struct sk_buff *skb)
if (skb_pull(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
goto out;
+ ath10k_record_wmi_event(ar, ATH10K_WMI_EVENT, id, skb->data);
trace_ath10k_wmi_event(ar, id, skb->data, skb->len);
consumed = ath10k_tm_event_wmi(ar, id, skb);
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index a81a1ab..8ebd05c 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1802,6 +1802,15 @@ struct sk_buff *ath10k_wmi_alloc_skb(struct ath10k *ar, u32 len)
static void ath10k_wmi_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
{
+ struct wmi_cmd_hdr *cmd_hdr;
+ enum wmi_tlv_event_id id;
+
+ cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
+ id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);
+
+ ath10k_record_wmi_event(ar, ATH10K_WMI_TX_COMPL, id,
+ skb->data + sizeof(struct wmi_cmd_hdr));
+
dev_kfree_skb(skb);
}
@@ -1912,6 +1921,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id)
might_sleep();
+ ath10k_record_wmi_event(ar, ATH10K_WMI_CMD, cmd_id, skb->data);
if (cmd_id == WMI_CMD_UNSUPPORTED) {
ath10k_warn(ar, "wmi command %d is not supported by firmware\n",
cmd_id);
--
2.7.4
^ permalink raw reply related
* [PATCH v2 0/3]
From: Rakesh Pillai @ 2020-07-31 18:27 UTC (permalink / raw)
To: ath10k
Cc: linux-wireless, linux-kernel, kvalo, davem, kuba, netdev,
Rakesh Pillai
The history recording will be compiled only if
ATH10K_DEBUG is enabled, and also enabled via
the module parameter. Once the history recording
is enabled via module parameter, it can be enabled
or disabled runtime via debugfs.
---
Changes from v1:
- Add module param and debugfs to enable/disable history recording.
Rakesh Pillai (3):
ath10k: Add history for tracking certain events
ath10k: Add module param to enable history
ath10k: Add debugfs support to enable event history
drivers/net/wireless/ath/ath10k/ce.c | 1 +
drivers/net/wireless/ath/ath10k/core.c | 3 +
drivers/net/wireless/ath/ath10k/core.h | 82 ++++++++++++
drivers/net/wireless/ath/ath10k/debug.c | 207 ++++++++++++++++++++++++++++++
drivers/net/wireless/ath/ath10k/debug.h | 75 +++++++++++
drivers/net/wireless/ath/ath10k/snoc.c | 15 ++-
drivers/net/wireless/ath/ath10k/wmi-tlv.c | 1 +
drivers/net/wireless/ath/ath10k/wmi.c | 10 ++
8 files changed, 393 insertions(+), 1 deletion(-)
--
2.7.4
^ permalink raw reply
* [PATCH v2 3/3] ath10k: Add debugfs support to enable event history
From: Rakesh Pillai @ 2020-07-31 18:27 UTC (permalink / raw)
To: ath10k
Cc: linux-wireless, linux-kernel, kvalo, davem, kuba, netdev,
Rakesh Pillai
In-Reply-To: <1596220042-2778-1-git-send-email-pillair@codeaurora.org>
Add the support to enable/disable the recording of
debug events history.
The enable/disable of the history from debugfs will
not make any affect if its not enabled via module
parameter.
Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1
Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
---
drivers/net/wireless/ath/ath10k/debug.c | 56 +++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 5d08652..6785fae 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -610,6 +610,59 @@ static const struct file_operations fops_simulate_fw_crash = {
.llseek = default_llseek,
};
+static ssize_t ath10k_read_history_enable(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ const char buf[] =
+ "To enable recording of certain event history, write to this file with the enable mask\n"
+ "BIT(0): Enable Reg Access history\n"
+ " - Register write events\n"
+ " - Register read events\n"
+ "BIT(1): Enable CE events history\n"
+ " - ATH10K_IRQ_TRIGGER event\n"
+ " - ATH10K_NAPI_POLL event\n"
+ " - ATH10K_CE_SERVICE event\n"
+ " - ATH10K_NAPI_COMPLETE event\n"
+ " - ATH10K_NAPI_RESCHED event\n"
+ " - ATH10K_IRQ_SUMMARY event\n"
+ "BIT(2): Enable WMI CMD history\n"
+ " - WMI CMD event\n"
+ " - WMI CMD TX completion event\n"
+ "BIT(3): Enable WMI events history\n"
+ " - WMI Events event\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
+}
+
+static ssize_t ath10k_write_history_enable(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ u32 history_enable_mask;
+ int i, ret;
+
+ ret = kstrtou32_from_user(user_buf, count, 0, &history_enable_mask);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < ATH10K_HISTORY_MAX; i++)
+ if (history_enable_mask & BIT(i))
+ set_bit(i, &ath10k_history_enable_mask);
+ else
+ clear_bit(i, &ath10k_history_enable_mask);
+
+ return count;
+}
+
+static const struct file_operations fops_history_enable = {
+ .read = ath10k_read_history_enable,
+ .write = ath10k_write_history_enable,
+ .open = simple_open,
+ .owner = THIS_MODULE,
+ .llseek = default_llseek,
+};
+
static ssize_t ath10k_read_chip_id(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -2658,6 +2711,9 @@ int ath10k_debug_register(struct ath10k *ar)
debugfs_create_file("reset_htt_stats", 0200, ar->debug.debugfs_phy, ar,
&fops_reset_htt_stats);
+ debugfs_create_file("history_enable", 0644, ar->debug.debugfs_phy, ar,
+ &fops_history_enable);
+
return 0;
}
--
2.7.4
^ permalink raw reply related
* [PATCH v2 bpf-next 0/5] BPF link force-detach support
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel; +Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko
This patch set adds new BPF link operation, LINK_DETACH, allowing processes
with BPF link FD to force-detach it from respective BPF hook, similarly how
BPF link is auto-detached when such BPF hook (e.g., cgroup, net_device, netns,
etc) is removed. This facility allows admin to forcefully undo BPF link
attachment, while process that created BPF link in the first place is left
intact.
Once force-detached, BPF link stays valid in the kernel as long as there is at
least one FD open against it. It goes into defunct state, just like
auto-detached BPF link.
bpftool also got `link detach` command to allow triggering this in
non-programmatic fashion.
v1->v2:
- improve error reporting in `bpftool link detach` (Song).
Andrii Nakryiko (5):
bpf: add support for forced LINK_DETACH command
libbpf: add bpf_link detach APIs
selftests/bpf: add link detach tests for cgroup, netns, and xdp
bpf_links
tools/bpftool: add `link detach` subcommand
tools/bpftool: add documentation and bash-completion for `link detach`
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 5 ++
kernel/bpf/cgroup.c | 15 +++++-
kernel/bpf/net_namespace.c | 8 +++
kernel/bpf/syscall.c | 26 ++++++++++
net/core/dev.c | 11 +++-
.../bpftool/Documentation/bpftool-link.rst | 8 +++
tools/bpf/bpftool/bash-completion/bpftool | 4 +-
tools/bpf/bpftool/link.c | 37 +++++++++++++-
tools/include/uapi/linux/bpf.h | 5 ++
tools/lib/bpf/bpf.c | 10 ++++
tools/lib/bpf/bpf.h | 2 +
tools/lib/bpf/libbpf.c | 5 ++
tools/lib/bpf/libbpf.h | 1 +
tools/lib/bpf/libbpf.map | 2 +
.../selftests/bpf/prog_tests/cgroup_link.c | 20 +++++++-
.../selftests/bpf/prog_tests/sk_lookup.c | 51 +++++++++----------
.../selftests/bpf/prog_tests/xdp_link.c | 14 +++++
tools/testing/selftests/bpf/testing_helpers.c | 14 +++++
tools/testing/selftests/bpf/testing_helpers.h | 3 ++
20 files changed, 208 insertions(+), 34 deletions(-)
--
2.24.1
^ permalink raw reply
* [PATCH v2 bpf-next 2/5] libbpf: add bpf_link detach APIs
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Song Liu
In-Reply-To: <20200731182830.286260-1-andriin@fb.com>
Add low-level bpf_link_detach() API. Also add higher-level bpf_link__detach()
one.
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
tools/include/uapi/linux/bpf.h | 5 +++++
tools/lib/bpf/bpf.c | 10 ++++++++++
tools/lib/bpf/bpf.h | 2 ++
tools/lib/bpf/libbpf.c | 5 +++++
tools/lib/bpf/libbpf.h | 1 +
tools/lib/bpf/libbpf.map | 2 ++
6 files changed, 25 insertions(+)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index eb5e0c38eb2c..b134e679e9db 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_cmd {
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
};
enum bpf_map_type {
@@ -634,6 +635,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e1bdf214f75f..eab14c97c15d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -603,6 +603,16 @@ int bpf_link_create(int prog_fd, int target_fd,
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
+int bpf_link_detach(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_detach.link_fd = link_fd;
+
+ return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+}
+
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6d367e01d05e..28855fd5b5f4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -178,6 +178,8 @@ LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts);
+LIBBPF_API int bpf_link_detach(int link_fd);
+
struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b9f11f854985..7be04e45d29c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7748,6 +7748,11 @@ struct bpf_link *bpf_link__open(const char *path)
return link;
}
+int bpf_link__detach(struct bpf_link *link)
+{
+ return bpf_link_detach(link->fd) ? -errno : 0;
+}
+
int bpf_link__pin(struct bpf_link *link, const char *path)
{
int err;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9924385462ab..3ed1399bfbbc 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -229,6 +229,7 @@ LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
+LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index ca49a6a7e5b2..099863411f7d 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -273,6 +273,8 @@ LIBBPF_0.0.9 {
LIBBPF_0.1.0 {
global:
+ bpf_link__detach;
+ bpf_link_detach;
bpf_map__ifindex;
bpf_map__key_size;
bpf_map__map_flags;
--
2.24.1
^ permalink raw reply related
* [PATCH v2 bpf-next 4/5] tools/bpftool: add `link detach` subcommand
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Song Liu
In-Reply-To: <20200731182830.286260-1-andriin@fb.com>
Add ability to force-detach BPF link. Also add missing error message, if
specified link ID is wrong.
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
tools/bpf/bpftool/link.c | 37 ++++++++++++++++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 326b8fdf0243..1b793759170e 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -22,6 +22,8 @@ static const char * const link_type_name[] = {
static int link_parse_fd(int *argc, char ***argv)
{
+ int fd;
+
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -35,7 +37,10 @@ static int link_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- return bpf_link_get_fd_by_id(id);
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ return fd;
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -316,6 +321,34 @@ static int do_pin(int argc, char **argv)
return err;
}
+static int do_detach(int argc, char **argv)
+{
+ int err, fd;
+
+ if (argc != 2) {
+ p_err("link specifier is invalid or missing\n");
+ return 1;
+ }
+
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return 1;
+
+ err = bpf_link_detach(fd);
+ if (err)
+ err = -errno;
+ close(fd);
+ if (err) {
+ p_err("failed link detach: %s", strerror(-err));
+ return 1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -326,6 +359,7 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s { show | list } [LINK]\n"
" %1$s %2$s pin LINK FILE\n"
+ " %1$s %2$s detach LINK\n"
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
@@ -341,6 +375,7 @@ static const struct cmd cmds[] = {
{ "list", do_show },
{ "help", do_help },
{ "pin", do_pin },
+ { "detach", do_detach },
{ 0 }
};
--
2.24.1
^ permalink raw reply related
* [PATCH v2 bpf-next 3/5] selftests/bpf: add link detach tests for cgroup, netns, and xdp bpf_links
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Song Liu
In-Reply-To: <20200731182830.286260-1-andriin@fb.com>
Add bpf_link__detach() testing to selftests for cgroup, netns, and xdp
bpf_links.
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
.../selftests/bpf/prog_tests/cgroup_link.c | 20 +++++++-
.../selftests/bpf/prog_tests/sk_lookup.c | 51 +++++++++----------
.../selftests/bpf/prog_tests/xdp_link.c | 14 +++++
tools/testing/selftests/bpf/testing_helpers.c | 14 +++++
tools/testing/selftests/bpf/testing_helpers.h | 3 ++
5 files changed, 73 insertions(+), 29 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..4d9b514b3fd9 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
- __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+ struct bpf_link_info info;
int i = 0, err, prog_fd;
bool detach_legacy = false;
@@ -219,6 +221,22 @@ void test_cgroup_link(void)
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ err = bpf_link__detach(links[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* cgroup_id should be zero in link_info */
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ /* First BPF program shouldn't be called anymore */
+ ping_and_check(0, cg_nr - 1);
+
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 379da6f10ee9..c571584c00f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -34,6 +34,7 @@
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
+#include "testing_helpers.h"
#include "test_sk_lookup.skel.h"
/* External (address, port) pairs the client sends packets to. */
@@ -469,34 +470,10 @@ static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
return 0;
}
-static __u32 link_info_prog_id(struct bpf_link *link)
-{
- struct bpf_link_info info = {};
- __u32 info_len = sizeof(info);
- int link_fd, err;
-
- link_fd = bpf_link__fd(link);
- if (CHECK(link_fd < 0, "bpf_link__fd", "failed\n")) {
- errno = -link_fd;
- log_err("bpf_link__fd failed");
- return 0;
- }
-
- err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
- if (CHECK(err, "bpf_obj_get_info_by_fd", "failed\n")) {
- log_err("bpf_obj_get_info_by_fd");
- return 0;
- }
- if (CHECK(info_len != sizeof(info), "bpf_obj_get_info_by_fd",
- "unexpected info len %u\n", info_len))
- return 0;
-
- return info.prog_id;
-}
-
static void query_lookup_prog(struct test_sk_lookup *skel)
{
struct bpf_link *link[3] = {};
+ struct bpf_link_info info;
__u32 attach_flags = 0;
__u32 prog_ids[3] = {};
__u32 prog_cnt = 3;
@@ -534,18 +511,36 @@ static void query_lookup_prog(struct test_sk_lookup *skel)
if (CHECK(prog_cnt != 3, "bpf_prog_query",
"wrong program count on query: %u", prog_cnt))
goto detach;
- prog_id = link_info_prog_id(link[0]);
+ prog_id = link_info_prog_id(link[0], &info);
CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
"invalid program #0 id on query: %u != %u\n",
prog_ids[0], prog_id);
- prog_id = link_info_prog_id(link[1]);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[1], &info);
CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
"invalid program #1 id on query: %u != %u\n",
prog_ids[1], prog_id);
- prog_id = link_info_prog_id(link[2]);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[2], &info);
CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
"invalid program #2 id on query: %u != %u\n",
prog_ids[2], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+ err = bpf_link__detach(link[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto detach;
+
+ /* prog id is still there, but netns_ino is zeroed out */
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino != 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
detach:
if (link[2])
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 52cba6795d40..6f814999b395 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -131,6 +131,20 @@ void test_xdp_link(void)
CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
"got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+ err = bpf_link__detach(link);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto cleanup;
+ CHECK(link_info.prog_id != id1, "link_prog_id",
+ "got %u != exp %u\n", link_info.prog_id, id1);
+ /* ifindex should be zeroed out */
+ CHECK(link_info.xdp.ifindex != 0, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, 0);
+
cleanup:
test_xdp_link__destroy(skel1);
test_xdp_link__destroy(skel2);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0af6337a8962..800d503e5cb4 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -64,3 +64,17 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ int err;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ if (err) {
+ printf("failed to get link info: %d\n", -errno);
+ return 0;
+ }
+ return info->prog_id;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 923b51762759..d4f8e749611b 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,8 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
#include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
--
2.24.1
^ permalink raw reply related
* [PATCH v2 bpf-next 5/5] tools/bpftool: add documentation and bash-completion for `link detach`
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Song Liu
In-Reply-To: <20200731182830.286260-1-andriin@fb.com>
Add info on link detach sub-command to man page. Add detach to bash-completion
as well.
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
tools/bpf/bpftool/Documentation/bpftool-link.rst | 8 ++++++++
tools/bpf/bpftool/bash-completion/bpftool | 4 ++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 38b0949a185b..4a52e7a93339 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -21,6 +21,7 @@ LINK COMMANDS
| **bpftool** **link { show | list }** [*LINK*]
| **bpftool** **link pin** *LINK* *FILE*
+| **bpftool** **link detach *LINK*
| **bpftool** **link help**
|
| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
@@ -49,6 +50,13 @@ DESCRIPTION
contain a dot character ('.'), which is reserved for future
extensions of *bpffs*.
+ **bpftool link detach** *LINK*
+ Force-detach link *LINK*. BPF link and its underlying BPF
+ program will stay valid, but they will be detached from the
+ respective BPF hook and BPF link will transition into
+ a defunct state until last open file descriptor for that
+ link is closed.
+
**bpftool link help**
Print short help message.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 257fa310ea2b..f53ed2f1a4aa 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1122,7 +1122,7 @@ _bpftool()
;;
link)
case $command in
- show|list|pin)
+ show|list|pin|detach)
case $prev in
id)
_bpftool_get_link_ids
@@ -1139,7 +1139,7 @@ _bpftool()
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
return 0
;;
- pin)
+ pin|detach)
if [[ $prev == "$command" ]]; then
COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
else
--
2.24.1
^ permalink raw reply related
* [PATCH v2 bpf-next 1/5] bpf: add support for forced LINK_DETACH command
From: Andrii Nakryiko @ 2020-07-31 18:28 UTC (permalink / raw)
To: bpf, netdev, ast, daniel
Cc: andrii.nakryiko, kernel-team, Andrii Nakryiko, Song Liu
In-Reply-To: <20200731182830.286260-1-andriin@fb.com>
Add LINK_DETACH command to force-detach bpf_link without destroying it. It has
the same behavior as auto-detaching of bpf_link due to cgroup dying for
bpf_cgroup_link or net_device being destroyed for bpf_xdp_link. In such case,
bpf_link is still a valid kernel object, but is defuncts and doesn't hold BPF
program attached to corresponding BPF hook. This functionality allows users
with enough access rights to manually force-detach attached bpf_link without
killing respective owner process.
This patch implements LINK_DETACH for cgroup, xdp, and netns links, mostly
re-using existing link release handling code.
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 5 +++++
kernel/bpf/cgroup.c | 15 ++++++++++++++-
kernel/bpf/net_namespace.c | 8 ++++++++
kernel/bpf/syscall.c | 26 ++++++++++++++++++++++++++
net/core/dev.c | 11 ++++++++++-
6 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 40c5e206ecf2..cef4ef0d2b4e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -793,6 +793,7 @@ struct bpf_link {
struct bpf_link_ops {
void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);
+ int (*detach)(struct bpf_link *link);
int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
struct bpf_prog *old_prog);
void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index eb5e0c38eb2c..b134e679e9db 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -117,6 +117,7 @@ enum bpf_cmd {
BPF_LINK_GET_NEXT_ID,
BPF_ENABLE_STATS,
BPF_ITER_CREATE,
+ BPF_LINK_DETACH,
};
enum bpf_map_type {
@@ -634,6 +635,10 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct {
+ __u32 link_fd;
+ } link_detach;
+
struct { /* struct used by BPF_ENABLE_STATS command */
__u32 type;
} enable_stats;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 957cce1d5168..83ff127ef7ae 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -814,6 +814,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
{
struct bpf_cgroup_link *cg_link =
container_of(link, struct bpf_cgroup_link, link);
+ struct cgroup *cg;
/* link might have been auto-detached by dying cgroup already,
* in that case our work is done here
@@ -832,8 +833,12 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
+ cg = cg_link->cgroup;
+ cg_link->cgroup = NULL;
+
mutex_unlock(&cgroup_mutex);
- cgroup_put(cg_link->cgroup);
+
+ cgroup_put(cg);
}
static void bpf_cgroup_link_dealloc(struct bpf_link *link)
@@ -844,6 +849,13 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link)
kfree(cg_link);
}
+static int bpf_cgroup_link_detach(struct bpf_link *link)
+{
+ bpf_cgroup_link_release(link);
+
+ return 0;
+}
+
static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
@@ -883,6 +895,7 @@ static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
static const struct bpf_link_ops bpf_cgroup_link_lops = {
.release = bpf_cgroup_link_release,
.dealloc = bpf_cgroup_link_dealloc,
+ .detach = bpf_cgroup_link_detach,
.update_prog = cgroup_bpf_replace,
.show_fdinfo = bpf_cgroup_link_show_fdinfo,
.fill_link_info = bpf_cgroup_link_fill_link_info,
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 71405edd667c..542f275bf252 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -142,9 +142,16 @@ static void bpf_netns_link_release(struct bpf_link *link)
bpf_prog_array_free(old_array);
out_unlock:
+ net_link->net = NULL;
mutex_unlock(&netns_bpf_mutex);
}
+static int bpf_netns_link_detach(struct bpf_link *link)
+{
+ bpf_netns_link_release(link);
+ return 0;
+}
+
static void bpf_netns_link_dealloc(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
@@ -228,6 +235,7 @@ static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
static const struct bpf_link_ops bpf_netns_link_ops = {
.release = bpf_netns_link_release,
.dealloc = bpf_netns_link_dealloc,
+ .detach = bpf_netns_link_detach,
.update_prog = bpf_netns_link_update_prog,
.fill_link_info = bpf_netns_link_fill_info,
.show_fdinfo = bpf_netns_link_show_fdinfo,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cd3d599e9e90..2f343ce15747 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3991,6 +3991,29 @@ static int link_update(union bpf_attr *attr)
return ret;
}
+#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
+
+static int link_detach(union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ int ret;
+
+ if (CHECK_ATTR(BPF_LINK_DETACH))
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->link_detach.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ if (link->ops->detach)
+ ret = link->ops->detach(link);
+ else
+ ret = -EOPNOTSUPP;
+
+ bpf_link_put(link);
+ return ret;
+}
+
static int bpf_link_inc_not_zero(struct bpf_link *link)
{
return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
@@ -4240,6 +4263,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_ITER_CREATE:
err = bpf_iter_create(&attr);
break;
+ case BPF_LINK_DETACH:
+ err = link_detach(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/net/core/dev.c b/net/core/dev.c
index a2a57988880a..c8b911b10187 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8979,12 +8979,20 @@ static void bpf_xdp_link_release(struct bpf_link *link)
/* if racing with net_device's tear down, xdp_link->dev might be
* already NULL, in which case link was already auto-detached
*/
- if (xdp_link->dev)
+ if (xdp_link->dev) {
WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+ xdp_link->dev = NULL;
+ }
rtnl_unlock();
}
+static int bpf_xdp_link_detach(struct bpf_link *link)
+{
+ bpf_xdp_link_release(link);
+ return 0;
+}
+
static void bpf_xdp_link_dealloc(struct bpf_link *link)
{
struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
@@ -9066,6 +9074,7 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
static const struct bpf_link_ops bpf_xdp_link_lops = {
.release = bpf_xdp_link_release,
.dealloc = bpf_xdp_link_dealloc,
+ .detach = bpf_xdp_link_detach,
.show_fdinfo = bpf_xdp_link_show_fdinfo,
.fill_link_info = bpf_xdp_link_fill_link_info,
.update_prog = bpf_xdp_link_update,
--
2.24.1
^ permalink raw reply related
* Re: [PATCH v2] ravb: Fixed the problem that rmmod can not be done
From: Sergei Shtylyov @ 2020-07-31 18:32 UTC (permalink / raw)
To: Yuusuke Ashizuka; +Cc: netdev, linux-renesas-soc, David S. Miller
In-Reply-To: <20200730100151.7490-1-ashiduka@fujitsu.com>
On 7/30/20 1:01 PM, Yuusuke Ashizuka wrote:
CCing DaveM (as you should have done from the start)...
> ravb is a module driver, but I cannot rmmod it after insmod it.
> ravb does mdio_init() at the time of probe, and module->refcnt is incremented
> by alloc_mdio_bitbang() called after that.
> Therefore, even if ifup is not performed, the driver is in use and rmmod cannot
> be performed.
>
> $ lsmod
> Module Size Used by
Did you also build mdio-bitbang.c as a module? For the in-kernal driver, not
being able to rmmod the 'ravb' one sounds logical. :-)
> ravb 40960 1
> $ rmmod ravb
> rmmod: ERROR: Module ravb is in use
>
> Fixed to execute mdio_init() at open and free_mdio() at close, thereby rmmod is
Call ravb_mdio_init() at open and free_mdio_bitbang() at close.
> possible in the ifdown state.
Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
> Signed-off-by: Yuusuke Ashizuka <ashiduka@fujitsu.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
[...]
> diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
> index 99f7aae102ce..df89d09b253e 100644
> --- a/drivers/net/ethernet/renesas/ravb_main.c
> +++ b/drivers/net/ethernet/renesas/ravb_main.c
> @@ -1342,6 +1342,51 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
> return error;
> }
>
> +/* MDIO bus init function */
> +static int ravb_mdio_init(struct ravb_private *priv)
> +{
> + struct platform_device *pdev = priv->pdev;
> + struct device *dev = &pdev->dev;
> + int error;
> +
> + /* Bitbang init */
> + priv->mdiobb.ops = &bb_ops;
> +
> + /* MII controller setting */
> + priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb);
> + if (!priv->mii_bus)
> + return -ENOMEM;
> +
> + /* Hook up MII support for ethtool */
> + priv->mii_bus->name = "ravb_mii";
> + priv->mii_bus->parent = dev;
> + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
> + pdev->name, pdev->id);
> +
> + /* Register MDIO bus */
> + error = of_mdiobus_register(priv->mii_bus, dev->of_node);
> + if (error)
> + goto out_free_bus;
> +
> + return 0;
> +
> +out_free_bus:
> + free_mdio_bitbang(priv->mii_bus);
> + return error;
> +}
> +
> +/* MDIO bus release function */
> +static int ravb_mdio_release(struct ravb_private *priv)
> +{
> + /* Unregister mdio bus */
> + mdiobus_unregister(priv->mii_bus);
> +
> + /* Free bitbang info */
> + free_mdio_bitbang(priv->mii_bus);
> +
> + return 0;
> +}
> +
[...]
> @@ -1887,51 +1942,6 @@ static const struct net_device_ops ravb_netdev_ops = {
> .ndo_set_features = ravb_set_features,
> };
>
> -/* MDIO bus init function */
> -static int ravb_mdio_init(struct ravb_private *priv)
> -{
> - struct platform_device *pdev = priv->pdev;
> - struct device *dev = &pdev->dev;
> - int error;
> -
> - /* Bitbang init */
> - priv->mdiobb.ops = &bb_ops;
> -
> - /* MII controller setting */
> - priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb);
> - if (!priv->mii_bus)
> - return -ENOMEM;
> -
> - /* Hook up MII support for ethtool */
> - priv->mii_bus->name = "ravb_mii";
> - priv->mii_bus->parent = dev;
> - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
> - pdev->name, pdev->id);
> -
> - /* Register MDIO bus */
> - error = of_mdiobus_register(priv->mii_bus, dev->of_node);
> - if (error)
> - goto out_free_bus;
> -
> - return 0;
> -
> -out_free_bus:
> - free_mdio_bitbang(priv->mii_bus);
> - return error;
> -}
> -
> -/* MDIO bus release function */
> -static int ravb_mdio_release(struct ravb_private *priv)
> -{
> - /* Unregister mdio bus */
> - mdiobus_unregister(priv->mii_bus);
> -
> - /* Free bitbang info */
> - free_mdio_bitbang(priv->mii_bus);
> -
> - return 0;
> -}
> -
Dave, would you tolerate the forward declarations here instead (to avoid the function moves, to be later
done in the net-next tree)?
[...]
MBR, Sergei
^ permalink raw reply
* Re: [PATCH net-next] Add Mellanox BlueField Gigabit Ethernet driver
From: Andrew Lunn @ 2020-07-31 18:37 UTC (permalink / raw)
To: David Thompson; +Cc: netdev, davem, kuba, jiri, Asmaa Mnebhi
In-Reply-To: <1596047355-28777-1-git-send-email-dthompson@mellanox.com>
On Wed, Jul 29, 2020 at 02:29:15PM -0400, David Thompson wrote:
Hi David
> +static void mlxbf_gige_get_pauseparam(struct net_device *netdev,
> + struct ethtool_pauseparam *pause)
> +{
> + pause->autoneg = AUTONEG_ENABLE;
> + pause->rx_pause = 1;
> + pause->tx_pause = 1;
This is incorrect. You say autoneg is supported. So you should be
returning the result of the autoneg. But what is also wrong is you
don't appear to be programming the MAC with the result of the autoneg.
mlxbf_gige_handle_link_change() should be doing this.
> +}
> +
> +static int mlxbf_gige_get_link_ksettings(struct net_device *netdev,
> + struct ethtool_link_ksettings *link_ksettings)
> +{
> + struct phy_device *phydev = netdev->phydev;
> + u32 supported, advertising;
> + u32 lp_advertising = 0;
> + int status;
phy_ethtool_ksettings_get() and maybe phy_ethtool_ksettings_set().
> +
> + supported = SUPPORTED_TP | SUPPORTED_1000baseT_Full |
> + SUPPORTED_Autoneg | SUPPORTED_Pause;
> +
> + advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
> + ADVERTISED_Pause;
> +
> + status = phy_read(phydev, MII_LPA);
> + if (status >= 0)
> + lp_advertising = mii_lpa_to_ethtool_lpa_t(status & 0xffff);
> +
> + status = phy_read(phydev, MII_STAT1000);
> + if (status >= 0)
> + lp_advertising |= mii_stat1000_to_ethtool_lpa_t(status & 0xffff);
> +
The MAC driver has no business poking around in PHY registers. Call
into phylib.
> +static void mlxbf_gige_handle_link_change(struct net_device *netdev)
> +{
> + struct mlxbf_gige *priv = netdev_priv(netdev);
> + struct phy_device *phydev = netdev->phydev;
> + irqreturn_t ret;
> +
> + ret = mlxbf_gige_mdio_handle_phy_interrupt(priv);
You are polling the PHY. I don't see anywhere you link the interrupt
to phylib.
> + if (ret != IRQ_HANDLED)
> + return;
> +
> + /* print new link status only if the interrupt came from the PHY */
> + phy_print_status(phydev);
> +}
> +static int mlxbf_gige_open(struct net_device *netdev)
> +{
> + struct mlxbf_gige *priv = netdev_priv(netdev);
> + struct phy_device *phydev = netdev->phydev;
> + u64 int_en;
> + int err;
> +
> + mlxbf_gige_cache_stats(priv);
> + mlxbf_gige_clean_port(priv);
> + mlxbf_gige_rx_init(priv);
> + mlxbf_gige_tx_init(priv);
> + netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll, NAPI_POLL_WEIGHT);
> + napi_enable(&priv->napi);
> + netif_start_queue(netdev);
> +
> + err = mlxbf_gige_request_irqs(priv);
> + if (err)
> + return err;
> +
> + phy_start(phydev);
> +
> + /* Set bits in INT_EN that we care about */
> + int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR |
> + MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS |
> + MLXBF_GIGE_INT_EN_TX_SMALL_FRAME_SIZE |
> + MLXBF_GIGE_INT_EN_TX_PI_CI_EXCEED_WQ_SIZE |
> + MLXBF_GIGE_INT_EN_SW_CONFIG_ERROR |
> + MLXBF_GIGE_INT_EN_SW_ACCESS_ERROR |
> + MLXBF_GIGE_INT_EN_RX_RECEIVE_PACKET;
> + writeq(int_en, priv->base + MLXBF_GIGE_INT_EN);
> +
> + return 0;
> +}
> +
> +static int mlxbf_gige_stop(struct net_device *netdev)
> +{
> + struct mlxbf_gige *priv = netdev_priv(netdev);
> +
> + writeq(0, priv->base + MLXBF_GIGE_INT_EN);
> + netif_stop_queue(netdev);
> + napi_disable(&priv->napi);
> + netif_napi_del(&priv->napi);
> + mlxbf_gige_free_irqs(priv);
> +
> + if (netdev->phydev)
> + phy_stop(netdev->phydev);
In open() you unconditionally start the phy. Do you expect the PHY to
disappear between open and stop?
> +static int mlxbf_gige_probe(struct platform_device *pdev)
> +{
> + phydev = phy_find_first(priv->mdiobus);
> + if (!phydev)
> + return -EIO;
-ENODEV would seem more appropriate.
Andrew
^ permalink raw reply
* Re: [PATCH v2 1/3] ath10k: Add history for tracking certain events
From: Ben Greear @ 2020-07-31 18:38 UTC (permalink / raw)
To: Rakesh Pillai, ath10k
Cc: linux-wireless, linux-kernel, kvalo, davem, kuba, netdev
In-Reply-To: <1596220042-2778-2-git-send-email-pillair@codeaurora.org>
On 7/31/20 11:27 AM, Rakesh Pillai wrote:
> Add history for tracking the below events
> - register read
> - register write
> - IRQ trigger
> - NAPI poll
> - CE service
> - WMI cmd
> - WMI event
> - WMI tx completion
>
> This will help in debugging any crash or any
> improper behaviour.
>
> Tested-on: WCN3990 hw1.0 SNOC WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1
>
> Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
> ---
> drivers/net/wireless/ath/ath10k/ce.c | 1 +
> drivers/net/wireless/ath/ath10k/core.h | 74 +++++++++++++++++
> drivers/net/wireless/ath/ath10k/debug.c | 133 ++++++++++++++++++++++++++++++
> drivers/net/wireless/ath/ath10k/debug.h | 74 +++++++++++++++++
> drivers/net/wireless/ath/ath10k/snoc.c | 15 +++-
> drivers/net/wireless/ath/ath10k/wmi-tlv.c | 1 +
> drivers/net/wireless/ath/ath10k/wmi.c | 10 +++
> 7 files changed, 307 insertions(+), 1 deletion(-)
>
> +void ath10k_record_wmi_event(struct ath10k *ar, enum ath10k_wmi_type type,
> + u32 id, unsigned char *data)
> +{
> + struct ath10k_wmi_event_entry *entry;
> + u32 idx;
> +
> + if (type == ATH10K_WMI_EVENT) {
> + if (!ar->wmi_event_history.record)
> + return;
This check above is duplicated below, add it once at top of the method
instead.
> +
> + spin_lock_bh(&ar->wmi_event_history.hist_lock);
> + idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
> + ar->wmi_event_history.max_entries);
> + spin_unlock_bh(&ar->wmi_event_history.hist_lock);
> + entry = &ar->wmi_event_history.record[idx];
> + } else {
> + if (!ar->wmi_cmd_history.record)
> + return;
> +
> + spin_lock_bh(&ar->wmi_cmd_history.hist_lock);
> + idx = ath10k_core_get_next_idx(&ar->reg_access_history.index,
> + ar->wmi_cmd_history.max_entries);
> + spin_unlock_bh(&ar->wmi_cmd_history.hist_lock);
> + entry = &ar->wmi_cmd_history.record[idx];
> + }
> +
> + entry->timestamp = ath10k_core_get_timestamp();
> + entry->cpu_id = smp_processor_id();
> + entry->type = type;
> + entry->id = id;
> + memcpy(&entry->data, data + 4, ATH10K_WMI_DATA_LEN);
> +}
> +EXPORT_SYMBOL(ath10k_record_wmi_event);
> @@ -1660,6 +1668,11 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
> ar->ce_priv = &ar_snoc->ce;
> msa_size = drv_data->msa_size;
>
> + ath10k_core_reg_access_history_init(ar, ATH10K_REG_ACCESS_HISTORY_MAX);
> + ath10k_core_wmi_event_history_init(ar, ATH10K_WMI_EVENT_HISTORY_MAX);
> + ath10k_core_wmi_cmd_history_init(ar, ATH10K_WMI_CMD_HISTORY_MAX);
> + ath10k_core_ce_event_history_init(ar, ATH10K_CE_EVENT_HISTORY_MAX);
Maybe only enable this once user turns it on? It sucks up a bit of memory?
> +
> ath10k_snoc_quirks_init(ar);
>
> ret = ath10k_snoc_resource_init(ar);
> diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> index 932266d..9df5748 100644
> --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
> @@ -627,6 +627,7 @@ static void ath10k_wmi_tlv_op_rx(struct ath10k *ar, struct sk_buff *skb)
> if (skb_pull(skb, sizeof(struct wmi_cmd_hdr)) == NULL)
> goto out;
>
> + ath10k_record_wmi_event(ar, ATH10K_WMI_EVENT, id, skb->data);
> trace_ath10k_wmi_event(ar, id, skb->data, skb->len);
>
> consumed = ath10k_tm_event_wmi(ar, id, skb);
> diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
> index a81a1ab..8ebd05c 100644
> --- a/drivers/net/wireless/ath/ath10k/wmi.c
> +++ b/drivers/net/wireless/ath/ath10k/wmi.c
> @@ -1802,6 +1802,15 @@ struct sk_buff *ath10k_wmi_alloc_skb(struct ath10k *ar, u32 len)
>
> static void ath10k_wmi_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
> {
> + struct wmi_cmd_hdr *cmd_hdr;
> + enum wmi_tlv_event_id id;
> +
> + cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
> + id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);
> +
> + ath10k_record_wmi_event(ar, ATH10K_WMI_TX_COMPL, id,
> + skb->data + sizeof(struct wmi_cmd_hdr));
> +
> dev_kfree_skb(skb);
> }
I think guard the above new code with if (unlikely(ar->ce_event_history.record)) { ... }
All in all, I think I'd want to compile this out (while leaving other debug compiled
in) since it seems this stuff would be rarely used and it adds method calls to hot
paths.
That is a decision for Kalle though, so see what he says...
Thanks,
Ben
--
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc http://www.candelatech.com
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox