Netdev List
 help / color / mirror / Atom feed
* [PATCH] net: usb: Merge cpu_to_le32s + memcpy to put_unaligned_le32
From: Chuhong Yuan @ 2019-07-22  7:41 UTC (permalink / raw)
  Cc: David S . Miller, Woojung Huh, Microchip Linux Driver Support,
	Steve Glendinning, linux-usb, netdev, linux-kernel, Chuhong Yuan

Merge the combo uses of cpu_to_le32s and memcpy.
Use put_unaligned_le32 instead.
This simplifies the code.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
---
 drivers/net/usb/asix_common.c  |  9 ++++-----
 drivers/net/usb/ax88179_178a.c | 11 ++++-------
 drivers/net/usb/lan78xx.c      | 11 ++++-------
 drivers/net/usb/smsc75xx.c     | 11 ++++-------
 drivers/net/usb/sr9800.c       |  9 ++++-----
 5 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index b39ee714fb01..e39f41efda3e 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -221,6 +221,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	int tailroom = skb_tailroom(skb);
 	u32 packet_len;
 	u32 padbytes = 0xffff0000;
+	void *ptr;
 
 	padlen = ((skb->len + 4) & (dev->maxpacket - 1)) ? 0 : 4;
 
@@ -256,13 +257,11 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	}
 
 	packet_len = ((skb->len ^ 0x0000ffff) << 16) + skb->len;
-	skb_push(skb, 4);
-	cpu_to_le32s(&packet_len);
-	skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
+	ptr = skb_push(skb, 4);
+	put_unaligned_le32(packet_len, ptr);
 
 	if (padlen) {
-		cpu_to_le32s(&padbytes);
-		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
+		put_unaligned_le32(padbytes, skb_tail_pointer(skb));
 		skb_put(skb, sizeof(padbytes));
 	}
 
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 72d165114b67..daa54486ab09 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1421,6 +1421,7 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 	int frame_size = dev->maxpacket;
 	int mss = skb_shinfo(skb)->gso_size;
 	int headroom;
+	void *ptr;
 
 	tx_hdr1 = skb->len;
 	tx_hdr2 = mss;
@@ -1435,13 +1436,9 @@ ax88179_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		return NULL;
 	}
 
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_hdr2);
-	skb_copy_to_linear_data(skb, &tx_hdr2, 4);
-
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_hdr1);
-	skb_copy_to_linear_data(skb, &tx_hdr1, 4);
+	ptr = skb_push(skb, 8);
+	put_unaligned_le32(tx_hdr1, ptr);
+	put_unaligned_le32(tx_hdr2, ptr + 4);
 
 	return skb;
 }
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 9c33b35bd155..769bb262fbec 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2729,6 +2729,7 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
 				       struct sk_buff *skb, gfp_t flags)
 {
 	u32 tx_cmd_a, tx_cmd_b;
+	void *ptr;
 
 	if (skb_cow_head(skb, TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
@@ -2757,13 +2758,9 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
 		tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_;
 	}
 
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_cmd_b);
-	memcpy(skb->data, &tx_cmd_b, 4);
-
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_cmd_a);
-	memcpy(skb->data, &tx_cmd_a, 4);
+	ptr = skb_push(skb, 8);
+	put_unaligned_le32(tx_cmd_a, ptr);
+	put_unaligned_le32(tx_cmd_b, ptr + 4);
 
 	return skb;
 }
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 7fac9db5380d..9556d431885f 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2255,6 +2255,7 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
 					 struct sk_buff *skb, gfp_t flags)
 {
 	u32 tx_cmd_a, tx_cmd_b;
+	void *ptr;
 
 	if (skb_cow_head(skb, SMSC75XX_TX_OVERHEAD)) {
 		dev_kfree_skb_any(skb);
@@ -2275,13 +2276,9 @@ static struct sk_buff *smsc75xx_tx_fixup(struct usbnet *dev,
 		tx_cmd_b = 0;
 	}
 
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_cmd_b);
-	memcpy(skb->data, &tx_cmd_b, 4);
-
-	skb_push(skb, 4);
-	cpu_to_le32s(&tx_cmd_a);
-	memcpy(skb->data, &tx_cmd_a, 4);
+	ptr = skb_push(skb, 8);
+	put_unaligned_le32(tx_cmd_a, ptr);
+	put_unaligned_le32(tx_cmd_b, ptr + 4);
 
 	return skb;
 }
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index 35f39f23d881..c5d4a0060124 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -115,6 +115,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	u32 padbytes = 0xffff0000;
 	u32 packet_len;
 	int padlen;
+	void *ptr;
 
 	padlen = ((skb->len + 4) % (dev->maxpacket - 1)) ? 0 : 4;
 
@@ -133,14 +134,12 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 			return NULL;
 	}
 
-	skb_push(skb, 4);
+	ptr = skb_push(skb, 4);
 	packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
-	cpu_to_le32s(&packet_len);
-	skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
+	put_unaligned_le32(packet_len, ptr);
 
 	if (padlen) {
-		cpu_to_le32s(&padbytes);
-		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
+		put_unaligned_le32(padbytes, skb_tail_pointer(skb));
 		skb_put(skb, sizeof(padbytes));
 	}
 
-- 
2.20.1


^ permalink raw reply related

* RE: [PATCH net-next 3/3] net: stmmac: Introducing support for Page Pool
From: Jose Abreu @ 2019-07-22  7:23 UTC (permalink / raw)
  To: Jon Hunter, Jose Abreu, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org
  Cc: Joao Pinto, David S . Miller, Giuseppe Cavallaro,
	Alexandre Torgue, Maxime Coquelin, Maxime Ripard, Chen-Yu Tsai,
	linux-tegra
In-Reply-To: <25512348-5b98-aeb7-a6fb-f90376e66a84@nvidia.com>

From: Jon Hunter <jonathanh@nvidia.com>
Date: Jul/19/2019, 14:35:52 (UTC+00:00)

> 
> On 19/07/2019 13:32, Jose Abreu wrote:
> > From: Jon Hunter <jonathanh@nvidia.com>
> > Date: Jul/19/2019, 13:30:10 (UTC+00:00)
> > 
> >> I booted the board without using NFS and then started used dhclient to
> >> bring up the network interface and it appears to be working fine. I can
> >> even mount the NFS share fine. So it does appear to be particular to
> >> using NFS to mount the rootfs.
> > 
> > Damn. Can you send me your .config ?
> 
> Yes no problem. Attached.

Can you compile your image without modules (i.e. all built-in) and let 
me know if the error still happens ?

---
Thanks,
Jose Miguel Abreu

^ permalink raw reply

* [PATCH V2 1/1] can: sja1000: f81601: add Fintek F81601 support
From: Ji-Ze Hong (Peter Hong) @ 2019-07-22  6:22 UTC (permalink / raw)
  To: wg, mkl, peter_hong
  Cc: davem, f.suligoi, linux-kernel, linux-can, netdev,
	Ji-Ze Hong (Peter Hong)

This patch add support for Fintek PCIE to 2 CAN controller support

Signed-off-by: Ji-Ze Hong (Peter Hong) <hpeter+linux_kernel@gmail.com>
---
Changelog:
v2:
	1: Fix comment on the spinlock with write access.
	2: Use ARRAY_SIZE instead of F81601_PCI_MAX_CHAN.
	3: Check the strap pin outside the loop.
	4: Fix the cleanup issue in f81601_pci_add_card().
	5: Remove unused "channels" in struct f81601_pci_card.

 drivers/net/can/sja1000/Kconfig  |   8 ++
 drivers/net/can/sja1000/Makefile |   1 +
 drivers/net/can/sja1000/f81601.c | 215 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 224 insertions(+)
 create mode 100644 drivers/net/can/sja1000/f81601.c

diff --git a/drivers/net/can/sja1000/Kconfig b/drivers/net/can/sja1000/Kconfig
index f6dc89927ece..8588323c5138 100644
--- a/drivers/net/can/sja1000/Kconfig
+++ b/drivers/net/can/sja1000/Kconfig
@@ -101,4 +101,12 @@ config CAN_TSCAN1
 	  IRQ numbers are read from jumpers JP4 and JP5,
 	  SJA1000 IO base addresses are chosen heuristically (first that works).
 
+config CAN_F81601
+	tristate "Fintek F81601 PCIE to 2 CAN Controller"
+	depends on PCI
+	help
+	  This driver adds support for Fintek F81601 PCIE to 2 CAN Controller.
+	  It had internal 24MHz clock source, but it can be changed by
+	  manufacturer. We can use modinfo to get usage for parameters.
+	  Visit http://www.fintek.com.tw to get more information.
 endif
diff --git a/drivers/net/can/sja1000/Makefile b/drivers/net/can/sja1000/Makefile
index 9253aaf9e739..6f6268543bd9 100644
--- a/drivers/net/can/sja1000/Makefile
+++ b/drivers/net/can/sja1000/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_CAN_PEAK_PCMCIA) += peak_pcmcia.o
 obj-$(CONFIG_CAN_PEAK_PCI) += peak_pci.o
 obj-$(CONFIG_CAN_PLX_PCI) += plx_pci.o
 obj-$(CONFIG_CAN_TSCAN1) += tscan1.o
+obj-$(CONFIG_CAN_F81601) += f81601.o
diff --git a/drivers/net/can/sja1000/f81601.c b/drivers/net/can/sja1000/f81601.c
new file mode 100644
index 000000000000..3c378de8764d
--- /dev/null
+++ b/drivers/net/can/sja1000/f81601.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Fintek F81601 PCIE to 2 CAN controller driver
+ *
+ * Copyright (C) 2019 Peter Hong <peter_hong@fintek.com.tw>
+ * Copyright (C) 2019 Linux Foundation
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/can/dev.h>
+#include <linux/io.h>
+#include <linux/version.h>
+
+#include "sja1000.h"
+
+#define F81601_PCI_MAX_CHAN		2
+
+#define F81601_DECODE_REG		0x209
+#define F81601_IO_MODE			BIT(7)
+#define F81601_MEM_MODE			BIT(6)
+#define F81601_CFG_MODE			BIT(5)
+#define F81601_CAN2_INTERNAL_CLK	BIT(3)
+#define F81601_CAN1_INTERNAL_CLK	BIT(2)
+#define F81601_CAN2_EN			BIT(1)
+#define F81601_CAN1_EN			BIT(0)
+
+#define F81601_TRAP_REG			0x20a
+#define F81601_CAN2_HAS_EN		BIT(4)
+
+struct f81601_pci_card {
+	void __iomem *addr;
+	spinlock_t lock;	/* use this spin lock only for write access */
+	struct pci_dev *dev;
+	struct net_device *net_dev[F81601_PCI_MAX_CHAN];
+};
+
+static const struct pci_device_id f81601_pci_tbl[] = {
+	{ PCI_DEVICE(0x1c29, 0x1703) },
+	{},
+};
+
+MODULE_DEVICE_TABLE(pci, f81601_pci_tbl);
+
+static bool internal_clk = 1;
+module_param(internal_clk, bool, 0444);
+MODULE_PARM_DESC(internal_clk, "Use internal clock, default 1 (24MHz)");
+
+static unsigned int external_clk;
+module_param(external_clk, uint, 0444);
+MODULE_PARM_DESC(external_clk, "External Clock, must spec when internal_clk = 0");
+
+static u8 f81601_pci_read_reg(const struct sja1000_priv *priv, int port)
+{
+	return readb(priv->reg_base + port);
+}
+
+static void f81601_pci_write_reg(const struct sja1000_priv *priv, int port,
+				 u8 val)
+{
+	struct f81601_pci_card *card = priv->priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+	writeb(val, priv->reg_base + port);
+	readb(priv->reg_base);
+	spin_unlock_irqrestore(&card->lock, flags);
+}
+
+static void f81601_pci_del_card(struct pci_dev *pdev)
+{
+	struct f81601_pci_card *card = pci_get_drvdata(pdev);
+	struct net_device *dev;
+	int i = 0;
+
+	for (i = 0; i < ARRAY_SIZE(card->net_dev); i++) {
+		dev = card->net_dev[i];
+		if (!dev)
+			continue;
+
+		dev_info(&pdev->dev, "%s: Removing %s\n", __func__, dev->name);
+
+		unregister_sja1000dev(dev);
+		free_sja1000dev(dev);
+	}
+
+	pcim_iounmap(pdev, card->addr);
+}
+
+/* Probe F81601 based device for the SJA1000 chips and register each
+ * available CAN channel to SJA1000 Socket-CAN subsystem.
+ */
+static int f81601_pci_add_card(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	struct sja1000_priv *priv;
+	struct net_device *dev;
+	struct f81601_pci_card *card;
+	int err, i, count;
+	u8 tmp;
+
+	if (pcim_enable_device(pdev) < 0) {
+		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		return -ENODEV;
+	}
+
+	dev_info(&pdev->dev, "Detected card at slot #%i\n",
+		 PCI_SLOT(pdev->devfn));
+
+	card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL);
+	if (!card)
+		return -ENOMEM;
+
+	card->dev = pdev;
+	spin_lock_init(&card->lock);
+
+	pci_set_drvdata(pdev, card);
+
+	tmp = F81601_IO_MODE | F81601_MEM_MODE | F81601_CFG_MODE |
+		F81601_CAN2_EN | F81601_CAN1_EN;
+
+	if (internal_clk) {
+		tmp |= F81601_CAN2_INTERNAL_CLK | F81601_CAN1_INTERNAL_CLK;
+
+		dev_info(&pdev->dev,
+			 "F81601 running with internal clock: 24Mhz\n");
+	} else {
+		dev_info(&pdev->dev,
+			 "F81601 running with external clock: %dMhz\n",
+			 external_clk / 1000000);
+	}
+
+	pci_write_config_byte(pdev, F81601_DECODE_REG, tmp);
+
+	card->addr = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0));
+
+	if (!card->addr) {
+		err = -ENOMEM;
+		dev_err(&pdev->dev, "%s: Failed to remap BAR\n", __func__);
+		goto failure_cleanup;
+	}
+
+	/* read CAN2_HW_EN strap pin to detect how many CANBUS do we have */
+	count = ARRAY_SIZE(card->net_dev);
+	pci_read_config_byte(pdev, F81601_TRAP_REG, &tmp);
+	if (!(tmp & F81601_CAN2_HAS_EN))
+		count = 1;
+
+	/* Detect available channels */
+	for (i = 0; i < count; i++) {
+		dev = alloc_sja1000dev(0);
+		if (!dev) {
+			err = -ENOMEM;
+			goto failure_cleanup;
+		}
+
+		priv = netdev_priv(dev);
+		priv->priv = card;
+		priv->irq_flags = IRQF_SHARED;
+		priv->reg_base = card->addr + 0x80 * i;
+		priv->read_reg = f81601_pci_read_reg;
+		priv->write_reg = f81601_pci_write_reg;
+
+		if (internal_clk)
+			priv->can.clock.freq = 24000000 / 2;
+		else
+			priv->can.clock.freq = external_clk / 2;
+
+		priv->ocr = OCR_TX0_PUSHPULL | OCR_TX1_PUSHPULL;
+		priv->cdr = CDR_CBP;
+
+		SET_NETDEV_DEV(dev, &pdev->dev);
+		dev->dev_id = i;
+		dev->irq = pdev->irq;
+
+		/* Register SJA1000 device */
+		err = register_sja1000dev(dev);
+		if (err) {
+			dev_err(&pdev->dev,
+				"%s: Registering device failed: %x\n", __func__,
+				err);
+			free_sja1000dev(dev);
+			goto failure_cleanup;
+		}
+
+		card->net_dev[i] = dev;
+		dev_info(&pdev->dev, "Channel #%d, %s at 0x%p, irq %d\n", i,
+			 dev->name, priv->reg_base, dev->irq);
+	}
+
+	return 0;
+
+failure_cleanup:
+	dev_err(&pdev->dev, "%s: failed: %d. Cleaning Up.\n", __func__, err);
+	f81601_pci_del_card(pdev);
+
+	return err;
+}
+
+static struct pci_driver f81601_pci_driver = {
+	.name =		"f81601",
+	.id_table =	f81601_pci_tbl,
+	.probe =	f81601_pci_add_card,
+	.remove =	f81601_pci_del_card,
+};
+
+MODULE_DESCRIPTION("Fintek F81601 PCIE to 2 CANBUS adaptor driver");
+MODULE_AUTHOR("Peter Hong <peter_hong@fintek.com.tw>");
+MODULE_LICENSE("GPL v2");
+
+module_pci_driver(f81601_pci_driver);
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH] unaligned: delete 1-byte accessors
From: James Bottomley @ 2019-07-22  6:20 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: akpm, linux-kernel, netdev, axboe, kvalo, john.johansen,
	linux-arch
In-Reply-To: <20190722060744.GA24253@avx2>

On Mon, 2019-07-22 at 09:07 +0300, Alexey Dobriyan wrote:
> On Mon, Jul 22, 2019 at 02:48:46PM +0900, James Bottomley wrote:
> > On Mon, 2019-07-22 at 08:22 +0300, Alexey Dobriyan wrote:
> > > On Mon, Jul 22, 2019 at 08:08:33AM +0900, James Bottomley wrote:
> > > > On Mon, 2019-07-22 at 00:52 +0300, Alexey Dobriyan wrote:
> > > > > Each and every 1-byte access is aligned!
> > > > 
> > > > The design idea of this is for parsing descriptors.  We simply
> > > > chunk up the describing structure using get_unaligned for
> > > > everything.  The reason is because a lot of these structures
> > > > come
> > > > with reserved areas which we may make use of later.  If we're
> > > > using
> > > > get_unaligned for everything we can simply change a u8 to a u16
> > > > in
> > > > the structure absorbing the reserved padding.  With your change
> > > > now
> > > > I'd have to chase down every byte access and replace it with
> > > > get_unaligned instead of simply changing the structure.
> > > > 
> > > > What's the significant advantage of this change that
> > > > compensates
> > > > for the problems the above causes?
> > > 
> > > HW descriptors have fixed endianness, you're supposed to use
> > > get_unaligned_be32() and friends.
> > 
> > Not if this is an internal descriptor format, which is what this is
> > mostly used for.
> 
> Maybe, but developer is supposed to look at all struct member usages
> while changing types, right?
> 
> > > For that matter, drivers/scsi/ has exactly 2 get_unaligned()
> > > calls
> > > one of which can be changed to get_unaligned_be32().
> > 
> > You haven't answered the "what is the benefit of this change"
> > question.
> >  I mean sure we can do it, but it won't make anything more
> > efficient
> > and it does help with the descriptor format to treat every
> > structure
> > field the same.
> 
> The benefit is less code, come on.
> 
> Another benefit is that typoing
> 
> 	get_unaligned((u16*)p)
> 
> for
> 	get_unaligned((u8*)p)
> 
> will get detected.

Well, that's not the way it's supposed to be used.  It's supposed to be
used as

struct desc {
u8 something;
u8 pad 1;
u16 another;
} __packed;

something = get_unaligned[_le/be](&struct.something);

So that the sizes are encoded in the descriptor structure.  If you
think it's badly documented, then please update that, I just don't see
a benefit to a coding change that removes the u8 version of this
because it makes our descriptor structure handling inconsistent.

Even if we allow people are hard coding the typedef, then making u8 not
work just looks inconsistent ... you could easily have typoed u32 for
u16 in the example above and there would be no detection.

James


^ permalink raw reply

* Re: [PATCH] unaligned: delete 1-byte accessors
From: Alexey Dobriyan @ 2019-07-22  6:07 UTC (permalink / raw)
  To: James Bottomley
  Cc: akpm, linux-kernel, netdev, axboe, kvalo, john.johansen,
	linux-arch
In-Reply-To: <1563774526.3223.2.camel@HansenPartnership.com>

On Mon, Jul 22, 2019 at 02:48:46PM +0900, James Bottomley wrote:
> On Mon, 2019-07-22 at 08:22 +0300, Alexey Dobriyan wrote:
> > On Mon, Jul 22, 2019 at 08:08:33AM +0900, James Bottomley wrote:
> > > On Mon, 2019-07-22 at 00:52 +0300, Alexey Dobriyan wrote:
> > > > Each and every 1-byte access is aligned!
> > > 
> > > The design idea of this is for parsing descriptors.  We simply
> > > chunk up the describing structure using get_unaligned for
> > > everything.  The reason is because a lot of these structures come
> > > with reserved areas which we may make use of later.  If we're using
> > > get_unaligned for everything we can simply change a u8 to a u16 in
> > > the structure absorbing the reserved padding.  With your change now
> > > I'd have to chase down every byte access and replace it with
> > > get_unaligned instead of simply changing the structure.
> > > 
> > > What's the significant advantage of this change that compensates
> > > for the problems the above causes?
> > 
> > HW descriptors have fixed endianness, you're supposed to use
> > get_unaligned_be32() and friends.
> 
> Not if this is an internal descriptor format, which is what this is
> mostly used for.

Maybe, but developer is supposed to look at all struct member usages
while changing types, right?

> > For that matter, drivers/scsi/ has exactly 2 get_unaligned() calls
> > one of which can be changed to get_unaligned_be32().
> 
> You haven't answered the "what is the benefit of this change" question.
>  I mean sure we can do it, but it won't make anything more efficient
> and it does help with the descriptor format to treat every structure
> field the same.

The benefit is less code, come on.

Another benefit is that typoing

	get_unaligned((u16*)p)

for
	get_unaligned((u8*)p)

will get detected.

^ permalink raw reply

* [PATCH net] net: hns: fix LED configuration for marvell phy
From: Yonglong Liu @ 2019-07-22  5:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, yisen.zhuang,
	shiju.jose

Since commit(net: phy: marvell: change default m88e1510 LED configuration),
the active LED of Hip07 devices is always off, because Hip07 just
use 2 LEDs.
This patch adds a phy_register_fixup_for_uid() for m88e1510 to
correct the LED configuration.

Fixes: 077772468ec1 ("net: phy: marvell: change default m88e1510 LED configuration")
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Reviewed-by: linyunsheng <linyunsheng@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 2235dd5..5b213eb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/marvell_phy.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
@@ -1149,6 +1150,13 @@ static void hns_nic_adjust_link(struct net_device *ndev)
 	}
 }
 
+static int hns_phy_marvell_fixup(struct phy_device *phydev)
+{
+	phydev->dev_flags |= MARVELL_PHY_LED0_LINK_LED1_ACTIVE;
+
+	return 0;
+}
+
 /**
  *hns_nic_init_phy - init phy
  *@ndev: net device
@@ -1174,6 +1182,16 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
 	if (h->phy_if != PHY_INTERFACE_MODE_XGMII) {
 		phy_dev->dev_flags = 0;
 
+		/* register the PHY fixup (for Marvell 88E1510) */
+		ret = phy_register_fixup_for_uid(MARVELL_PHY_ID_88E1510,
+						 MARVELL_PHY_ID_MASK,
+						 hns_phy_marvell_fixup);
+		/* we can live without it, so just issue a warning */
+		if (ret)
+			netdev_warn(ndev,
+				    "Cannot register PHY fixup, ret=%d\n",
+				    ret);
+
 		ret = phy_connect_direct(ndev, phy_dev, hns_nic_adjust_link,
 					 h->phy_if);
 	} else {
@@ -2430,8 +2448,11 @@ static int hns_nic_dev_remove(struct platform_device *pdev)
 		hns_nic_uninit_ring_data(priv);
 	priv->ring_data = NULL;
 
-	if (ndev->phydev)
+	if (ndev->phydev) {
+		phy_unregister_fixup_for_uid(MARVELL_PHY_ID_88E1510,
+					     MARVELL_PHY_ID_MASK);
 		phy_disconnect(ndev->phydev);
+	}
 
 	if (!IS_ERR_OR_NULL(priv->ae_handle))
 		hnae_put_handle(priv->ae_handle);
-- 
2.8.1


^ permalink raw reply related

* Re: [PATCH] unaligned: delete 1-byte accessors
From: James Bottomley @ 2019-07-22  5:48 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: akpm, linux-kernel, netdev, axboe, kvalo, john.johansen,
	linux-arch
In-Reply-To: <20190722052244.GA4235@avx2>

On Mon, 2019-07-22 at 08:22 +0300, Alexey Dobriyan wrote:
> On Mon, Jul 22, 2019 at 08:08:33AM +0900, James Bottomley wrote:
> > On Mon, 2019-07-22 at 00:52 +0300, Alexey Dobriyan wrote:
> > > Each and every 1-byte access is aligned!
> > 
> > The design idea of this is for parsing descriptors.  We simply
> > chunk up the describing structure using get_unaligned for
> > everything.  The reason is because a lot of these structures come
> > with reserved areas which we may make use of later.  If we're using
> > get_unaligned for everything we can simply change a u8 to a u16 in
> > the structure absorbing the reserved padding.  With your change now
> > I'd have to chase down every byte access and replace it with
> > get_unaligned instead of simply changing the structure.
> > 
> > What's the significant advantage of this change that compensates
> > for the problems the above causes?
> 
> HW descriptors have fixed endianness, you're supposed to use
> get_unaligned_be32() and friends.

Not if this is an internal descriptor format, which is what this is
mostly used for.

> For that matter, drivers/scsi/ has exactly 2 get_unaligned() calls
> one of which can be changed to get_unaligned_be32().

You haven't answered the "what is the benefit of this change" question.
 I mean sure we can do it, but it won't make anything more efficient
and it does help with the descriptor format to treat every structure
field the same.

James


^ permalink raw reply

* Re: [PATCH] unaligned: delete 1-byte accessors
From: Alexey Dobriyan @ 2019-07-22  5:22 UTC (permalink / raw)
  To: James Bottomley
  Cc: akpm, linux-kernel, netdev, axboe, kvalo, john.johansen,
	linux-arch
In-Reply-To: <1563750513.2898.4.camel@HansenPartnership.com>

On Mon, Jul 22, 2019 at 08:08:33AM +0900, James Bottomley wrote:
> On Mon, 2019-07-22 at 00:52 +0300, Alexey Dobriyan wrote:
> > Each and every 1-byte access is aligned!
> 
> The design idea of this is for parsing descriptors.  We simply chunk up
> the describing structure using get_unaligned for everything.  The
> reason is because a lot of these structures come with reserved areas
> which we may make use of later.  If we're using get_unaligned for
> everything we can simply change a u8 to a u16 in the structure
> absorbing the reserved padding.  With your change now I'd have to chase
> down every byte access and replace it with get_unaligned instead of
> simply changing the structure.
> 
> What's the significant advantage of this change that compensates for
> the problems the above causes?

HW descriptors have fixed endianness, you're supposed to use
get_unaligned_be32() and friends.

For that matter, drivers/scsi/ has exactly 2 get_unaligned() calls one of
which can be changed to get_unaligned_be32().

^ permalink raw reply

* Re: [PATCH 2/3] macb: Update compatibility string for SiFive FU540-C000
From: Yash Shah @ 2019-07-22  4:39 UTC (permalink / raw)
  To: Nicolas Ferre
  Cc: David Miller, Rob Herring, Paul Walmsley, netdev, devicetree,
	linux-kernel@vger.kernel.org List, linux-riscv, Mark Rutland,
	Palmer Dabbelt, Albert Ou, Petr Štetiar, Sachin Ghadi
In-Reply-To: <4075b955-a187-6fd7-a2e6-deb82b5d4fb6@microchip.com>

On Fri, Jul 19, 2019 at 5:36 PM <Nicolas.Ferre@microchip.com> wrote:
>
> On 19/07/2019 at 13:10, Yash Shah wrote:
> > Update the compatibility string for SiFive FU540-C000 as per the new
> > string updated in the binding doc.
> > Reference: https://lkml.org/lkml/2019/7/17/200
>
> Maybe referring to lore.kernel.org is better:
> https://lore.kernel.org/netdev/CAJ2_jOFEVZQat0Yprg4hem4jRrqkB72FKSeQj4p8P5KA-+rgww@mail.gmail.com/

Sure. Will keep that in mind for future reference.

>
> > Signed-off-by: Yash Shah <yash.shah@sifive.com>
>
> Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>

Thanks.

- Yash

^ permalink raw reply

* [PATCH 0/4] put_user_page: new put_user_page_dirty*() helpers
From: john.hubbard @ 2019-07-22  4:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard

From: John Hubbard <jhubbard@nvidia.com>

Hi,

Here is the first small batch of call site conversions for put_page()
to put_user_page().

This batch includes some, but not all of the places that benefit from the
two new put_user_page_dirty*() helper functions. (The ordering of call site
conversion patch submission makes it better to wait until later, to convert
the rest.)

There are about 50+ patches in my tree [1], and I'll be sending out the
remaining ones in a few more groups:

    * The block/bio related changes (Jerome mostly wrote those, but I've
      had to move stuff around extensively, and add a little code)

    * mm/ changes

    * other subsystem patches

    * an RFC that shows the current state of the tracking patch set. That
      can only be applied after all call sites are converted, but it's
      good to get an early look at it.

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

[1] https://github.com/johnhubbard/linux/tree/gup_dma_core

John Hubbard (4):
  drivers/gpu/drm/via: convert put_page() to put_user_page*()
  net/xdp: convert put_page() to put_user_page*()
  net/rds: convert put_page() to put_user_page*()
  gup: new put_user_page_dirty*() helpers

 drivers/gpu/drm/via/via_dmablit.c        |  5 +++--
 drivers/infiniband/core/umem.c           |  2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c |  2 +-
 include/linux/mm.h                       | 10 ++++++++++
 net/rds/info.c                           |  5 ++---
 net/rds/message.c                        |  2 +-
 net/rds/rdma.c                           | 15 +++++++--------
 net/xdp/xdp_umem.c                       |  3 +--
 8 files changed, 26 insertions(+), 18 deletions(-)

-- 
2.22.0


^ permalink raw reply

* [PATCH 1/3] drivers/gpu/drm/via: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-07-22  4:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard
In-Reply-To: <20190722043012.22945-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/drm/via/via_dmablit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index 062067438f1d..219827ae114f 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -189,8 +189,9 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 		for (i = 0; i < vsg->num_pages; ++i) {
 			if (NULL != (page = vsg->pages[i])) {
 				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
-					SetPageDirty(page);
-				put_page(page);
+					put_user_pages_dirty(&page, 1);
+				else
+					put_user_page(page);
 			}
 		}
 		/* fall through */
-- 
2.22.0


^ permalink raw reply related

* [PATCH 3/3] gup: new put_user_page_dirty*() helpers
From: john.hubbard @ 2019-07-22  4:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard
In-Reply-To: <20190722043012.22945-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

While converting call sites to use put_user_page*() [1], quite a few
places ended up needing a single-page routine to put and dirty a
page.

Provide put_user_page_dirty() and put_user_page_dirty_lock(),
and use them in a few places: net/xdp, drm/via/, drivers/infiniband.

Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 drivers/gpu/drm/via/via_dmablit.c        |  2 +-
 drivers/infiniband/core/umem.c           |  2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c |  2 +-
 include/linux/mm.h                       | 10 ++++++++++
 net/xdp/xdp_umem.c                       |  2 +-
 5 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index 219827ae114f..d30b2d75599f 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -189,7 +189,7 @@ via_free_sg_info(struct pci_dev *pdev, drm_via_sg_info_t *vsg)
 		for (i = 0; i < vsg->num_pages; ++i) {
 			if (NULL != (page = vsg->pages[i])) {
 				if (!PageReserved(page) && (DMA_FROM_DEVICE == vsg->direction))
-					put_user_pages_dirty(&page, 1);
+					put_user_page_dirty(page);
 				else
 					put_user_page(page);
 			}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 08da840ed7ee..a7337cc3ca20 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -55,7 +55,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 		page = sg_page_iter_page(&sg_iter);
 		if (umem->writable && dirty)
-			put_user_pages_dirty_lock(&page, 1);
+			put_user_page_dirty_lock(page);
 		else
 			put_user_page(page);
 	}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..d2ded624fb2a 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -76,7 +76,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 			page = sg_page(sg);
 			pa = sg_phys(sg);
 			if (dirty)
-				put_user_pages_dirty_lock(&page, 1);
+				put_user_page_dirty_lock(page);
 			else
 				put_user_page(page);
 			usnic_dbg("pa: %pa\n", &pa);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..c0584c6d9d78 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1061,6 +1061,16 @@ void put_user_pages_dirty(struct page **pages, unsigned long npages);
 void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
 void put_user_pages(struct page **pages, unsigned long npages);
 
+static inline void put_user_page_dirty(struct page *page)
+{
+	put_user_pages_dirty(&page, 1);
+}
+
+static inline void put_user_page_dirty_lock(struct page *page)
+{
+	put_user_pages_dirty_lock(&page, 1);
+}
+
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 9cbbb96c2a32..1d122e52c6de 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -171,7 +171,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 	for (i = 0; i < umem->npgs; i++) {
 		struct page *page = umem->pgs[i];
 
-		put_user_pages_dirty_lock(&page, 1);
+		put_user_page_dirty_lock(page);
 	}
 
 	kfree(umem->pgs);
-- 
2.22.0


^ permalink raw reply related

* [PATCH 2/3] net/xdp: convert put_page() to put_user_page*()
From: john.hubbard @ 2019-07-22  4:30 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alexander Viro, Björn Töpel, Boaz Harrosh,
	Christoph Hellwig, Daniel Vetter, Dan Williams, Dave Chinner,
	David Airlie, David S . Miller, Ilya Dryomov, Jan Kara,
	Jason Gunthorpe, Jens Axboe, Jérôme Glisse,
	Johannes Thumshirn, Magnus Karlsson, Matthew Wilcox,
	Miklos Szeredi, Ming Lei, Sage Weil, Santosh Shilimkar, Yan Zheng,
	netdev, dri-devel, linux-mm, linux-rdma, bpf, LKML, John Hubbard
In-Reply-To: <20190722043012.22945-1-jhubbard@nvidia.com>

From: John Hubbard <jhubbard@nvidia.com>

For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page() or
release_pages().

This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").

Cc: Björn Töpel <bjorn.topel@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 net/xdp/xdp_umem.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 83de74ca729a..9cbbb96c2a32 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -171,8 +171,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 	for (i = 0; i < umem->npgs; i++) {
 		struct page *page = umem->pgs[i];
 
-		set_page_dirty_lock(page);
-		put_page(page);
+		put_user_pages_dirty_lock(&page, 1);
 	}
 
 	kfree(umem->pgs);
-- 
2.22.0


^ permalink raw reply related

* [GIT] Networking
From: David Miller @ 2019-07-22  4:13 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


1) Several netfilter fixes including a nfnetlink deadlock fix from
   Florian Westphal and fix for dropping VRF packets from Miaohe Lin.

2) Flow offload fixes from Pablo Neira Ayuso including a fix to
   restore proper block sharing.

3) Fix r8169 PHY init from Thomas Voegtle.

4) Fix memory leak in mac80211, from Lorenzo Bianconi.

5) Missing NULL check on object allocation in cxgb4, from Navid
   Emamdoost.

6) Fix scaling of RX power in sfp phy driver, from Andrew Lunn.

7) Check that there is actually an ip header to access in skb->data in
   VRF, from Peter Kosyh.

8) Remove spurious rcu unlock in hv_netvsc, from Haiyang Zhang.

9) One more tweak the the TCP fragmentation memory limit changes, to
   be less harmful to applications setting small SO_SNDBUF values.
   From Eric Dumazet.

Please pull, thanks a lot!

The following changes since commit 31cc088a4f5d83481c6f5041bd6eb06115b974af:

  Merge tag 'drm-next-2019-07-19' of git://anongit.freedesktop.org/drm/drm (2019-07-19 12:29:43 -0700)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git 

for you to fetch changes up to b617158dc096709d8600c53b6052144d12b89fab:

  tcp: be more careful in tcp_fragment() (2019-07-21 20:41:24 -0700)

----------------------------------------------------------------
Andrew Lunn (1):
      net: phy: sfp: hwmon: Fix scaling of RX power

Arnd Bergmann (1):
      netfilter: bridge: make NF_TABLES_BRIDGE tristate

Benjamin Poirier (1):
      be2net: Synchronize be_update_queues with dev_watchdog

Brian King (1):
      bnx2x: Prevent load reordering in tx completion processing

Brian Norris (1):
      mac80211: don't warn about CW params when not using them

Christian Hesse (1):
      netfilter: nf_tables: fix module autoload for redir

Christophe JAILLET (3):
      tipc: Fix a typo
      net: hns3: typo in the name of a constant
      chelsio: Fix a typo in a function name

David S. Miller (3):
      Merge git://git.kernel.org/.../pablo/nf
      Merge branch 'flow_offload-fixes'
      Merge tag 'mac80211-for-davem-2019-07-20' of git://git.kernel.org/.../jberg/mac80211

Eric Dumazet (1):
      tcp: be more careful in tcp_fragment()

Fernando Fernandez Mancera (2):
      netfilter: synproxy: fix erroneous tcp mss option
      netfilter: synproxy: fix rst sequence number mismatch

Florian Westphal (3):
      netfilter: nfnetlink: avoid deadlock due to synchronous request_module
      netfilter: conntrack: always store window size un-scaled
      netfilter: nf_tables: don't fail when updating base chain policy

Frederick Lawler (3):
      cxgb4: Prefer pcie_capability_read_word()
      igc: Prefer pcie_capability_read_word()
      qed: Prefer pcie_capability_read_word()

Haiyang Zhang (1):
      hv_netvsc: Fix extra rcu_read_unlock in netvsc_recv_callback()

Jeremy Sowden (1):
      kbuild: add net/netfilter/nf_tables_offload.h to header-test blacklist.

Johannes Berg (2):
      wireless: fix nl80211 vendor commands
      nl80211: fix VENDOR_CMD_RAW_DATA

John Crispin (1):
      nl80211: fix NL80211_HE_MAX_CAPABILITY_LEN

Laura Garcia Liebana (1):
      netfilter: nft_hash: fix symhash with modulus one

Lorenzo Bianconi (1):
      mac80211: fix possible memory leak in ieee80211_assign_beacon

Miaohe Lin (1):
      netfilter: Fix rpfilter dropping vrf packets by mistake

Navid Emamdoost (1):
      allocate_flower_entry: should check for null deref

Pablo Neira Ayuso (6):
      netfilter: nft_meta: skip EAGAIN if nft_meta_bridge is not a module
      netfilter: bridge: NF_CONNTRACK_BRIDGE does not depend on NF_TABLES_BRIDGE
      net: openvswitch: rename flow_stats to sw_flow_stats
      net: flow_offload: remove netns parameter from flow_block_cb_alloc()
      net: flow_offload: rename tc_setup_cb_t to flow_setup_cb_t
      net: flow_offload: add flow_block structure and use it

Peter Kosyh (1):
      vrf: make sure skb->data contains ip header to make routing

Phil Sutter (1):
      netfilter: nf_tables: Support auto-loading for inet nat

Thomas Voegtle (1):
      r8169: fix RTL8168g PHY init

Vasily Averin (1):
      connector: remove redundant input callback from cn_dev

Vlad Buslov (1):
      net: sched: verify that q!=NULL before setting q->flags

Yonatan Goldschmidt (1):
      netfilter: Update obsolete comments referring to ip_conntrack

xiao ruizhu (1):
      netfilter: nf_conntrack_sip: fix expectation clash

 drivers/connector/connector.c                             |  6 +-----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c           |  3 +++
 drivers/net/ethernet/chelsio/cxgb/my3126.c                |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c           |  6 ++----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c      |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c                |  9 +++------
 drivers/net/ethernet/emulex/benet/be_main.c               |  5 +++++
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h           |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c    |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c  |  4 ++--
 drivers/net/ethernet/intel/igc/igc_main.c                 | 12 ++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c          |  5 ++---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c            | 15 ++++++++-------
 drivers/net/ethernet/mscc/ocelot_flower.c                 | 11 +++++------
 drivers/net/ethernet/mscc/ocelot_tc.c                     |  6 +++---
 drivers/net/ethernet/netronome/nfp/flower/offload.c       | 11 +++++------
 drivers/net/ethernet/qlogic/qed/qed_rdma.c                |  5 ++---
 drivers/net/ethernet/realtek/r8169_main.c                 |  4 ++--
 drivers/net/hyperv/netvsc_drv.c                           |  1 -
 drivers/net/phy/sfp.c                                     |  2 +-
 drivers/net/vrf.c                                         | 58 +++++++++++++++++++++++++++++++++++-----------------------
 drivers/net/wireless/ath/wil6210/cfg80211.c               |  4 ++++
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/vendor.c |  1 +
 drivers/net/wireless/ti/wlcore/vendor_cmd.c               |  3 +++
 include/Kbuild                                            |  1 +
 include/linux/connector.h                                 |  1 -
 include/linux/netfilter/nf_conntrack_h323_asn1.h          |  3 +--
 include/net/cfg80211.h                                    |  2 +-
 include/net/flow_offload.h                                | 30 ++++++++++++++++++++++--------
 include/net/netfilter/nf_conntrack_expect.h               | 12 +++++++++---
 include/net/netfilter/nf_conntrack_synproxy.h             |  1 +
 include/net/netfilter/nf_tables.h                         |  5 +++--
 include/net/pkt_cls.h                                     |  5 ++---
 include/net/sch_generic.h                                 |  8 +++-----
 include/net/tcp.h                                         |  5 +++++
 include/uapi/linux/nl80211.h                              |  2 +-
 net/bridge/netfilter/Kconfig                              |  6 +++---
 net/core/flow_offload.c                                   | 22 ++++++++++------------
 net/dsa/slave.c                                           |  6 +++---
 net/ipv4/netfilter/ipt_CLUSTERIP.c                        |  4 ++--
 net/ipv4/netfilter/ipt_SYNPROXY.c                         |  2 ++
 net/ipv4/netfilter/ipt_rpfilter.c                         |  1 +
 net/ipv4/netfilter/nf_nat_h323.c                          | 12 ++++++------
 net/ipv4/tcp_output.c                                     | 13 +++++++++++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c                        |  2 ++
 net/ipv6/netfilter/ip6t_rpfilter.c                        |  8 ++++++--
 net/mac80211/cfg.c                                        |  8 ++++++--
 net/mac80211/driver-ops.c                                 | 13 +++++++++----
 net/netfilter/Kconfig                                     |  6 ++----
 net/netfilter/ipvs/ip_vs_nfct.c                           |  2 +-
 net/netfilter/nf_conntrack_amanda.c                       |  2 +-
 net/netfilter/nf_conntrack_broadcast.c                    |  2 +-
 net/netfilter/nf_conntrack_core.c                         |  4 +---
 net/netfilter/nf_conntrack_expect.c                       | 26 +++++++++++++++++++-------
 net/netfilter/nf_conntrack_ftp.c                          |  2 +-
 net/netfilter/nf_conntrack_h323_asn1.c                    |  5 ++---
 net/netfilter/nf_conntrack_h323_main.c                    | 18 +++++++++---------
 net/netfilter/nf_conntrack_irc.c                          |  2 +-
 net/netfilter/nf_conntrack_netlink.c                      |  4 ++--
 net/netfilter/nf_conntrack_pptp.c                         |  4 ++--
 net/netfilter/nf_conntrack_proto_gre.c                    |  2 --
 net/netfilter/nf_conntrack_proto_icmp.c                   |  2 +-
 net/netfilter/nf_conntrack_proto_tcp.c                    |  8 +++++---
 net/netfilter/nf_conntrack_sane.c                         |  2 +-
 net/netfilter/nf_conntrack_sip.c                          | 10 +++++++---
 net/netfilter/nf_conntrack_tftp.c                         |  2 +-
 net/netfilter/nf_nat_amanda.c                             |  2 +-
 net/netfilter/nf_nat_core.c                               |  2 +-
 net/netfilter/nf_nat_ftp.c                                |  2 +-
 net/netfilter/nf_nat_irc.c                                |  2 +-
 net/netfilter/nf_nat_sip.c                                |  8 +++++---
 net/netfilter/nf_nat_tftp.c                               |  2 +-
 net/netfilter/nf_synproxy_core.c                          |  8 ++++----
 net/netfilter/nf_tables_api.c                             |  4 +++-
 net/netfilter/nf_tables_offload.c                         |  5 +++--
 net/netfilter/nfnetlink.c                                 |  2 +-
 net/netfilter/nft_chain_filter.c                          |  2 +-
 net/netfilter/nft_chain_nat.c                             |  3 +++
 net/netfilter/nft_ct.c                                    |  2 +-
 net/netfilter/nft_hash.c                                  |  2 +-
 net/netfilter/nft_meta.c                                  |  2 +-
 net/netfilter/nft_redir.c                                 |  2 +-
 net/netfilter/nft_synproxy.c                              |  2 ++
 net/openvswitch/flow.c                                    |  8 ++++----
 net/openvswitch/flow.h                                    |  4 ++--
 net/openvswitch/flow_table.c                              |  8 ++++----
 net/sched/cls_api.c                                       | 16 +++++++++++-----
 net/sched/cls_bpf.c                                       |  2 +-
 net/sched/cls_flower.c                                    |  2 +-
 net/sched/cls_matchall.c                                  |  2 +-
 net/sched/cls_u32.c                                       |  6 +++---
 net/tipc/topsrv.c                                         |  2 +-
 92 files changed, 323 insertions(+), 236 deletions(-)

^ permalink raw reply

* Re: [PATCH net] hv_netvsc: Fix extra rcu_read_unlock in netvsc_recv_callback()
From: David Miller @ 2019-07-22  3:41 UTC (permalink / raw)
  To: haiyangz
  Cc: sashal, linux-hyperv, netdev, kys, sthemmin, olaf, vkuznets,
	linux-kernel
In-Reply-To: <1563557581-17669-1-git-send-email-haiyangz@microsoft.com>

From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Fri, 19 Jul 2019 17:33:51 +0000

> There is an extra rcu_read_unlock left in netvsc_recv_callback(),
> after a previous patch that removes RCU from this function.
> This patch removes the extra RCU unlock.
> 
> Fixes: 345ac08990b8 ("hv_netvsc: pass netvsc_device to receive callback")
> Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH net] tcp: be more careful in tcp_fragment()
From: David Miller @ 2019-07-22  3:42 UTC (permalink / raw)
  To: edumazet
  Cc: netdev, eric.dumazet, aprout, jonathan.lemon, mkubecek, ncardwell,
	ycheng, cpaasch, jtl
In-Reply-To: <20190719185233.242049-1-edumazet@google.com>

From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Jul 2019 11:52:33 -0700

> Some applications set tiny SO_SNDBUF values and expect
> TCP to just work. Recent patches to address CVE-2019-11478
> broke them in case of losses, since retransmits might
> be prevented.
> 
> We should allow these flows to make progress.
> 
> This patch allows the first and last skb in retransmit queue
> to be split even if memory limits are hit.
> 
> It also adds the some room due to the fact that tcp_sendmsg()
> and tcp_sendpage() might overshoot sk_wmem_queued by about one full
> TSO skb (64KB size). Note this allowance was already present
> in stable backports for kernels < 4.15
> 
> Note for < 4.15 backports :
>  tcp_rtx_queue_tail() will probably look like :
> 
> static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
> {
> 	struct sk_buff *skb = tcp_send_head(sk);
> 
> 	return skb ? tcp_write_queue_prev(sk, skb) : tcp_write_queue_tail(sk);
> }
> 
> Fixes: f070ef2ac667 ("tcp: tcp_fragment() should apply sane memory limits")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Andrew Prout <aprout@ll.mit.edu>
> Tested-by: Andrew Prout <aprout@ll.mit.edu>
> Tested-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> Tested-by: Michal Kubecek <mkubecek@suse.cz>
> Acked-by: Neal Cardwell <ncardwell@google.com>
> Acked-by: Yuchung Cheng <ycheng@google.com>
> Acked-by: Christoph Paasch <cpaasch@apple.com>
> Cc: Jonathan Looney <jtl@netflix.com>

Applied and queued up for -stable.

^ permalink raw reply

* [PATCH] drivers: net: xgene: Remove acpi_has_method() calls
From: Kelsey Skunberg @ 2019-07-22  3:04 UTC (permalink / raw)
  To: iyappan, keyur, quan, davem, netdev, linux-kernel
  Cc: bjorn, rjw, skhan, linux-kernel-mentees, skunberg.kelsey

acpi_evaluate_object will already return an error if the needed method
does not exist. Remove unnecessary acpi_has_method() calls and check the
returned acpi_status for failure instead.

Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c    |  7 +++----
 drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 10 +++++-----
 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c |  9 ++++-----
 3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 61a465097cb8..ef75a09069a8 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -694,6 +694,7 @@ bool xgene_ring_mgr_init(struct xgene_enet_pdata *p)
 static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
+	acpi_status status;
 
 	if (!xgene_ring_mgr_init(pdata))
 		return -ENODEV;
@@ -712,11 +713,9 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 		udelay(5);
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
-			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+		status = acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
 					     "_RST", NULL, NULL);
-		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
-					 "_INI")) {
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
 					     "_INI", NULL, NULL);
 		}
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index 6453fc2ebb1f..6237a2cfd703 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -437,6 +437,7 @@ static void xgene_sgmac_tx_disable(struct xgene_enet_pdata *p)
 static int xgene_enet_reset(struct xgene_enet_pdata *p)
 {
 	struct device *dev = &p->pdev->dev;
+	acpi_status status;
 
 	if (!xgene_ring_mgr_init(p))
 		return -ENODEV;
@@ -460,14 +461,13 @@ static int xgene_enet_reset(struct xgene_enet_pdata *p)
 		}
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_RST"))
-			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
-					     "_RST", NULL, NULL);
-		else if (acpi_has_method(ACPI_HANDLE(&p->pdev->dev), "_INI"))
+		status = acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
+			 		      "_RST", NULL, NULL);
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&p->pdev->dev),
 					     "_INI", NULL, NULL);
+		}
 #endif
-	}
 
 	if (!p->port_id) {
 		xgene_enet_ecc_init(p);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 133eb91c542e..fede3bfe4d68 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -380,6 +380,7 @@ static void xgene_xgmac_tx_disable(struct xgene_enet_pdata *pdata)
 static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
+	acpi_status status;
 
 	if (!xgene_ring_mgr_init(pdata))
 		return -ENODEV;
@@ -393,11 +394,9 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 		udelay(5);
 	} else {
 #ifdef CONFIG_ACPI
-		if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev), "_RST")) {
-			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
-					     "_RST", NULL, NULL);
-		} else if (acpi_has_method(ACPI_HANDLE(&pdata->pdev->dev),
-					   "_INI")) {
+		status = acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
+				 	      "_RST", NULL, NULL);
+		if (ACPI_FAILURE(status)) {
 			acpi_evaluate_object(ACPI_HANDLE(&pdata->pdev->dev),
 					     "_INI", NULL, NULL);
 		}
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v2 net-next 03/11] net/ipv4: Plumb support for filtering route dumps
From: Hangbin Liu @ 2019-07-22  3:00 UTC (permalink / raw)
  To: David Ahern; +Cc: David Ahern, netdev, Jianlin Shi
In-Reply-To: <147df36b-75df-5e71-3d74-9454db676bce@gmail.com>

On Fri, Jul 19, 2019 at 10:55:49AM -0600, David Ahern wrote:
> Hi:
> 
> On 7/18/19 10:17 PM, Hangbin Liu wrote:
> > Hi David,
> > 
> > Before commit 18a8021a7be3 ("net/ipv4: Plumb support for filtering route
> > dumps"), when we dump a non-exist table, ip cmd exits silently.
> > 
> > # ip -4 route list table 1
> > # echo $?
> > 0
> > 
> > After commit 18a8021a7be3 ("net/ipv4: Plumb support for filtering route
> > dumps"). When we dump a non-exist table, as we returned -ENOENT, ip route
> > shows:
> > 
> > # ip -4 route show table 1
> > Error: ipv4: FIB table does not exist.
> > Dump terminated
> > # echo $?
> > 2
> > 
> > For me it looks make sense to return -ENOENT if we do not have the route
> > table. But this changes the userspace behavior. Do you think if we need to
> > keep backward compatible or just let it do as it is right now?
> > 
> 
> It is not change in userspace behavior; ip opted into the strict
> checking. The impact is to 'ip' users.
> 
> A couple of people have asked about this, and I am curious as to why
> people run a route dump for a table that does not exist and do not like
> being told that it does not exist.

Hi David,

Thanks for the reply. We have some route function tests and the new behavior
break the test. I just want to make sure this is expected so we can change our
tests to match the new behavior.

Cheers
Hangbin

^ permalink raw reply

* linux-next: manual merge of the net tree with Linus' tree
From: Stephen Rothwell @ 2019-07-21 23:41 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux Next Mailing List, Linux Kernel Mailing List,
	Masahiro Yamada, Jeremy Sowden

[-- Attachment #1: Type: text/plain, Size: 761 bytes --]

Hi all,

Today's linux-next merge of the net tree got a conflict in:

  include/Kbuild

between commit:

  67bf47452ea0 ("kbuild: update compile-test header list for v5.3-rc1")

from Linus' tree and commit:

  408d2bbbfd46 ("kbuild: add net/netfilter/nf_tables_offload.h to header-test blacklist.")

from the net tree.

I fixed it up (I used the former version) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] unaligned: delete 1-byte accessors
From: James Bottomley @ 2019-07-21 23:08 UTC (permalink / raw)
  To: Alexey Dobriyan, akpm
  Cc: linux-kernel, netdev, axboe, kvalo, john.johansen, linux-arch
In-Reply-To: <20190721215253.GA18177@avx2>

On Mon, 2019-07-22 at 00:52 +0300, Alexey Dobriyan wrote:
> Each and every 1-byte access is aligned!

The design idea of this is for parsing descriptors.  We simply chunk up
the describing structure using get_unaligned for everything.  The
reason is because a lot of these structures come with reserved areas
which we may make use of later.  If we're using get_unaligned for
everything we can simply change a u8 to a u16 in the structure
absorbing the reserved padding.  With your change now I'd have to chase
down every byte access and replace it with get_unaligned instead of
simply changing the structure.

What's the significant advantage of this change that compensates for
the problems the above causes?

James


^ permalink raw reply

* [PATCH] unaligned: delete 1-byte accessors
From: Alexey Dobriyan @ 2019-07-21 21:52 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, netdev, axboe, kvalo, john.johansen, linux-arch

Each and every 1-byte access is aligned!

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

	There may be more unaligned stuff in arch/.

 block/partitions/ldm.h                      |    2 +-
 block/partitions/msdos.c                    |    2 +-
 drivers/net/wireless/marvell/mwifiex/pcie.c |    2 +-
 include/linux/unaligned/generic.h           |   12 ++----------
 net/core/netpoll.c                          |    4 ++--
 security/apparmor/policy_unpack.c           |    2 +-
 6 files changed, 8 insertions(+), 16 deletions(-)

--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -85,7 +85,7 @@ struct parsed_partitions;
 #define TOC_BITMAP2		"log"		/* bitmaps in the TOCBLOCK. */
 
 /* Borrowed from msdos.c */
-#define SYS_IND(p)		(get_unaligned(&(p)->sys_ind))
+#define SYS_IND(p)		((p)->sys_ind)
 
 struct frag {				/* VBLK Fragment handling */
 	struct list_head list;
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -33,7 +33,7 @@
  */
 #include <asm/unaligned.h>
 
-#define SYS_IND(p)	get_unaligned(&p->sys_ind)
+#define SYS_IND(p)	((p)->sys_ind)
 
 static inline sector_t nr_sects(struct partition *p)
 {
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -1090,7 +1090,7 @@ static int mwifiex_pcie_alloc_sleep_cookie_buf(struct mwifiex_adapter *adapter)
 
 	mwifiex_dbg(adapter, INFO,
 		    "alloc_scook: sleep cookie=0x%x\n",
-		    get_unaligned(card->sleep_cookie_vbase));
+		    *card->sleep_cookie_vbase);
 
 	return 0;
 }
--- a/include/linux/unaligned/generic.h
+++ b/include/linux/unaligned/generic.h
@@ -9,27 +9,22 @@
 extern void __bad_unaligned_access_size(void);
 
 #define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({			\
-	__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr),			\
 	__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)),	\
 	__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)),	\
 	__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)),	\
-	__bad_unaligned_access_size()))));					\
+	__bad_unaligned_access_size())));					\
 	}))
 
 #define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({			\
-	__builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr),			\
 	__builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)),	\
 	__builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)),	\
 	__builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)),	\
-	__bad_unaligned_access_size()))));					\
+	__bad_unaligned_access_size())));					\
 	}))
 
 #define __put_unaligned_le(val, ptr) ({					\
 	void *__gu_p = (ptr);						\
 	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		*(u8 *)__gu_p = (__force u8)(val);			\
-		break;							\
 	case 2:								\
 		put_unaligned_le16((__force u16)(val), __gu_p);		\
 		break;							\
@@ -48,9 +43,6 @@ extern void __bad_unaligned_access_size(void);
 #define __put_unaligned_be(val, ptr) ({					\
 	void *__gu_p = (ptr);						\
 	switch (sizeof(*(ptr))) {					\
-	case 1:								\
-		*(u8 *)__gu_p = (__force u8)(val);			\
-		break;							\
 	case 2:								\
 		put_unaligned_be16((__force u16)(val), __gu_p);		\
 		break;							\
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -408,7 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 		ip6h = ipv6_hdr(skb);
 
 		/* ip6h->version = 6; ip6h->priority = 0; */
-		put_unaligned(0x60, (unsigned char *)ip6h);
+		*(unsigned char *)ip6h = 0x60;
 		ip6h->flow_lbl[0] = 0;
 		ip6h->flow_lbl[1] = 0;
 		ip6h->flow_lbl[2] = 0;
@@ -436,7 +436,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 		iph = ip_hdr(skb);
 
 		/* iph->version = 4; iph->ihl = 5; */
-		put_unaligned(0x45, (unsigned char *)iph);
+		*(unsigned char *)iph = 0x45;
 		iph->tos      = 0;
 		put_unaligned(htons(ip_len), &(iph->tot_len));
 		iph->id       = htons(atomic_inc_return(&ip_ident));
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -301,7 +301,7 @@ static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name)
 		if (!inbounds(e, sizeof(u8)))
 			goto fail;
 		if (data)
-			*data = get_unaligned((u8 *)e->pos);
+			*data = *(u8 *)e->pos;
 		e->pos += sizeof(u8);
 		return 1;
 	}

^ permalink raw reply

* [PATCH bpf-next v10 06/10] bpf,landlock: Add a new map type: inode
From: Mickaël Salaün @ 2019-07-21 21:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Alexander Viro, Alexei Starovoitov,
	Andrew Morton, Andy Lutomirski, Arnaldo Carvalho de Melo,
	Casey Schaufler, Daniel Borkmann, David Drysdale,
	David S . Miller, Eric W . Biederman, James Morris, Jann Horn,
	John Johansen, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Shuah Khan, Stephen Smalley, Tejun Heo,
	Tetsuo Handa, Thomas Graf, Tycho Andersen, Will Drewry,
	kernel-hardening, linux-api, linux-fsdevel, linux-security-module,
	netdev
In-Reply-To: <20190721213116.23476-1-mic@digikod.net>

FIXME: 64-bits in the doc

This new map store arbitrary values referenced by inode keys.  The map
can be updated from user space with file descriptor pointing to inodes
tied to a file system.  From an eBPF (Landlock) program point of view,
such a map is read-only and can only be used to retrieved a value tied
to a given inode.  This is useful to recognize an inode tagged by user
space, without access right to this inode (i.e. no need to have a write
access to this inode).

Add dedicated BPF functions to handle this type of map:
* bpf_inode_htab_map_update_elem()
* bpf_inode_htab_map_lookup_elem()
* bpf_inode_htab_map_delete_elem()

This new map require a dedicated helper inode_map_lookup_elem() because
of the key which is a pointer to an opaque data (only provided by the
kernel).  This act like a (physical or cryptographic) key, which is why
it is also not allowed to get the next key.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Jann Horn <jann@thejh.net>
---

Changes since v9:
* use a hash map for the inode map: integrate inodemap.c into hashtab.c
  * add map_put_key() to struct bpf_map_ops to enable to put an inode
    reference used as key
  * allow arbitrary value size instead of 64-bits
* handle inode and map lifetime with LSM hooks
* check access for inode lookup via syscall: similar to adding xattr,
  except it does not touch the file system (which is handy for read-only
  ones)
* force read-only inode map for Landlock programs
* rename inode_map_lookup() into inode_map_lookup_elem()
* fix inode and mnt checks (suggested by Al Viro)

Changes since v8:
* remove prog chaining and object tagging to ease review
* use bpf_map_init_from_attr()

Changes since v7:
* new design with a dedicated map and a BPF function to tie a value to
  an inode
* add the ability to set or get a tag on an inode from a Landlock
  program

Changes since v6:
* remove WARN_ON() for missing dentry->d_inode
* refactor bpf_landlock_func_proto() (suggested by Kees Cook)

Changes since v5:
* cosmetic fixes and rebase

Changes since v4:
* use a file abstraction (handle) to wrap inode, dentry, path and file
  structs
* remove bpf_landlock_cmp_fs_beneath()
* rename the BPF helper and move it to kernel/bpf/
* tighten helpers accessible by a Landlock rule

Changes since v3:
* remove bpf_landlock_cmp_fs_prop() (suggested by Alexei Starovoitov)
* add hooks dealing with struct inode and struct path pointers:
  inode_permission and inode_getattr
* add abstraction over eBPF helper arguments thanks to wrapping structs
* add bpf_landlock_get_fs_mode() helper to check file type and mode
* merge WARN_ON() (suggested by Kees Cook)
* fix and update bpf_helpers.h
* use BPF_CALL_* for eBPF helpers (suggested by Alexei Starovoitov)
* make handle arraymap safe (RCU) and remove buggy synchronize_rcu()
* factor out the arraymay walk
* use size_t to index array (suggested by Jann Horn)

Changes since v2:
* add MNT_INTERNAL check to only add file handle from user-visible FS
  (e.g. no anonymous inode)
* replace struct file* with struct path* in map_landlock_handle
* add BPF protos
* fix bpf_landlock_cmp_fs_prop_with_struct_file()
---
 include/linux/bpf.h            |  16 +++
 include/linux/bpf_types.h      |   3 +
 include/linux/landlock.h       |   4 +
 include/uapi/linux/bpf.h       |  12 +-
 kernel/bpf/core.c              |   2 +
 kernel/bpf/hashtab.c           | 253 +++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  27 +++-
 kernel/bpf/verifier.c          |  14 ++
 security/landlock/common.h     |  14 ++
 security/landlock/hooks_fs.c   |  85 +++++++++++
 security/landlock/init.c       |  13 ++
 tools/include/uapi/linux/bpf.h |  12 +-
 tools/lib/bpf/libbpf_probes.c  |   1 +
 13 files changed, 453 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6d9c7a08713e..c507438e56b5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -47,6 +47,7 @@ struct bpf_map_ops {
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
 				int fd);
 	void (*map_fd_put_ptr)(void *ptr);
+	void (*map_put_key)(void *key);
 	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@@ -208,6 +209,8 @@ enum bpf_arg_type {
 	ARG_PTR_TO_INT,		/* pointer to int */
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
+
+	ARG_PTR_TO_INODE,	/* pointer to a struct inode */
 };
 
 /* type of values returned from helper functions */
@@ -278,6 +281,7 @@ enum bpf_reg_type {
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
+	PTR_TO_INODE,		 /* reg points to struct inode */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -479,6 +483,7 @@ struct bpf_event_entry {
 	struct rcu_head rcu;
 };
 
+
 bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
@@ -684,6 +689,16 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
 				void *key, void *value, u64 map_flags);
 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_inode_fd_htab_map_lookup_elem(struct bpf_map *map, int *key, void *value);
+int bpf_inode_fd_htab_map_delete_elem(struct bpf_map *map, int *key);
+int bpf_inode_ptr_unlocked_htab_map_delete_elem(struct bpf_map *map,
+						struct inode **key,
+						bool remove_in_inode);
+int bpf_inode_ptr_locked_htab_map_delete_elem(struct bpf_map *map,
+					      struct inode **key,
+					      bool remove_in_inode);
+int bpf_inode_fd_htab_map_update_elem(struct bpf_map *map, int *key,
+				      void *value, u64 map_flags);
 
 int bpf_get_file_flag(int flags);
 int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
@@ -1055,6 +1070,7 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
 extern const struct bpf_func_proto bpf_strtol_proto;
 extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_inode_map_lookup_elem_proto;
 
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2ab647323f3a..ea177818d67e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -80,3 +80,6 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
+#ifdef CONFIG_SECURITY_LANDLOCK
+BPF_MAP_TYPE(BPF_MAP_TYPE_INODE, htab_inode_ops)
+#endif
diff --git a/include/linux/landlock.h b/include/linux/landlock.h
index 8ac7942f50fc..731b89cdf977 100644
--- a/include/linux/landlock.h
+++ b/include/linux/landlock.h
@@ -9,6 +9,7 @@
 #ifndef _LINUX_LANDLOCK_H
 #define _LINUX_LANDLOCK_H
 
+#include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/sched.h> /* task_struct */
 
@@ -31,4 +32,7 @@ static inline void get_seccomp_landlock(struct task_struct *tsk)
 }
 #endif /* CONFIG_SECCOMP_FILTER && CONFIG_SECURITY_LANDLOCK */
 
+int landlock_inode_add_map(struct inode *inode, struct bpf_map *map);
+void landlock_inode_remove_map(struct inode *inode, const struct bpf_map *map);
+
 #endif /* _LINUX_LANDLOCK_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d68613f737f3..2da054ca9c8b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_INODE,
 };
 
 /* Note that tracing related programs such as
@@ -2717,6 +2718,14 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * void *bpf_inode_map_lookup_elem(struct bpf_map *map, const void *key)
+ *	Description
+ *		Perform a lookup in *map* for an entry associated to an inode
+ *		*key*.
+ *	Return
+ *		Map value associated to *key*, or **NULL** if no entry was
+ *		found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2828,7 +2837,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(inode_map_lookup_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 16079550db6d..4177c818e5cd 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2040,6 +2040,8 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
+const struct bpf_func_proto bpf_inode_map_update_proto __weak;
+
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
 	return NULL;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 22066a62c8c9..4fc7755042f0 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1,13 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2016 Facebook
+ * Copyright (c) 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright (c) 2019 ANSSI
  */
+#include <asm/resource.h> /* RLIMIT_NOFILE */
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/err.h>
 #include <linux/jhash.h>
+#include <linux/fs.h> /* iput() */
 #include <linux/filter.h>
+#include <linux/landlock.h>
+#include <linux/mount.h> /* MNT_INTERNAL */
 #include <linux/rculist_nulls.h>
 #include <linux/random.h>
+#include <linux/sched/signal.h> /* rlimit() */
 #include <uapi/linux/btf.h>
 #include "percpu_freelist.h"
 #include "bpf_lru_list.h"
@@ -684,6 +692,8 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 
 		map->ops->map_fd_put_ptr(ptr);
 	}
+	if (map->ops->map_put_key)
+		map->ops->map_put_key(l->key);
 
 	if (htab_is_prealloc(htab)) {
 		__pcpu_freelist_push(&htab->freelist, &l->fnode);
@@ -1514,3 +1524,246 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_gen_lookup = htab_of_map_gen_lookup,
 	.map_check_btf = map_check_no_btf,
 };
+
+/* inode_htab */
+
+static int inode_htab_map_alloc_check(union bpf_attr *attr)
+{
+	/* only allow root to create this type of map (for now), should be
+	 * removed when Landlock will be usable by unprivileged users */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/* the key is a file descriptor */
+	if (attr->max_entries == 0 || attr->key_size != sizeof(int) ||
+	    (attr->map_flags & ~(BPF_F_RDONLY | BPF_F_WRONLY |
+				 BPF_F_RDONLY_PROG)) ||
+	    /* for now, force read-only map for eBPF programs because only
+	     * bpf_inode_map_lookup_elem() enable to access them */
+	    !(attr->map_flags & BPF_F_RDONLY_PROG) ||
+	    bpf_map_attr_numa_node(attr) != NUMA_NO_NODE)
+		return -EINVAL;
+
+	/*
+	 * Limit number of entries in an inode map to the maximum number of
+	 * open files for the current process. The maximum number of file
+	 * references (including all inode maps) for a process is then
+	 * (RLIMIT_NOFILE - 1) * RLIMIT_NOFILE. If the process' RLIMIT_NOFILE
+	 * is 0, then any entry update is forbidden.
+	 *
+	 * An eBPF program can inherit all the inode map FD. The worse case is
+	 * to fill a bunch of arraymaps, create an eBPF program, close the
+	 * inode map FDs, and start again. The maximum number of inode map
+	 * entries can then be close to RLIMIT_NOFILE^3.
+	 */
+	if (attr->max_entries > rlimit(RLIMIT_NOFILE))
+		return -EMFILE;
+
+	/* decorelate UAPI from kernel API */
+	attr->key_size = sizeof(struct inode *);
+
+	return htab_map_alloc_check(attr);
+}
+
+static void inode_htab_put_key(void *key)
+{
+	struct inode **inode = key;
+
+	if ((*inode)->i_state & I_FREEING)
+		return;
+	iput(*inode);
+}
+
+/* called from syscall or (never) from eBPF program */
+static int map_get_next_no_key(struct bpf_map *map, void *key, void *next_key)
+{
+	/* do not leak a file descriptor */
+	return -ENOTSUPP;
+}
+
+/* must call iput(inode) after this call */
+static struct inode *inode_from_fd(int ufd, bool check_access)
+{
+	struct inode *ret;
+	struct fd f;
+	int deny;
+
+	f = fdget(ufd);
+	if (unlikely(!f.file))
+		return ERR_PTR(-EBADF);
+	/* TODO?: add this check when called from an eBPF program too (already
+	* checked by the LSM parent hooks anyway) */
+	if (unlikely(IS_PRIVATE(file_inode(f.file)))) {
+		ret = ERR_PTR(-EINVAL);
+		goto put_fd;
+	}
+	/* check if the FD is tied to a mount point */
+	/* TODO?: add this check when called from an eBPF program too */
+	if (unlikely(f.file->f_path.mnt->mnt_flags & MNT_INTERNAL)) {
+		ret = ERR_PTR(-EINVAL);
+		goto put_fd;
+	}
+	if (check_access) {
+		/*
+		* must be allowed to access attributes from this file to then
+		* be able to compare an inode to its map entry
+		*/
+		deny = security_inode_getattr(&f.file->f_path);
+		if (deny) {
+			ret = ERR_PTR(deny);
+			goto put_fd;
+		}
+	}
+	ret = file_inode(f.file);
+	ihold(ret);
+
+put_fd:
+	fdput(f);
+	return ret;
+}
+
+/*
+ * The key is a FD when called from a syscall, but an inode address when called
+ * from an eBPF program.
+ */
+
+/* called from syscall */
+int bpf_inode_fd_htab_map_lookup_elem(struct bpf_map *map, int *key, void *value)
+{
+	void *ptr;
+	struct inode *inode;
+	int ret;
+
+	/* check inode access */
+	inode = inode_from_fd(*key, true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	rcu_read_lock();
+	ptr = htab_map_lookup_elem(map, &inode);
+	iput(inode);
+	if (IS_ERR(ptr)) {
+		ret = PTR_ERR(ptr);
+	} else if (!ptr) {
+		ret = -ENOENT;
+	} else {
+		ret = 0;
+		copy_map_value(map, value, ptr);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/* called from kernel */
+int bpf_inode_ptr_locked_htab_map_delete_elem(struct bpf_map *map,
+		struct inode **key, bool remove_in_inode)
+{
+	if (remove_in_inode)
+		landlock_inode_remove_map(*key, map);
+	return htab_map_delete_elem(map, key);
+}
+
+/* called from syscall */
+int bpf_inode_fd_htab_map_delete_elem(struct bpf_map *map, int *key)
+{
+	struct inode *inode;
+	int ret;
+
+	/* do not check inode access (similar to directory check) */
+	inode = inode_from_fd(*key, false);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	ret = bpf_inode_ptr_locked_htab_map_delete_elem(map, &inode, true);
+	iput(inode);
+	return ret;
+}
+
+/* called from syscall */
+int bpf_inode_fd_htab_map_update_elem(struct bpf_map *map, int *key, void *value,
+		u64 map_flags)
+{
+	struct inode *inode;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	/* check inode access */
+	inode = inode_from_fd(*key, true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+	ret = htab_map_update_elem(map, &inode, value, map_flags);
+	if (!ret)
+		ret = landlock_inode_add_map(inode, map);
+	iput(inode);
+	return ret;
+}
+
+static void inode_htab_map_free(struct bpf_map *map)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct hlist_nulls_node *n;
+	struct hlist_nulls_head *head;
+	struct htab_elem *l;
+	int i;
+
+	for (i = 0; i < htab->n_buckets; i++) {
+		head = select_bucket(htab, i);
+		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
+			landlock_inode_remove_map(*((struct inode **)l->key), map);
+		}
+	}
+	htab_map_free(map);
+}
+
+/* use the map_inode_lookup_elem() helper instead */
+static void *map_lookup_no_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
+static int map_delete_no_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON_ONCE(1);
+	return -ENOTSUPP;
+}
+
+static int map_update_no_elem(struct bpf_map *map, void *key, void *value,
+		u64 flags)
+{
+	WARN_ON_ONCE(1);
+	return -ENOTSUPP;
+}
+
+const struct bpf_map_ops htab_inode_ops = {
+	.map_alloc_check = inode_htab_map_alloc_check,
+	.map_alloc = htab_map_alloc,
+	.map_free = inode_htab_map_free,
+	.map_put_key = inode_htab_put_key,
+	.map_get_next_key = map_get_next_no_key,
+	.map_lookup_elem = map_lookup_no_elem,
+	.map_delete_elem = map_delete_no_elem,
+	.map_update_elem = map_update_no_elem,
+	.map_check_btf = map_check_no_btf,
+};
+
+/*
+ * We need a dedicated helper to deal with inode maps because the key is a
+ * pointer to an opaque data, only provided by the kernel.  This really act
+ * like a (physical or cryptographic) key, which is why it is also not allowed
+ * to get the next key with map_get_next_key().
+ */
+BPF_CALL_2(bpf_inode_map_lookup_elem, struct bpf_map *, map, void *, key)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return (unsigned long)htab_map_lookup_elem(map, &key);
+}
+
+const struct bpf_func_proto bpf_inode_map_lookup_elem_proto = {
+	.func		= bpf_inode_map_lookup_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_INODE,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b2a8cb14f28e..e46441c42b68 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -801,6 +801,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 		   map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_peek_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_INODE) {
+		err = bpf_inode_fd_htab_map_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		if (map->ops->map_lookup_elem_sys_only)
@@ -951,6 +953,10 @@ static int map_update_elem(union bpf_attr *attr)
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 		   map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_push_elem(map, value, attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_INODE) {
+		rcu_read_lock();
+		err = bpf_inode_fd_htab_map_update_elem(map, key, value, attr->flags);
+		rcu_read_unlock();
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -1006,7 +1012,10 @@ static int map_delete_elem(union bpf_attr *attr)
 	preempt_disable();
 	__this_cpu_inc(bpf_prog_active);
 	rcu_read_lock();
-	err = map->ops->map_delete_elem(map, key);
+	if (map->map_type == BPF_MAP_TYPE_INODE)
+		err = bpf_inode_fd_htab_map_delete_elem(map, key);
+	else
+		err = map->ops->map_delete_elem(map, key);
 	rcu_read_unlock();
 	__this_cpu_dec(bpf_prog_active);
 	preempt_enable();
@@ -1018,6 +1027,22 @@ static int map_delete_elem(union bpf_attr *attr)
 	return err;
 }
 
+int bpf_inode_ptr_unlocked_htab_map_delete_elem(struct bpf_map *map,
+						struct inode **key, bool remove_in_inode)
+{
+	int err;
+
+	preempt_disable();
+	__this_cpu_inc(bpf_prog_active);
+	rcu_read_lock();
+	err = bpf_inode_ptr_locked_htab_map_delete_elem(map, key, remove_in_inode);
+	rcu_read_unlock();
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
+	maybe_wait_bpf_programs(map);
+	return err;
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 026c68cb9116..3972b9f02dac 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -400,6 +400,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
+	[PTR_TO_INODE]		= "inode",
 };
 
 static char slot_type_char[] = {
@@ -1846,6 +1847,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
+	case PTR_TO_INODE:
 		return true;
 	default:
 		return false;
@@ -3306,6 +3308,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
 		}
+	} else if (arg_type == ARG_PTR_TO_INODE) {
+		expected_type = PTR_TO_INODE;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -3511,6 +3517,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_sk_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_INODE:
+		if (func_id != BPF_FUNC_inode_map_lookup_elem)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -3579,6 +3589,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_inode_map_lookup_elem:
+		if (map->map_type != BPF_MAP_TYPE_INODE)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/security/landlock/common.h b/security/landlock/common.h
index b2ee018eb6fc..b0ba3f31ac7d 100644
--- a/security/landlock/common.h
+++ b/security/landlock/common.h
@@ -11,6 +11,7 @@
 
 #include <linux/bpf.h> /* enum bpf_attach_type */
 #include <linux/filter.h> /* bpf_prog */
+#include <linux/lsm_hooks.h> /* lsm_blob_sizes */
 #include <linux/refcount.h> /* refcount_t */
 #include <uapi/linux/landlock.h> /* LANDLOCK_TRIGGER_* */
 
@@ -23,6 +24,8 @@
 #define _LANDLOCK_TRIGGER_FS_PICK_LAST	LANDLOCK_TRIGGER_FS_PICK_WRITE
 #define _LANDLOCK_TRIGGER_FS_PICK_MASK	((_LANDLOCK_TRIGGER_FS_PICK_LAST << 1ULL) - 1)
 
+extern struct lsm_blob_sizes landlock_blob_sizes;
+
 enum landlock_hook_type {
 	LANDLOCK_HOOK_FS_PICK = 1,
 	LANDLOCK_HOOK_FS_WALK,
@@ -55,6 +58,17 @@ struct landlock_prog_set {
 	refcount_t usage;
 };
 
+struct landlock_inode_map {
+	struct list_head list;
+	struct rcu_head rcu_put;
+	struct bpf_map *map;
+	/*
+	 * It would be nice to remove the inode field, but it is necessary for
+	 * call_rcu() .
+	 */
+	struct inode *inode;
+};
+
 /**
  * get_hook_index - get an index for the programs of struct landlock_prog_set
  *
diff --git a/security/landlock/hooks_fs.c b/security/landlock/hooks_fs.c
index 3f81b7fc2938..8c9d6a333111 100644
--- a/security/landlock/hooks_fs.c
+++ b/security/landlock/hooks_fs.c
@@ -46,6 +46,12 @@ bool landlock_is_valid_access_fs_pick(int off, enum bpf_access_type type,
 		enum bpf_reg_type *reg_type, int *max_size)
 {
 	switch (off) {
+	case offsetof(struct landlock_ctx_fs_pick, inode):
+		if (type != BPF_READ)
+			return false;
+		*reg_type = PTR_TO_INODE;
+		*max_size = sizeof(u64);
+		return true;
 	default:
 		return false;
 	}
@@ -55,6 +61,12 @@ bool landlock_is_valid_access_fs_walk(int off, enum bpf_access_type type,
 		enum bpf_reg_type *reg_type, int *max_size)
 {
 	switch (off) {
+	case offsetof(struct landlock_ctx_fs_walk, inode):
+		if (type != BPF_READ)
+			return false;
+		*reg_type = PTR_TO_INODE;
+		*max_size = sizeof(u64);
+		return true;
 	default:
 		return false;
 	}
@@ -237,8 +249,79 @@ static int hook_sb_pivotroot(const struct path *old_path,
 			new_path->dentry->d_inode);
 }
 
+/* inode helpers */
+
+static inline struct list_head *inode_landlock(const struct inode *inode)
+{
+	return inode->i_security + landlock_blob_sizes.lbs_inode;
+}
+
+int landlock_inode_add_map(struct inode *inode, struct bpf_map *map)
+{
+	struct landlock_inode_map *inode_map;
+
+	inode_map = kzalloc(sizeof(*inode_map), GFP_ATOMIC);
+	if (!inode_map)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&inode_map->list);
+	inode_map->map = map;
+	inode_map->inode = inode;
+	list_add_tail(&inode_map->list, inode_landlock(inode));
+	return 0;
+}
+
+static void put_landlock_inode_map(struct rcu_head *head)
+{
+	struct landlock_inode_map *inode_map;
+	int err;
+
+	inode_map = container_of(head, struct landlock_inode_map, rcu_put);
+	err = bpf_inode_ptr_unlocked_htab_map_delete_elem(inode_map->map,
+			&inode_map->inode, false);
+	bpf_map_put(inode_map->map);
+	kfree(inode_map);
+}
+
+void landlock_inode_remove_map(struct inode *inode, const struct bpf_map *map)
+{
+	struct landlock_inode_map *inode_map;
+	bool found = false;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(inode_map, inode_landlock(inode), list) {
+		if (inode_map->map == map) {
+			found = true;
+			list_del_rcu(&inode_map->list);
+			kfree_rcu(inode_map, rcu_put);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	WARN_ON(!found);
+}
+
 /* inode hooks */
 
+static int hook_inode_alloc_security(struct inode *inode)
+{
+	struct list_head *ll_inode = inode_landlock(inode);
+
+	INIT_LIST_HEAD(ll_inode);
+	return 0;
+}
+
+static void hook_inode_free_security(struct inode *inode)
+{
+	struct landlock_inode_map *inode_map;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(inode_map, inode_landlock(inode), list) {
+		list_del_rcu(&inode_map->list);
+		call_rcu(&inode_map->rcu_put, put_landlock_inode_map);
+	}
+	rcu_read_unlock();
+}
+
 /* a directory inode contains only one dentry */
 static int hook_inode_create(struct inode *dir, struct dentry *dentry,
 		umode_t mode)
@@ -517,6 +600,8 @@ static struct security_hook_list landlock_hooks[] = {
 	LSM_HOOK_INIT(sb_mount, hook_sb_mount),
 	LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
 
+	LSM_HOOK_INIT(inode_alloc_security, hook_inode_alloc_security),
+	LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
 	LSM_HOOK_INIT(inode_create, hook_inode_create),
 	LSM_HOOK_INIT(inode_link, hook_inode_link),
 	LSM_HOOK_INIT(inode_unlink, hook_inode_unlink),
diff --git a/security/landlock/init.c b/security/landlock/init.c
index 391e88bd4d3a..eec4467cb5ee 100644
--- a/security/landlock/init.c
+++ b/security/landlock/init.c
@@ -104,6 +104,18 @@ static const struct bpf_func_proto *bpf_landlock_func_proto(
 	default:
 		break;
 	}
+
+	switch (get_hook_type(prog)) {
+	case LANDLOCK_HOOK_FS_WALK:
+	case LANDLOCK_HOOK_FS_PICK:
+		switch (func_id) {
+		case BPF_FUNC_inode_map_lookup_elem:
+			return &bpf_inode_map_lookup_elem_proto;
+		default:
+			break;
+		}
+		break;
+	}
 	return NULL;
 }
 
@@ -123,6 +135,7 @@ static int __init landlock_init(void)
 }
 
 struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
+	.lbs_inode = sizeof(struct list_head),
 };
 
 DEFINE_LSM(LANDLOCK_NAME) = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7b7a4f6c3104..7a55535f5dc1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_INODE,
 };
 
 /* Note that tracing related programs such as
@@ -2714,6 +2715,14 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * void *bpf_inode_map_lookup_elem(struct bpf_map *map, const void *key)
+ *	Description
+ *		Perform a lookup in *map* for an entry associated to an inode
+ *		*key*.
+ *	Return
+ *		Map value associated to *key*, or **NULL** if no entry was
+ *		found.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2825,7 +2834,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(inode_map_lookup_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 03c910d1f84c..98875221310d 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -250,6 +250,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 	case BPF_MAP_TYPE_XSKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+	case BPF_MAP_TYPE_INODE:
 	default:
 		break;
 	}
-- 
2.22.0


^ permalink raw reply related

* [PATCH bpf-next v10 05/10] landlock: Handle filesystem access control
From: Mickaël Salaün @ 2019-07-21 21:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Alexander Viro, Alexei Starovoitov,
	Andrew Morton, Andy Lutomirski, Arnaldo Carvalho de Melo,
	Casey Schaufler, Daniel Borkmann, David Drysdale,
	David S . Miller, Eric W . Biederman, James Morris, Jann Horn,
	John Johansen, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Shuah Khan, Stephen Smalley, Tejun Heo,
	Tetsuo Handa, Thomas Graf, Tycho Andersen, Will Drewry,
	kernel-hardening, linux-api, linux-fsdevel, linux-security-module,
	netdev
In-Reply-To: <20190721213116.23476-1-mic@digikod.net>

This add two Landlock hooks: FS_WALK and FS_PICK.

The FS_WALK hook is used to walk through a file path. A program tied to
this hook will be evaluated for each directory traversal except the last
one if it is the leaf of the path.  It is important to differentiate
this hook from FS_PICK to enable more powerful path evaluation in the
future (cf. Landlock patch v8).

The FS_PICK hook is used to validate a set of actions requested on a
file. This actions are defined with triggers (e.g. read, write, open,
append...).

The Landlock LSM hook registration is done after other LSM to only run
actions from user-space, via eBPF programs, if the access was granted by
major (privileged) LSMs.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
---

Changes since v9:
* replace subtype with expected_attach_type and expected_attach_triggers

Changes since v8:
* add a new LSM_ORDER_LAST, cf. commit e2bc445b66ca ("LSM: Introduce
  enum lsm_order")
* add WARN_ON() for pointer dereferencement
* remove the FS_GET subtype which rely on program chaining
* remove the subtype option which was only used for chaining (with the
  "previous" field)
* remove inode_lookup which depends on the (removed) nameidata security
  blob
* remove eBPF helpers to get and set Landlock inode tags
* do not use task LSM credentials (for now)

Changes since v7:
* major rewrite with clean Landlock hooks able to deal with file paths

Changes since v6:
* add 3 more sub-events: IOCTL, LOCK, FCNTL
  https://lkml.kernel.org/r/2fbc99a6-f190-f335-bd14-04bdeed35571@digikod.net
* use the new security_add_hooks()
* explain the -Werror=unused-function
* constify pointers
* cleanup headers

Changes since v5:
* split hooks.[ch] into hooks.[ch] and hooks_fs.[ch]
* add more documentation
* cosmetic fixes
* rebase (SCALAR_VALUE)

Changes since v4:
* add LSM hook abstraction called Landlock event
  * use the compiler type checking to verify hooks use by an event
  * handle all filesystem related LSM hooks (e.g. file_permission,
    mmap_file, sb_mount...)
* register BPF programs for Landlock just after LSM hooks registration
* move hooks registration after other LSMs
* add failsafes to check if a hook is not used by the kernel
* allow partial raw value access form the context (needed for programs
  generated by LLVM)

Changes since v3:
* split commit
* add hooks dealing with struct inode and struct path pointers:
  inode_permission and inode_getattr
* add abstraction over eBPF helper arguments thanks to wrapping structs
---
 include/linux/lsm_hooks.h    |   1 +
 security/landlock/Makefile   |   3 +-
 security/landlock/common.h   |   9 +
 security/landlock/hooks.c    |  94 ++++++
 security/landlock/hooks.h    |  31 ++
 security/landlock/hooks_fs.c | 554 +++++++++++++++++++++++++++++++++++
 security/landlock/hooks_fs.h |  31 ++
 security/landlock/init.c     |  33 +++
 security/security.c          |  15 +
 9 files changed, 770 insertions(+), 1 deletion(-)
 create mode 100644 security/landlock/hooks.c
 create mode 100644 security/landlock/hooks.h
 create mode 100644 security/landlock/hooks_fs.c
 create mode 100644 security/landlock/hooks_fs.h

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index df1318d85f7d..c06ad8a1d424 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2092,6 +2092,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 enum lsm_order {
 	LSM_ORDER_FIRST = -1,	/* This is only for capabilities. */
 	LSM_ORDER_MUTABLE = 0,
+	LSM_ORDER_LAST = 1,	/* potentially-unprivileged LSM */
 };
 
 struct lsm_info {
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
index 2a1a7082a365..270ece5d93de 100644
--- a/security/landlock/Makefile
+++ b/security/landlock/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
 
 landlock-y := init.o \
-	enforce.o enforce_seccomp.o
+	enforce.o enforce_seccomp.o \
+	hooks.o hooks_fs.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
index 2cf36dbf4560..b2ee018eb6fc 100644
--- a/security/landlock/common.h
+++ b/security/landlock/common.h
@@ -79,4 +79,13 @@ static inline enum landlock_hook_type get_hook_type(const struct bpf_prog *prog)
 	}
 }
 
+__maybe_unused
+static bool current_has_prog_type(enum landlock_hook_type hook_type)
+{
+	struct landlock_prog_set *prog_set;
+
+	prog_set = current->seccomp.landlock_prog_set;
+	return (prog_set && prog_set->programs[get_hook_index(hook_type)]);
+}
+
 #endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/hooks.c b/security/landlock/hooks.c
new file mode 100644
index 000000000000..97c54957f17b
--- /dev/null
+++ b/security/landlock/hooks.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - hook helpers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/bpf.h> /* enum bpf_prog_aux */
+#include <linux/errno.h>
+#include <linux/filter.h> /* BPF_PROG_RUN() */
+#include <linux/rculist.h> /* list_add_tail_rcu */
+#include <uapi/linux/landlock.h> /* struct landlock_context */
+
+#include "common.h" /* struct landlock_rule, get_hook_index() */
+#include "hooks.h" /* landlock_hook_ctx */
+
+#include "hooks_fs.h"
+
+/* return a Landlock program context (e.g. hook_ctx->fs_walk.prog_ctx) */
+static const void *get_ctx(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx)
+{
+	switch (hook_type) {
+	case LANDLOCK_HOOK_FS_WALK:
+		return landlock_get_ctx_fs_walk(hook_ctx->fs_walk);
+	case LANDLOCK_HOOK_FS_PICK:
+		return landlock_get_ctx_fs_pick(hook_ctx->fs_pick);
+	}
+	WARN_ON(1);
+	return NULL;
+}
+
+/**
+ * landlock_access_deny - run Landlock programs tied to a hook
+ *
+ * @hook_idx: hook index in the programs array
+ * @ctx: non-NULL valid eBPF context
+ * @prog_set: Landlock program set pointer
+ * @triggers: a bitmask to check if a program should be run
+ *
+ * Return true if at least one program return deny.
+ */
+static bool landlock_access_deny(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx,
+		struct landlock_prog_set *prog_set, u64 triggers)
+{
+	struct landlock_prog_list *prog_list, *prev_list = NULL;
+	u32 hook_idx = get_hook_index(hook_type);
+
+	if (!prog_set)
+		return false;
+
+	for (prog_list = prog_set->programs[hook_idx];
+			prog_list; prog_list = prog_list->prev) {
+		u32 ret;
+		const void *prog_ctx;
+
+		/* check if @prog expect at least one of this triggers */
+		if (triggers && !(triggers & prog_list->prog->aux->
+					expected_attach_triggers))
+			continue;
+		prog_ctx = get_ctx(hook_type, hook_ctx);
+		if (!prog_ctx || WARN_ON(IS_ERR(prog_ctx)))
+			return true;
+		rcu_read_lock();
+		ret = BPF_PROG_RUN(prog_list->prog, prog_ctx);
+		rcu_read_unlock();
+		/* deny access if a program returns a value different than 0 */
+		if (ret)
+			return true;
+		if (prev_list && prog_list->prev && prog_list->prev->prog->
+				expected_attach_type ==
+				prev_list->prog->expected_attach_type)
+			WARN_ON(prog_list->prev != prev_list);
+		prev_list = prog_list;
+	}
+	return false;
+}
+
+int landlock_decide(enum landlock_hook_type hook_type,
+		struct landlock_hook_ctx *hook_ctx, u64 triggers)
+{
+	bool deny = false;
+
+#ifdef CONFIG_SECCOMP_FILTER
+	deny = landlock_access_deny(hook_type, hook_ctx,
+			current->seccomp.landlock_prog_set, triggers);
+#endif /* CONFIG_SECCOMP_FILTER */
+
+	/* should we use -EPERM or -EACCES? */
+	return deny ? -EACCES : 0;
+}
diff --git a/security/landlock/hooks.h b/security/landlock/hooks.h
new file mode 100644
index 000000000000..31446e6629fb
--- /dev/null
+++ b/security/landlock/hooks.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - hooks helpers
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/sched.h> /* struct task_struct */
+#include <linux/seccomp.h>
+
+#include "hooks_fs.h"
+
+struct landlock_hook_ctx {
+	union {
+		struct landlock_hook_ctx_fs_walk *fs_walk;
+		struct landlock_hook_ctx_fs_pick *fs_pick;
+	};
+};
+
+static inline bool landlocked(const struct task_struct *task)
+{
+#ifdef CONFIG_SECCOMP_FILTER
+	return !!(task->seccomp.landlock_prog_set);
+#else
+	return false;
+#endif /* CONFIG_SECCOMP_FILTER */
+}
+
+int landlock_decide(enum landlock_hook_type, struct landlock_hook_ctx *, u64);
diff --git a/security/landlock/hooks_fs.c b/security/landlock/hooks_fs.c
new file mode 100644
index 000000000000..3f81b7fc2938
--- /dev/null
+++ b/security/landlock/hooks_fs.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - filesystem hooks
+ *
+ * Copyright © 2016-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <linux/bpf.h> /* enum bpf_access_type */
+#include <linux/kernel.h> /* ARRAY_SIZE */
+#include <linux/lsm_hooks.h>
+#include <linux/rcupdate.h> /* synchronize_rcu() */
+#include <linux/stat.h> /* S_ISDIR */
+#include <linux/stddef.h> /* offsetof */
+#include <linux/types.h> /* uintptr_t */
+#include <linux/workqueue.h> /* INIT_WORK() */
+
+/* permissions translation */
+#include <linux/fs.h> /* MAY_* */
+#include <linux/mman.h> /* PROT_* */
+#include <linux/namei.h>
+
+/* hook arguments */
+#include <linux/dcache.h> /* struct dentry */
+#include <linux/fs.h> /* struct inode, struct iattr */
+#include <linux/mm_types.h> /* struct vm_area_struct */
+#include <linux/mount.h> /* struct vfsmount */
+#include <linux/path.h> /* struct path */
+#include <linux/sched.h> /* struct task_struct */
+#include <linux/time.h> /* struct timespec */
+
+#include "common.h"
+#include "hooks_fs.h"
+#include "hooks.h"
+
+/* fs_pick */
+
+#include <asm/page.h> /* PAGE_SIZE */
+#include <asm/syscall.h>
+#include <linux/dcache.h> /* d_path, dentry_path_raw */
+#include <linux/err.h> /* *_ERR */
+#include <linux/gfp.h> /* __get_free_page, GFP_KERNEL */
+#include <linux/path.h> /* struct path */
+
+bool landlock_is_valid_access_fs_pick(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size)
+{
+	switch (off) {
+	default:
+		return false;
+	}
+}
+
+bool landlock_is_valid_access_fs_walk(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size)
+{
+	switch (off) {
+	default:
+		return false;
+	}
+}
+
+/* fs_walk */
+
+struct landlock_hook_ctx_fs_walk {
+	struct landlock_ctx_fs_walk prog_ctx;
+};
+
+const struct landlock_ctx_fs_walk *landlock_get_ctx_fs_walk(
+		const struct landlock_hook_ctx_fs_walk *hook_ctx)
+{
+	if (WARN_ON(!hook_ctx))
+		return NULL;
+
+	return &hook_ctx->prog_ctx;
+}
+
+static int decide_fs_walk(int may_mask, struct inode *inode)
+{
+	struct landlock_hook_ctx_fs_walk fs_walk = {};
+	struct landlock_hook_ctx hook_ctx = {
+		.fs_walk = &fs_walk,
+	};
+	const enum landlock_hook_type hook_type = LANDLOCK_HOOK_FS_WALK;
+
+	if (!current_has_prog_type(hook_type))
+		/* no fs_walk */
+		return 0;
+	if (WARN_ON(!inode))
+		return -EFAULT;
+
+	/* init common data: inode */
+	fs_walk.prog_ctx.inode = (uintptr_t)inode;
+	return landlock_decide(hook_type, &hook_ctx, 0);
+}
+
+/* fs_pick */
+
+struct landlock_hook_ctx_fs_pick {
+	__u64 triggers;
+	struct landlock_ctx_fs_pick prog_ctx;
+};
+
+const struct landlock_ctx_fs_pick *landlock_get_ctx_fs_pick(
+		const struct landlock_hook_ctx_fs_pick *hook_ctx)
+{
+	if (WARN_ON(!hook_ctx))
+		return NULL;
+
+	return &hook_ctx->prog_ctx;
+}
+
+static int decide_fs_pick(__u64 triggers, struct inode *inode)
+{
+	struct landlock_hook_ctx_fs_pick fs_pick = {};
+	struct landlock_hook_ctx hook_ctx = {
+		.fs_pick = &fs_pick,
+	};
+	const enum landlock_hook_type hook_type = LANDLOCK_HOOK_FS_PICK;
+
+	if (WARN_ON(!triggers))
+		return 0;
+	if (!current_has_prog_type(hook_type))
+		/* no fs_pick */
+		return 0;
+	if (WARN_ON(!inode))
+		return -EFAULT;
+
+	fs_pick.triggers = triggers,
+	/* init common data: inode */
+	fs_pick.prog_ctx.inode = (uintptr_t)inode;
+	return landlock_decide(hook_type, &hook_ctx, fs_pick.triggers);
+}
+
+/* helpers */
+
+static u64 fs_may_to_triggers(int may_mask, umode_t mode)
+{
+	u64 ret = 0;
+
+	if (may_mask & MAY_EXEC)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_EXECUTE;
+	if (may_mask & MAY_READ) {
+		if (S_ISDIR(mode))
+			ret |= LANDLOCK_TRIGGER_FS_PICK_READDIR;
+		else
+			ret |= LANDLOCK_TRIGGER_FS_PICK_READ;
+	}
+	if (may_mask & MAY_WRITE)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_WRITE;
+	if (may_mask & MAY_APPEND)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_APPEND;
+	if (may_mask & MAY_OPEN)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_OPEN;
+	if (may_mask & MAY_CHROOT)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_CHROOT;
+	else if (may_mask & MAY_CHDIR)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_CHDIR;
+	/* XXX: ignore MAY_ACCESS */
+	WARN_ON(!ret);
+	return ret;
+}
+
+static inline u64 mem_prot_to_triggers(unsigned long prot, bool private)
+{
+	u64 ret = LANDLOCK_TRIGGER_FS_PICK_MAP;
+
+	/* private mapping do not write to files */
+	if (!private && (prot & PROT_WRITE))
+		ret |= LANDLOCK_TRIGGER_FS_PICK_WRITE;
+	if (prot & PROT_READ)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_READ;
+	if (prot & PROT_EXEC)
+		ret |= LANDLOCK_TRIGGER_FS_PICK_EXECUTE;
+	WARN_ON(!ret);
+	return ret;
+}
+
+/* binder hooks */
+
+static int hook_binder_transfer_file(struct task_struct *from,
+		struct task_struct *to, struct file *file)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_TRANSFER,
+			file_inode(file));
+}
+
+/* sb hooks */
+
+static int hook_sb_statfs(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+/* TODO: handle mount source and remount */
+static int hook_sb_mount(const char *dev_name, const struct path *path,
+		const char *type, unsigned long flags, void *data)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!path))
+		return 0;
+	if (WARN_ON(!path->dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_MOUNTON,
+			path->dentry->d_inode);
+}
+
+/*
+ * The @old_path is similar to a destination mount point.
+ */
+static int hook_sb_pivotroot(const struct path *old_path,
+		const struct path *new_path)
+{
+	int err;
+
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!old_path))
+		return 0;
+	if (WARN_ON(!old_path->dentry))
+		return 0;
+	err = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_MOUNTON,
+			old_path->dentry->d_inode);
+	if (err)
+		return err;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CHROOT,
+			new_path->dentry->d_inode);
+}
+
+/* inode hooks */
+
+/* a directory inode contains only one dentry */
+static int hook_inode_create(struct inode *dir, struct dentry *dentry,
+		umode_t mode)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_link(struct dentry *old_dentry, struct inode *dir,
+		struct dentry *new_dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!old_dentry)) {
+		int ret = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LINK,
+				old_dentry->d_inode);
+		if (ret)
+			return ret;
+	}
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LINKTO, dir);
+}
+
+static int hook_inode_unlink(struct inode *dir, struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_UNLINK,
+			dentry->d_inode);
+}
+
+static int hook_inode_symlink(struct inode *dir, struct dentry *dentry,
+		const char *old_name)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_mkdir(struct inode *dir, struct dentry *dentry,
+		umode_t mode)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RMDIR, dentry->d_inode);
+}
+
+static int hook_inode_mknod(struct inode *dir, struct dentry *dentry,
+		umode_t mode, dev_t dev)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_CREATE, dir);
+}
+
+static int hook_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
+		struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (!WARN_ON(!old_dentry)) {
+		int ret = decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RENAME,
+				old_dentry->d_inode);
+		if (ret)
+			return ret;
+	}
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RENAMETO, new_dir);
+}
+
+static int hook_inode_readlink(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_READ, dentry->d_inode);
+}
+
+/*
+ * ignore the inode_follow_link hook (could set is_symlink in the fs_walk
+ * context)
+ */
+
+static int hook_inode_permission(struct inode *inode, int mask)
+{
+	u64 triggers;
+
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!inode))
+		return 0;
+
+	triggers = fs_may_to_triggers(mask, inode->i_mode);
+	/*
+	 * decide_fs_walk() is exclusive with decide_fs_pick(): in a path walk,
+	 * ignore execute-only access on directory for any fs_pick program
+	 */
+	if (triggers == LANDLOCK_TRIGGER_FS_PICK_EXECUTE &&
+			S_ISDIR(inode->i_mode))
+		return decide_fs_walk(mask, inode);
+
+	return decide_fs_pick(triggers, inode);
+}
+
+static int hook_inode_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getattr(const struct path *path)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!path))
+		return 0;
+	if (WARN_ON(!path->dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			path->dentry->d_inode);
+}
+
+static int hook_inode_setxattr(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getxattr(struct dentry *dentry, const char *name)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_listxattr(struct dentry *dentry)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_removexattr(struct dentry *dentry, const char *name)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!dentry))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR,
+			dentry->d_inode);
+}
+
+static int hook_inode_getsecurity(struct inode *inode, const char *name,
+		void **buffer, bool alloc)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR, inode);
+}
+
+static int hook_inode_setsecurity(struct inode *inode, const char *name,
+		const void *value, size_t size, int flag)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_SETATTR, inode);
+}
+
+static int hook_inode_listsecurity(struct inode *inode, char *buffer,
+		size_t buffer_size)
+{
+	if (!landlocked(current))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_GETATTR, inode);
+}
+
+/* file hooks */
+
+static int hook_file_ioctl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_IOCTL,
+			file_inode(file));
+}
+
+static int hook_file_lock(struct file *file, unsigned int cmd)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_LOCK, file_inode(file));
+}
+
+static int hook_file_fcntl(struct file *file, unsigned int cmd,
+		unsigned long arg)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_FCNTL,
+			file_inode(file));
+}
+
+static int hook_mmap_file(struct file *file, unsigned long reqprot,
+		unsigned long prot, unsigned long flags)
+{
+	if (!landlocked(current))
+		return 0;
+	/* file can be null for anonymous mmap */
+	if (!file)
+		return 0;
+	return decide_fs_pick(mem_prot_to_triggers(prot, flags & MAP_PRIVATE),
+			file_inode(file));
+}
+
+static int hook_file_mprotect(struct vm_area_struct *vma,
+		unsigned long reqprot, unsigned long prot)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!vma))
+		return 0;
+	if (!vma->vm_file)
+		return 0;
+	return decide_fs_pick(mem_prot_to_triggers(prot,
+				!(vma->vm_flags & VM_SHARED)),
+			file_inode(vma->vm_file));
+}
+
+static int hook_file_receive(struct file *file)
+{
+	if (!landlocked(current))
+		return 0;
+	if (WARN_ON(!file))
+		return 0;
+	return decide_fs_pick(LANDLOCK_TRIGGER_FS_PICK_RECEIVE,
+			file_inode(file));
+}
+
+static struct security_hook_list landlock_hooks[] = {
+	LSM_HOOK_INIT(binder_transfer_file, hook_binder_transfer_file),
+
+	LSM_HOOK_INIT(sb_statfs, hook_sb_statfs),
+	LSM_HOOK_INIT(sb_mount, hook_sb_mount),
+	LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
+
+	LSM_HOOK_INIT(inode_create, hook_inode_create),
+	LSM_HOOK_INIT(inode_link, hook_inode_link),
+	LSM_HOOK_INIT(inode_unlink, hook_inode_unlink),
+	LSM_HOOK_INIT(inode_symlink, hook_inode_symlink),
+	LSM_HOOK_INIT(inode_mkdir, hook_inode_mkdir),
+	LSM_HOOK_INIT(inode_rmdir, hook_inode_rmdir),
+	LSM_HOOK_INIT(inode_mknod, hook_inode_mknod),
+	LSM_HOOK_INIT(inode_rename, hook_inode_rename),
+	LSM_HOOK_INIT(inode_readlink, hook_inode_readlink),
+	LSM_HOOK_INIT(inode_permission, hook_inode_permission),
+	LSM_HOOK_INIT(inode_setattr, hook_inode_setattr),
+	LSM_HOOK_INIT(inode_getattr, hook_inode_getattr),
+	LSM_HOOK_INIT(inode_setxattr, hook_inode_setxattr),
+	LSM_HOOK_INIT(inode_getxattr, hook_inode_getxattr),
+	LSM_HOOK_INIT(inode_listxattr, hook_inode_listxattr),
+	LSM_HOOK_INIT(inode_removexattr, hook_inode_removexattr),
+	LSM_HOOK_INIT(inode_getsecurity, hook_inode_getsecurity),
+	LSM_HOOK_INIT(inode_setsecurity, hook_inode_setsecurity),
+	LSM_HOOK_INIT(inode_listsecurity, hook_inode_listsecurity),
+
+	/* do not handle file_permission for now */
+	LSM_HOOK_INIT(file_ioctl, hook_file_ioctl),
+	LSM_HOOK_INIT(file_lock, hook_file_lock),
+	LSM_HOOK_INIT(file_fcntl, hook_file_fcntl),
+	LSM_HOOK_INIT(mmap_file, hook_mmap_file),
+	LSM_HOOK_INIT(file_mprotect, hook_file_mprotect),
+	LSM_HOOK_INIT(file_receive, hook_file_receive),
+	/* file_open is not handled, use inode_permission instead */
+};
+
+__init void landlock_add_hooks_fs(void)
+{
+	security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+			LANDLOCK_NAME);
+}
diff --git a/security/landlock/hooks_fs.h b/security/landlock/hooks_fs.h
new file mode 100644
index 000000000000..eeae4dcd842f
--- /dev/null
+++ b/security/landlock/hooks_fs.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - filesystem hooks
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2019 ANSSI
+ */
+
+#include <linux/bpf.h> /* enum bpf_access_type */
+
+__init void landlock_add_hooks_fs(void);
+
+/* fs_pick */
+
+struct landlock_hook_ctx_fs_pick;
+
+bool landlock_is_valid_access_fs_pick(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size);
+
+const struct landlock_ctx_fs_pick *landlock_get_ctx_fs_pick(
+		const struct landlock_hook_ctx_fs_pick *hook_ctx);
+
+/* fs_walk */
+
+struct landlock_hook_ctx_fs_walk;
+
+bool landlock_is_valid_access_fs_walk(int off, enum bpf_access_type type,
+		enum bpf_reg_type *reg_type, int *max_size);
+
+const struct landlock_ctx_fs_walk *landlock_get_ctx_fs_walk(
+		const struct landlock_hook_ctx_fs_walk *hook_ctx);
diff --git a/security/landlock/init.c b/security/landlock/init.c
index 8dfd5fea3c1f..391e88bd4d3a 100644
--- a/security/landlock/init.c
+++ b/security/landlock/init.c
@@ -9,8 +9,10 @@
 #include <linux/bpf.h> /* enum bpf_access_type */
 #include <linux/capability.h> /* capable */
 #include <linux/filter.h> /* struct bpf_prog */
+#include <linux/lsm_hooks.h>
 
 #include "common.h" /* LANDLOCK_* */
+#include "hooks_fs.h"
 
 static bool bpf_landlock_is_valid_access(int off, int size,
 		enum bpf_access_type type, const struct bpf_prog *prog,
@@ -27,6 +29,20 @@ static bool bpf_landlock_is_valid_access(int off, int size,
 	if (size <= 0 || size > sizeof(__u64))
 		return false;
 
+	/* set register type and max size */
+	switch (get_hook_type(prog)) {
+	case LANDLOCK_HOOK_FS_PICK:
+		if (!landlock_is_valid_access_fs_pick(off, type, &reg_type,
+					&max_size))
+			return false;
+		break;
+	case LANDLOCK_HOOK_FS_WALK:
+		if (!landlock_is_valid_access_fs_walk(off, type, &reg_type,
+					&max_size))
+			return false;
+		break;
+	}
+
 	/* check memory range access */
 	switch (reg_type) {
 	case NOT_INIT:
@@ -98,3 +114,20 @@ const struct bpf_verifier_ops landlock_verifier_ops = {
 };
 
 const struct bpf_prog_ops landlock_prog_ops = {};
+
+static int __init landlock_init(void)
+{
+	pr_info(LANDLOCK_NAME ": Initializing (sandbox with seccomp)\n");
+	landlock_add_hooks_fs();
+	return 0;
+}
+
+struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
+};
+
+DEFINE_LSM(LANDLOCK_NAME) = {
+	.name = LANDLOCK_NAME,
+	.order = LSM_ORDER_LAST,
+	.blobs = &landlock_blob_sizes,
+	.init = landlock_init,
+};
diff --git a/security/security.c b/security/security.c
index 250ee2d76406..e694e5fe7021 100644
--- a/security/security.c
+++ b/security/security.c
@@ -263,6 +263,21 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 		}
 	}
 
+	/*
+	 * In case of an unprivileged access-control, we don't want to give the
+	 * ability to any process to do some checks (e.g. through an eBPF
+	 * program) on kernel objects (e.g. files) if a privileged security
+	 * policy forbid their access.  We must then load
+	 * potentially-unprivileged security modules after all other LSMs.
+	 *
+	 * LSM_ORDER_LAST is always last and does not appear in the modifiable
+	 * ordered list of enabled LSMs.
+	 */
+	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
+		if (lsm->order == LSM_ORDER_LAST)
+			append_ordered_lsm(lsm, "last");
+	}
+
 	/* Disable all LSMs not in the ordered list. */
 	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
 		if (exists_ordered_lsm(lsm))
-- 
2.22.0


^ permalink raw reply related

* [PATCH bpf-next v10 09/10] bpf,landlock: Add tests for Landlock
From: Mickaël Salaün @ 2019-07-21 21:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Alexander Viro, Alexei Starovoitov,
	Andrew Morton, Andy Lutomirski, Arnaldo Carvalho de Melo,
	Casey Schaufler, Daniel Borkmann, David Drysdale,
	David S . Miller, Eric W . Biederman, James Morris, Jann Horn,
	John Johansen, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Shuah Khan, Stephen Smalley, Tejun Heo,
	Tetsuo Handa, Thomas Graf, Tycho Andersen, Will Drewry,
	kernel-hardening, linux-api, linux-fsdevel, linux-security-module,
	netdev
In-Reply-To: <20190721213116.23476-1-mic@digikod.net>

Test basic context access, ptrace protection and filesystem hooks.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Will Drewry <wad@chromium.org>
---

Changes since v9:
* replace subtype with expected_attach_type and expected_attach_triggers
* rename inode_map_lookup() into inode_map_lookup_elem()
* check for inode map entry without value (which is now possible thanks
  to the pointer null check)
* use read-only inode map for Landlock programs

Changes since v8:
* update eBPF include path for macros
* use TEST_GEN_PROGS and use the generic "clean" target
* add more verbose errors
* update the bpf/verifier files
* remove chain tests (from landlock and bpf/verifier)
* replace the whitelist tests with blacklist tests (because of stateless
  Landlock programs): remove "dotdot" tests and other depth tests
* sync the landlock Makefile with its bpf sibling directory and use
  bpf_load_program_xattr()

Changes since v7:
* update tests and add new ones for filesystem hierarchy and Landlock
  chains.

Changes since v6:
* use the new kselftest_harness.h
* use const variables
* replace ASSERT_STEP with ASSERT_*
* rename BPF_PROG_TYPE_LANDLOCK to BPF_PROG_TYPE_LANDLOCK_RULE
* force sample library rebuild
* fix install target

Changes since v5:
* add subtype test
* add ptrace tests
* split and rename files
* cleanup and rebase
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/bpf/test_verifier.c   |   1 +
 .../testing/selftests/bpf/verifier/landlock.c |  24 ++
 tools/testing/selftests/landlock/.gitignore   |   4 +
 tools/testing/selftests/landlock/Makefile     |  39 +++
 tools/testing/selftests/landlock/test.h       |  50 ++++
 tools/testing/selftests/landlock/test_base.c  |  24 ++
 tools/testing/selftests/landlock/test_fs.c    | 256 ++++++++++++++++++
 .../testing/selftests/landlock/test_ptrace.c  | 148 ++++++++++
 9 files changed, 547 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/landlock.c
 create mode 100644 tools/testing/selftests/landlock/.gitignore
 create mode 100644 tools/testing/selftests/landlock/Makefile
 create mode 100644 tools/testing/selftests/landlock/test.h
 create mode 100644 tools/testing/selftests/landlock/test_base.c
 create mode 100644 tools/testing/selftests/landlock/test_fs.c
 create mode 100644 tools/testing/selftests/landlock/test_ptrace.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 25b43a8c2b15..1949fbb3098e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -21,6 +21,7 @@ TARGETS += ir
 TARGETS += kcmp
 TARGETS += kexec
 TARGETS += kvm
+TARGETS += landlock
 TARGETS += lib
 TARGETS += livepatch
 TARGETS += membarrier
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index b0773291012a..b8542431c78b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -30,6 +30,7 @@
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/btf.h>
+#include <linux/landlock.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
diff --git a/tools/testing/selftests/bpf/verifier/landlock.c b/tools/testing/selftests/bpf/verifier/landlock.c
new file mode 100644
index 000000000000..eaf6dddbf208
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/landlock.c
@@ -0,0 +1,24 @@
+{
+	"landlock/fs_walk: always accept",
+	.insns = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK,
+	.expected_attach_type = BPF_LANDLOCK_FS_WALK,
+},
+{
+	"landlock/fs_pick: read context",
+	.insns = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6,
+			offsetof(struct landlock_ctx_fs_pick, inode)),
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK,
+	.expected_attach_type = BPF_LANDLOCK_FS_PICK,
+	.expected_attach_triggers = LANDLOCK_TRIGGER_FS_PICK_READ,
+},
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
new file mode 100644
index 000000000000..25b9cd834c3c
--- /dev/null
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -0,0 +1,4 @@
+/test_base
+/test_fs
+/test_ptrace
+/tmp_*
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
new file mode 100644
index 000000000000..7a253bf6d580
--- /dev/null
+++ b/tools/testing/selftests/landlock/Makefile
@@ -0,0 +1,39 @@
+LIBDIR := ../../../lib
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
+
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+BPFOBJS := $(BPFDIR)/bpf.o $(BPFDIR)/nlattr.o
+LOADOBJ := ../../../../samples/bpf/bpf_load.o
+
+CFLAGS += -Wl,-no-as-needed -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDFLAGS += -lelf
+
+test_src = $(wildcard test_*.c)
+
+test_objs := $(test_src:.c=)
+
+TEST_GEN_PROGS := $(test_objs)
+
+.PHONY: all clean force
+
+all: $(test_objs)
+
+# force a rebuild of BPFOBJS when its dependencies are updated
+force:
+
+# rebuild bpf.o as a workaround for the samples/bpf bug
+$(BPFOBJS): $(LOADOBJ) force
+	$(MAKE) -C $(BPFDIR)
+
+$(LOADOBJ): force
+	$(MAKE) -C $(dir $(LOADOBJ))
+
+$(test_objs): $(BPFOBJS) $(LOADOBJ) ../kselftest_harness.h
+
+include ../lib.mk
diff --git a/tools/testing/selftests/landlock/test.h b/tools/testing/selftests/landlock/test.h
new file mode 100644
index 000000000000..e1e86a804180
--- /dev/null
+++ b/tools/testing/selftests/landlock/test.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock helpers
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/landlock.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+#include "../../../../samples/bpf/bpf_load.h"
+
+#ifndef SECCOMP_PREPEND_LANDLOCK_PROG
+#define SECCOMP_PREPEND_LANDLOCK_PROG	4
+#endif
+
+#ifndef seccomp
+static int __attribute__((unused)) seccomp(unsigned int op, unsigned int flags,
+		void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+/* bpf_load_program() with subtype */
+static int __attribute__((unused)) ll_bpf_load_program(
+		const struct bpf_insn *insns, size_t insns_cnt, char *log_buf,
+		size_t log_buf_sz, const enum bpf_attach_type attach_type,
+		__u64 attach_triggers)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = BPF_PROG_TYPE_LANDLOCK_HOOK;
+	load_attr.expected_attach_type = attach_type;
+	load_attr.expected_attach_triggers = attach_triggers;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = "GPL";
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
diff --git a/tools/testing/selftests/landlock/test_base.c b/tools/testing/selftests/landlock/test_base.c
new file mode 100644
index 000000000000..db46f39048cb
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_base.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - base
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+
+#include "test.h"
+
+TEST(seccomp_landlock)
+{
+	int ret;
+
+	ret = seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Kernel does not support CONFIG_SECURITY_LANDLOCK");
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/test_fs.c b/tools/testing/selftests/landlock/test_fs.c
new file mode 100644
index 000000000000..f35b99fcb70f
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_fs.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - file system
+ *
+ * Copyright © 2018-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <bpf/bpf.h> /* bpf_create_map() */
+#include <fcntl.h> /* O_DIRECTORY */
+#include <sys/stat.h> /* statbuf */
+#include <unistd.h> /* faccessat() */
+
+#include "test.h"
+
+#define TEST_PATH_TRIGGERS ( \
+		LANDLOCK_TRIGGER_FS_PICK_OPEN | \
+		LANDLOCK_TRIGGER_FS_PICK_READDIR | \
+		LANDLOCK_TRIGGER_FS_PICK_EXECUTE | \
+		LANDLOCK_TRIGGER_FS_PICK_GETATTR)
+
+static void test_path_rel(struct __test_metadata *_metadata, int dirfd,
+		const char *path, int ret)
+{
+	int fd;
+	struct stat statbuf;
+
+	ASSERT_EQ(ret, faccessat(dirfd, path, R_OK | X_OK, 0));
+	ASSERT_EQ(ret, fstatat(dirfd, path, &statbuf, 0));
+	fd = openat(dirfd, path, O_DIRECTORY);
+	if (ret) {
+		ASSERT_EQ(-1, fd);
+	} else {
+		ASSERT_NE(-1, fd);
+		EXPECT_EQ(0, close(fd));
+	}
+}
+
+static void test_path(struct __test_metadata *_metadata, const char *path,
+		int ret)
+{
+	return test_path_rel(_metadata, AT_FDCWD, path, ret);
+}
+
+static const char d1[] = "/usr";
+static const char d2[] = "/usr/share";
+static const char d3[] = "/usr/share/doc";
+
+TEST(fs_base)
+{
+	test_path(_metadata, d1, 0);
+	test_path(_metadata, d2, 0);
+	test_path(_metadata, d3, 0);
+}
+
+#define MAP_VALUE_DENY 1
+
+static int create_denied_inode_map(struct __test_metadata *_metadata,
+		const char *const dirs[])
+{
+	int map, key, dirs_len, i;
+	__u64 value = MAP_VALUE_DENY;
+
+	ASSERT_NE(NULL, dirs) {
+		TH_LOG("No directory list\n");
+	}
+	ASSERT_NE(NULL, dirs[0]) {
+		TH_LOG("Empty directory list\n");
+	}
+
+	/* get the number of dir entries */
+	for (dirs_len = 0; dirs[dirs_len]; dirs_len++);
+	map = bpf_create_map(BPF_MAP_TYPE_INODE, sizeof(key), sizeof(value),
+			dirs_len, BPF_F_RDONLY_PROG);
+	ASSERT_NE(-1, map) {
+		TH_LOG("Failed to create a map of %d elements: %s\n", dirs_len,
+				strerror(errno));
+	}
+
+	for (i = 0; dirs[i]; i++) {
+		key = open(dirs[i], O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+		ASSERT_NE(-1, key) {
+			TH_LOG("Failed to open directory \"%s\": %s\n", dirs[i],
+					strerror(errno));
+		}
+		ASSERT_EQ(0, bpf_map_update_elem(map, &key, &value, BPF_ANY)) {
+			TH_LOG("Failed to update the map with \"%s\": %s\n",
+					dirs[i], strerror(errno));
+		}
+		close(key);
+	}
+	return map;
+}
+
+static void enforce_map(struct __test_metadata *_metadata, int map,
+		bool subpath)
+{
+	const struct bpf_insn prog_deny[] = {
+		BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+		/* look for the requested inode in the map */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6,
+			offsetof(struct landlock_ctx_fs_walk, inode)),
+		BPF_LD_MAP_FD(BPF_REG_1, map), /* 2 instructions */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				BPF_FUNC_inode_map_lookup_elem),
+		/* if there is no mark, then allow access to this inode */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+		/* otherwise, deny access to this inode */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, MAP_VALUE_DENY, 2),
+		BPF_MOV32_IMM(BPF_REG_0, LANDLOCK_RET_ALLOW),
+		BPF_EXIT_INSN(),
+		BPF_MOV32_IMM(BPF_REG_0, LANDLOCK_RET_DENY),
+		BPF_EXIT_INSN(),
+	};
+	int fd_walk = -1, fd_pick;
+	char log[1024] = "";
+
+	if (subpath) {
+		fd_walk = ll_bpf_load_program((const struct bpf_insn *)&prog_deny,
+				sizeof(prog_deny) / sizeof(struct bpf_insn),
+				log, sizeof(log), BPF_LANDLOCK_FS_WALK, 0);
+		ASSERT_NE(-1, fd_walk) {
+			TH_LOG("Failed to load fs_walk program: %s\n%s",
+					strerror(errno), log);
+		}
+		ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &fd_walk)) {
+			TH_LOG("Failed to apply Landlock program: %s", strerror(errno));
+		}
+		EXPECT_EQ(0, close(fd_walk));
+	}
+
+	fd_pick = ll_bpf_load_program((const struct bpf_insn *)&prog_deny,
+			sizeof(prog_deny) / sizeof(struct bpf_insn), log,
+			sizeof(log), BPF_LANDLOCK_FS_PICK, TEST_PATH_TRIGGERS);
+	ASSERT_NE(-1, fd_pick) {
+		TH_LOG("Failed to load fs_pick program: %s\n%s",
+				strerror(errno), log);
+	}
+	ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &fd_pick)) {
+		TH_LOG("Failed to apply Landlock program: %s", strerror(errno));
+	}
+	EXPECT_EQ(0, close(fd_pick));
+}
+
+static void check_map_blacklist(struct __test_metadata *_metadata,
+		bool subpath)
+{
+	int map = create_denied_inode_map(_metadata, (const char *const [])
+			{ d2, NULL });
+	ASSERT_NE(-1, map);
+	enforce_map(_metadata, map, subpath);
+	test_path(_metadata, d1, 0);
+	test_path(_metadata, d2, -1);
+	test_path(_metadata, d3, subpath ? -1 : 0);
+	EXPECT_EQ(0, close(map));
+}
+
+TEST(fs_map_blacklist_literal)
+{
+	check_map_blacklist(_metadata, false);
+}
+
+TEST(fs_map_blacklist_subpath)
+{
+	check_map_blacklist(_metadata, true);
+}
+
+static const char r2[] = ".";
+static const char r3[] = "./doc";
+
+enum relative_access {
+	REL_OPEN,
+	REL_CHDIR,
+	REL_CHROOT,
+};
+
+static void check_access(struct __test_metadata *_metadata,
+		bool enforce, enum relative_access rel)
+{
+	int dirfd;
+	int map = -1;
+
+	if (rel == REL_CHROOT)
+		ASSERT_NE(-1, chdir(d2));
+	if (enforce) {
+		map = create_denied_inode_map(_metadata, (const char *const [])
+				{ d3, NULL });
+		ASSERT_NE(-1, map);
+		enforce_map(_metadata, map, true);
+	}
+	switch (rel) {
+	case REL_OPEN:
+		dirfd = open(d2, O_DIRECTORY);
+		ASSERT_NE(-1, dirfd);
+		break;
+	case REL_CHDIR:
+		ASSERT_NE(-1, chdir(d2));
+		dirfd = AT_FDCWD;
+		break;
+	case REL_CHROOT:
+		ASSERT_NE(-1, chroot(d2)) {
+			TH_LOG("Failed to chroot: %s\n", strerror(errno));
+		}
+		dirfd = AT_FDCWD;
+		break;
+	default:
+		ASSERT_TRUE(false);
+		return;
+	}
+
+	test_path_rel(_metadata, dirfd, r2, 0);
+	test_path_rel(_metadata, dirfd, r3, enforce ? -1 : 0);
+
+	if (rel == REL_OPEN)
+		EXPECT_EQ(0, close(dirfd));
+	if (enforce)
+		EXPECT_EQ(0, close(map));
+}
+
+TEST(fs_allow_open)
+{
+	/* no enforcement, via open */
+	check_access(_metadata, false, REL_OPEN);
+}
+
+TEST(fs_allow_chdir)
+{
+	/* no enforcement, via chdir */
+	check_access(_metadata, false, REL_CHDIR);
+}
+
+TEST(fs_allow_chroot)
+{
+	/* no enforcement, via chroot */
+	check_access(_metadata, false, REL_CHROOT);
+}
+
+TEST(fs_deny_open)
+{
+	/* enforcement without tag, via open */
+	check_access(_metadata, true, REL_OPEN);
+}
+
+TEST(fs_deny_chdir)
+{
+	/* enforcement without tag, via chdir */
+	check_access(_metadata, true, REL_CHDIR);
+}
+
+TEST(fs_deny_chroot)
+{
+	/* enforcement without tag, via chroot */
+	check_access(_metadata, true, REL_CHROOT);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/test_ptrace.c b/tools/testing/selftests/landlock/test_ptrace.c
new file mode 100644
index 000000000000..b190a809ceec
--- /dev/null
+++ b/tools/testing/selftests/landlock/test_ptrace.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - ptrace
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ */
+
+#define _GNU_SOURCE
+#include <signal.h> /* raise */
+#include <sys/ptrace.h>
+#include <sys/types.h> /* waitpid */
+#include <sys/wait.h> /* waitpid */
+#include <unistd.h> /* fork, pipe */
+
+#include "test.h"
+
+static void apply_null_sandbox(struct __test_metadata *_metadata)
+{
+	const struct bpf_insn prog_accept[] = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog;
+	char log[256] = "";
+
+	prog = ll_bpf_load_program((const struct bpf_insn *)&prog_accept,
+			sizeof(prog_accept) / sizeof(struct bpf_insn), log,
+			sizeof(log), BPF_LANDLOCK_FS_PICK, LANDLOCK_TRIGGER_FS_PICK_OPEN);
+	ASSERT_NE(-1, prog) {
+		TH_LOG("Failed to load minimal rule: %s\n%s",
+				strerror(errno), log);
+	}
+	ASSERT_EQ(0, seccomp(SECCOMP_PREPEND_LANDLOCK_PROG, 0, &prog)) {
+		TH_LOG("Failed to apply minimal rule: %s", strerror(errno));
+	}
+	EXPECT_EQ(0, close(prog));
+}
+
+/* PTRACE_TRACEME and PTRACE_ATTACH without Landlock rules effect */
+static void check_ptrace(struct __test_metadata *_metadata,
+		int sandbox_both, int sandbox_parent, int sandbox_child,
+		int expect_ptrace)
+{
+	pid_t child;
+	int status;
+	int pipefd[2];
+
+	ASSERT_EQ(0, pipe(pipefd));
+	if (sandbox_both)
+		apply_null_sandbox(_metadata);
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		char buf;
+
+		EXPECT_EQ(0, close(pipefd[1]));
+		if (sandbox_child)
+			apply_null_sandbox(_metadata);
+
+		/* test traceme */
+		ASSERT_EQ(expect_ptrace, ptrace(PTRACE_TRACEME));
+		if (expect_ptrace) {
+			ASSERT_EQ(EPERM, errno);
+		} else {
+			ASSERT_EQ(0, raise(SIGSTOP));
+		}
+
+		/* sync */
+		ASSERT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed to read() sync from parent");
+		}
+		ASSERT_EQ('.', buf);
+		_exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+	}
+
+	EXPECT_EQ(0, close(pipefd[0]));
+	if (sandbox_parent)
+		apply_null_sandbox(_metadata);
+
+	/* test traceme */
+	if (!expect_ptrace) {
+		ASSERT_EQ(child, waitpid(child, &status, 0));
+		ASSERT_EQ(1, WIFSTOPPED(status));
+		ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+	}
+	/* test attach */
+	ASSERT_EQ(expect_ptrace, ptrace(PTRACE_ATTACH, child, NULL, 0));
+	if (expect_ptrace) {
+		ASSERT_EQ(EPERM, errno);
+	} else {
+		ASSERT_EQ(child, waitpid(child, &status, 0));
+		ASSERT_EQ(1, WIFSTOPPED(status));
+		ASSERT_EQ(0, ptrace(PTRACE_CONT, child, NULL, 0));
+	}
+
+	/* sync */
+	ASSERT_EQ(1, write(pipefd[1], ".", 1)) {
+		TH_LOG("Failed to write() sync to child");
+	}
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	if (WIFSIGNALED(status) || WEXITSTATUS(status))
+		_metadata->passed = 0;
+}
+
+TEST(ptrace_allow_without_sandbox)
+{
+	/* no sandbox */
+	check_ptrace(_metadata, 0, 0, 0, 0);
+}
+
+TEST(ptrace_allow_with_one_sandbox)
+{
+	/* child sandbox */
+	check_ptrace(_metadata, 0, 0, 1, 0);
+}
+
+TEST(ptrace_allow_with_nested_sandbox)
+{
+	/* inherited and child sandbox */
+	check_ptrace(_metadata, 1, 0, 1, 0);
+}
+
+TEST(ptrace_deny_with_parent_sandbox)
+{
+	/* parent sandbox */
+	check_ptrace(_metadata, 0, 1, 0, -1);
+}
+
+TEST(ptrace_deny_with_nested_and_parent_sandbox)
+{
+	/* inherited and parent sandbox */
+	check_ptrace(_metadata, 1, 1, 0, -1);
+}
+
+TEST(ptrace_deny_with_forked_sandbox)
+{
+	/* inherited, parent and child sandbox */
+	check_ptrace(_metadata, 1, 1, 1, -1);
+}
+
+TEST(ptrace_deny_with_sibling_sandbox)
+{
+	/* parent and child sandbox */
+	check_ptrace(_metadata, 0, 1, 1, -1);
+}
+
+TEST_HARNESS_MAIN
-- 
2.22.0


^ permalink raw reply related

* [PATCH bpf-next v10 01/10] fs,security: Add a new file access type: MAY_CHROOT
From: Mickaël Salaün @ 2019-07-21 21:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Mickaël Salaün, Alexander Viro, Alexei Starovoitov,
	Andrew Morton, Andy Lutomirski, Arnaldo Carvalho de Melo,
	Casey Schaufler, Daniel Borkmann, David Drysdale,
	David S . Miller, Eric W . Biederman, James Morris, Jann Horn,
	John Johansen, Jonathan Corbet, Kees Cook, Michael Kerrisk,
	Mickaël Salaün, Paul Moore, Sargun Dhillon,
	Serge E . Hallyn, Shuah Khan, Stephen Smalley, Tejun Heo,
	Tetsuo Handa, Thomas Graf, Tycho Andersen, Will Drewry,
	kernel-hardening, linux-api, linux-fsdevel, linux-security-module,
	netdev
In-Reply-To: <20190721213116.23476-1-mic@digikod.net>

For compatibility reason, MAY_CHROOT is always set with MAY_CHDIR.
However, this new flag enable to differentiate a chdir form a chroot.

This is needed for the Landlock LSM to be able to evaluate a new root
directory.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: James Morris <jmorris@namei.org>
Cc: John Johansen <john.johansen@canonical.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: linux-fsdevel@vger.kernel.org
---
 fs/open.c          | 3 ++-
 include/linux/fs.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..e8767318fd03 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -494,7 +494,8 @@ int ksys_chroot(const char __user *filename)
 	if (error)
 		goto out;
 
-	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+	error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR |
+			MAY_CHROOT);
 	if (error)
 		goto dput_and_out;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75f2ed289a3f..7a0d92b1da85 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -99,6 +99,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define MAY_CHDIR		0x00000040
 /* called from RCU mode, don't block */
 #define MAY_NOT_BLOCK		0x00000080
+#define MAY_CHROOT		0x00000100
 
 /*
  * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
-- 
2.22.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox