All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [f2fs-dev] [PATCH 3/6] f2fs: do not expose unwritten blocks to user by DIO
From: Chao Yu @ 2022-01-05 13:19 UTC (permalink / raw)
  To: Jaegeuk Kim, linux-kernel, linux-f2fs-devel
In-Reply-To: <20220104212419.1879225-3-jaegeuk@kernel.org>

On 2022/1/5 5:24, Jaegeuk Kim wrote:
> DIO preallocates physical blocks before writing data, but if an error occurrs
> or power-cut happens, we can see block contents from the disk. This patch tries
> to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> unwritten blocks from error or power-cut.
> 
> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>

Reviewed-by: Chao Yu <chao@kernel.org>

Thanks,

^ permalink raw reply

* Re: [meta-freescale] How to configure eth0 on fslc-image-network-full-cmdline
From: Fabio Estevam @ 2022-01-05 13:18 UTC (permalink / raw)
  To: Leo; +Cc: meta-freescale
In-Reply-To: <q58T.1637188857419907534.bliW@lists.yoctoproject.org>

Hi Leo,

On Wed, Nov 17, 2021 at 7:40 PM Leo <Leo.E.Meza@gmail.com> wrote:
>
> Hello,
>
> I'm using a wandboard (rev d1) and building the fslc-image-network-full-cmdline image (honister branch). I've got ethernet wired to a switch on my home network, but I cannot figure out how to setup this build to bring up eth0 via DHCP. Any help would be appreciated. Is this image supposed to bring up eth0 right out of the box?

There is a recent fix in 5.15.13 for Ethernet on wandboard
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/arch/arm/boot/dts/imx6qdl-wandboard.dtsi?h=v5.15.13&id=ef73e3b650b7b1ad4080b8e7aeba5e36e19f0932

After applying this fix, I can confirm Ethernet is functional.

To get an IP address via DHCP:
udhcpc -i eth0


^ permalink raw reply

* [PATCH v2 net-next 3/3] net: dsa: remove cross-chip support for HSR
From: Vladimir Oltean @ 2022-01-05 13:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Jakub Kicinski, Andrew Lunn, Vivien Didelot,
	Florian Fainelli, George McCollister
In-Reply-To: <20220105131813.2647558-1-vladimir.oltean@nxp.com>

The cross-chip notifiers for HSR are bypass operations, meaning that
even though all switches in a tree are notified, only the switch
specified in the info structure is targeted.

We can eliminate the unnecessary complexity by deleting the cross-chip
notifier logic and calling the ds->ops straight from port.c.

Cc: George McCollister <george.mccollister@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: George McCollister <george.mccollister@gmail.com>
---
v1->v2:
- delete leftover definition of struct dsa_notifier_hsr_info
- guard against absence of ds->ops->port_hsr_join and
  ds->ops->port_hsr_leave

 net/dsa/dsa_priv.h |  9 ---------
 net/dsa/port.c     | 29 +++++++++++++----------------
 net/dsa/switch.c   | 24 ------------------------
 3 files changed, 13 insertions(+), 49 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c593d56c94b3..760306f0012f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -25,8 +25,6 @@ enum {
 	DSA_NOTIFIER_FDB_DEL,
 	DSA_NOTIFIER_HOST_FDB_ADD,
 	DSA_NOTIFIER_HOST_FDB_DEL,
-	DSA_NOTIFIER_HSR_JOIN,
-	DSA_NOTIFIER_HSR_LEAVE,
 	DSA_NOTIFIER_LAG_CHANGE,
 	DSA_NOTIFIER_LAG_JOIN,
 	DSA_NOTIFIER_LAG_LEAVE,
@@ -125,13 +123,6 @@ struct dsa_switchdev_event_work {
 	bool host_addr;
 };
 
-/* DSA_NOTIFIER_HSR_* */
-struct dsa_notifier_hsr_info {
-	struct net_device *hsr;
-	int sw_index;
-	int port;
-};
-
 struct dsa_slave_priv {
 	/* Copy of CPU port xmit for faster access in slave transmit hot path */
 	struct sk_buff *	(*xmit)(struct sk_buff *skb,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 05be4577b044..bd78192e0e47 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1317,16 +1317,15 @@ EXPORT_SYMBOL_GPL(dsa_port_get_phy_sset_count);
 
 int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
 {
-	struct dsa_notifier_hsr_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.hsr = hsr,
-	};
+	struct dsa_switch *ds = dp->ds;
 	int err;
 
+	if (!ds->ops->port_hsr_join)
+		return -EOPNOTSUPP;
+
 	dp->hsr_dev = hsr;
 
-	err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_JOIN, &info);
+	err = ds->ops->port_hsr_join(ds, dp->index, hsr);
 	if (err)
 		dp->hsr_dev = NULL;
 
@@ -1335,20 +1334,18 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr)
 
 void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
 {
-	struct dsa_notifier_hsr_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.hsr = hsr,
-	};
+	struct dsa_switch *ds = dp->ds;
 	int err;
 
 	dp->hsr_dev = NULL;
 
-	err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
-	if (err)
-		dev_err(dp->ds->dev,
-			"port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n",
-			dp->index, ERR_PTR(err));
+	if (ds->ops->port_hsr_leave) {
+		err = ds->ops->port_hsr_leave(ds, dp->index, hsr);
+		if (err)
+			dev_err(dp->ds->dev,
+				"port %d failed to leave HSR %s: %pe\n",
+				dp->index, hsr->name, ERR_PTR(err));
+	}
 }
 
 int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index a164ec02b4e9..e3c7d2627a61 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -437,24 +437,6 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	return dsa_port_do_fdb_del(dp, info->addr, info->vid);
 }
 
-static int dsa_switch_hsr_join(struct dsa_switch *ds,
-			       struct dsa_notifier_hsr_info *info)
-{
-	if (ds->index == info->sw_index && ds->ops->port_hsr_join)
-		return ds->ops->port_hsr_join(ds, info->port, info->hsr);
-
-	return -EOPNOTSUPP;
-}
-
-static int dsa_switch_hsr_leave(struct dsa_switch *ds,
-				struct dsa_notifier_hsr_info *info)
-{
-	if (ds->index == info->sw_index && ds->ops->port_hsr_leave)
-		return ds->ops->port_hsr_leave(ds, info->port, info->hsr);
-
-	return -EOPNOTSUPP;
-}
-
 static int dsa_switch_lag_change(struct dsa_switch *ds,
 				 struct dsa_notifier_lag_info *info)
 {
@@ -729,12 +711,6 @@ static int dsa_switch_event(struct notifier_block *nb,
 	case DSA_NOTIFIER_HOST_FDB_DEL:
 		err = dsa_switch_host_fdb_del(ds, info);
 		break;
-	case DSA_NOTIFIER_HSR_JOIN:
-		err = dsa_switch_hsr_join(ds, info);
-		break;
-	case DSA_NOTIFIER_HSR_LEAVE:
-		err = dsa_switch_hsr_leave(ds, info);
-		break;
 	case DSA_NOTIFIER_LAG_CHANGE:
 		err = dsa_switch_lag_change(ds, info);
 		break;
-- 
2.25.1


^ permalink raw reply related

* [PATCH v2 net-next 2/3] net: dsa: remove cross-chip support for MRP
From: Vladimir Oltean @ 2022-01-05 13:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Jakub Kicinski, Andrew Lunn, Vivien Didelot,
	Florian Fainelli, Horatiu Vultur
In-Reply-To: <20220105131813.2647558-1-vladimir.oltean@nxp.com>

The cross-chip notifiers for MRP are bypass operations, meaning that
even though all switches in a tree are notified, only the switch
specified in the info structure is targeted.

We can eliminate the unnecessary complexity by deleting the cross-chip
notifier logic and calling the ds->ops straight from port.c.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
v1->v2: delete leftover definitions of struct dsa_notifier_mrp_info and
        struct dsa_notifier_mrp_ring_role_info.

 net/dsa/dsa_priv.h | 18 -------------
 net/dsa/port.c     | 44 +++++++++++++++----------------
 net/dsa/switch.c   | 64 ----------------------------------------------
 3 files changed, 20 insertions(+), 106 deletions(-)

diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index b5ae21f172a8..c593d56c94b3 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -40,10 +40,6 @@ enum {
 	DSA_NOTIFIER_TAG_PROTO,
 	DSA_NOTIFIER_TAG_PROTO_CONNECT,
 	DSA_NOTIFIER_TAG_PROTO_DISCONNECT,
-	DSA_NOTIFIER_MRP_ADD,
-	DSA_NOTIFIER_MRP_DEL,
-	DSA_NOTIFIER_MRP_ADD_RING_ROLE,
-	DSA_NOTIFIER_MRP_DEL_RING_ROLE,
 	DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
 	DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
 };
@@ -107,20 +103,6 @@ struct dsa_notifier_tag_proto_info {
 	const struct dsa_device_ops *tag_ops;
 };
 
-/* DSA_NOTIFIER_MRP_* */
-struct dsa_notifier_mrp_info {
-	const struct switchdev_obj_mrp *mrp;
-	int sw_index;
-	int port;
-};
-
-/* DSA_NOTIFIER_MRP_* */
-struct dsa_notifier_mrp_ring_role_info {
-	const struct switchdev_obj_ring_role_mrp *mrp;
-	int sw_index;
-	int port;
-};
-
 /* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
 struct dsa_notifier_tag_8021q_vlan_info {
 	int tree_index;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 05677e016982..05be4577b044 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -907,49 +907,45 @@ int dsa_port_vlan_del(struct dsa_port *dp,
 int dsa_port_mrp_add(const struct dsa_port *dp,
 		     const struct switchdev_obj_mrp *mrp)
 {
-	struct dsa_notifier_mrp_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.mrp = mrp,
-	};
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->port_mrp_add)
+		return -EOPNOTSUPP;
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD, &info);
+	return ds->ops->port_mrp_add(ds, dp->index, mrp);
 }
 
 int dsa_port_mrp_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_mrp *mrp)
 {
-	struct dsa_notifier_mrp_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.mrp = mrp,
-	};
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->port_mrp_del)
+		return -EOPNOTSUPP;
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL, &info);
+	return ds->ops->port_mrp_del(ds, dp->index, mrp);
 }
 
 int dsa_port_mrp_add_ring_role(const struct dsa_port *dp,
 			       const struct switchdev_obj_ring_role_mrp *mrp)
 {
-	struct dsa_notifier_mrp_ring_role_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.mrp = mrp,
-	};
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->port_mrp_add_ring_role)
+		return -EOPNOTSUPP;
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_MRP_ADD_RING_ROLE, &info);
+	return ds->ops->port_mrp_add_ring_role(ds, dp->index, mrp);
 }
 
 int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
 			       const struct switchdev_obj_ring_role_mrp *mrp)
 {
-	struct dsa_notifier_mrp_ring_role_info info = {
-		.sw_index = dp->ds->index,
-		.port = dp->index,
-		.mrp = mrp,
-	};
+	struct dsa_switch *ds = dp->ds;
+
+	if (!ds->ops->port_mrp_del_ring_role)
+		return -EOPNOTSUPP;
 
-	return dsa_port_notify(dp, DSA_NOTIFIER_MRP_DEL_RING_ROLE, &info);
+	return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp);
 }
 
 void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 260d8e7d6e5a..a164ec02b4e9 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -701,58 +701,6 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds,
 	return 0;
 }
 
-static int dsa_switch_mrp_add(struct dsa_switch *ds,
-			      struct dsa_notifier_mrp_info *info)
-{
-	if (!ds->ops->port_mrp_add)
-		return -EOPNOTSUPP;
-
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mrp_add(ds, info->port, info->mrp);
-
-	return 0;
-}
-
-static int dsa_switch_mrp_del(struct dsa_switch *ds,
-			      struct dsa_notifier_mrp_info *info)
-{
-	if (!ds->ops->port_mrp_del)
-		return -EOPNOTSUPP;
-
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mrp_del(ds, info->port, info->mrp);
-
-	return 0;
-}
-
-static int
-dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
-			     struct dsa_notifier_mrp_ring_role_info *info)
-{
-	if (!ds->ops->port_mrp_add_ring_role)
-		return -EOPNOTSUPP;
-
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mrp_add_ring_role(ds, info->port,
-						       info->mrp);
-
-	return 0;
-}
-
-static int
-dsa_switch_mrp_del_ring_role(struct dsa_switch *ds,
-			     struct dsa_notifier_mrp_ring_role_info *info)
-{
-	if (!ds->ops->port_mrp_del_ring_role)
-		return -EOPNOTSUPP;
-
-	if (ds->index == info->sw_index)
-		return ds->ops->port_mrp_del_ring_role(ds, info->port,
-						       info->mrp);
-
-	return 0;
-}
-
 static int dsa_switch_event(struct notifier_block *nb,
 			    unsigned long event, void *info)
 {
@@ -826,18 +774,6 @@ static int dsa_switch_event(struct notifier_block *nb,
 	case DSA_NOTIFIER_TAG_PROTO_DISCONNECT:
 		err = dsa_switch_disconnect_tag_proto(ds, info);
 		break;
-	case DSA_NOTIFIER_MRP_ADD:
-		err = dsa_switch_mrp_add(ds, info);
-		break;
-	case DSA_NOTIFIER_MRP_DEL:
-		err = dsa_switch_mrp_del(ds, info);
-		break;
-	case DSA_NOTIFIER_MRP_ADD_RING_ROLE:
-		err = dsa_switch_mrp_add_ring_role(ds, info);
-		break;
-	case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
-		err = dsa_switch_mrp_del_ring_role(ds, info);
-		break;
 	case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD:
 		err = dsa_switch_tag_8021q_vlan_add(ds, info);
 		break;
-- 
2.25.1


^ permalink raw reply related

* [PATCH v2 net-next 1/3] net: dsa: fix incorrect function pointer check for MRP ring roles
From: Vladimir Oltean @ 2022-01-05 13:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Jakub Kicinski, Andrew Lunn, Vivien Didelot,
	Florian Fainelli, Horatiu Vultur
In-Reply-To: <20220105131813.2647558-1-vladimir.oltean@nxp.com>

The cross-chip notifier boilerplate code meant to check the presence of
ds->ops->port_mrp_add_ring_role before calling it, but checked
ds->ops->port_mrp_add instead, before calling
ds->ops->port_mrp_add_ring_role.

Therefore, a driver which implements one operation but not the other
would trigger a NULL pointer dereference.

There isn't any such driver in DSA yet, so there is no reason to
backport the change. Issue found through code inspection.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Fixes: c595c4330da0 ("net: dsa: add MRP support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
v1->v2: patch is new

 net/dsa/switch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 393f2d8a860a..260d8e7d6e5a 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -729,7 +729,7 @@ static int
 dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
 			     struct dsa_notifier_mrp_ring_role_info *info)
 {
-	if (!ds->ops->port_mrp_add)
+	if (!ds->ops->port_mrp_add_ring_role)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
@@ -743,7 +743,7 @@ static int
 dsa_switch_mrp_del_ring_role(struct dsa_switch *ds,
 			     struct dsa_notifier_mrp_ring_role_info *info)
 {
-	if (!ds->ops->port_mrp_del)
+	if (!ds->ops->port_mrp_del_ring_role)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-- 
2.25.1


^ permalink raw reply related

* [PATCH v2 net-next 0/3] DSA cross-chip notifier cleanup
From: Vladimir Oltean @ 2022-01-05 13:18 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Jakub Kicinski, Andrew Lunn, Vivien Didelot,
	Florian Fainelli, Horatiu Vultur, George McCollister

This series deletes the no-op cross-chip notifier support for MRP and
HSR, features which were introduced relatively recently and did not get
full review at the time. The new code is functionally equivalent, but
simpler.

Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: George McCollister <george.mccollister@gmail.com>

Vladimir Oltean (3):
  net: dsa: fix incorrect function pointer check for MRP ring roles
  net: dsa: remove cross-chip support for MRP
  net: dsa: remove cross-chip support for HSR

 net/dsa/dsa_priv.h | 27 --------------
 net/dsa/port.c     | 73 +++++++++++++++++---------------------
 net/dsa/switch.c   | 88 ----------------------------------------------
 3 files changed, 33 insertions(+), 155 deletions(-)

-- 
2.25.1


^ permalink raw reply

* [PATCH] nitrogen6x: add missing pinctrl to fix mmc
From: Gary Bisson @ 2022-01-05 13:17 UTC (permalink / raw)
  To: u-boot; +Cc: troy.kisky, Gary Bisson

Since commit f7ac30b042d, the pin muxing for mmc was removed from the
board file to be managed by DM_MMC which requires PINCTRL to work. It
made the change for sabrelite but nitrogen configs were forgotten.

Signed-off-by: Gary Bisson <gary.bisson@boundarydevices.com>
---
 configs/nitrogen6dl2g_defconfig | 2 ++
 configs/nitrogen6dl_defconfig   | 2 ++
 configs/nitrogen6q2g_defconfig  | 2 ++
 configs/nitrogen6q_defconfig    | 2 ++
 configs/nitrogen6s1g_defconfig  | 2 ++
 configs/nitrogen6s_defconfig    | 2 ++
 6 files changed, 12 insertions(+)

diff --git a/configs/nitrogen6dl2g_defconfig b/configs/nitrogen6dl2g_defconfig
index 593a43e5e79..20c5d302577 100644
--- a/configs/nitrogen6dl2g_defconfig
+++ b/configs/nitrogen6dl2g_defconfig
@@ -68,6 +68,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
diff --git a/configs/nitrogen6dl_defconfig b/configs/nitrogen6dl_defconfig
index 4bcc6756801..796bd66bc23 100644
--- a/configs/nitrogen6dl_defconfig
+++ b/configs/nitrogen6dl_defconfig
@@ -68,6 +68,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
diff --git a/configs/nitrogen6q2g_defconfig b/configs/nitrogen6q2g_defconfig
index 76fc53d5154..b42220db064 100644
--- a/configs/nitrogen6q2g_defconfig
+++ b/configs/nitrogen6q2g_defconfig
@@ -70,6 +70,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
diff --git a/configs/nitrogen6q_defconfig b/configs/nitrogen6q_defconfig
index fca3e5f5311..cc085594967 100644
--- a/configs/nitrogen6q_defconfig
+++ b/configs/nitrogen6q_defconfig
@@ -70,6 +70,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
diff --git a/configs/nitrogen6s1g_defconfig b/configs/nitrogen6s1g_defconfig
index 8b720b0d600..17133c5cd60 100644
--- a/configs/nitrogen6s1g_defconfig
+++ b/configs/nitrogen6s1g_defconfig
@@ -68,6 +68,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
diff --git a/configs/nitrogen6s_defconfig b/configs/nitrogen6s_defconfig
index a9d239e9be9..242580e3e9f 100644
--- a/configs/nitrogen6s_defconfig
+++ b/configs/nitrogen6s_defconfig
@@ -68,6 +68,8 @@ CONFIG_PHY_MICREL=y
 CONFIG_PHY_MICREL_KSZ90X1=y
 CONFIG_FEC_MXC=y
 CONFIG_MII=y
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_IMX6=y
 CONFIG_MXC_UART=y
 CONFIG_SPI=y
 CONFIG_DM_SPI=y
-- 
2.34.1


^ permalink raw reply related

* Re: [PATCH] docs/system/ppc: Merge the PEF information into the pseries page
From: Daniel Henrique Barboza @ 2022-01-05 12:52 UTC (permalink / raw)
  To: Thomas Huth, Cédric Le Goater, qemu-devel
  Cc: Leonardo Garcia, qemu-ppc, Greg Kurz, David Gibson
In-Reply-To: <20220105103232.405204-1-thuth@redhat.com>



On 1/5/22 07:32, Thomas Huth wrote:
> The Protected Execution Facility is only available with the pseries
> machine, so let's merge the old ASCII text into the new RST file now.
> 
> Signed-off-by: Thomas Huth <thuth@redhat.com>
> ---

Well observed. Thanks for fixing it.


Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>


>   docs/papr-pef.txt           | 30 ------------------------------
>   docs/system/ppc/pseries.rst | 33 +++++++++++++++++++++++++++++++++
>   2 files changed, 33 insertions(+), 30 deletions(-)
>   delete mode 100644 docs/papr-pef.txt
> 
> diff --git a/docs/papr-pef.txt b/docs/papr-pef.txt
> deleted file mode 100644
> index 72550e9bf8..0000000000
> --- a/docs/papr-pef.txt
> +++ /dev/null
> @@ -1,30 +0,0 @@
> -POWER (PAPR) Protected Execution Facility (PEF)
> -===============================================
> -
> -Protected Execution Facility (PEF), also known as Secure Guest support
> -is a feature found on IBM POWER9 and POWER10 processors.
> -
> -If a suitable firmware including an Ultravisor is installed, it adds
> -an extra memory protection mode to the CPU.  The ultravisor manages a
> -pool of secure memory which cannot be accessed by the hypervisor.
> -
> -When this feature is enabled in QEMU, a guest can use ultracalls to
> -enter "secure mode".  This transfers most of its memory to secure
> -memory, where it cannot be eavesdropped by a compromised hypervisor.
> -
> -Launching
> ----------
> -
> -To launch a guest which will be permitted to enter PEF secure mode:
> -
> -# ${QEMU} \
> -    -object pef-guest,id=pef0 \
> -    -machine confidential-guest-support=pef0 \
> -    ...
> -
> -Live Migration
> -----------------
> -
> -Live migration is not yet implemented for PEF guests.  For
> -consistency, we currently prevent migration if the PEF feature is
> -enabled, whether or not the guest has actually entered secure mode.
> diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst
> index 72e315eff6..16394fa521 100644
> --- a/docs/system/ppc/pseries.rst
> +++ b/docs/system/ppc/pseries.rst
> @@ -230,6 +230,39 @@ nested. Combinations not shown in the table are not available.
>   
>   .. [3] Introduced on Power10 machines.
>   
> +
> +POWER (PAPR) Protected Execution Facility (PEF)
> +-----------------------------------------------
> +
> +Protected Execution Facility (PEF), also known as Secure Guest support
> +is a feature found on IBM POWER9 and POWER10 processors.
> +
> +If a suitable firmware including an Ultravisor is installed, it adds
> +an extra memory protection mode to the CPU.  The ultravisor manages a
> +pool of secure memory which cannot be accessed by the hypervisor.
> +
> +When this feature is enabled in QEMU, a guest can use ultracalls to
> +enter "secure mode".  This transfers most of its memory to secure
> +memory, where it cannot be eavesdropped by a compromised hypervisor.
> +
> +Launching
> +^^^^^^^^^
> +
> +To launch a guest which will be permitted to enter PEF secure mode::
> +
> +  $ qemu-system-ppc64 \
> +      -object pef-guest,id=pef0 \
> +      -machine confidential-guest-support=pef0 \
> +      ...
> +
> +Live Migration
> +^^^^^^^^^^^^^^
> +
> +Live migration is not yet implemented for PEF guests.  For
> +consistency, QEMU currently prevents migration if the PEF feature is
> +enabled, whether or not the guest has actually entered secure mode.
> +
> +
>   Maintainer contact information
>   ------------------------------
>   


^ permalink raw reply

* Re: [PATCH net-next v2] net/smc: Reduce overflow of smc clcsock listen queue
From: Karsten Graul @ 2022-01-05 13:17 UTC (permalink / raw)
  To: dust.li, D. Wythe; +Cc: kuba, davem, netdev, linux-s390, linux-rdma
In-Reply-To: <20220105085748.GD31579@linux.alibaba.com>

On 05/01/2022 09:57, dust.li wrote:
> On Wed, Jan 05, 2022 at 12:40:49PM +0800, D. Wythe wrote:
> I'm thinking maybe we can actively fall back to TCP in this case ? Not
> sure if this is a good idea.

I think its a good decision to switch new connections to use the TCP fallback when the
current queue of connections waiting for a SMC handshake is too large.
With this the application is able to accept all incoming connections and they are not
dropped. The only thing that is be different compared to TCP is that the order of the
accepted connections is changed, connections that came in later might reach the user space 
application earlier than connections that still run the SMC hand shake processing. 
But I think that is semantically okay.

^ permalink raw reply

* Re: mkimage_fit_atf.sh: not found
From: Tom Rini @ 2022-01-05 13:16 UTC (permalink / raw)
  To: Fabio Estevam
  Cc: Tim Harvey, u-boot, Stefano Babic, Schrempf Frieder, Adam Ford,
	Marcel Ziswiler, Jagan Teki
In-Reply-To: <CAOMZO5B857fDgyBtBj0vt0Bmh5MfW5zrCQJeN8yUNtn2-bn8MA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1666 bytes --]

On Tue, Jan 04, 2022 at 07:54:38PM -0300, Fabio Estevam wrote:
> HI Tim,
> 
> On Tue, Jan 4, 2022 at 7:48 PM Tim Harvey <tharvey@gateworks.com> wrote:
> >
> > Stefano and Fabio,
> >
> > I'm seeing the imx8mm_venice_defconfig target failing to build on
> > master due to mkimage_fit_atf.sh not found:
> > ./"arch/arm/mach-imx/mkimage_fit_atf.sh" \
> > arch/arm/dts/imx8mm-venice-gw71xx-0x.dtb
> > arch/arm/dts/imx8mm-venice-gw72xx-0x.dtb
> > arch/arm/dts/imx8mm-venice-gw73xx-0x.dtb
> > arch/arm/dts/imx8mm-venice-gw7901.dtb
> > arch/arm/dts/imx8mm-venice-gw7902.dtb > u-boot.its
> > /bin/sh: 1: ./arch/arm/mach-imx/mkimage_fit_atf.sh: not found
> >
> > As far as I can tell the other boards that are still using
> > SPL_FIT_GENERATOR also fail due to this (ie imx8mm_beacon_defconfig,
> > imx8mq_evk_defconfig, imx8mm-icore-mx8mm-edimm2.2_defconfig, etc).
> >
> > What is the state of the binman conversion? I submitted a series to
> > convert my boards to binman and it has just been sitting without any
> > response for months now [1].
> >
> > I'm not sure when the above breakage occurred but the conversion to
> > binman resolves it and other things.
> >
> > Please let me know what you expect me to do to resolve this as there
> > is a release pending.
> >
> > Best regards,
> >
> > Tim
> > [1] https://patchwork.ozlabs.org/project/uboot/patch/20211006201700.3018-1-tharvey@gateworks.com/
> 
> Stefano is on vacation. Tom, would you mind picking Tim's series?

Looking at the thread, there's a few pre-requisite patches?  And is this
also one of the "switch to binman" patches that then fails in CI?
Thanks.

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]

^ permalink raw reply

* [PATCH 1/2] platform: sifive_fu740: do not use a global in da9063_reset/shutdown
From: Nikita Shubin @ 2022-01-05 13:16 UTC (permalink / raw)
  To: opensbi
In-Reply-To: <20220105072039.2609845-1-aurelien@aurel32.net>

Hello Aurelien!

On Wed,  5 Jan 2022 08:20:38 +0100
Aurelien Jarno <aurelien@aurel32.net> wrote:

Seems good to me.

Reviewed-by: Nikita Shubin <n.shubin@yadro.com>

> da9063_reset() and da9063_shutdown() take the chip address in argument
> (like similar functions), but in practice use the da9063 global struct
> instead. Fix that.
> 
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
>  platform/generic/sifive_fu740.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/platform/generic/sifive_fu740.c
> b/platform/generic/sifive_fu740.c index 333b3fb..866e924 100644
> --- a/platform/generic/sifive_fu740.c
> +++ b/platform/generic/sifive_fu740.c
> @@ -81,32 +81,32 @@ static inline int da9063_sanity_check(struct
> i2c_adapter *adap, uint32_t reg) 
>  static inline int da9063_shutdown(struct i2c_adapter *adap, uint32_t
> reg) {
> -	int rc = i2c_adapter_reg_write(adap, da9063.reg,
> +	int rc = i2c_adapter_reg_write(adap, reg,
>  					DA9063_REG_PAGE_CON, 0x00);
>  
>  	if (rc)
>  		return rc;
>  
> -	return i2c_adapter_reg_write(adap, da9063.reg,
> +	return i2c_adapter_reg_write(adap, reg,
>  				     DA9063_REG_CONTROL_F,
>  				     DA9063_CONTROL_F_SHUTDOWN);
>  }
>  
>  static inline int da9063_reset(struct i2c_adapter *adap, uint32_t
> reg) {
> -	int rc = i2c_adapter_reg_write(adap, da9063.reg,
> +	int rc = i2c_adapter_reg_write(adap, reg,
>  					DA9063_REG_PAGE_CON, 0x00);
>  
>  	if (rc)
>  		return rc;
>  
> -	rc = i2c_adapter_reg_write(adap, da9063.reg,
> +	rc = i2c_adapter_reg_write(adap, reg,
>  				   DA9063_REG_CONTROL_F,
>  				   DA9063_CONTROL_F_WAKEUP);
>  	if (rc)
>  		return rc;
>  
> -	return i2c_adapter_reg_write(adap, da9063.reg,
> +	return i2c_adapter_reg_write(adap, reg,
>  				DA9063_REG_CONTROL_A,
>  				DA9063_CONTROL_A_M_POWER1_EN |
>  				DA9063_CONTROL_A_M_POWER_EN |



^ permalink raw reply

* drivers/remoteproc/remoteproc_debugfs.c:395:10: warning: incompatible integer to pointer conversion returning 'long' from a function with result type 'struct dentry *'
From: kernel test robot @ 2022-01-05 13:15 UTC (permalink / raw)
  To: Miaoqian Lin; +Cc: llvm, kbuild-all, linux-kernel, 0day robot

tree:   https://github.com/0day-ci/linux/commits/UPDATE-20220105-144328/Miaoqian-Lin/remoteproc-Fix-NULL-vs-IS_ERR-checking-in-rproc_create_trace_file/20211227-170725
head:   e2c26738760efa9568ce3fef6180fb4311e28d08
commit: e2c26738760efa9568ce3fef6180fb4311e28d08 remoteproc: Fix NULL vs IS_ERR() checking in rproc_create_trace_file
date:   6 hours ago
config: i386-randconfig-a016-20220105 (https://download.01.org/0day-ci/archive/20220105/202201052108.jYZEx2yr-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project d5b6e30ed3acad794dd0aec400e617daffc6cc3d)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/e2c26738760efa9568ce3fef6180fb4311e28d08
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review UPDATE-20220105-144328/Miaoqian-Lin/remoteproc-Fix-NULL-vs-IS_ERR-checking-in-rproc_create_trace_file/20211227-170725
        git checkout e2c26738760efa9568ce3fef6180fb4311e28d08
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/remoteproc/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/remoteproc/remoteproc_debugfs.c:395:10: warning: incompatible integer to pointer conversion returning 'long' from a function with result type 'struct dentry *' [-Wint-conversion]
                   return PTR_ERR(tfile);
                          ^~~~~~~~~~~~~~
   1 warning generated.


vim +395 drivers/remoteproc/remoteproc_debugfs.c

   385	
   386	struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
   387					       struct rproc_debug_trace *trace)
   388	{
   389		struct dentry *tfile;
   390	
   391		tfile = debugfs_create_file(name, 0400, rproc->dbg_dir, trace,
   392					    &trace_rproc_ops);
   393		if (IS_ERR(tfile)) {
   394			dev_err(&rproc->dev, "failed to create debugfs trace entry\n");
 > 395			return PTR_ERR(tfile);
   396		}
   397	
   398		return tfile;
   399	}
   400	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs
From: Christian König @ 2022-01-05 13:15 UTC (permalink / raw)
  To: Lazar, Lijo, Andrey Grodzovsky, dri-devel, amd-gfx; +Cc: horace.chen, Monk.Liu
In-Reply-To: <55065993-0e8d-30a5-b08f-733e5ba360b1@amd.com>

Am 05.01.22 um 14:11 schrieb Lazar, Lijo:
> On 1/5/2022 6:01 PM, Christian König wrote:
>> Am 05.01.22 um 10:54 schrieb Lazar, Lijo:
>>> On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:
>>>> Use reset domain wq also for non TDR gpu recovery trigers
>>>> such as sysfs and RAS. We must serialize all possible
>>>> GPU recoveries to gurantee no concurrency there.
>>>> For TDR call the original recovery function directly since
>>>> it's already executed from within the wq. For others just
>>>> use a wrapper to qeueue work and wait on it to finish.
>>>>
>>>> v2: Rename to amdgpu_recover_work_struct
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 
>>>> +++++++++++++++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  2 +-
>>>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b5ff76aae7e0..8e96b9a14452 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct 
>>>> amdgpu_device *adev);
>>>>   bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
>>>>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>>                     struct amdgpu_job* job);
>>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>>> +                  struct amdgpu_job *job);
>>>>   void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
>>>>   int amdgpu_device_pci_reset(struct amdgpu_device *adev);
>>>>   bool amdgpu_device_need_post(struct amdgpu_device *adev);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 7c063fd37389..258ec3c0b2af 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>>    * Returns 0 for success or an error on failure.
>>>>    */
>>>>   -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>>>                     struct amdgpu_job *job)
>>>>   {
>>>>       struct list_head device_list, *device_list_handle = NULL;
>>>> @@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct 
>>>> amdgpu_device *adev,
>>>>       return r;
>>>>   }
>>>>   +struct amdgpu_recover_work_struct {
>>>> +    struct work_struct base;
>>>> +    struct amdgpu_device *adev;
>>>> +    struct amdgpu_job *job;
>>>> +    int ret;
>>>> +};
>>>> +
>>>> +static void amdgpu_device_queue_gpu_recover_work(struct 
>>>> work_struct *work)
>>>> +{
>>>> +    struct amdgpu_recover_work_struct *recover_work = 
>>>> container_of(work, struct amdgpu_recover_work_struct, base);
>>>> +
>>>> +    recover_work->ret = 
>>>> amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
>>>> +}
>>>> +/*
>>>> + * Serialize gpu recover into reset domain single threaded wq
>>>> + */
>>>> +int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>> +                    struct amdgpu_job *job)
>>>> +{
>>>> +    struct amdgpu_recover_work_struct work = {.adev = adev, .job = 
>>>> job};
>>>> +
>>>> +    INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
>>>> +
>>>> +    if (!queue_work(adev->reset_domain.wq, &work.base))
>>>> +        return -EAGAIN;
>>>> +
>>>
>>> The decision to schedule a reset is made at this point. Subsequent 
>>> accesses to hardware may not be reliable. So should the flag 
>>> in_reset be set here itself rather than waiting for the work to 
>>> start execution?
>>
>> No, when we race and lose the VM is completely lost and probably 
>> restarted by the hypervisor.
>>
>> And when we race and win we properly set the flag before signaling 
>> the hypervisor that it can continue with the reset.
>>
>
> I was talking about baremetal case. When this was synchronous, 
> in_reset flag is set as one of the first things and amdgpu_in_reset is 
> checked to prevent further hardware accesses. This design only changes 
> the recover part and doesn't change the hardware perspective. 

> Potential accesses from other processes need to be blocked as soon as 
> we determine a reset is required.

That's an incorrect assumption.

Accessing the hardware is perfectly ok as long as the reset hasn't 
started yet. In other words even when the hardware is locked up you can 
still happily read/write registers or access the VRAM BAR.

Only when the hardware is currently performing a reset, then we can't 
touch it or there might be unfortunate consequences (usually complete 
system lockup).

Regards,
Christian.

> Are we expecting the work to be immediately executed and set the flags?
>
> Thanks,
> Lijo
>
>>> Also, what about having the reset_active or in_reset flag in the 
>>> reset_domain itself?
>>
>> Of hand that sounds like a good idea.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Thanks,
>>> Lijo
>>>
>>>> +    flush_work(&work.base);
>>>> +
>>>> +    return work.ret;
>>>> +}
>>>> +
>>>>   /**
>>>>    * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
>>>>    *
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> index bfc47bea23db..38c9fd7b7ad4 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> @@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat 
>>>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>             ti.process_name, ti.tgid, ti.task_name, ti.pid);
>>>>         if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>>> -        amdgpu_device_gpu_recover(ring->adev, job);
>>>> +        amdgpu_device_gpu_recover_imp(ring->adev, job);
>>>>       } else {
>>>>           drm_sched_suspend_timeout(&ring->sched);
>>>>           if (amdgpu_sriov_vf(adev))
>>>>
>>


^ permalink raw reply

* Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs
From: Christian König @ 2022-01-05 13:15 UTC (permalink / raw)
  To: Lazar, Lijo, Andrey Grodzovsky, dri-devel, amd-gfx
  Cc: horace.chen, daniel, Monk.Liu
In-Reply-To: <55065993-0e8d-30a5-b08f-733e5ba360b1@amd.com>

Am 05.01.22 um 14:11 schrieb Lazar, Lijo:
> On 1/5/2022 6:01 PM, Christian König wrote:
>> Am 05.01.22 um 10:54 schrieb Lazar, Lijo:
>>> On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:
>>>> Use reset domain wq also for non TDR gpu recovery trigers
>>>> such as sysfs and RAS. We must serialize all possible
>>>> GPU recoveries to gurantee no concurrency there.
>>>> For TDR call the original recovery function directly since
>>>> it's already executed from within the wq. For others just
>>>> use a wrapper to qeueue work and wait on it to finish.
>>>>
>>>> v2: Rename to amdgpu_recover_work_struct
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 
>>>> +++++++++++++++++++++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  2 +-
>>>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index b5ff76aae7e0..8e96b9a14452 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct 
>>>> amdgpu_device *adev);
>>>>   bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
>>>>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>>                     struct amdgpu_job* job);
>>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>>> +                  struct amdgpu_job *job);
>>>>   void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
>>>>   int amdgpu_device_pci_reset(struct amdgpu_device *adev);
>>>>   bool amdgpu_device_need_post(struct amdgpu_device *adev);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 7c063fd37389..258ec3c0b2af 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>>    * Returns 0 for success or an error on failure.
>>>>    */
>>>>   -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>>>                     struct amdgpu_job *job)
>>>>   {
>>>>       struct list_head device_list, *device_list_handle = NULL;
>>>> @@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct 
>>>> amdgpu_device *adev,
>>>>       return r;
>>>>   }
>>>>   +struct amdgpu_recover_work_struct {
>>>> +    struct work_struct base;
>>>> +    struct amdgpu_device *adev;
>>>> +    struct amdgpu_job *job;
>>>> +    int ret;
>>>> +};
>>>> +
>>>> +static void amdgpu_device_queue_gpu_recover_work(struct 
>>>> work_struct *work)
>>>> +{
>>>> +    struct amdgpu_recover_work_struct *recover_work = 
>>>> container_of(work, struct amdgpu_recover_work_struct, base);
>>>> +
>>>> +    recover_work->ret = 
>>>> amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
>>>> +}
>>>> +/*
>>>> + * Serialize gpu recover into reset domain single threaded wq
>>>> + */
>>>> +int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>> +                    struct amdgpu_job *job)
>>>> +{
>>>> +    struct amdgpu_recover_work_struct work = {.adev = adev, .job = 
>>>> job};
>>>> +
>>>> +    INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
>>>> +
>>>> +    if (!queue_work(adev->reset_domain.wq, &work.base))
>>>> +        return -EAGAIN;
>>>> +
>>>
>>> The decision to schedule a reset is made at this point. Subsequent 
>>> accesses to hardware may not be reliable. So should the flag 
>>> in_reset be set here itself rather than waiting for the work to 
>>> start execution?
>>
>> No, when we race and lose the VM is completely lost and probably 
>> restarted by the hypervisor.
>>
>> And when we race and win we properly set the flag before signaling 
>> the hypervisor that it can continue with the reset.
>>
>
> I was talking about baremetal case. When this was synchronous, 
> in_reset flag is set as one of the first things and amdgpu_in_reset is 
> checked to prevent further hardware accesses. This design only changes 
> the recover part and doesn't change the hardware perspective. 

> Potential accesses from other processes need to be blocked as soon as 
> we determine a reset is required.

That's an incorrect assumption.

Accessing the hardware is perfectly ok as long as the reset hasn't 
started yet. In other words even when the hardware is locked up you can 
still happily read/write registers or access the VRAM BAR.

Only when the hardware is currently performing a reset, then we can't 
touch it or there might be unfortunate consequences (usually complete 
system lockup).

Regards,
Christian.

> Are we expecting the work to be immediately executed and set the flags?
>
> Thanks,
> Lijo
>
>>> Also, what about having the reset_active or in_reset flag in the 
>>> reset_domain itself?
>>
>> Of hand that sounds like a good idea.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Thanks,
>>> Lijo
>>>
>>>> +    flush_work(&work.base);
>>>> +
>>>> +    return work.ret;
>>>> +}
>>>> +
>>>>   /**
>>>>    * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
>>>>    *
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> index bfc47bea23db..38c9fd7b7ad4 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>>> @@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat 
>>>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>>             ti.process_name, ti.tgid, ti.task_name, ti.pid);
>>>>         if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>>> -        amdgpu_device_gpu_recover(ring->adev, job);
>>>> +        amdgpu_device_gpu_recover_imp(ring->adev, job);
>>>>       } else {
>>>>           drm_sched_suspend_timeout(&ring->sched);
>>>>           if (amdgpu_sriov_vf(adev))
>>>>
>>


^ permalink raw reply

* [PATCH v2 2/2] update-index: refresh should rewrite index in case of racy timestamps
From: Marc Strapetz via GitGitGadget @ 2022-01-05 13:15 UTC (permalink / raw)
  To: git; +Cc: Marc Strapetz, Marc Strapetz
In-Reply-To: <pull.1105.v2.git.1641388523.gitgitgadget@gmail.com>

From: Marc Strapetz <marc.strapetz@syntevo.com>

'git update-index --refresh' and '--really-refresh' should force writing
of the index file if racy timestamps have been encountered, as
'git status' already does [1].

Note that calling 'git update-index --refresh' still does not guarantee
that there will be no more racy timestamps afterwards (the same holds
true for 'git status'):

- calling 'git update-index --refresh' immediately after touching and
  adding a file may still leave racy timestamps if all three operations
  occur within the racy-tolerance (usually 1 second unless USE_NSEC has
  been defined)

- calling 'git update-index --refresh' for timestamps which are set into
  the future will leave them racy

To guarantee that such racy timestamps will be resolved would require to
wait until the system clock has passed beyond these timestamps and only
then write the index file. Especially for future timestamps, this does
not seem feasible because of possibly long delays/hangs.

[1] https://lore.kernel.org/git/d3dd805c-7c1d-30a9-6574-a7bfcb7fc013@syntevo.com/

Signed-off-by: Marc Strapetz <marc.strapetz@syntevo.com>
---
 builtin/update-index.c               | 11 +++++
 cache.h                              |  1 +
 read-cache.c                         |  2 +-
 t/t2108-update-index-refresh-racy.sh | 64 ++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100755 t/t2108-update-index-refresh-racy.sh

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 187203e8bb5..7e0a0d9bf80 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -787,6 +787,17 @@ static int refresh(struct refresh_params *o, unsigned int flag)
 	setup_work_tree();
 	read_cache();
 	*o->has_errors |= refresh_cache(o->flags | flag);
+	if (has_racy_timestamp(&the_index)) {
+		/*
+		 * Even if nothing else has changed, updating the file
+		 * increases the chance that racy timestamps become
+		 * non-racy, helping future run-time performance.
+		 * We do that even in case of "errors" returned by
+		 * refresh_cache() as these are no actual errors.
+		 * cmd_status() does the same.
+		 */
+		active_cache_changed |= SOMETHING_CHANGED;
+	}
 	return 0;
 }
 
diff --git a/cache.h b/cache.h
index cfba463aa97..dd1932e2d0e 100644
--- a/cache.h
+++ b/cache.h
@@ -891,6 +891,7 @@ void *read_blob_data_from_index(struct index_state *, const char *, unsigned lon
 #define CE_MATCH_IGNORE_FSMONITOR 0X20
 int is_racy_timestamp(const struct index_state *istate,
 		      const struct cache_entry *ce);
+int has_racy_timestamp(struct index_state *istate);
 int ie_match_stat(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
 int ie_modified(struct index_state *, const struct cache_entry *, struct stat *, unsigned int);
 
diff --git a/read-cache.c b/read-cache.c
index cbe73f14e5e..ed297635a33 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2775,7 +2775,7 @@ static int repo_verify_index(struct repository *repo)
 	return verify_index_from(repo->index, repo->index_file);
 }
 
-static int has_racy_timestamp(struct index_state *istate)
+int has_racy_timestamp(struct index_state *istate)
 {
 	int entries = istate->cache_nr;
 	int i;
diff --git a/t/t2108-update-index-refresh-racy.sh b/t/t2108-update-index-refresh-racy.sh
new file mode 100755
index 00000000000..171c37ebec9
--- /dev/null
+++ b/t/t2108-update-index-refresh-racy.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+
+test_description='update-index refresh tests related to racy timestamps'
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+reset_files () {
+	echo content >file &&
+	echo content >other &&
+	test-tool chmtime =1234567890 file &&
+	test-tool chmtime =1234567890 other
+}
+
+update_assert_changed () {
+	test-tool chmtime =1234567890 .git/index &&
+	test_might_fail git update-index "$1" &&
+	test-tool chmtime --get .git/index >.git/out &&
+	! grep ^1234567890 .git/out
+}
+
+test_expect_success 'setup' '
+	reset_files &&
+	# we are calling reset_files() a couple of times during tests;
+	# test-tool chmtime does not change the ctime; to not weaken
+	# or even break our tests, disable ctime-checks entirely
+	git config core.trustctime false &&
+	git add file other &&
+	git commit -m "initial import"
+'
+
+test_expect_success '--refresh has no racy timestamps to fix' '
+	reset_files &&
+	test-tool chmtime =1234567891 .git/index &&
+	git update-index --refresh &&
+	test-tool chmtime --get .git/index >.git/out &&
+	grep ^1234567891 .git/out
+'
+
+test_expect_success '--refresh should fix racy timestamp' '
+	reset_files &&
+	update_assert_changed --refresh
+'
+
+test_expect_success '--really-refresh should fix racy timestamp' '
+	reset_files &&
+	update_assert_changed --really-refresh
+'
+
+test_expect_success '--refresh should fix racy timestamp if other file needs update' '
+	reset_files &&
+	echo content2 >other &&
+	test-tool chmtime =1234567890 other &&
+	update_assert_changed --refresh
+'
+
+test_expect_success '--refresh should fix racy timestamp if racy file needs update' '
+	reset_files &&
+	echo content2 >file &&
+	test-tool chmtime =1234567890 file &&
+	update_assert_changed --refresh
+'
+
+test_done
-- 
gitgitgadget

^ permalink raw reply related

* Re: [PATCH v2 1/3] sched/pelt: Don't sync hardly util_sum with uti_avg
From: Dietmar Eggemann @ 2022-01-05 13:15 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: mingo, peterz, juri.lelli, rostedt, bsegall, mgorman, bristot,
	linux-kernel, rickyiu, odin, sachinp, naresh.kamboju
In-Reply-To: <CAKfTPtBR3BWCwEaJe0Cq6K5__zNxfU7FFo2f0bpOPkvzxKdiww@mail.gmail.com>

On 04/01/2022 14:42, Vincent Guittot wrote:
> On Tue, 4 Jan 2022 at 12:47, Dietmar Eggemann <dietmar.eggemann@arm.com> wrote:
>>
>> On 22/12/2021 10:38, Vincent Guittot wrote:

[...]

>> I still wonder whether the regression only comes from the changes in
>> update_cfs_rq_load_avg(), introduced by 1c35b07e6d39.
>> And could be fixed only by this part of the patch-set (A):
> 
> I have been able to trigger the warning even with (A) though It took
> much more time.
> And I have been able to catch wrong situations  (with additional
> traces) in the 3 places A, B and C

OK. By wrong situation you mean '_sum < _avg * MIN_DIVIDER' ?

>> @@ -3677,15 +3706,22 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq
>> *cfs_rq)
>>
>>     r = removed_load;
>>     sub_positive(&sa->load_avg, r);
>> -   sa->load_sum = sa->load_avg * divider;
>> +   sub_positive(&sa->load_sum, r * divider);
>> +   sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * MIN_DIVIDER);
>>
>>     r = removed_util;
>>     sub_positive(&sa->util_avg, r);
>> -   sa->util_sum = sa->util_avg * divider;
>> +   sub_positive(&sa->util_sum, r * divider);
>> +   sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * MIN_DIVIDER);
>>
>>     r = removed_runnable;
>>     sub_positive(&sa->runnable_avg, r);
>> -   sa->runnable_sum = sa->runnable_avg * divider;
>> +   sub_positive(&sa->runnable_sum, r * divider);
>> +   sa->runnable_sum = max_t(u32, sa->runnable_sum,
>> +                                 sa->runnable_avg * MIN_DIVIDER);
>>
>> i.e. w/o changing update_tg_cfs_X() (and
>> detach_entity_load_avg()/dequeue_load_avg()).
>>
>> update_load_avg()
>>   update_cfs_rq_load_avg()    <---
>>   propagate_entity_load_avg()
>>     update_tg_cfs_X()         <---
>>
>>
>> I didn't see the SCHED_WARN_ON() [cfs_rq_is_decayed()] when looping on
>> hackbench in several different sched group levels on
>> [Hikey620 (Arm64, 8 CPUs, SMP, 4 taskgroups: A/B C/D E/F G/H), >12h uptime].
> 
> IIRC, it was with hikey960 with cgroup v1
> As a side note, I never trigger the problem with dragonboard845 and cgroup v2

OK, just started a test on hikey960 cgroupv1. Let's see if I can catch it.

[...]

>>> @@ -3780,7 +3799,11 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
>>>
>>>       dequeue_load_avg(cfs_rq, se);
>>>       sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
>>> -     cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
>>> +     sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
>>> +     /* See update_tg_cfs_util() */
>>> +     cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum,
>>> +                                       cfs_rq->avg.util_avg * MIN_DIVIDER);
>>> +
>>
>> Maybe add a:
>>
>> Fixes: fcf6631f3736 ("sched/pelt: Ensure that *_sum is always synced
>> with *_avg")
> 
> I spent time thinking about adding fixes tag. There is no crash/warn
> so far so should we propagate it back in LTS for better performance ?

Not sure I understand. What do you mean by 'should we propagate it back
in LTS'?

[...]

>> This max_t() should make sure that `_sum is always >= _avg *
>> MIN_DIVIDER`. Which is not the case sometimes. Currently this is done in
>>
>> (1) update_cfs_rq_load_avg()
>> (2) detach_entity_load_avg() and dequeue_load_avg()
>> (3) update_tg_cfs_X()
>>
>> but not in attach_entity_load_avg(), enqueue_load_avg(). What's the
>> reason for this?
> 
> Main reason is that I have never seen the problem.
> Then, the problem comes from subtracting task's value whereas here we
> always add positive value

OK, I see. The add_positive()'s in update_tg_cfs_X() deal w/ `long` values.

^ permalink raw reply

* [PATCH v2 1/2] t7508: add tests capturing racy timestamp handling
From: Marc Strapetz via GitGitGadget @ 2022-01-05 13:15 UTC (permalink / raw)
  To: git; +Cc: Marc Strapetz, Marc Strapetz
In-Reply-To: <pull.1105.v2.git.1641388523.gitgitgadget@gmail.com>

From: Marc Strapetz <marc.strapetz@syntevo.com>

"git status" fixes racy timestamps regardless of the worktree being
dirty or not. The new test cases capture this behavior.

Signed-off-by: Marc Strapetz <marc.strapetz@syntevo.com>
---
 t/t7508-status.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/t/t7508-status.sh b/t/t7508-status.sh
index 05c6c02435d..652cbb5ed2e 100755
--- a/t/t7508-status.sh
+++ b/t/t7508-status.sh
@@ -1656,4 +1656,32 @@ test_expect_success '--no-optional-locks prevents index update' '
 	! grep ^1234567890 out
 '
 
+test_expect_success 'racy timestamps will be fixed for clean worktree' '
+	echo content >racy-dirty &&
+	echo content >racy-racy &&
+	git add racy* &&
+	git commit -m "racy test files" &&
+	# let status rewrite the index, if necessary; after that we expect
+	# no more index writes unless caused by racy timestamps; note that
+	# timestamps may already be racy now (depending on previous tests)
+	git status &&
+	test-tool chmtime =1234567890 .git/index &&
+	test-tool chmtime --get .git/index >out &&
+	grep ^1234567890 out &&
+	git status &&
+	test-tool chmtime --get .git/index >out &&
+	! grep ^1234567890 out
+'
+
+test_expect_success 'racy timestamps will be fixed for dirty worktree' '
+	echo content2 >racy-dirty &&
+	git status &&
+	test-tool chmtime =1234567890 .git/index &&
+	test-tool chmtime --get .git/index >out &&
+	grep ^1234567890 out &&
+	git status &&
+	test-tool chmtime --get .git/index >out &&
+	! grep ^1234567890 out
+'
+
 test_done
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH v2 0/2] update-index: refresh should rewrite index in case of racy timestamps
From: Marc Strapetz via GitGitGadget @ 2022-01-05 13:15 UTC (permalink / raw)
  To: git; +Cc: Marc Strapetz
In-Reply-To: <pull.1105.git.1640181390841.gitgitgadget@gmail.com>

This patch makes update-index --refresh write the index if it contains racy
timestamps, as discussed at [1].

Changes since v1:

 * main commit message now uses 'git update-index' and the paragraph was
   dropped
 * t/t7508-status.sh: two tests added which capture status racy handling
 * builtin/update-index.c: comment improved
 * t/t2108-update-index-refresh-racy.sh: major overhaul
   * one test case added
   * mtime-manipulations simplified and aligned to t7508
   * code style fixes, as discussed

[1]
https://lore.kernel.org/git/d3dd805c-7c1d-30a9-6574-a7bfcb7fc013@syntevo.com/

Marc Strapetz (2):
  t7508: add tests capturing racy timestamp handling
  update-index: refresh should rewrite index in case of racy timestamps

 builtin/update-index.c               | 11 +++++
 cache.h                              |  1 +
 read-cache.c                         |  2 +-
 t/t2108-update-index-refresh-racy.sh | 64 ++++++++++++++++++++++++++++
 t/t7508-status.sh                    | 28 ++++++++++++
 5 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100755 t/t2108-update-index-refresh-racy.sh


base-commit: dcc0cd074f0c639a0df20461a301af6d45bd582e
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1105%2Fmstrap%2Ffeature%2Fupdate-index-refresh-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1105/mstrap/feature/update-index-refresh-v2
Pull-Request: https://github.com/gitgitgadget/git/pull/1105

Range-diff vs v1:

 -:  ----------- > 1:  7d58f806111 t7508: add tests capturing racy timestamp handling
 1:  8f9618a44c5 ! 2:  dfeabf6af15 update-index: refresh should rewrite index in case of racy timestamps
     @@ Metadata
       ## Commit message ##
          update-index: refresh should rewrite index in case of racy timestamps
      
     -    update-index --refresh and --really-refresh should force writing of the
     -    index file if racy timestamps have been encountered, as status already
     -    does [1].
     +    'git update-index --refresh' and '--really-refresh' should force writing
     +    of the index file if racy timestamps have been encountered, as
     +    'git status' already does [1].
      
     -    Note that calling update-index still does not guarantee that there will
     -    be no more racy timestamps afterwards (the same holds true for status):
     +    Note that calling 'git update-index --refresh' still does not guarantee
     +    that there will be no more racy timestamps afterwards (the same holds
     +    true for 'git status'):
      
     -    - calling update-index immediately after touching and adding a file may
     -      still leave racy timestamps if all three operations occur within the
     -      racy-tolerance (usually 1 second unless USE_NSEC has been defined)
     +    - calling 'git update-index --refresh' immediately after touching and
     +      adding a file may still leave racy timestamps if all three operations
     +      occur within the racy-tolerance (usually 1 second unless USE_NSEC has
     +      been defined)
      
     -    - calling update-index for timestamps which are set into the future
     -      will leave them racy
     +    - calling 'git update-index --refresh' for timestamps which are set into
     +      the future will leave them racy
      
          To guarantee that such racy timestamps will be resolved would require to
          wait until the system clock has passed beyond these timestamps and only
          then write the index file. Especially for future timestamps, this does
          not seem feasible because of possibly long delays/hangs.
      
     -    Both --refresh and --really-refresh may in theory be used in
     -    combination with --unresolve and --again which may reset the
     -    "active_cache_changed" flag. There is no difference of whether we
     -    write the index due to racy timestamps or due to other
     -    reasons, like if --really-refresh has detected CE_ENTRY_CHANGED in
     -    refresh_cache(). Hence, we will set the "active_cache_changed" flag
     -    immediately after calling refresh_cache().
     -
          [1] https://lore.kernel.org/git/d3dd805c-7c1d-30a9-6574-a7bfcb7fc013@syntevo.com/
      
          Signed-off-by: Marc Strapetz <marc.strapetz@syntevo.com>
     @@ builtin/update-index.c: static int refresh(struct refresh_params *o, unsigned in
       	read_cache();
       	*o->has_errors |= refresh_cache(o->flags | flag);
      +	if (has_racy_timestamp(&the_index)) {
     -+		/* For racy timestamps we should set active_cache_changed immediately:
     -+		 * other callbacks may follow for which some of them may reset
     -+		 * active_cache_changed. */
     ++		/*
     ++		 * Even if nothing else has changed, updating the file
     ++		 * increases the chance that racy timestamps become
     ++		 * non-racy, helping future run-time performance.
     ++		 * We do that even in case of "errors" returned by
     ++		 * refresh_cache() as these are no actual errors.
     ++		 * cmd_status() does the same.
     ++		 */
      +		active_cache_changed |= SOMETHING_CHANGED;
      +	}
       	return 0;
     @@ t/t2108-update-index-refresh-racy.sh (new)
      +
      +test_description='update-index refresh tests related to racy timestamps'
      +
     ++TEST_PASSES_SANITIZE_LEAK=true
      +. ./test-lib.sh
      +
     -+reset_mtime() {
     -+	test-tool chmtime =$(test-tool chmtime --get .git/fs-tstamp) $1
     -+}
     -+
     -+update_assert_unchanged() {
     -+	local ts1=$(test-tool chmtime --get .git/index) &&
     -+	git update-index $1 &&
     -+	local ts2=$(test-tool chmtime --get .git/index) &&
     -+	[ $ts1 -eq $ts2 ]
     ++reset_files () {
     ++	echo content >file &&
     ++	echo content >other &&
     ++	test-tool chmtime =1234567890 file &&
     ++	test-tool chmtime =1234567890 other
      +}
      +
     -+update_assert_changed() {
     -+	local ts1=$(test-tool chmtime --get .git/index) &&
     -+	test_might_fail git update-index $1 &&
     -+	local ts2=$(test-tool chmtime --get .git/index) &&
     -+	[ $ts1 -ne $ts2 ]
     ++update_assert_changed () {
     ++	test-tool chmtime =1234567890 .git/index &&
     ++	test_might_fail git update-index "$1" &&
     ++	test-tool chmtime --get .git/index >.git/out &&
     ++	! grep ^1234567890 .git/out
      +}
      +
      +test_expect_success 'setup' '
     -+	touch .git/fs-tstamp &&
     -+	test-tool chmtime -1 .git/fs-tstamp &&
     -+	echo content >file &&
     -+	reset_mtime file &&
     -+
     -+	git add file &&
     ++	reset_files &&
     ++	# we are calling reset_files() a couple of times during tests;
     ++	# test-tool chmtime does not change the ctime; to not weaken
     ++	# or even break our tests, disable ctime-checks entirely
     ++	git config core.trustctime false &&
     ++	git add file other &&
      +	git commit -m "initial import"
      +'
      +
      +test_expect_success '--refresh has no racy timestamps to fix' '
     -+	reset_mtime .git/index &&
     -+	test-tool chmtime +1 .git/index &&
     -+	update_assert_unchanged --refresh
     ++	reset_files &&
     ++	test-tool chmtime =1234567891 .git/index &&
     ++	git update-index --refresh &&
     ++	test-tool chmtime --get .git/index >.git/out &&
     ++	grep ^1234567891 .git/out
      +'
      +
      +test_expect_success '--refresh should fix racy timestamp' '
     -+	reset_mtime .git/index &&
     ++	reset_files &&
      +	update_assert_changed --refresh
      +'
      +
      +test_expect_success '--really-refresh should fix racy timestamp' '
     -+	reset_mtime .git/index &&
     ++	reset_files &&
      +	update_assert_changed --really-refresh
      +'
      +
     -+test_expect_success '--refresh should fix racy timestamp even if needs update' '
     ++test_expect_success '--refresh should fix racy timestamp if other file needs update' '
     ++	reset_files &&
     ++	echo content2 >other &&
     ++	test-tool chmtime =1234567890 other &&
     ++	update_assert_changed --refresh
     ++'
     ++
     ++test_expect_success '--refresh should fix racy timestamp if racy file needs update' '
     ++	reset_files &&
      +	echo content2 >file &&
     -+	reset_mtime file &&
     -+	reset_mtime .git/index &&
     ++	test-tool chmtime =1234567890 file &&
      +	update_assert_changed --refresh
      +'
      +

-- 
gitgitgadget

^ permalink raw reply

* drivers/remoteproc/remoteproc_debugfs.c:395:10: warning: incompatible integer to pointer conversion returning 'long' from a function with result type 'struct dentry *'
From: kernel test robot @ 2022-01-05 13:15 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 2509 bytes --]

tree:   https://github.com/0day-ci/linux/commits/UPDATE-20220105-144328/Miaoqian-Lin/remoteproc-Fix-NULL-vs-IS_ERR-checking-in-rproc_create_trace_file/20211227-170725
head:   e2c26738760efa9568ce3fef6180fb4311e28d08
commit: e2c26738760efa9568ce3fef6180fb4311e28d08 remoteproc: Fix NULL vs IS_ERR() checking in rproc_create_trace_file
date:   6 hours ago
config: i386-randconfig-a016-20220105 (https://download.01.org/0day-ci/archive/20220105/202201052108.jYZEx2yr-lkp(a)intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project d5b6e30ed3acad794dd0aec400e617daffc6cc3d)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/e2c26738760efa9568ce3fef6180fb4311e28d08
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review UPDATE-20220105-144328/Miaoqian-Lin/remoteproc-Fix-NULL-vs-IS_ERR-checking-in-rproc_create_trace_file/20211227-170725
        git checkout e2c26738760efa9568ce3fef6180fb4311e28d08
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/remoteproc/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/remoteproc/remoteproc_debugfs.c:395:10: warning: incompatible integer to pointer conversion returning 'long' from a function with result type 'struct dentry *' [-Wint-conversion]
                   return PTR_ERR(tfile);
                          ^~~~~~~~~~~~~~
   1 warning generated.


vim +395 drivers/remoteproc/remoteproc_debugfs.c

   385	
   386	struct dentry *rproc_create_trace_file(const char *name, struct rproc *rproc,
   387					       struct rproc_debug_trace *trace)
   388	{
   389		struct dentry *tfile;
   390	
   391		tfile = debugfs_create_file(name, 0400, rproc->dbg_dir, trace,
   392					    &trace_rproc_ops);
   393		if (IS_ERR(tfile)) {
   394			dev_err(&rproc->dev, "failed to create debugfs trace entry\n");
 > 395			return PTR_ERR(tfile);
   396		}
   397	
   398		return tfile;
   399	}
   400	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply

* [PATCH net-next] net: macb: use .mac_select_pcs() interface
From: Russell King (Oracle) @ 2022-01-05 13:15 UTC (permalink / raw)
  To: Andrew Lunn, Heiner Kallweit
  Cc: David S. Miller, netdev, Nicolas Ferre, Claudiu Beznea,
	Jakub Kicinski

Convert the PCS selection to use mac_select_pcs, which allows the PCS
to perform any validation it needs.

We must use separate phylink_pcs instances for the USX and SGMII PCS,
rather than just changing the "ops" pointer before re-setting it to
phylink as this interface queries the PCS, rather than requesting it
to be changed.

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 drivers/net/ethernet/cadence/macb.h      |  3 ++-
 drivers/net/ethernet/cadence/macb_main.c | 26 +++++++++++-------------
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 5620b97b3482..9ddbee7de72b 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1271,7 +1271,8 @@ struct macb {
 	struct mii_bus		*mii_bus;
 	struct phylink		*phylink;
 	struct phylink_config	phylink_config;
-	struct phylink_pcs	phylink_pcs;
+	struct phylink_pcs	phylink_usx_pcs;
+	struct phylink_pcs	phylink_sgmii_pcs;
 
 	u32			caps;
 	unsigned int		dma_burst_length;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index d4da9adf6777..a363da928e8b 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -510,7 +510,7 @@ static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 				 phy_interface_t interface, int speed,
 				 int duplex)
 {
-	struct macb *bp = container_of(pcs, struct macb, phylink_pcs);
+	struct macb *bp = container_of(pcs, struct macb, phylink_usx_pcs);
 	u32 config;
 
 	config = gem_readl(bp, USX_CONTROL);
@@ -524,7 +524,7 @@ static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 static void macb_usx_pcs_get_state(struct phylink_pcs *pcs,
 				   struct phylink_link_state *state)
 {
-	struct macb *bp = container_of(pcs, struct macb, phylink_pcs);
+	struct macb *bp = container_of(pcs, struct macb, phylink_usx_pcs);
 	u32 val;
 
 	state->speed = SPEED_10000;
@@ -544,7 +544,7 @@ static int macb_usx_pcs_config(struct phylink_pcs *pcs,
 			       const unsigned long *advertising,
 			       bool permit_pause_to_mac)
 {
-	struct macb *bp = container_of(pcs, struct macb, phylink_pcs);
+	struct macb *bp = container_of(pcs, struct macb, phylink_usx_pcs);
 
 	gem_writel(bp, USX_CONTROL, gem_readl(bp, USX_CONTROL) |
 		   GEM_BIT(SIGNAL_OK));
@@ -727,28 +727,23 @@ static void macb_mac_link_up(struct phylink_config *config,
 	netif_tx_wake_all_queues(ndev);
 }
 
-static int macb_mac_prepare(struct phylink_config *config, unsigned int mode,
-			    phy_interface_t interface)
+static struct phylink_pcs *macb_mac_select_pcs(struct phylink_config *config,
+					       phy_interface_t interface)
 {
 	struct net_device *ndev = to_net_dev(config->dev);
 	struct macb *bp = netdev_priv(ndev);
 
 	if (interface == PHY_INTERFACE_MODE_10GBASER)
-		bp->phylink_pcs.ops = &macb_phylink_usx_pcs_ops;
+		return &bp->phylink_usx_pcs;
 	else if (interface == PHY_INTERFACE_MODE_SGMII)
-		bp->phylink_pcs.ops = &macb_phylink_pcs_ops;
+		return &bp->phylink_sgmii_pcs;
 	else
-		bp->phylink_pcs.ops = NULL;
-
-	if (bp->phylink_pcs.ops)
-		phylink_set_pcs(bp->phylink, &bp->phylink_pcs);
-
-	return 0;
+		return NULL;
 }
 
 static const struct phylink_mac_ops macb_phylink_ops = {
 	.validate = phylink_generic_validate,
-	.mac_prepare = macb_mac_prepare,
+	.mac_select_pcs = macb_mac_select_pcs,
 	.mac_config = macb_mac_config,
 	.mac_link_down = macb_mac_link_down,
 	.mac_link_up = macb_mac_link_up,
@@ -806,6 +801,9 @@ static int macb_mii_probe(struct net_device *dev)
 {
 	struct macb *bp = netdev_priv(dev);
 
+	bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops;
+	bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops;
+
 	bp->phylink_config.dev = &dev->dev;
 	bp->phylink_config.type = PHYLINK_NETDEV;
 
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH v7] usb: f_fs: Fix use-after-free for epfile
From: kernel test robot @ 2022-01-05 13:15 UTC (permalink / raw)
  To: kbuild-all
In-Reply-To: <1641364317-11916-1-git-send-email-quic_ugoswami@quicinc.com>

[-- Attachment #1: Type: text/plain, Size: 8498 bytes --]

Hi Udipto,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on peter-chen-usb/for-usb-next]
[cannot apply to usb/usb-testing balbi-usb/testing/next v5.16-rc8 next-20220105]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Udipto-Goswami/usb-f_fs-Fix-use-after-free-for-epfile/20220105-143439
base:   https://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git for-usb-next
config: alpha-allyesconfig (https://download.01.org/0day-ci/archive/20220105/202201052146.RZUTvDGn-lkp(a)intel.com/config)
compiler: alpha-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/0a319144fb2e68829c0d23f5b5505a19a207c906
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Udipto-Goswami/usb-f_fs-Fix-use-after-free-for-epfile/20220105-143439
        git checkout 0a319144fb2e68829c0d23f5b5505a19a207c906
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=alpha SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:17,
                    from include/linux/list.h:9,
                    from include/linux/rculist.h:10,
                    from include/linux/pid.h:5,
                    from include/linux/sched.h:14,
                    from include/linux/blkdev.h:5,
                    from drivers/usb/gadget/function/f_fs.c:17:
   drivers/usb/gadget/function/f_fs.c: In function 'ffs_epfiles_create':
>> drivers/usb/gadget/function/f_fs.c:1918:43: error: 'flags' undeclared (first use in this function)
    1918 |         spin_lock_irqsave(&ffs->eps_lock, flags);
         |                                           ^~~~~
   include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck'
      11 |         typeof(x) __dummy2; \
         |                ^
   include/linux/spinlock.h:384:9: note: in expansion of macro 'raw_spin_lock_irqsave'
     384 |         raw_spin_lock_irqsave(spinlock_check(lock), flags);     \
         |         ^~~~~~~~~~~~~~~~~~~~~
   drivers/usb/gadget/function/f_fs.c:1918:9: note: in expansion of macro 'spin_lock_irqsave'
    1918 |         spin_lock_irqsave(&ffs->eps_lock, flags);
         |         ^~~~~~~~~~~~~~~~~
   drivers/usb/gadget/function/f_fs.c:1918:43: note: each undeclared identifier is reported only once for each function it appears in
    1918 |         spin_lock_irqsave(&ffs->eps_lock, flags);
         |                                           ^~~~~
   include/linux/typecheck.h:11:16: note: in definition of macro 'typecheck'
      11 |         typeof(x) __dummy2; \
         |                ^
   include/linux/spinlock.h:384:9: note: in expansion of macro 'raw_spin_lock_irqsave'
     384 |         raw_spin_lock_irqsave(spinlock_check(lock), flags);     \
         |         ^~~~~~~~~~~~~~~~~~~~~
   drivers/usb/gadget/function/f_fs.c:1918:9: note: in expansion of macro 'spin_lock_irqsave'
    1918 |         spin_lock_irqsave(&ffs->eps_lock, flags);
         |         ^~~~~~~~~~~~~~~~~
   include/linux/typecheck.h:12:25: warning: comparison of distinct pointer types lacks a cast
      12 |         (void)(&__dummy == &__dummy2); \
         |                         ^~
   include/linux/spinlock.h:251:17: note: in expansion of macro 'typecheck'
     251 |                 typecheck(unsigned long, flags);        \
         |                 ^~~~~~~~~
   include/linux/spinlock.h:384:9: note: in expansion of macro 'raw_spin_lock_irqsave'
     384 |         raw_spin_lock_irqsave(spinlock_check(lock), flags);     \
         |         ^~~~~~~~~~~~~~~~~~~~~
   drivers/usb/gadget/function/f_fs.c:1918:9: note: in expansion of macro 'spin_lock_irqsave'
    1918 |         spin_lock_irqsave(&ffs->eps_lock, flags);
         |         ^~~~~~~~~~~~~~~~~
   drivers/usb/gadget/function/f_fs.c: In function 'ffs_func_eps_enable':
>> drivers/usb/gadget/function/f_fs.c:1980:9: error: 'epfiles' undeclared (first use in this function); did you mean 'epfile'?
    1980 |         epfiles = ffs->epfiles;
         |         ^~~~~~~
         |         epfile


vim +/flags +1918 drivers/usb/gadget/function/f_fs.c

  1888	
  1889	static int ffs_epfiles_create(struct ffs_data *ffs)
  1890	{
  1891		struct ffs_epfile *epfile, *epfiles;
  1892		unsigned i, count;
  1893	
  1894		ENTER();
  1895	
  1896		count = ffs->eps_count;
  1897		epfiles = kcalloc(count, sizeof(*epfiles), GFP_KERNEL);
  1898		if (!epfiles)
  1899			return -ENOMEM;
  1900	
  1901		epfile = epfiles;
  1902		for (i = 1; i <= count; ++i, ++epfile) {
  1903			epfile->ffs = ffs;
  1904			mutex_init(&epfile->mutex);
  1905			if (ffs->user_flags & FUNCTIONFS_VIRTUAL_ADDR)
  1906				sprintf(epfile->name, "ep%02x", ffs->eps_addrmap[i]);
  1907			else
  1908				sprintf(epfile->name, "ep%u", i);
  1909			epfile->dentry = ffs_sb_create_file(ffs->sb, epfile->name,
  1910							 epfile,
  1911							 &ffs_epfile_operations);
  1912			if (!epfile->dentry) {
  1913				ffs_epfiles_destroy(epfiles, i - 1);
  1914				return -ENOMEM;
  1915			}
  1916		}
  1917	
> 1918		spin_lock_irqsave(&ffs->eps_lock, flags);
  1919		ffs->epfiles = epfiles;
  1920		spin_unlock_irqrestore(&ffs->eps_lock, flags);
  1921		return 0;
  1922	}
  1923	
  1924	static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count)
  1925	{
  1926		struct ffs_epfile *epfile = epfiles;
  1927	
  1928		ENTER();
  1929	
  1930		for (; count; --count, ++epfile) {
  1931			BUG_ON(mutex_is_locked(&epfile->mutex));
  1932			if (epfile->dentry) {
  1933				d_delete(epfile->dentry);
  1934				dput(epfile->dentry);
  1935				epfile->dentry = NULL;
  1936			}
  1937		}
  1938	
  1939		kfree(epfiles);
  1940	}
  1941	
  1942	static void ffs_func_eps_disable(struct ffs_function *func)
  1943	{
  1944		struct ffs_ep *ep;
  1945		struct ffs_epfile *epfile;
  1946		unsigned short count;
  1947		unsigned long flags;
  1948	
  1949		spin_lock_irqsave(&func->ffs->eps_lock, flags);
  1950		count = func->ffs->eps_count;
  1951		epfile = func->ffs->epfiles;
  1952		ep = func->eps;
  1953		while (count--) {
  1954			/* pending requests get nuked */
  1955			if (ep->ep)
  1956				usb_ep_disable(ep->ep);
  1957			++ep;
  1958	
  1959			if (epfile) {
  1960				epfile->ep = NULL;
  1961				__ffs_epfile_read_buffer_free(epfile);
  1962				++epfile;
  1963			}
  1964		}
  1965		spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
  1966	}
  1967	
  1968	static int ffs_func_eps_enable(struct ffs_function *func)
  1969	{
  1970		struct ffs_data *ffs;
  1971		struct ffs_ep *ep;
  1972		struct ffs_epfile *epfile;
  1973		unsigned count;
  1974		unsigned long flags;
  1975		int ret = 0;
  1976	
  1977		spin_lock_irqsave(&func->ffs->eps_lock, flags);
  1978		ffs = func->ffs;
  1979		ep = func->eps;
> 1980		epfiles = ffs->epfiles;
  1981		count = ffs->eps_count;
  1982		while(count--) {
  1983			ep->ep->driver_data = ep;
  1984	
  1985			ret = config_ep_by_speed(func->gadget, &func->function, ep->ep);
  1986			if (ret) {
  1987				pr_err("%s: config_ep_by_speed(%s) returned %d\n",
  1988						__func__, ep->ep->name, ret);
  1989				break;
  1990			}
  1991	
  1992			ret = usb_ep_enable(ep->ep);
  1993			if (!ret) {
  1994				epfile->ep = ep;
  1995				epfile->in = usb_endpoint_dir_in(ep->ep->desc);
  1996				epfile->isoc = usb_endpoint_xfer_isoc(ep->ep->desc);
  1997			} else {
  1998				break;
  1999			}
  2000	
  2001			++ep;
  2002			++epfile;
  2003		}
  2004	
  2005		wake_up_interruptible(&ffs->wait);
  2006		spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
  2007	
  2008		return ret;
  2009	}
  2010	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply

* [PATCH v2] crypto: qat - use signed variable to store status and error checking
From: Muhammad Usama Anjum @ 2022-01-05 13:14 UTC (permalink / raw)
  To: Giovanni Cabiddu, Herbert Xu, David S. Miller,
	open list:QAT DRIVER, open list:CRYPTO API, open list
  Cc: usama.anjum

ret should be signed. adf_cfg_get_param_value() and match_string()
return signed statuses. The return status may be saved wrongly in
unsigned ret variable. Correct the data type of ret to signed int.

Acked-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
---
Changes in v2:
	Updated commit message
---
 drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index 6d10edc40aca..68d39c833332 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -52,7 +52,7 @@ static const char *const dev_cfg_services[] = {
 static int get_service_enabled(struct adf_accel_dev *accel_dev)
 {
 	char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
-	u32 ret;
+	int ret;
 
 	ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
 				      ADF_SERVICES_ENABLED, services);
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH] cifs: invalidate dns resolver keys after use
From: David Howells @ 2022-01-05 13:12 UTC (permalink / raw)
  To: Enzo Matsumiya
  Cc: dhowells, Shyam Prasad N, Steve French, Paulo Alcantara, CIFS
In-Reply-To: <20211223203327.mvzmj3mtlpke3wxn@cyberdelia>

Enzo Matsumiya <ematsumiya@suse.de> wrote:

> I'm not sure I understand. I'm using res_nquery() on my to-be-proposed
> patch and it works fine.

You're supposed to use getaddrinfo() these days, apparently.  The info you're
looking for might not be in the DNS.

David


^ permalink raw reply

* Re: [PATCH] debugfs: lockdown: Allow reading debugfs files that are not world readable
From: Michal Suchánek @ 2022-01-05 13:12 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: linux-kernel, Rafael J. Wysocki, Matthew Garrett, James Morris,
	David Howells, Andy Shevchenko, acpi4asus-user,
	platform-driver-x86, Thomas Gleixner
In-Reply-To: <YdWGQ+Kxeo9Q7Kli@kroah.com>

Hello,

On Wed, Jan 05, 2022 at 12:51:31PM +0100, Greg Kroah-Hartman wrote:
> On Tue, Jan 04, 2022 at 06:05:05PM +0100, Michal Suchanek wrote:
> > 
> > When the kernel is locked down the kernel allows reading only debugfs
> > files with mode 444. Mode 400 is also valid but is not allowed.
> > 
> > Make the 444 into a mask.
> > 
> > Fixes: 5496197f9b08 ("debugfs: Restrict debugfs when the kernel is locked down")
> > Signed-off-by: Michal Suchanek <msuchanek@suse.de>
> > ---
> >  fs/debugfs/file.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Why has it taken so long for anyone to notice this (2 years!)?
> 
> Is that because no one uses the lockdown mode and tries to read debugfs
> files?

It's because people use those LTSS kernels that don't have this change.

> > 
> > diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
> > index 7d162b0efbf0..950c63fa4d0b 100644
> > --- a/fs/debugfs/file.c
> > +++ b/fs/debugfs/file.c
> > @@ -147,7 +147,7 @@ static int debugfs_locked_down(struct inode *inode,
> >  			       struct file *filp,
> >  			       const struct file_operations *real_fops)
> >  {
> > -	if ((inode->i_mode & 07777) == 0444 &&
> > +	if ((inode->i_mode & 07777 & ~0444) == 0 &&
> 
> You are now allowing more than just 0400, is that intentional?

The intent is to allow files that have permissions that are subset of
0444. The only one that makes sense and people complain about is 0400
but if you had 0440 or 0004 it would be permitted as well.

> I never understood why files that were 0666 were not able to be read
> here as well, why not allow that as well?  What was magic about 0444
> files?

I don't understand that either but I am not really trying to challenge
that part.

Thanks

Michal

^ permalink raw reply

* Re: [RFC v2 4/8] drm/amdgpu: Serialize non TDR gpu recovery with TDRs
From: Lazar, Lijo @ 2022-01-05 13:11 UTC (permalink / raw)
  To: Christian König, Andrey Grodzovsky, dri-devel, amd-gfx
  Cc: horace.chen, Monk.Liu
In-Reply-To: <9dc55576-19b1-d5e3-f4da-eede4db8b289@amd.com>



On 1/5/2022 6:01 PM, Christian König wrote:
> Am 05.01.22 um 10:54 schrieb Lazar, Lijo:
>> On 12/23/2021 3:35 AM, Andrey Grodzovsky wrote:
>>> Use reset domain wq also for non TDR gpu recovery trigers
>>> such as sysfs and RAS. We must serialize all possible
>>> GPU recoveries to gurantee no concurrency there.
>>> For TDR call the original recovery function directly since
>>> it's already executed from within the wq. For others just
>>> use a wrapper to qeueue work and wait on it to finish.
>>>
>>> v2: Rename to amdgpu_recover_work_struct
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 33 +++++++++++++++++++++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  2 +-
>>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index b5ff76aae7e0..8e96b9a14452 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1296,6 +1296,8 @@ bool amdgpu_device_has_job_running(struct 
>>> amdgpu_device *adev);
>>>   bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
>>>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>                     struct amdgpu_job* job);
>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>> +                  struct amdgpu_job *job);
>>>   void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
>>>   int amdgpu_device_pci_reset(struct amdgpu_device *adev);
>>>   bool amdgpu_device_need_post(struct amdgpu_device *adev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 7c063fd37389..258ec3c0b2af 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -4979,7 +4979,7 @@ static void amdgpu_device_recheck_guilty_jobs(
>>>    * Returns 0 for success or an error on failure.
>>>    */
>>>   -int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>> +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
>>>                     struct amdgpu_job *job)
>>>   {
>>>       struct list_head device_list, *device_list_handle =  NULL;
>>> @@ -5237,6 +5237,37 @@ int amdgpu_device_gpu_recover(struct 
>>> amdgpu_device *adev,
>>>       return r;
>>>   }
>>>   +struct amdgpu_recover_work_struct {
>>> +    struct work_struct base;
>>> +    struct amdgpu_device *adev;
>>> +    struct amdgpu_job *job;
>>> +    int ret;
>>> +};
>>> +
>>> +static void amdgpu_device_queue_gpu_recover_work(struct work_struct 
>>> *work)
>>> +{
>>> +    struct amdgpu_recover_work_struct *recover_work = 
>>> container_of(work, struct amdgpu_recover_work_struct, base);
>>> +
>>> +    recover_work->ret = 
>>> amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
>>> +}
>>> +/*
>>> + * Serialize gpu recover into reset domain single threaded wq
>>> + */
>>> +int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>> +                    struct amdgpu_job *job)
>>> +{
>>> +    struct amdgpu_recover_work_struct work = {.adev = adev, .job = 
>>> job};
>>> +
>>> +    INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
>>> +
>>> +    if (!queue_work(adev->reset_domain.wq, &work.base))
>>> +        return -EAGAIN;
>>> +
>>
>> The decision to schedule a reset is made at this point. Subsequent 
>> accesses to hardware may not be reliable. So should the flag in_reset 
>> be set here itself rather than waiting for the work to start execution?
> 
> No, when we race and lose the VM is completely lost and probably 
> restarted by the hypervisor.
> 
> And when we race and win we properly set the flag before signaling the 
> hypervisor that it can continue with the reset.
> 

I was talking about baremetal case. When this was synchronous, in_reset 
flag is set as one of the first things and amdgpu_in_reset is checked to 
prevent further hardware accesses. This design only changes the recover 
part and doesn't change the hardware perspective. Potential accesses 
from other processes need to be blocked as soon as we determine a reset 
is required. Are we expecting the work to be immediately executed and 
set the flags?

Thanks,
Lijo

>> Also, what about having the reset_active or in_reset flag in the 
>> reset_domain itself?
> 
> Of hand that sounds like a good idea.
> 
> Regards,
> Christian.
> 
>>
>> Thanks,
>> Lijo
>>
>>> +    flush_work(&work.base);
>>> +
>>> +    return work.ret;
>>> +}
>>> +
>>>   /**
>>>    * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
>>>    *
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> index bfc47bea23db..38c9fd7b7ad4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>>> @@ -63,7 +63,7 @@ static enum drm_gpu_sched_stat 
>>> amdgpu_job_timedout(struct drm_sched_job *s_job)
>>>             ti.process_name, ti.tgid, ti.task_name, ti.pid);
>>>         if (amdgpu_device_should_recover_gpu(ring->adev)) {
>>> -        amdgpu_device_gpu_recover(ring->adev, job);
>>> +        amdgpu_device_gpu_recover_imp(ring->adev, job);
>>>       } else {
>>>           drm_sched_suspend_timeout(&ring->sched);
>>>           if (amdgpu_sriov_vf(adev))
>>>
> 

^ permalink raw reply


This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.