All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ieee1275/ofdisk: retry on open and read failure
@ 2023-03-28  5:30 Mukesh Kumar Chaurasiya
  2023-03-28  8:34 ` Michael Chang
  0 siblings, 1 reply; 6+ messages in thread
From: Mukesh Kumar Chaurasiya @ 2023-03-28  5:30 UTC (permalink / raw)
  To: grub-devel; +Cc: meghanaprakash, avnish, brking, mamatha4, mchauras

Sometimes, when booting from a very busy SAN, the access to the
disk can fail and then grub will eventually drop to grub prompt.
This scenario is more frequent when deploying many machines at
the same time using the same SAN.
This patch aims to force the ofdisk module to retry the open or
read function after it fails. We use MAX_RETRIES to specify the
amount of times it will try to access the disk before it
definitely fails.

Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.vnet.ibm.com>
---
 grub-core/disk/ieee1275/ofdisk.c | 65 +++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/grub-core/disk/ieee1275/ofdisk.c b/grub-core/disk/ieee1275/ofdisk.c
index c6cba0c8a..f4183a531 100644
--- a/grub-core/disk/ieee1275/ofdisk.c
+++ b/grub-core/disk/ieee1275/ofdisk.c
@@ -24,6 +24,9 @@
 #include <grub/ieee1275/ofdisk.h>
 #include <grub/i18n.h>
 #include <grub/time.h>
+#include <grub/env.h>
+
+#define RETRY_DEFAULT_TIMEOUT 15000
 
 static char *last_devpath;
 static grub_ieee1275_ihandle_t last_ihandle;
@@ -452,7 +455,7 @@ compute_dev_path (const char *name)
 }
 
 static grub_err_t
-grub_ofdisk_open (const char *name, grub_disk_t disk)
+grub_ofdisk_open_real (const char *name, grub_disk_t disk)
 {
   grub_ieee1275_phandle_t dev;
   char *devpath;
@@ -525,6 +528,41 @@ grub_ofdisk_open (const char *name, grub_disk_t disk)
   return 0;
 }
 
+static grub_uint64_t
+grub_ofdisk_disk_timeout(void)
+{
+   if(grub_env_get("ofdisk_retry_timeout") != NULL)
+     {
+	grub_uint64_t retry = grub_strtoul(grub_env_get("ofdisk_retry_timeout"), 0, 10);
+	if(retry)
+	  return retry;
+     }
+
+   return RETRY_DEFAULT_TIMEOUT;
+}
+
+static grub_err_t
+grub_ofdisk_open (const char *name, grub_disk_t disk)
+{
+  grub_err_t err;
+  grub_uint64_t timeout = grub_get_time_ms () + grub_ofdisk_disk_timeout();
+
+ retry:
+  err = grub_ofdisk_open_real (name, disk);
+
+  if (err == GRUB_ERR_UNKNOWN_DEVICE)
+    {
+      if (grub_get_time_ms () < timeout)
+        {
+          grub_dprintf ("ofdisk","Failed to open disk %s. Retrying...\n", name);
+          grub_errno = GRUB_ERR_NONE;
+          goto retry;
+	}
+    }
+
+  return err;
+}
+
 static void
 grub_ofdisk_close (grub_disk_t disk)
 {
@@ -568,7 +606,7 @@ grub_ofdisk_prepare (grub_disk_t disk, grub_disk_addr_t sector)
 }
 
 static grub_err_t
-grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
+grub_ofdisk_read_real (grub_disk_t disk, grub_disk_addr_t sector,
 		  grub_size_t size, char *buf)
 {
   grub_err_t err;
@@ -587,6 +625,29 @@ grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
   return 0;
 }
 
+static grub_err_t
+grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
+		  grub_size_t size, char *buf)
+{
+  grub_err_t err;
+  grub_uint64_t timeout = grub_get_time_ms () + grub_ofdisk_disk_timeout();
+
+ retry:
+  err = grub_ofdisk_read_real (disk, sector, size, buf);
+
+  if (err == GRUB_ERR_READ_ERROR)
+    {
+      if (grub_get_time_ms () < timeout)
+        {
+          grub_dprintf ("ofdisk","Failed to read disk %s. Retrying...\n", (char*)disk->data);
+          grub_errno = GRUB_ERR_NONE;
+          goto retry;
+	}
+    }
+
+  return err;
+}
+
 static grub_err_t
 grub_ofdisk_write (grub_disk_t disk, grub_disk_addr_t sector,
 		   grub_size_t size, const char *buf)
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread
* [PATCH] ieee1275/ofdisk: retry on open and read failure
@ 2021-05-07 14:15 Diego Domingos
  0 siblings, 0 replies; 6+ messages in thread
From: Diego Domingos @ 2021-05-07 14:15 UTC (permalink / raw)
  To: grub-devel

Sometimes, when booting from a very busy SAN, the access to the
disk can fail and then grub will eventually drop to grub prompt.
This scenario is more frequent when deploying many machines at
the same time using the same SAN.
This patch aims to force the ofdisk module to retry the open or
read function after it fails. We use MAX_RETRIES to specify the
amount of times it will try to access the disk before it
definitely fails.

---
 grub-core/disk/ieee1275/ofdisk.c | 27 +++++++++++++++++++++------
 include/grub/ieee1275/ofdisk.h   |  8 ++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/grub-core/disk/ieee1275/ofdisk.c b/grub-core/disk/ieee1275/ofdisk.c
index d887d4b..777ae63 100644
--- a/grub-core/disk/ieee1275/ofdisk.c
+++ b/grub-core/disk/ieee1275/ofdisk.c
@@ -225,7 +225,9 @@ dev_iterate (const struct grub_ieee1275_devalias *alias)
       char *buf, *bufptr;
       unsigned i;
 
-      if (grub_ieee1275_open (alias->path, &ihandle))
+
+      RETRY_IEEE1275_OFDISK_OPEN(alias->path, &ihandle)
+      if (! ihandle)
 	return;
 
       /* This method doesn't need memory allocation for the table. Open
@@ -305,7 +307,9 @@ dev_iterate (const struct grub_ieee1275_devalias *alias)
           return;
         }
 
-      if (grub_ieee1275_open (alias->path, &ihandle))
+      RETRY_IEEE1275_OFDISK_OPEN(alias->path, &ihandle)
+
+      if (! ihandle)
         {
           grub_free (buf);
           grub_free (table);
@@ -555,7 +559,7 @@ grub_ofdisk_prepare (grub_disk_t disk, grub_disk_addr_t sector)
       last_ihandle = 0;
       last_devpath = NULL;
 
-      grub_ieee1275_open (disk->data, &last_ihandle);
+      RETRY_IEEE1275_OFDISK_OPEN(disk->data, &last_ihandle)
       if (! last_ihandle)
 	return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
       last_devpath = disk->data;      
@@ -582,12 +586,23 @@ grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
     return err;
   grub_ieee1275_read (last_ihandle, buf, size  << disk->log_sector_size,
 		      &actual);
-  if (actual != (grub_ssize_t) (size  << disk->log_sector_size))
+  int i = 0;
+  while(actual != (grub_ssize_t) (size  << disk->log_sector_size)){
+    if (i>10){
     return grub_error (GRUB_ERR_READ_ERROR, N_("failure reading sector 0x%llx "
 					       "from `%s'"),
 		       (unsigned long long) sector,
 		       disk->name);
-
+    }
+    grub_dprintf("ofdisk","Read failed. Retrying...\n");
+    last_devpath = NULL;
+    err = grub_ofdisk_prepare (disk, sector);
+    if (err)
+      return err;
+    grub_ieee1275_read (last_ihandle, buf, size  << disk->log_sector_size,
+                      &actual);
+    i++;
+  }
   return 0;
 }
 
@@ -704,7 +719,7 @@ grub_ofdisk_get_block_size (const char *device, grub_uint32_t *block_size,
   last_ihandle = 0;
   last_devpath = NULL;
 
-  grub_ieee1275_open (device, &last_ihandle);
+  RETRY_IEEE1275_OFDISK_OPEN (device, &last_ihandle)
   if (! last_ihandle)
     return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "can't open device");
 
diff --git a/include/grub/ieee1275/ofdisk.h b/include/grub/ieee1275/ofdisk.h
index 2f69e3f..124e297 100644
--- a/include/grub/ieee1275/ofdisk.h
+++ b/include/grub/ieee1275/ofdisk.h
@@ -22,4 +22,12 @@
 extern void grub_ofdisk_init (void);
 extern void grub_ofdisk_fini (void);
 
+#define MAX_RETRIES 20
+
+
+#define RETRY_IEEE1275_OFDISK_OPEN(device, last_ihandle) unsigned retry_i=0;for(retry_i=0; retry_i < MAX_RETRIES; retry_i++){ \
+						if(!grub_ieee1275_open(device, last_ihandle)) \
+						break; \
+						grub_dprintf("ofdisk","Opening disk %s failed. Retrying...\n",device); }
+
 #endif /* ! GRUB_INIT_HEADER */
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-05 16:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-28  5:30 [PATCH] ieee1275/ofdisk: retry on open and read failure Mukesh Kumar Chaurasiya
2023-03-28  8:34 ` Michael Chang
2023-03-28 16:08   ` Robbie Harwood
2023-03-29  5:30   ` [PATCH V2] " Mukesh Kumar Chaurasiya
2023-04-05 16:41     ` Daniel Kiper
  -- strict thread matches above, loose matches on Subject: below --
2021-05-07 14:15 [PATCH] " Diego Domingos

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.