Linux NFS development
 help / color / mirror / Atom feed
* [bug report] pNFS/flexfiles: don't attempt pnfs on fatal DS errors
@ 2025-08-28 11:08 Dan Carpenter
  2025-08-28 11:18 ` Mkrtchyan, Tigran
  0 siblings, 1 reply; 2+ messages in thread
From: Dan Carpenter @ 2025-08-28 11:08 UTC (permalink / raw)
  To: Tigran Mkrtchyan; +Cc: linux-nfs

Hello Tigran Mkrtchyan,

Commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS
errors") from Jun 27, 2025 (linux-next), leads to the following
Smatch static checker warning:

	fs/nfs/flexfilelayout/flexfilelayout.c:880 ff_layout_pg_init_read()
	error: uninitialized symbol 'ds_idx'.

We recently changed ff_layout_choose_ds_for_read() from returning NULL to
returning error pointers.  There are two bugs.  One error path in
ff_layout_choose_ds_for_read() accidentally returns success.  And some
of the callers are still checking for NULL instead of error pointers.

Btw, I'm always promoting my blog about error pointers and NULL:
https://staticthinking.wordpress.com/2022/08/01/mixing-error-pointers-and-null/
It's not really related here, since we should not mix error pointers
and NULL but I still link to it because that's what they taught me in
my Trump Wealth Institute course on Search Engine Optimization.

Here is what ff_layout_choose_ds_for_read() looks like:
fs/nfs/flexfilelayout/flexfilelayout.c
   758  static struct nfs4_pnfs_ds *
   759  ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
   760                               u32 start_idx, u32 *best_idx,
   761                               bool check_device)
   762  {
   763          struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
   764          struct nfs4_ff_layout_mirror *mirror;
   765          struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
   766          u32 idx;
   767  
   768          /* mirrors are initially sorted by efficiency */
   769          for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
   770                  mirror = FF_LAYOUT_COMP(lseg, idx);
   771                  ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
   772                  if (IS_ERR(ds))
   773                          continue;
   774  
   775                  if (check_device &&
   776                      nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
   777                          continue;

Bug #1:  If we hit this continue on the last iteration through the loop
then we return success and *best_idx is not initialized.  It should be:

		if (check_device &&
		    nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) {
			ds = ERR_PTR(-EINVAL);
			continue;
		}

   778  
   779                  *best_idx = idx;
   780                  break;
   781          }
   782  
   783          return ds;
   784  }

Bug #2: The whole rest of the call tree expects NULL and not error pointers.
For example, ff_layout_get_ds_for_read():

fs/nfs/flexfilelayout/flexfilelayout.c
   812  static struct nfs4_pnfs_ds *
   813  ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
   814                            u32 *best_idx)
   815  {
   816          struct pnfs_layout_segment *lseg = pgio->pg_lseg;
   817          struct nfs4_pnfs_ds *ds;
   818  
   819          ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
   820                                                 best_idx);
   821          if (ds || !pgio->pg_mirror_idx)
                    ^^
   822                  return ds;

This needs to check for error pointers.  But also if pgio->pg_mirror_idx
is zero, is that a success return?  I don't know...

   823          return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
   824  }

fs/nfs/flexfilelayout/flexfilelayout.c
    842 static void
    843 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
    844                         struct nfs_page *req)
    845 {
    846         struct nfs_pgio_mirror *pgm;
    847         struct nfs4_ff_layout_mirror *mirror;
    848         struct nfs4_pnfs_ds *ds;
    849         u32 ds_idx;
    850 
    851         if (NFS_SERVER(pgio->pg_inode)->flags &
    852                         (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
    853                 pgio->pg_maxretrans = io_maxretrans;
    854 retry:
    855         pnfs_generic_pg_check_layout(pgio, req);
    856         /* Use full layout for now */
    857         if (!pgio->pg_lseg) {
    858                 ff_layout_pg_get_read(pgio, req, false);
    859                 if (!pgio->pg_lseg)
    860                         goto out_nolseg;
    861         }
    862         if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
    863                 ff_layout_pg_get_read(pgio, req, true);
    864                 if (!pgio->pg_lseg)
    865                         goto out_nolseg;
    866         }
    867         /* Reset wb_nio, since getting layout segment was successful */
    868         req->wb_nio = 0;
    869 
    870         ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
    871         if (!ds) {
                ^^^^^^^^^^
This doesn't check for error pointers either.

    872                 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
    873                         goto out_mds;
    874                 pnfs_generic_pg_cleanup(pgio);
    875                 /* Sleep for 1 second before retrying */
    876                 ssleep(1);
    877                 goto retry;
    878         }
    879 
--> 880         mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
                                                       ^^^^^^
Uninitialized.


regards,
dan carpenter

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [bug report] pNFS/flexfiles: don't attempt pnfs on fatal DS errors
  2025-08-28 11:08 [bug report] pNFS/flexfiles: don't attempt pnfs on fatal DS errors Dan Carpenter
@ 2025-08-28 11:18 ` Mkrtchyan, Tigran
  0 siblings, 0 replies; 2+ messages in thread
From: Mkrtchyan, Tigran @ 2025-08-28 11:18 UTC (permalink / raw)
  To: Dan Carpenter; +Cc: linux-nfs

[-- Attachment #1: Type: text/plain, Size: 5858 bytes --]

Hi Dan Carpenter,

Indeed, the behavior looks incorrect. I will look at it deeper and submit a fix.

Best regards,
    Tigran.

----- Original Message -----
> From: "Dan Carpenter" <dan.carpenter@linaro.org>
> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> Cc: "linux-nfs" <linux-nfs@vger.kernel.org>
> Sent: Thursday, 28 August, 2025 13:08:13
> Subject: [bug report] pNFS/flexfiles: don't attempt pnfs on fatal DS errors

> Hello Tigran Mkrtchyan,
> 
> Commit f06bedfa62d5 ("pNFS/flexfiles: don't attempt pnfs on fatal DS
> errors") from Jun 27, 2025 (linux-next), leads to the following
> Smatch static checker warning:
> 
>	fs/nfs/flexfilelayout/flexfilelayout.c:880 ff_layout_pg_init_read()
>	error: uninitialized symbol 'ds_idx'.
> 
> We recently changed ff_layout_choose_ds_for_read() from returning NULL to
> returning error pointers.  There are two bugs.  One error path in
> ff_layout_choose_ds_for_read() accidentally returns success.  And some
> of the callers are still checking for NULL instead of error pointers.
> 
> Btw, I'm always promoting my blog about error pointers and NULL:
> https://staticthinking.wordpress.com/2022/08/01/mixing-error-pointers-and-null/
> It's not really related here, since we should not mix error pointers
> and NULL but I still link to it because that's what they taught me in
> my Trump Wealth Institute course on Search Engine Optimization.
> 
> Here is what ff_layout_choose_ds_for_read() looks like:
> fs/nfs/flexfilelayout/flexfilelayout.c
>   758  static struct nfs4_pnfs_ds *
>   759  ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
>   760                               u32 start_idx, u32 *best_idx,
>   761                               bool check_device)
>   762  {
>   763          struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
>   764          struct nfs4_ff_layout_mirror *mirror;
>   765          struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
>   766          u32 idx;
>   767
>   768          /* mirrors are initially sorted by efficiency */
>   769          for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
>   770                  mirror = FF_LAYOUT_COMP(lseg, idx);
>   771                  ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
>   772                  if (IS_ERR(ds))
>   773                          continue;
>   774
>   775                  if (check_device &&
>   776
>   nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
>   777                          continue;
> 
> Bug #1:  If we hit this continue on the last iteration through the loop
> then we return success and *best_idx is not initialized.  It should be:
> 
>		if (check_device &&
>		    nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) {
>			ds = ERR_PTR(-EINVAL);
>			continue;
>		}
> 
>   778
>   779                  *best_idx = idx;
>   780                  break;
>   781          }
>   782
>   783          return ds;
>   784  }
> 
> Bug #2: The whole rest of the call tree expects NULL and not error pointers.
> For example, ff_layout_get_ds_for_read():
> 
> fs/nfs/flexfilelayout/flexfilelayout.c
>   812  static struct nfs4_pnfs_ds *
>   813  ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
>   814                            u32 *best_idx)
>   815  {
>   816          struct pnfs_layout_segment *lseg = pgio->pg_lseg;
>   817          struct nfs4_pnfs_ds *ds;
>   818
>   819          ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
>   820                                                 best_idx);
>   821          if (ds || !pgio->pg_mirror_idx)
>                    ^^
>   822                  return ds;
> 
> This needs to check for error pointers.  But also if pgio->pg_mirror_idx
> is zero, is that a success return?  I don't know...
> 
>   823          return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
>   824  }
> 
> fs/nfs/flexfilelayout/flexfilelayout.c
>    842 static void
>    843 ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
>    844                         struct nfs_page *req)
>    845 {
>    846         struct nfs_pgio_mirror *pgm;
>    847         struct nfs4_ff_layout_mirror *mirror;
>    848         struct nfs4_pnfs_ds *ds;
>    849         u32 ds_idx;
>    850
>    851         if (NFS_SERVER(pgio->pg_inode)->flags &
>    852                         (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
>    853                 pgio->pg_maxretrans = io_maxretrans;
>    854 retry:
>    855         pnfs_generic_pg_check_layout(pgio, req);
>    856         /* Use full layout for now */
>    857         if (!pgio->pg_lseg) {
>    858                 ff_layout_pg_get_read(pgio, req, false);
>    859                 if (!pgio->pg_lseg)
>    860                         goto out_nolseg;
>    861         }
>    862         if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) {
>    863                 ff_layout_pg_get_read(pgio, req, true);
>    864                 if (!pgio->pg_lseg)
>    865                         goto out_nolseg;
>    866         }
>    867         /* Reset wb_nio, since getting layout segment was successful */
>    868         req->wb_nio = 0;
>    869
>    870         ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
>    871         if (!ds) {
>                ^^^^^^^^^^
> This doesn't check for error pointers either.
> 
>    872                 if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
>    873                         goto out_mds;
>    874                 pnfs_generic_pg_cleanup(pgio);
>    875                 /* Sleep for 1 second before retrying */
>    876                 ssleep(1);
>    877                 goto retry;
>    878         }
>    879
> --> 880         mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
>                                                       ^^^^^^
> Uninitialized.
> 
> 
> regards,
> dan carpenter

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 2826 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-08-28 11:28 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-28 11:08 [bug report] pNFS/flexfiles: don't attempt pnfs on fatal DS errors Dan Carpenter
2025-08-28 11:18 ` Mkrtchyan, Tigran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox