linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Martin K. Petersen" <martin.petersen@oracle.com>
To: linux-fsdevel@vger.kernel.org
Cc: Eric Sandeen <sandeen@redhat.com>,
	Andreas Dilger <adilger@sun.com>, Jim Meyering <jim@meyering.net>,
	jens.axboe@oracle.com
Subject: Topology ioctls
Date: Wed, 23 Sep 2009 10:26:27 -0400	[thread overview]
Message-ID: <yq1pr9hu48c.fsf@sermon.lab.mkp.net> (raw)


The original rationale for exporting the topology information via sysfs
was that we intended to support multiple heterogeneous regions within a
block device.  And that fit poorly with an ioctl approach.

However, with a single region per device it is trivial to provide the
topology.  And while mkfs.* will continue to use the libblkid interface,
there are users that would like to get access to this information
without having to traverse sysfs and stitch things together manually.

Example:

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/ioctl.h>

#define BLKSSZGET 	_IO(0x12,104)
#define BLKIOMIN	_IO(0x12,120)
#define BLKIOOPT	_IO(0x12,121)
#define BLKALIGNOFF	_IO(0x12,122)
#define BLKPBSZGET	_IO(0x12,123)

static int die(const char *fmt, ...)
{
        int err = errno;
        va_list val;

        va_start(val, fmt);
        vfprintf(stderr, fmt, val);
        if (err != 0)
                fprintf(stderr, ": %s", strerror(err));
        fprintf(stderr, "\n");
        va_end(val);

        exit(EXIT_FAILURE);
}

int main(int argc, char *argv[])
{
	int fd, lbs, pbs, min, opt, align;

	if (argc != 2)
		die("Usage: %s <dev>", argv[0]);

	fd = open(argv[1], O_RDONLY);
	if (fd < 0)
		die("Can't open %s", argv[1]);

	if (ioctl(fd, BLKSSZGET, &lbs) < 0)
		die("Can't get logical block size");

	if (ioctl(fd, BLKPBSZGET, &pbs) < 0)
		die("Can't get physical block size");

	if (ioctl(fd, BLKIOMIN, &min) < 0)
		die("Can't get preferred random I/O size");

	if (ioctl(fd, BLKIOOPT, &opt) < 0)
		die("Can't get preferred sustained I/O size");

	if (ioctl(fd, BLKALIGNOFF, &align) < 0)
		die("Can't get alignment offset");

	printf("%s:\n", argv[1]);
	printf("\tlogical block size: %u\n", lbs);
	printf("\tphysical block size: %u\n", pbs);
	printf("\trandom I/O size: %u\n", min);
	printf("\tsustained I/O size: %u\n", opt);

	if (align == -1)
		printf("\talignment offset: inconsistent\n");
	else
		printf("\talignment offset: %u\n", align);

	exit(EXIT_SUCCESS);
}


Patch:

block: Topology ioctls

Not all users of the topology information want to use libblkid.  Provide
the topology information through bdev ioctls.

Also clarify sector size comments for existing BLK ioctls.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

---

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34..bcc8bec 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -734,6 +734,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	switch (cmd) {
 	case HDIO_GETGEO:
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+	case BLKPBSZGET:
+		return compat_put_int(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return compat_put_int(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return compat_put_int(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return compat_put_int(arg, bdev_alignment_offset(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b58..fea6f2c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -263,10 +263,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
+	case BLKSSZGET: /* get block device logical block size */
 		return put_int(arg, bdev_logical_block_size(bdev));
+	case BLKPBSZGET: /* get block device physical block size */
+		return put_int(arg, bdev_physical_block_size(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
@@ -309,6 +311,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_ulong(arg, size >> 9);
 	case BLKGETSIZE64:
 		return put_u64(arg, bdev->bd_inode->i_size);
+	case BLKIOMIN:
+		return put_int(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return put_int(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return put_int(arg, bdev_alignment_offset(bdev));
 	case BLKTRACESTART:
 	case BLKTRACESTOP:
 	case BLKTRACESETUP:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86c..935bcb0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1080,16 +1080,31 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
 	return q->limits.physical_block_size;
 }
 
+static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
+{
+	return queue_physical_block_size(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 
+static inline unsigned int bdev_io_min(struct block_device *bdev)
+{
+	return queue_io_min(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 
+static inline unsigned int bdev_io_opt(struct block_device *bdev)
+{
+	return queue_io_opt(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(struct request_queue *q)
 {
 	if (q && q->limits.misaligned)
@@ -1108,6 +1123,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q,
 		& (q->limits.io_min - 1);
 }
 
+static inline int bdev_alignment_offset(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (!q || q->limits.misaligned)
+		return -1;
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->alignment_offset;
+
+	return q->limits.alignment_offset;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90162fb..3f401fc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -300,6 +300,10 @@ struct inodes_stat_t {
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */

             reply	other threads:[~2009-09-23 14:26 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-23 14:26 Martin K. Petersen [this message]
2009-09-23 18:28 ` Topology ioctls Jamie Lokier
2009-09-24  4:06   ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=yq1pr9hu48c.fsf@sermon.lab.mkp.net \
    --to=martin.petersen@oracle.com \
    --cc=adilger@sun.com \
    --cc=jens.axboe@oracle.com \
    --cc=jim@meyering.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=sandeen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).