From: Sage Weil <sage@newdream.net>
To: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Sage Weil <sage@newdream.net>
Subject: [PATCH 24/26] ceph: ioctls
Date: Tue, 2 Mar 2010 16:46:55 -0800 [thread overview]
Message-ID: <1267577217-31923-25-git-send-email-sage@newdream.net> (raw)
In-Reply-To: <1267577217-31923-1-git-send-email-sage@newdream.net>
A few Ceph ioctls for getting and setting file layout (striping)
parameters, and learning the identity and network address of the OSD a
given region of a file is stored on.
Signed-off-by: Sage Weil <sage@newdream.net>
---
Documentation/ioctl/ioctl-number.txt | 1 +
fs/ceph/ioctl.c | 160 ++++++++++++++++++++++++++++++++++
fs/ceph/ioctl.h | 40 +++++++++
3 files changed, 201 insertions(+), 0 deletions(-)
create mode 100644 fs/ceph/ioctl.c
create mode 100644 fs/ceph/ioctl.h
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 35cf64d..e07e5b5 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -292,6 +292,7 @@ Code Seq#(hex) Include File Comments
0x92 00-0F drivers/usb/mon/mon_bin.c
0x93 60-7F linux/auto_fs.h
0x94 all fs/btrfs/ioctl.h
+0x97 00-7F fs/ceph/ioctl.h Ceph file system
0x99 00-0F 537-Addinboard driver
<mailto:buk@buks.ipn.de>
0xA0 all linux/sdp/sdp.h Industrial Device Project
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
new file mode 100644
index 0000000..8a5bcae
--- /dev/null
+++ b/fs/ceph/ioctl.c
@@ -0,0 +1,160 @@
+#include <linux/in.h>
+
+#include "ioctl.h"
+#include "super.h"
+#include "ceph_debug.h"
+
+
+/*
+ * ioctls
+ */
+
+/*
+ * get and set the file layout
+ */
+static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
+{
+ struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+ struct ceph_ioctl_layout l;
+ int err;
+
+ err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+ if (!err) {
+ l.stripe_unit = ceph_file_layout_su(ci->i_layout);
+ l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+ l.object_size = ceph_file_layout_object_size(ci->i_layout);
+ l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
+ l.preferred_osd =
+ (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+ if (copy_to_user(arg, &l, sizeof(l)))
+ return -EFAULT;
+ }
+
+ return err;
+}
+
+static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
+ struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc;
+ struct ceph_mds_request *req;
+ struct ceph_ioctl_layout l;
+ int err, i;
+
+ /* copy and validate */
+ if (copy_from_user(&l, arg, sizeof(l)))
+ return -EFAULT;
+
+ if ((l.object_size & ~PAGE_MASK) ||
+ (l.stripe_unit & ~PAGE_MASK) ||
+ !l.stripe_unit ||
+ (l.object_size &&
+ (unsigned)l.object_size % (unsigned)l.stripe_unit))
+ return -EINVAL;
+
+ /* make sure it's a valid data pool */
+ if (l.data_pool > 0) {
+ mutex_lock(&mdsc->mutex);
+ err = -EINVAL;
+ for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++)
+ if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) {
+ err = 0;
+ break;
+ }
+ mutex_unlock(&mdsc->mutex);
+ if (err)
+ return err;
+ }
+
+ req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETLAYOUT,
+ USE_AUTH_MDS);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+ req->r_inode = igrab(inode);
+ req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
+
+ req->r_args.setlayout.layout.fl_stripe_unit =
+ cpu_to_le32(l.stripe_unit);
+ req->r_args.setlayout.layout.fl_stripe_count =
+ cpu_to_le32(l.stripe_count);
+ req->r_args.setlayout.layout.fl_object_size =
+ cpu_to_le32(l.object_size);
+ req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
+ req->r_args.setlayout.layout.fl_pg_preferred =
+ cpu_to_le32(l.preferred_osd);
+
+ err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ ceph_mdsc_put_request(req);
+ return err;
+}
+
+/*
+ * Return object name, size/offset information, and location (OSD
+ * number, network address) for a given file offset.
+ */
+static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
+{
+ struct ceph_ioctl_dataloc dl;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_osd_client *osdc = &ceph_client(inode->i_sb)->osdc;
+ u64 len = 1, olen;
+ u64 tmp;
+ struct ceph_object_layout ol;
+ struct ceph_pg pgid;
+
+ /* copy and validate */
+ if (copy_from_user(&dl, arg, sizeof(dl)))
+ return -EFAULT;
+
+ down_read(&osdc->map_sem);
+ ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+ &dl.object_no, &dl.object_offset, &olen);
+ dl.file_offset -= dl.object_offset;
+ dl.object_size = ceph_file_layout_object_size(ci->i_layout);
+ dl.block_size = ceph_file_layout_su(ci->i_layout);
+
+ /* block_offset = object_offset % block_size */
+ tmp = dl.object_offset;
+ dl.block_offset = do_div(tmp, dl.block_size);
+
+ snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
+ ceph_ino(inode), dl.object_no);
+ ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
+ osdc->osdmap);
+
+ pgid = ol.ol_pgid;
+ dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
+ if (dl.osd >= 0) {
+ struct ceph_entity_addr *a =
+ ceph_osd_addr(osdc->osdmap, dl.osd);
+ if (a)
+ memcpy(&dl.osd_addr, &a->in_addr, sizeof(dl.osd_addr));
+ } else {
+ memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
+ }
+ up_read(&osdc->map_sem);
+
+ /* send result back to user */
+ if (copy_to_user(arg, &dl, sizeof(dl)))
+ return -EFAULT;
+
+ return 0;
+}
+
+long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+ switch (cmd) {
+ case CEPH_IOC_GET_LAYOUT:
+ return ceph_ioctl_get_layout(file, (void __user *)arg);
+
+ case CEPH_IOC_SET_LAYOUT:
+ return ceph_ioctl_set_layout(file, (void __user *)arg);
+
+ case CEPH_IOC_GET_DATALOC:
+ return ceph_ioctl_get_dataloc(file, (void __user *)arg);
+ }
+ return -ENOTTY;
+}
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
new file mode 100644
index 0000000..25e4f1a
--- /dev/null
+++ b/fs/ceph/ioctl.h
@@ -0,0 +1,40 @@
+#ifndef FS_CEPH_IOCTL_H
+#define FS_CEPH_IOCTL_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define CEPH_IOCTL_MAGIC 0x97
+
+/* just use u64 to align sanely on all archs */
+struct ceph_ioctl_layout {
+ __u64 stripe_unit, stripe_count, object_size;
+ __u64 data_pool;
+ __s64 preferred_osd;
+};
+
+#define CEPH_IOC_GET_LAYOUT _IOR(CEPH_IOCTL_MAGIC, 1, \
+ struct ceph_ioctl_layout)
+#define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \
+ struct ceph_ioctl_layout)
+
+/*
+ * Extract identity, address of the OSD and object storing a given
+ * file offset.
+ */
+struct ceph_ioctl_dataloc {
+ __u64 file_offset; /* in+out: file offset */
+ __u64 object_offset; /* out: offset in object */
+ __u64 object_no; /* out: object # */
+ __u64 object_size; /* out: object size */
+ char object_name[64]; /* out: object name */
+ __u64 block_offset; /* out: offset in block */
+ __u64 block_size; /* out: block length */
+ __s64 osd; /* out: osd # */
+ struct sockaddr_storage osd_addr; /* out: osd address */
+};
+
+#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \
+ struct ceph_ioctl_dataloc)
+
+#endif
--
1.7.0
next prev parent reply other threads:[~2010-03-03 0:46 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-03 0:46 [PATCH 00/26] ceph distributed file system client Sage Weil
2010-03-03 0:46 ` [PATCH 01/26] ceph: documentation Sage Weil
2010-03-03 0:46 ` [PATCH 02/26] ceph: on-wire types Sage Weil
2010-03-03 0:46 ` [PATCH 03/26] ceph: client types Sage Weil
2010-03-03 0:46 ` [PATCH 04/26] ceph: hash function Sage Weil
2010-03-03 0:46 ` [PATCH 05/26] ceph: ref counted buffer Sage Weil
2010-03-03 0:46 ` [PATCH 06/26] ceph: dynamic pagelist buffer Sage Weil
2010-03-03 0:46 ` [PATCH 07/26] ceph: super.c Sage Weil
2010-03-03 0:46 ` [PATCH 08/26] ceph: inode operations Sage Weil
2010-03-03 0:46 ` [PATCH 09/26] ceph: directory operations Sage Weil
2010-03-03 0:46 ` [PATCH 10/26] ceph: file operations Sage Weil
2010-03-03 0:46 ` [PATCH 11/26] ceph: address space operations Sage Weil
2010-03-03 0:46 ` [PATCH 12/26] ceph: MDS client Sage Weil
2010-03-03 0:46 ` [PATCH 13/26] ceph: OSD client Sage Weil
2010-03-03 0:46 ` [PATCH 14/26] ceph: CRUSH mapping algorithm Sage Weil
2010-03-03 0:46 ` [PATCH 15/26] ceph: monitor client Sage Weil
2010-03-03 0:46 ` [PATCH 16/26] ceph: authentication interface Sage Weil
2010-03-03 0:46 ` [PATCH 17/26] ceph: trivial 'auth_none' authentication scheme Sage Weil
2010-03-03 0:46 ` [PATCH 18/26] ceph: 'auth_x' " Sage Weil
2010-03-03 0:46 ` [PATCH 19/26] ceph: capability management Sage Weil
2010-03-03 0:46 ` [PATCH 20/26] ceph: snapshot management Sage Weil
2010-03-03 0:46 ` [PATCH 21/26] ceph: messenger library Sage Weil
2010-03-03 0:46 ` [PATCH 22/26] ceph: message pools Sage Weil
2010-03-03 0:46 ` [PATCH 23/26] ceph: nfs re-export support Sage Weil
2010-03-03 7:48 ` Christoph Hellwig
2010-03-03 21:37 ` Sage Weil
2010-03-04 11:50 ` Miklos Szeredi
2010-03-03 0:46 ` Sage Weil [this message]
2010-03-03 0:46 ` [PATCH 25/26] ceph: debugfs Sage Weil
2010-03-03 0:46 ` [PATCH 26/26] ceph: Kconfig, Makefile, MAINTAINERS entry Sage Weil
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1267577217-31923-25-git-send-email-sage@newdream.net \
--to=sage@newdream.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).