From: Peng Tao <bergwolf@gmail.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel@vger.kernel.org,
Thomas Leibovici <thomas.leibovici@cea.fr>,
Peng Tao <tao.peng@emc.com>,
Andreas Dilger <andreas.dilger@intel.com>
Subject: [PATCH 34/48] staging/lustre/lmv: support DNE with HSM.
Date: Tue, 23 Jul 2013 00:06:55 +0800 [thread overview]
Message-ID: <1374509230-3324-35-git-send-email-bergwolf@gmail.com> (raw)
In-Reply-To: <1374509230-3324-1-git-send-email-bergwolf@gmail.com>
From: Thomas Leibovici <thomas.leibovici@cea.fr>
Send HSM requests to the appropriate MDT. Split lists of fids of HSM
actions into one list per MDT.
Move kuc registration/unregistration from MDC to LMV as this is not
MDT related.
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3365
Lustre-change: http://review.whamcloud.com/6714
Signed-off-by: Thomas Leibovici <thomas.leibovici@cea.fr>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
---
drivers/staging/lustre/lustre/lmv/lmv_obd.c | 200 ++++++++++++++++++++++-
drivers/staging/lustre/lustre/mdc/mdc_request.c | 18 +-
2 files changed, 205 insertions(+), 13 deletions(-)
diff --git a/drivers/staging/lustre/lustre/lmv/lmv_obd.c b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
index d10f7fc..9e68eea 100644
--- a/drivers/staging/lustre/lustre/lmv/lmv_obd.c
+++ b/drivers/staging/lustre/lustre/lmv/lmv_obd.c
@@ -44,6 +44,7 @@
#include <asm/div64.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
+#include <asm/uaccess.h>
#include <lustre/lustre_idl.h>
#include <obd_support.h>
@@ -783,6 +784,125 @@ out_fid2path:
RETURN(rc);
}
+static int lmv_hsm_req_count(struct lmv_obd *lmv,
+ const struct hsm_user_request *hur,
+ const struct lmv_tgt_desc *tgt_mds)
+{
+ int i, nr = 0;
+ struct lmv_tgt_desc *curr_tgt;
+
+ /* count how many requests must be sent to the given target */
+ for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
+ curr_tgt = lmv_find_target(lmv, &hur->hur_user_item[i].hui_fid);
+ if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid))
+ nr++;
+ }
+ return nr;
+}
+
+static void lmv_hsm_req_build(struct lmv_obd *lmv,
+ struct hsm_user_request *hur_in,
+ const struct lmv_tgt_desc *tgt_mds,
+ struct hsm_user_request *hur_out)
+{
+ int i, nr_out;
+ struct lmv_tgt_desc *curr_tgt;
+
+ /* build the hsm_user_request for the given target */
+ hur_out->hur_request = hur_in->hur_request;
+ nr_out = 0;
+ for (i = 0; i < hur_in->hur_request.hr_itemcount; i++) {
+ curr_tgt = lmv_find_target(lmv,
+ &hur_in->hur_user_item[i].hui_fid);
+ if (obd_uuid_equals(&curr_tgt->ltd_uuid, &tgt_mds->ltd_uuid)) {
+ hur_out->hur_user_item[nr_out] =
+ hur_in->hur_user_item[i];
+ nr_out++;
+ }
+ }
+ hur_out->hur_request.hr_itemcount = nr_out;
+ memcpy(hur_data(hur_out), hur_data(hur_in),
+ hur_in->hur_request.hr_data_len);
+}
+
+static int lmv_hsm_ct_unregister(struct lmv_obd *lmv, unsigned int cmd, int len,
+ struct lustre_kernelcomm *lk, void *uarg)
+{
+ int i, rc = 0;
+ ENTRY;
+
+ /* unregister request (call from llapi_hsm_copytool_fini) */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ /* best effort: try to clean as much as possible
+ * (continue on error) */
+ obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp, len, lk, uarg);
+ }
+
+ /* Whatever the result, remove copytool from kuc groups.
+ * Unreached coordinators will get EPIPE on next requests
+ * and will unregister automatically.
+ */
+ rc = libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group);
+ RETURN(rc);
+}
+
+static int lmv_hsm_ct_register(struct lmv_obd *lmv, unsigned int cmd, int len,
+ struct lustre_kernelcomm *lk, void *uarg)
+{
+ struct file *filp;
+ int i, j, err;
+ int rc = 0;
+ bool any_set = false;
+ ENTRY;
+
+ /* All or nothing: try to register to all MDS.
+ * In case of failure, unregister from previous MDS,
+ * except if it because of inactive target. */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ err = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp,
+ len, lk, uarg);
+ if (err) {
+ if (lmv->tgts[i]->ltd_active) {
+ /* permanent error */
+ CERROR("error: iocontrol MDC %s on MDT"
+ "idx %d cmd %x: err = %d\n",
+ lmv->tgts[i]->ltd_uuid.uuid,
+ i, cmd, err);
+ rc = err;
+ lk->lk_flags |= LK_FLG_STOP;
+ /* unregister from previous MDS */
+ for (j = 0; j < i; j++)
+ obd_iocontrol(cmd,
+ lmv->tgts[j]->ltd_exp,
+ len, lk, uarg);
+ RETURN(rc);
+ }
+ /* else: transient error.
+ * kuc will register to the missing MDT
+ * when it is back */
+ } else {
+ any_set = true;
+ }
+ }
+
+ if (!any_set)
+ /* no registration done: return error */
+ RETURN(-ENOTCONN);
+
+ /* at least one registration done, with no failure */
+ filp = fget(lk->lk_wfd);
+ if (filp == NULL) {
+ RETURN(-EBADF);
+ }
+ rc = libcfs_kkuc_group_add(filp, lk->lk_uid, lk->lk_group, lk->lk_data);
+ if (rc != 0 && filp != NULL)
+ fput(filp);
+ RETURN(rc);
+}
+
+
+
+
static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
int len, void *karg, void *uarg)
{
@@ -908,7 +1028,77 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
}
case LL_IOC_HSM_STATE_GET:
case LL_IOC_HSM_STATE_SET:
- case LL_IOC_HSM_ACTION:
+ case LL_IOC_HSM_ACTION: {
+ struct md_op_data *op_data = karg;
+ struct lmv_tgt_desc *tgt;
+
+ tgt = lmv_find_target(lmv, &op_data->op_fid1);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+
+ if (tgt->ltd_exp == NULL)
+ RETURN(-EINVAL);
+
+ rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
+ break;
+ }
+ case LL_IOC_HSM_PROGRESS: {
+ const struct hsm_progress_kernel *hpk = karg;
+ struct lmv_tgt_desc *tgt;
+
+ tgt = lmv_find_target(lmv, &hpk->hpk_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+ rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
+ break;
+ }
+ case LL_IOC_HSM_REQUEST: {
+ struct hsm_user_request *hur = karg;
+ struct lmv_tgt_desc *tgt;
+ unsigned int reqcount = hur->hur_request.hr_itemcount;
+
+ if (reqcount == 0)
+ RETURN(0);
+
+ /* if the request is about a single fid
+ * or if there is a single MDS, no need to split
+ * the request. */
+ if (reqcount == 1 || count == 1) {
+ tgt = lmv_find_target(lmv,
+ &hur->hur_user_item[0].hui_fid);
+ if (IS_ERR(tgt))
+ RETURN(PTR_ERR(tgt));
+ rc = obd_iocontrol(cmd, tgt->ltd_exp, len, karg, uarg);
+ } else {
+ /* split fid list to their respective MDS */
+ for (i = 0; i < count; i++) {
+ unsigned int nr, reqlen;
+ int rc1;
+ struct hsm_user_request *req;
+
+ nr = lmv_hsm_req_count(lmv, hur, lmv->tgts[i]);
+ if (nr == 0) /* nothing for this MDS */
+ continue;
+
+ /* build a request with fids for this MDS */
+ reqlen = offsetof(typeof(*hur),
+ hur_user_item[nr])
+ + hur->hur_request.hr_data_len;
+ OBD_ALLOC_LARGE(req, reqlen);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ lmv_hsm_req_build(lmv, hur, lmv->tgts[i], req);
+
+ rc1 = obd_iocontrol(cmd, lmv->tgts[i]->ltd_exp,
+ reqlen, req, uarg);
+ if (rc1 != 0 && rc == 0)
+ rc = rc1;
+ OBD_FREE_LARGE(req, reqlen);
+ }
+ }
+ break;
+ }
case LL_IOC_LOV_SWAP_LAYOUTS: {
struct md_op_data *op_data = karg;
struct lmv_tgt_desc *tgt1, *tgt2;
@@ -931,6 +1121,14 @@ static int lmv_iocontrol(unsigned int cmd, struct obd_export *exp,
rc = obd_iocontrol(cmd, tgt1->ltd_exp, len, karg, uarg);
break;
}
+ case LL_IOC_HSM_CT_START: {
+ struct lustre_kernelcomm *lk = karg;
+ if (lk->lk_flags & LK_FLG_STOP)
+ rc = lmv_hsm_ct_unregister(lmv, cmd, len, lk, uarg);
+ else
+ rc = lmv_hsm_ct_register(lmv, cmd, len, lk, uarg);
+ break;
+ }
default:
for (i = 0; i < count; i++) {
struct obd_device *mdc_obd;
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index b8bf2dc..191edc6 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -1771,6 +1771,9 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
GOTO(out, rc);
case LL_IOC_HSM_CT_START:
rc = mdc_ioc_hsm_ct_start(exp, karg);
+ /* ignore if it was already registered on this MDS. */
+ if (rc == -EEXIST)
+ rc = 0;
GOTO(out, rc);
case LL_IOC_HSM_PROGRESS:
rc = mdc_ioc_hsm_progress(exp, karg);
@@ -1988,19 +1991,10 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
lk->lk_uid, lk->lk_group, lk->lk_flags);
if (lk->lk_flags & LK_FLG_STOP) {
- rc = libcfs_kkuc_group_rem(lk->lk_uid, lk->lk_group);
/* Unregister with the coordinator */
- if (rc == 0)
- rc = mdc_ioc_hsm_ct_unregister(imp);
+ rc = mdc_ioc_hsm_ct_unregister(imp);
} else {
- struct file *fp = fget(lk->lk_wfd);
-
- rc = libcfs_kkuc_group_add(fp, lk->lk_uid, lk->lk_group,
- lk->lk_data);
- if (rc && fp)
- fput(fp);
- if (rc == 0)
- rc = mdc_ioc_hsm_ct_register(imp, archive);
+ rc = mdc_ioc_hsm_ct_register(imp, archive);
}
return rc;
@@ -2325,7 +2319,7 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
}
case IMP_EVENT_ACTIVE:
rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
- /* restore re-establish kuc registration after reconnecting */
+ /* redo the kuc registration after reconnecting */
if (rc == 0)
rc = mdc_kuc_reregister(imp);
break;
--
1.7.9.5
next prev parent reply other threads:[~2013-07-22 16:12 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-22 16:06 [PATCH 00/48] staging/lustre: minor cleanup and Intel Lustre sync Peng Tao
2013-07-22 16:06 ` [PATCH 01/48] staging/lustre: remove bogus ifndef EXPORT_SYMBOL Peng Tao
2013-07-22 16:06 ` [PATCH 02/48] staging/lustre/obdclass: restore lu_ref Peng Tao
2013-07-23 19:37 ` Greg Kroah-Hartman
2013-07-23 20:31 ` Dilger, Andreas
2013-07-23 20:52 ` Greg Kroah-Hartman
2013-07-24 6:06 ` Peng Tao
2013-07-22 16:06 ` [PATCH 03/48] staging/lustre/llite: use READ, WRITE around ll_rw_stats_tally() Peng Tao
2013-07-22 16:06 ` [PATCH 04/48] staging/lustre/llite: missing last bit in ll_have_md_lock Peng Tao
2013-07-22 16:06 ` [PATCH 05/48] staging/lustre: fix 'program hangs' errors Peng Tao
2013-07-22 16:06 ` [PATCH 06/48] staging/lustre/llite: check alloc in ll_file_data_get, ll_dir_ioctl Peng Tao
2013-07-22 16:06 ` [PATCH 07/48] staging/lustre/llite: A not locked mutex can be unlocked Peng Tao
2013-07-22 16:06 ` [PATCH 08/48] staging/lustre/llite: check ll_prep_md_op_data() using IS_ERR() Peng Tao
2013-07-22 16:06 ` [PATCH 09/48] staging/lustre/ldlm: print FID in lvbo_init(), lvbo_update Peng Tao
2013-07-22 16:06 ` [PATCH 10/48] staging/lustre/ptlrpc: race in pinger (use-after-free situation) Peng Tao
2013-07-22 16:06 ` [PATCH 11/48] staging/lustre/ptlrpc: Translate between host and network errnos Peng Tao
2013-07-22 16:29 ` Paul Bolle
2013-07-22 16:36 ` Peng Tao
2013-07-22 16:06 ` [PATCH 12/48] staging/lustre/mdc: layout lock rpc must not take rpc_lock Peng Tao
2013-07-22 16:06 ` [PATCH 13/48] staging/lustre/ldlm: split client namespaces into active and inactive Peng Tao
2013-07-22 16:06 ` [PATCH 14/48] staging/lustre: Only wake up ldlm_poold as frequently as the check interval Peng Tao
2013-07-22 16:06 ` [PATCH 15/48] staging/lustre: Make quota namespace refcounting consistent Peng Tao
2013-07-22 16:06 ` [PATCH 16/48] staging/lustre/dlmlock: compress out unused space Peng Tao
2013-07-22 16:06 ` [PATCH 17/48] staging/lustre/md: fix lu_ucred.c boilerplate Peng Tao
2013-07-22 16:06 ` [PATCH 18/48] staging/lustre/layout: introduce new layout for released files Peng Tao
2013-07-22 16:06 ` [PATCH 19/48] staging/lustre/mdt: add macros for fid string len Peng Tao
2013-07-22 16:06 ` [PATCH 20/48] staging/lustre/llapi: add user space method for lov_user_md Peng Tao
2013-07-22 16:06 ` [PATCH 21/48] staging/lustre/obdclass: use a dummy structure for lu_ref_link Peng Tao
2013-07-23 20:35 ` Greg Kroah-Hartman
2013-07-24 6:10 ` Peng Tao
2013-07-22 16:06 ` [PATCH 22/48] staging/lustre: fix 'code maintainability' errors Peng Tao
2013-07-22 16:06 ` [PATCH 23/48] staging/lustre/fid: prepare FID module for client server split Peng Tao
2013-07-22 16:06 ` [PATCH 24/48] staging/lustre/llite: force lvb_data update after layout change Peng Tao
2013-07-22 16:06 ` [PATCH 25/48] staging/lustre/lfsck: LFSCK 1.5 technical debts (3) Peng Tao
2013-07-22 16:06 ` [PATCH 26/48] staging/lustre/osc: Check return code for lu_kmem_init Peng Tao
2013-07-22 16:06 ` [PATCH 27/48] staging/lustre/ptlrpc: Race between start and stop service threads Peng Tao
2013-07-22 16:06 ` [PATCH 28/48] staging/lustre/crypto: add crc32c module loading to libcfs Peng Tao
2013-07-22 16:06 ` [PATCH 29/48] staging/lustre/mdt: duplicate link names in directory Peng Tao
2013-07-22 16:06 ` [PATCH 30/48] staging/lustre/llite: call simple_setattr() from ll_md_setattr() Peng Tao
2013-07-22 16:06 ` [PATCH 31/48] staging/lustre/ldlm: Fix flock deadlock detection race Peng Tao
2013-07-22 16:06 ` [PATCH 32/48] staging/lustre/lnet: remove empty file lnet/lnet/api-errno.c Peng Tao
2013-07-22 16:06 ` [PATCH 33/48] staging/lustre/fld: prepare FLD module for client server split Peng Tao
2013-07-22 16:06 ` Peng Tao [this message]
2013-07-22 16:06 ` [PATCH 35/48] staging/lustre/lmv: support DNE with HSM Peng Tao
2013-07-23 20:39 ` Greg Kroah-Hartman
2013-07-22 16:06 ` [PATCH 36/48] staging/lustre/obdclass: add obd_target.h Peng Tao
2013-07-23 20:41 ` Greg Kroah-Hartman
2013-07-22 16:06 ` [PATCH 37/48] staging/lustre/procfs: return -ENOMEM from lprocfs_register() Peng Tao
2013-07-22 16:06 ` [PATCH 38/48] staging/lustre/lmv: fix duplicate directory entries Peng Tao
2013-07-22 16:07 ` [PATCH 39/48] staging/lustre/obdclass: be more careful processing server name Peng Tao
2013-07-22 16:07 ` [PATCH 40/48] staging/lustre/llite: return valid fsid for statfs Peng Tao
2013-07-22 16:07 ` [PATCH 41/48] staging/lustre/llite: error of listxattr when buffer is small Peng Tao
2013-07-22 16:07 ` [PATCH 42/48] staging/lustre/llite: Anonymous dentry incorrectly identified as root Peng Tao
2013-07-22 16:07 ` [PATCH 43/48] staging/lustre/build: fix 'data race condition' issues Peng Tao
2013-07-22 16:07 ` [PATCH 44/48] " Peng Tao
2013-07-23 20:44 ` Greg Kroah-Hartman
2013-07-22 16:07 ` [PATCH 45/48] staging/lustre/style: removes obsolete EXPORT_SYMTAB macros v2 Peng Tao
2013-07-22 16:07 ` [PATCH 46/48] staging/lustre/mdt: HSM on disk actions record Peng Tao
2013-07-22 16:07 ` [PATCH 47/48] staging/lustre/scrub: purge inconsistenct objects after OI scrub Peng Tao
2013-07-22 16:07 ` [PATCH 48/48] staging/lustre/mdc: Keep resend FLocks Peng Tao
2013-07-23 20:51 ` [PATCH 00/48] staging/lustre: minor cleanup and Intel Lustre sync Greg Kroah-Hartman
2013-07-24 6:05 ` Peng Tao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1374509230-3324-35-git-send-email-bergwolf@gmail.com \
--to=bergwolf@gmail.com \
--cc=andreas.dilger@intel.com \
--cc=gregkh@linuxfoundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tao.peng@emc.com \
--cc=thomas.leibovici@cea.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox