From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records
Date: Sun, 4 Apr 2021 20:50:51 -0400 [thread overview]
Message-ID: <1617583870-32029-23-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>
From: Alex Zhuravlev <bzzz@whamcloud.com>
if llog's header or record is found corrupted, then
ignore the remaining records and try with the next one.
WC-bug-id: https://jira.whamcloud.com/browse/LU-14098
Lustre-commit: 910eb97c1b43a44 ("LU-14098 obdclass: try to skip corrupted llog records")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/40754
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
fs/lustre/obdclass/llog.c | 76 ++++++++++++++++++++++++++++++--------
fs/lustre/obdclass/llog_cat.c | 14 +++----
fs/lustre/obdclass/llog_internal.h | 5 +++
3 files changed, 72 insertions(+), 23 deletions(-)
diff --git a/fs/lustre/obdclass/llog.c b/fs/lustre/obdclass/llog.c
index e172ebc..7668d51 100644
--- a/fs/lustre/obdclass/llog.c
+++ b/fs/lustre/obdclass/llog.c
@@ -184,7 +184,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
(llh->llh_flags & LLOG_F_IS_CAT &&
flags & LLOG_F_IS_PLAIN))) {
CERROR("%s: llog type is %s but initializing %s\n",
- handle->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(handle),
llh->llh_flags & LLOG_F_IS_CAT ?
"catalog" : "plain",
flags & LLOG_F_IS_CAT ? "catalog" : "plain");
@@ -206,7 +206,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
if (unlikely(uuid &&
!obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
CERROR("%s: llog uuid mismatch: %s/%s\n",
- handle->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(handle),
(char *)uuid->uuid,
(char *)llh->llh_tgtuuid.uuid);
rc = -EEXIST;
@@ -220,8 +220,8 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
llh->llh_flags |= LLOG_F_IS_FIXSIZE;
} else if (!(flags & LLOG_F_IS_PLAIN)) {
CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
- handle->lgh_ctxt->loc_obd->obd_name,
- flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
+ loghandle2name(handle), flags, LLOG_F_IS_CAT,
+ LLOG_F_IS_PLAIN);
rc = -EINVAL;
}
llh->llh_flags |= fmt;
@@ -234,6 +234,29 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
}
EXPORT_SYMBOL(llog_init_handle);
+int llog_verify_record(const struct llog_handle *llh, struct llog_rec_hdr *rec)
+{
+ int chunk_size = llh->lgh_hdr->llh_hdr.lrh_len;
+
+ if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) {
+ CERROR("%s: record is too large: %d > %d\n",
+ loghandle2name(llh), rec->lrh_len, chunk_size);
+ return -EINVAL;
+ }
+ if (rec->lrh_index >= LLOG_HDR_BITMAP_SIZE(llh->lgh_hdr)) {
+ CERROR("%s: index is too high: %d\n",
+ loghandle2name(llh), rec->lrh_index);
+ return -EINVAL;
+ }
+ if ((rec->lrh_type & LLOG_OP_MASK) != LLOG_OP_MAGIC) {
+ CERROR("%s: magic %x is bad\n",
+ loghandle2name(llh), rec->lrh_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int llog_process_thread(void *arg)
{
struct llog_process_info *lpi = arg;
@@ -247,6 +270,7 @@ static int llog_process_thread(void *arg)
int saved_index = 0;
int last_called_index = 0;
bool repeated = false;
+ bool refresh_idx = false;
if (!llh)
return -EINVAL;
@@ -380,12 +404,21 @@ static int llog_process_thread(void *arg)
repeated = false;
- if (!rec->lrh_len || rec->lrh_len > chunk_size) {
- CWARN("invalid length %d in llog record for index %d/%d\n",
- rec->lrh_len,
- rec->lrh_index, index);
- rc = -EINVAL;
- goto out;
+ rc = llog_verify_record(loghandle, rec);
+ if (rc) {
+ CERROR("%s: invalid record in llog "DFID" record for index %d/%d: rc = %d\n",
+ loghandle2name(loghandle),
+ PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
+ rec->lrh_len, index, rc);
+ /*
+ * the block seem to be corrupted, let's try
+ * with the next one. reset rc to go to the
+ * next chunk.
+ */
+ refresh_idx = true;
+ index = 0;
+ rc = 0;
+ goto repeat;
}
if (rec->lrh_index < index) {
@@ -395,11 +428,22 @@ static int llog_process_thread(void *arg)
}
if (rec->lrh_index != index) {
- CERROR("%s: Invalid record: index %u but expected %u\n",
- loghandle->lgh_ctxt->loc_obd->obd_name,
- rec->lrh_index, index);
- rc = -ERANGE;
- goto out;
+ /*
+ * the last time we couldn't parse the block due
+ * to corruption, thus has no idea about the
+ * next index, take it from the block, once.
+ */
+ if (refresh_idx) {
+ refresh_idx = false;
+ index = rec->lrh_index;
+ } else {
+ CERROR("%s: "DFID" Invalid record: index %u but expected %u\n",
+ loghandle2name(loghandle),
+ PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
+ rec->lrh_index, index);
+ rc = -ERANGE;
+ goto out;
+ }
}
CDEBUG(D_OTHER,
@@ -501,7 +545,7 @@ int llog_process_or_fork(const struct lu_env *env,
if (IS_ERR(task)) {
rc = PTR_ERR(task);
CERROR("%s: cannot start thread: rc = %d\n",
- loghandle->lgh_ctxt->loc_obd->obd_name, rc);
+ loghandle2name(loghandle), rc);
goto out_lpi;
}
wait_for_completion(&lpi->lpi_completion);
diff --git a/fs/lustre/obdclass/llog_cat.c b/fs/lustre/obdclass/llog_cat.c
index 9298808..b67e7a2b 100644
--- a/fs/lustre/obdclass/llog_cat.c
+++ b/fs/lustre/obdclass/llog_cat.c
@@ -80,7 +80,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) {
if (cgl->lgl_ogen != logid->lgl_ogen) {
CWARN("%s: log " DFID " generation %x != %x\n",
- loghandle->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(loghandle),
PFID(&logid->lgl_oi.oi_fid),
cgl->lgl_ogen, logid->lgl_ogen);
continue;
@@ -88,7 +88,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
*res = llog_handle_get(loghandle);
if (!*res) {
CERROR("%s: log "DFID" refcount is zero!\n",
- loghandle->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(loghandle),
PFID(&logid->lgl_oi.oi_fid));
continue;
}
@@ -103,8 +103,8 @@ static int llog_cat_id2handle(const struct lu_env *env,
LLOG_OPEN_EXISTS);
if (rc < 0) {
CERROR("%s: error opening log id " DFID ":%x: rc = %d\n",
- cathandle->lgh_ctxt->loc_obd->obd_name,
- PFID(&logid->lgl_oi.oi_fid), logid->lgl_ogen, rc);
+ loghandle2name(cathandle), PFID(&logid->lgl_oi.oi_fid),
+ logid->lgl_ogen, rc);
return rc;
}
@@ -155,7 +155,7 @@ static int llog_cat_process_common(const struct lu_env *env,
if (rec->lrh_type != le32_to_cpu(LLOG_LOGID_MAGIC)) {
rc = -EINVAL;
CWARN("%s: invalid record in catalog " DFID ":%x: rc = %d\n",
- cat_llh->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(cat_llh),
PFID(&cat_llh->lgh_id.lgl_oi.oi_fid),
cat_llh->lgh_id.lgl_ogen, rc);
@@ -170,7 +170,7 @@ static int llog_cat_process_common(const struct lu_env *env,
rc = llog_cat_id2handle(env, cat_llh, llhp, &lir->lid_id);
if (rc) {
CWARN("%s: can't find llog handle " DFID ":%x: rc = %d\n",
- cat_llh->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(cat_llh),
PFID(&lir->lid_id.lgl_oi.oi_fid),
lir->lid_id.lgl_ogen, rc);
@@ -235,7 +235,7 @@ static int llog_cat_process_or_fork(const struct lu_env *env,
struct llog_process_cat_data cd;
CWARN("%s: catlog " DFID " crosses index zero\n",
- cat_llh->lgh_ctxt->loc_obd->obd_name,
+ loghandle2name(cat_llh),
PFID(&cat_llh->lgh_id.lgl_oi.oi_fid));
/*startcat = 0 is default value for general processing */
if ((startcat != LLOG_CAT_FIRST &&
diff --git a/fs/lustre/obdclass/llog_internal.h b/fs/lustre/obdclass/llog_internal.h
index c34adfe..41ac4f0 100644
--- a/fs/lustre/obdclass/llog_internal.h
+++ b/fs/lustre/obdclass/llog_internal.h
@@ -74,4 +74,9 @@ static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec)
{
return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len);
}
+
+static inline char *loghandle2name(const struct llog_handle *lgh)
+{
+ return lgh->lgh_ctxt->loc_obd->obd_name;
+}
#endif
--
1.8.3.1
_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org
next prev parent reply other threads:[~2021-04-05 0:52 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-04-05 0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 11/41] lnet: select best peer and local net James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05 0:50 ` James Simmons [this message]
2021-04-05 0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05 0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05 0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1617583870-32029-23-git-send-email-jsimmons@infradead.org \
--to=jsimmons@infradead.org \
--cc=adilger@whamcloud.com \
--cc=green@whamcloud.com \
--cc=lustre-devel@lists.lustre.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).