From mboxrd@z Thu Jan 1 00:00:00 1970 From: Sunil Mushran Date: Tue, 10 May 2011 10:17:31 -0700 Subject: [Ocfs2-devel] [PATCH 3/3] ocfs2/dlm: Do not migrate resource to a node that is leaving the domain In-Reply-To: <20110510061933.GA7881@laptop.jp.oracle.com> References: <1303859005-29529-1-git-send-email-sunil.mushran@oracle.com> <1303859005-29529-3-git-send-email-sunil.mushran@oracle.com> <20110510061933.GA7881@laptop.jp.oracle.com> Message-ID: <4DC9732B.7000702@oracle.com> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: ocfs2-devel@oss.oracle.com Yes, this series addresses that problem. On 05/09/2011 11:19 PM, Wengang Wang wrote: > Hi Sunil, > > Your this series of patches fix a problem in mmap(and could also other) > test in ocfs2-test. > The problem is that when all the three nodes do unmount at the same time, > two of them keeps migrating a lockres to each other. So the umount on these > two nodes hang there. I have idea why dlm_run_purge_list have no change to > purge it. Your patches fix it :) > > thanks, > wengang. > > On 11-04-26 16:03, Sunil Mushran wrote: >> During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that >> have no locks and references are freed. Ones that have locks and/or references >> are migrated to another node. >> >> The first task in migration is finding a target. Currently we scan the lock >> resource and find one node that either has a lock or a reference. This is not >> very efficient in a parallel umount case as we might end up migrating the >> lock resource to a node which itself may have to migrate it to a third node. >> >> The patch scans the dlm->exit_domain_map to ensure the target node is not >> leaving the domain. If no valid target node is found, o2dlm does not migrate >> the resource but instead waits for the unlock and deref messages that will >> allow it to free the resource. >> >> Signed-off-by: Sunil Mushran >> --- >> fs/ocfs2/dlm/dlmdomain.c | 10 ++- >> fs/ocfs2/dlm/dlmmaster.c | 139 ++++++++++++++++----------------------------- >> 2 files changed, 57 insertions(+), 92 deletions(-) >> >> diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c >> index 3aff23f..6ed6b95 100644 >> --- a/fs/ocfs2/dlm/dlmdomain.c >> +++ b/fs/ocfs2/dlm/dlmdomain.c >> @@ -451,14 +451,18 @@ redo_bucket: >> dropped = dlm_empty_lockres(dlm, res); >> >> spin_lock(&res->spinlock); >> - __dlm_lockres_calc_usage(dlm, res); >> - iter = res->hash_node.next; >> + if (dropped) >> + __dlm_lockres_calc_usage(dlm, res); >> + else >> + iter = res->hash_node.next; >> spin_unlock(&res->spinlock); >> >> dlm_lockres_put(res); >> >> - if (dropped) >> + if (dropped) { >> + cond_resched_lock(&dlm->spinlock); >> goto redo_bucket; >> + } >> } >> cond_resched_lock(&dlm->spinlock); >> num += n; >> diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c >> index 3e59ff9..4499d86 100644 >> --- a/fs/ocfs2/dlm/dlmmaster.c >> +++ b/fs/ocfs2/dlm/dlmmaster.c >> @@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, >> >> >> static int dlm_migrate_lockres(struct dlm_ctxt *dlm, >> - struct dlm_lock_resource *res, >> - u8 target) >> + struct dlm_lock_resource *res, u8 target) >> { >> struct dlm_master_list_entry *mle = NULL; >> struct dlm_master_list_entry *oldmle = NULL; >> @@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, >> if (!dlm_grab(dlm)) >> return -EINVAL; >> >> + BUG_ON(target == O2NM_MAX_NODES); >> + >> name = res->lockname.name; >> namelen = res->lockname.len; >> >> - mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); >> - >> - /* Ensure this lockres is a proper candidate for migration */ >> - spin_lock(&res->spinlock); >> - ret = dlm_is_lockres_migrateable(dlm, res); >> - spin_unlock(&res->spinlock); >> - >> - /* No work to do */ >> - if (!ret) >> - goto leave; >> - >> - /* >> - * preallocate up front >> - * if this fails, abort >> - */ >> + mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, >> + target); >> >> + /* preallocate up front. if this fails, abort */ >> ret = -ENOMEM; >> mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); >> if (!mres) { >> @@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, >> ret = 0; >> >> /* >> - * find a node to migrate the lockres to >> - */ >> - >> - spin_lock(&dlm->spinlock); >> - /* pick a new node */ >> - if (!test_bit(target, dlm->domain_map) || >> - target>= O2NM_MAX_NODES) { >> - target = dlm_pick_migration_target(dlm, res); >> - } >> - mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, >> - namelen, name, target); >> - >> - if (target>= O2NM_MAX_NODES || >> - !test_bit(target, dlm->domain_map)) { >> - /* target chosen is not alive */ >> - ret = -EINVAL; >> - } >> - >> - if (ret) { >> - spin_unlock(&dlm->spinlock); >> - goto fail; >> - } >> - >> - mlog(0, "continuing with target = %u\n", target); >> - >> - /* >> * clear any existing master requests and >> * add the migration mle to the list >> */ >> + spin_lock(&dlm->spinlock); >> spin_lock(&dlm->master_lock); >> ret = dlm_add_migration_mle(dlm, res, mle,&oldmle, name, >> namelen, target, dlm->node_num); >> @@ -2514,6 +2478,7 @@ fail: >> dlm_put_mle(mle); >> } else if (mle) { >> kmem_cache_free(dlm_mle_cache, mle); >> + mle = NULL; >> } >> goto leave; >> } >> @@ -2632,7 +2597,6 @@ leave: >> if (wake) >> wake_up(&res->wq); >> >> - /* TODO: cleanup */ >> if (mres) >> free_page((unsigned long)mres); >> >> @@ -2657,28 +2621,28 @@ leave: >> */ >> int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) >> { >> - int mig, ret; >> + int ret; >> int lock_dropped = 0; >> + u8 target = O2NM_MAX_NODES; >> >> assert_spin_locked(&dlm->spinlock); >> >> spin_lock(&res->spinlock); >> - mig = dlm_is_lockres_migrateable(dlm, res); >> + if (dlm_is_lockres_migrateable(dlm, res)) >> + target = dlm_pick_migration_target(dlm, res); >> spin_unlock(&res->spinlock); >> - if (!mig) >> + >> + if (target == O2NM_MAX_NODES) >> goto leave; >> >> /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ >> spin_unlock(&dlm->spinlock); >> lock_dropped = 1; >> - while (1) { >> - ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); >> - if (ret>= 0) >> - break; >> - mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, >> - res->lockname.len, res->lockname.name); >> - msleep(DLM_MIGRATION_RETRY_MS); >> - } >> + ret = dlm_migrate_lockres(dlm, res, target); >> + if (ret) >> + mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", >> + dlm->name, res->lockname.len, res->lockname.name, >> + target, ret); >> spin_lock(&dlm->spinlock); >> leave: >> return lock_dropped; >> @@ -2862,61 +2826,58 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, >> } >> } >> >> -/* for now this is not too intelligent. we will >> - * need stats to make this do the right thing. >> - * this just finds the first lock on one of the >> - * queues and uses that node as the target. */ >> +/* >> + * Pick a node to migrate the lock resource to. This function selects a >> + * potential target based first on the locks and then on refmap. It skips >> + * nodes that are in the process of exiting the domain. >> + */ >> static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, >> struct dlm_lock_resource *res) >> { >> - int i; >> + enum dlm_lockres_list idx; >> struct list_head *queue =&res->granted; >> struct dlm_lock *lock; >> int nodenum; >> >> assert_spin_locked(&dlm->spinlock); >> + assert_spin_locked(&res->spinlock); >> >> - spin_lock(&res->spinlock); >> - for (i=0; i<3; i++) { >> + /* Go through all the locks */ >> + for (idx = DLM_GRANTED_LIST; idx<= DLM_BLOCKED_LIST; idx++) { >> + queue = dlm_list_idx_to_ptr(res, idx); >> list_for_each_entry(lock, queue, list) { >> - /* up to the caller to make sure this node >> - * is alive */ >> - if (lock->ml.node != dlm->node_num) { >> - spin_unlock(&res->spinlock); >> - return lock->ml.node; >> - } >> + if (lock->ml.node == dlm->node_num) >> + continue; >> + if (test_bit(lock->ml.node, dlm->exit_domain_map)) >> + continue; >> + if (!test_bit(lock->ml.node, dlm->domain_map)) >> + continue; >> + nodenum = lock->ml.node; >> + goto bail; >> } >> - queue++; >> - } >> - >> - nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); >> - if (nodenum< O2NM_MAX_NODES) { >> - spin_unlock(&res->spinlock); >> - return nodenum; >> } >> - spin_unlock(&res->spinlock); >> - mlog(0, "have not found a suitable target yet! checking domain map\n"); >> >> - /* ok now we're getting desperate. pick anyone alive. */ >> + /* Go thru the refmap */ >> nodenum = -1; >> while (1) { >> - nodenum = find_next_bit(dlm->domain_map, >> - O2NM_MAX_NODES, nodenum+1); >> - mlog(0, "found %d in domain map\n", nodenum); >> + nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, >> + nodenum + 1); >> if (nodenum>= O2NM_MAX_NODES) >> break; >> - if (nodenum != dlm->node_num) { >> - mlog(0, "picking %d\n", nodenum); >> - return nodenum; >> - } >> + if (nodenum == dlm->node_num) >> + continue; >> + if (test_bit(nodenum, dlm->exit_domain_map)) >> + continue; >> + if (!test_bit(lock->ml.node, dlm->domain_map)) >> + continue; >> + goto bail; >> } >> >> - mlog(0, "giving up. no master to migrate to\n"); >> - return DLM_LOCK_RES_OWNER_UNKNOWN; >> + nodenum = O2NM_MAX_NODES; >> +bail: >> + return nodenum; >> } >> >> - >> - >> /* this is called by the new master once all lockres >> * data has been received */ >> static int dlm_do_migrate_request(struct dlm_ctxt *dlm, >> -- >> 1.7.1 >> >> >> _______________________________________________ >> Ocfs2-devel mailing list >> Ocfs2-devel at oss.oracle.com >> http://oss.oracle.com/mailman/listinfo/ocfs2-devel