[DLM] fix grant_after_purge softlockup
David Teigland [Tue, 2 May 2006 17:34:03 +0000 (13:34 -0400)]
In dlm_grant_after_purge() we were holding a hash table read_lock while
calling put_rsb() which potentially removes the rsb from the hash table,
taking the same lock in write.  Fix this by flagging rsb's ahead of time
that have been purged.  Then iteratively read_lock the hash table, find a
flagged rsb, unlock, process rsb.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

fs/dlm/dlm_internal.h
fs/dlm/lock.c
fs/dlm/lock.h

index c329902..149106f 100644 (file)
@@ -280,6 +280,7 @@ enum rsb_flags {
        RSB_NEW_MASTER,
        RSB_NEW_MASTER2,
        RSB_RECOVER_CONVERT,
+       RSB_LOCKS_PURGED,
 };
 
 static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
index 85a0e73..5f69639 100644 (file)
@@ -3278,6 +3278,7 @@ static void purge_queue(struct dlm_rsb *r, struct list_head *queue,
 
        list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) {
                if (test(ls, lkb)) {
+                       rsb_set_flag(r, RSB_LOCKS_PURGED);
                        del_lkb(r, lkb);
                        /* this put should free the lkb */
                        if (!dlm_put_lkb(lkb))
@@ -3334,27 +3335,40 @@ int dlm_purge_locks(struct dlm_ls *ls)
        return 0;
 }
 
-int dlm_grant_after_purge(struct dlm_ls *ls)
+static struct dlm_rsb *find_purged_rsb(struct dlm_ls *ls, int bucket)
+{
+       struct dlm_rsb *r, *r_ret = NULL;
+
+       read_lock(&ls->ls_rsbtbl[bucket].lock);
+       list_for_each_entry(r, &ls->ls_rsbtbl[bucket].list, res_hashchain) {
+               if (!rsb_flag(r, RSB_LOCKS_PURGED))
+                       continue;
+               hold_rsb(r);
+               rsb_clear_flag(r, RSB_LOCKS_PURGED);
+               r_ret = r;
+               break;
+       }
+       read_unlock(&ls->ls_rsbtbl[bucket].lock);
+       return r_ret;
+}
+
+void dlm_grant_after_purge(struct dlm_ls *ls)
 {
        struct dlm_rsb *r;
        int i;
 
        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
-               read_lock(&ls->ls_rsbtbl[i].lock);
-               list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
-                       hold_rsb(r);
-                       lock_rsb(r);
-                       if (is_master(r)) {
-                               grant_pending_locks(r);
-                               confirm_master(r, 0);
-                       }
-                       unlock_rsb(r);
-                       put_rsb(r);
+               r = find_purged_rsb(ls, i);
+               if (!r)
+                       continue;
+               lock_rsb(r);
+               if (is_master(r)) {
+                       grant_pending_locks(r);
+                       confirm_master(r, 0);
                }
-               read_unlock(&ls->ls_rsbtbl[i].lock);
+               unlock_rsb(r);
+               put_rsb(r);
        }
-
-       return 0;
 }
 
 static struct dlm_lkb *search_remid_list(struct list_head *head, int nodeid,
index bffab9c..56cdc07 100644 (file)
@@ -25,7 +25,7 @@ void dlm_scan_rsbs(struct dlm_ls *ls);
 
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
-int dlm_grant_after_purge(struct dlm_ls *ls);
+void dlm_grant_after_purge(struct dlm_ls *ls);
 int dlm_recover_waiters_post(struct dlm_ls *ls);
 void dlm_recover_waiters_pre(struct dlm_ls *ls);
 int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);