xfs: add xlog_grant_head_wake_all
[linux-2.6.git] / fs / xfs / xfs_dquot.c
index 0c5fe66..71e615f 100644 (file)
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 
-
 /*
-   LOCK ORDER
-
-   inode lock              (ilock)
-   dquot hash-chain lock    (hashlock)
-   xqm dquot freelist lock  (freelistlock
-   mount's dquot list lock  (mplistlock)
-   user dquot lock - lock ordering among dquots is based on the uid or gid
-   group dquot lock - similar to udquots. Between the two dquots, the udquot
-                     has to be locked first.
-   pin lock - the dquot lock must be held to take this lock.
-   flush lock - ditto.
-*/
+ * Lock order:
+ *
+ * ip->i_lock
+ *   qh->qh_lock
+ *     qi->qi_dqlist_lock
+ *       dquot->q_qlock (xfs_dqlock() and friends)
+ *         dquot->q_flush (xfs_dqflock() and friends)
+ *         xfs_Gqm->qm_dqfrlist_lock
+ *
+ * If two dquots need to be locked the order is user before group/project,
+ * otherwise by the lowest id first, see xfs_dqlock2.
+ */
 
 #ifdef DEBUG
 xfs_buftarg_t *xfs_dqerror_target;
@@ -64,82 +63,6 @@ int xfs_dqerror_mod = 33;
 static struct lock_class_key xfs_dquot_other_class;
 
 /*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-       xfs_mount_t  *mp,
-       xfs_dqid_t   id,
-       uint         type)
-{
-       xfs_dquot_t     *dqp;
-       boolean_t       brandnewdquot;
-
-       brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-       dqp->dq_flags = type;
-       dqp->q_core.d_id = cpu_to_be32(id);
-       dqp->q_mount = mp;
-
-       /*
-        * No need to re-initialize these if this is a reclaimed dquot.
-        */
-       if (brandnewdquot) {
-               INIT_LIST_HEAD(&dqp->q_freelist);
-               mutex_init(&dqp->q_qlock);
-               init_waitqueue_head(&dqp->q_pinwait);
-
-               /*
-                * Because we want to use a counting completion, complete
-                * the flush completion once to allow a single access to
-                * the flush completion without blocking.
-                */
-               init_completion(&dqp->q_flush);
-               complete(&dqp->q_flush);
-
-               trace_xfs_dqinit(dqp);
-       } else {
-               /*
-                * Only the q_core portion was zeroed in dqreclaim_one().
-                * So, we need to reset others.
-                */
-               dqp->q_nrefs = 0;
-               dqp->q_blkno = 0;
-               INIT_LIST_HEAD(&dqp->q_mplist);
-               INIT_LIST_HEAD(&dqp->q_hashlist);
-               dqp->q_bufoffset = 0;
-               dqp->q_fileoffset = 0;
-               dqp->q_transp = NULL;
-               dqp->q_gdquot = NULL;
-               dqp->q_res_bcount = 0;
-               dqp->q_res_icount = 0;
-               dqp->q_res_rtbcount = 0;
-               atomic_set(&dqp->q_pincount, 0);
-               dqp->q_hash = NULL;
-               ASSERT(list_empty(&dqp->q_freelist));
-
-               trace_xfs_dqreuse(dqp);
-       }
-
-       /*
-        * In either case we need to make sure group quotas have a different
-        * lock class than user quotas, to make sure lockdep knows we can
-        * locks of one of each at the same time.
-        */
-       if (!(type & XFS_DQ_USER))
-               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
-       /*
-        * log item gets initialized later
-        */
-       return (dqp);
-}
-
-/*
  * This is called to free all the memory associated with a dquot
  */
 void
@@ -155,24 +78,6 @@ xfs_qm_dqdestroy(
 }
 
 /*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
-       xfs_dqid_t      id,
-       uint            type,
-       xfs_dqblk_t     *d)
-{
-       /*
-        * Caller has zero'd the entire dquot 'chunk' already.
-        */
-       d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-       d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-       d->dd_diskdq.d_id = cpu_to_be32(id);
-       d->dd_diskdq.d_flags = type;
-}
-
-/*
  * If default limits are in force, push them into the dquot now.
  * We overwrite the dquot limits only if they are zero and this
  * is not the root dquot.
@@ -234,10 +139,10 @@ xfs_qm_adjust_dqtimers(
 
        if (!d->d_btimer) {
                if ((d->d_blk_softlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
+                    (be64_to_cpu(d->d_bcount) >
                      be64_to_cpu(d->d_blk_softlimit))) ||
                    (d->d_blk_hardlimit &&
-                    (be64_to_cpu(d->d_bcount) >=
+                    (be64_to_cpu(d->d_bcount) >
                      be64_to_cpu(d->d_blk_hardlimit)))) {
                        d->d_btimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_btimelimit);
@@ -246,10 +151,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_blk_softlimit ||
-                    (be64_to_cpu(d->d_bcount) <
+                    (be64_to_cpu(d->d_bcount) <=
                      be64_to_cpu(d->d_blk_softlimit))) &&
                    (!d->d_blk_hardlimit ||
-                   (be64_to_cpu(d->d_bcount) <
+                   (be64_to_cpu(d->d_bcount) <=
                     be64_to_cpu(d->d_blk_hardlimit)))) {
                        d->d_btimer = 0;
                }
@@ -257,10 +162,10 @@ xfs_qm_adjust_dqtimers(
 
        if (!d->d_itimer) {
                if ((d->d_ino_softlimit &&
-                    (be64_to_cpu(d->d_icount) >=
+                    (be64_to_cpu(d->d_icount) >
                      be64_to_cpu(d->d_ino_softlimit))) ||
                    (d->d_ino_hardlimit &&
-                    (be64_to_cpu(d->d_icount) >=
+                    (be64_to_cpu(d->d_icount) >
                      be64_to_cpu(d->d_ino_hardlimit)))) {
                        d->d_itimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_itimelimit);
@@ -269,10 +174,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_ino_softlimit ||
-                    (be64_to_cpu(d->d_icount) <
+                    (be64_to_cpu(d->d_icount) <=
                      be64_to_cpu(d->d_ino_softlimit)))  &&
                    (!d->d_ino_hardlimit ||
-                    (be64_to_cpu(d->d_icount) <
+                    (be64_to_cpu(d->d_icount) <=
                      be64_to_cpu(d->d_ino_hardlimit)))) {
                        d->d_itimer = 0;
                }
@@ -280,10 +185,10 @@ xfs_qm_adjust_dqtimers(
 
        if (!d->d_rtbtimer) {
                if ((d->d_rtb_softlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
+                    (be64_to_cpu(d->d_rtbcount) >
                      be64_to_cpu(d->d_rtb_softlimit))) ||
                    (d->d_rtb_hardlimit &&
-                    (be64_to_cpu(d->d_rtbcount) >=
+                    (be64_to_cpu(d->d_rtbcount) >
                      be64_to_cpu(d->d_rtb_hardlimit)))) {
                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_rtbtimelimit);
@@ -292,10 +197,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_rtb_softlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
+                    (be64_to_cpu(d->d_rtbcount) <=
                      be64_to_cpu(d->d_rtb_softlimit))) &&
                    (!d->d_rtb_hardlimit ||
-                    (be64_to_cpu(d->d_rtbcount) <
+                    (be64_to_cpu(d->d_rtbcount) <=
                      be64_to_cpu(d->d_rtb_hardlimit)))) {
                        d->d_rtbtimer = 0;
                }
@@ -328,8 +233,13 @@ xfs_qm_init_dquot_blk(
        curid = id - (id % q->qi_dqperchunk);
        ASSERT(curid >= 0);
        memset(d, 0, BBTOB(q->qi_dqchunklen));
-       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
-               xfs_qm_dqinit_core(curid, type, d);
+       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
+               d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+               d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+               d->dd_diskdq.d_id = cpu_to_be32(curid);
+               d->dd_diskdq.d_flags = type;
+       }
+
        xfs_trans_dquot_buf(tp, bp,
                            (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
                            ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -372,21 +282,19 @@ xfs_qm_dqalloc(
         * Return if this type of quotas is turned off while we didn't
         * have an inode lock
         */
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+       if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
                xfs_iunlock(quotip, XFS_ILOCK_EXCL);
                return (ESRCH);
        }
 
-       xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
        nmaps = 1;
-       if ((error = xfs_bmapi(tp, quotip,
-                             offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
-                             XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
-                             &firstblock,
-                             XFS_QM_DQALLOC_SPACE_RES(mp),
-                             &map, &nmaps, &flist))) {
+       error = xfs_bmapi_write(tp, quotip, offset_fsb,
+                               XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                               &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               &map, &nmaps, &flist);
+       if (error)
                goto error0;
-       }
        ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
        ASSERT(nmaps == 1);
        ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -476,7 +384,7 @@ xfs_qm_dqtobp(
        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
 
        xfs_ilock(quotip, XFS_ILOCK_SHARED);
-       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
+       if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
                /*
                 * Return if this type of quotas is turned off while we
                 * didn't have the quota inode lock.
@@ -488,9 +396,8 @@ xfs_qm_dqtobp(
        /*
         * Find the block map; no allocations yet
         */
-       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-                         NULL, 0, &map, &nmaps, NULL);
+       error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
+                              XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 
        xfs_iunlock(quotip, XFS_ILOCK_SHARED);
        if (error)
@@ -567,36 +474,87 @@ xfs_qm_dqtobp(
  * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
  * and release the buffer immediately.
  *
+ * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
  */
-/* ARGSUSED */
-STATIC int
+int
 xfs_qm_dqread(
-       xfs_trans_t     **tpp,
-       xfs_dqid_t      id,
-       xfs_dquot_t     *dqp,   /* dquot to get filled in */
-       uint            flags)
+       struct xfs_mount        *mp,
+       xfs_dqid_t              id,
+       uint                    type,
+       uint                    flags,
+       struct xfs_dquot        **O_dqpp)
 {
-       xfs_disk_dquot_t *ddqp;
-       xfs_buf_t        *bp;
-       int              error;
-       xfs_trans_t      *tp;
+       struct xfs_dquot        *dqp;
+       struct xfs_disk_dquot   *ddqp;
+       struct xfs_buf          *bp;
+       struct xfs_trans        *tp = NULL;
+       int                     error;
+       int                     cancelflags = 0;
+
+
+       dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+
+       dqp->dq_flags = type;
+       dqp->q_core.d_id = cpu_to_be32(id);
+       dqp->q_mount = mp;
+       INIT_LIST_HEAD(&dqp->q_freelist);
+       mutex_init(&dqp->q_qlock);
+       init_waitqueue_head(&dqp->q_pinwait);
+
+       /*
+        * Because we want to use a counting completion, complete
+        * the flush completion once to allow a single access to
+        * the flush completion without blocking.
+        */
+       init_completion(&dqp->q_flush);
+       complete(&dqp->q_flush);
+
+       /*
+        * Make sure group quotas have a different lock class than user
+        * quotas.
+        */
+       if (!(type & XFS_DQ_USER))
+               lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
 
-       ASSERT(tpp);
+       atomic_inc(&xfs_Gqm->qm_totaldquots);
 
        trace_xfs_dqread(dqp);
 
+       if (flags & XFS_QMOPT_DQALLOC) {
+               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
+               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               XFS_WRITE_LOG_RES(mp) +
+                               /*
+                                * Round the chunklen up to the next multiple
+                                * of 128 (buf log item chunk size)).
+                                */
+                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
+                               0,
+                               XFS_TRANS_PERM_LOG_RES,
+                               XFS_WRITE_LOG_COUNT);
+               if (error)
+                       goto error1;
+               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
+       }
+
        /*
         * get a pointer to the on-disk dquot and the buffer containing it
         * dqp already knows its own type (GROUP/USER).
         */
-       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
-               return (error);
+       error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
+       if (error) {
+               /*
+                * This can happen if quotas got turned off (ESRCH),
+                * or if the dquot didn't exist on disk and we ask to
+                * allocate (ENOENT).
+                */
+               trace_xfs_dqread_fail(dqp);
+               cancelflags |= XFS_TRANS_ABORT;
+               goto error1;
        }
-       tp = *tpp;
 
        /* copy everything from disk dquot to the incore dquot */
        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
        xfs_qm_dquot_logitem_init(dqp);
 
        /*
@@ -608,7 +566,7 @@ xfs_qm_dqread(
        dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
 
        /* Mark the buf so that this will stay incore a little longer */
-       XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
+       xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 
        /*
         * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
@@ -625,77 +583,22 @@ xfs_qm_dqread(
        ASSERT(xfs_buf_islocked(bp));
        xfs_trans_brelse(tp, bp);
 
-       return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
-       xfs_mount_t     *mp,
-       xfs_dqid_t      id,      /* gid or uid, depending on type */
-       uint            type,    /* UDQUOT or GDQUOT */
-       uint            flags,   /* DQALLOC, DQREPAIR */
-       xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
-{
-       xfs_dquot_t     *dqp;
-       int             error;
-       xfs_trans_t     *tp;
-       int             cancelflags=0;
-
-       dqp = xfs_qm_dqinit(mp, id, type);
-       tp = NULL;
-       if (flags & XFS_QMOPT_DQALLOC) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-                               XFS_WRITE_LOG_RES(mp) +
-                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
-                               128,
-                               0,
-                               XFS_TRANS_PERM_LOG_RES,
-                               XFS_WRITE_LOG_COUNT);
-               if (error) {
-                       cancelflags = 0;
-                       goto error0;
-               }
-               cancelflags = XFS_TRANS_RELEASE_LOG_RES;
-       }
-
-       /*
-        * Read it from disk; xfs_dqread() takes care of
-        * all the necessary initialization of dquot's fields (locks, etc)
-        */
-       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
-               /*
-                * This can happen if quotas got turned off (ESRCH),
-                * or if the dquot didn't exist on disk and we ask to
-                * allocate (ENOENT).
-                */
-               trace_xfs_dqread_fail(dqp);
-               cancelflags |= XFS_TRANS_ABORT;
-               goto error0;
-       }
        if (tp) {
-               if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
-                       goto error1;
+               error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+               if (error)
+                       goto error0;
        }
 
        *O_dqpp = dqp;
-       return (0);
+       return error;
 
- error0:
-       ASSERT(error);
+error1:
        if (tp)
                xfs_trans_cancel(tp, cancelflags);
- error1:
+error0:
        xfs_qm_dqdestroy(dqp);
        *O_dqpp = NULL;
-       return (error);
+       return error;
 }
 
 /*
@@ -713,12 +616,9 @@ xfs_qm_dqlookup(
        xfs_dquot_t             **O_dqpp)
 {
        xfs_dquot_t             *dqp;
-       uint                    flist_locked;
 
        ASSERT(mutex_is_locked(&qh->qh_lock));
 
-       flist_locked = B_FALSE;
-
        /*
         * Traverse the hashchain looking for a match
         */
@@ -728,70 +628,31 @@ xfs_qm_dqlookup(
                 * dqlock to look at the id field of the dquot, since the
                 * id can't be modified without the hashlock anyway.
                 */
-               if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
-                       trace_xfs_dqlookup_found(dqp);
+               if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
+                       continue;
 
-                       /*
-                        * All in core dquots must be on the dqlist of mp
-                        */
-                       ASSERT(!list_empty(&dqp->q_mplist));
-
-                       xfs_dqlock(dqp);
-                       if (dqp->q_nrefs == 0) {
-                               ASSERT(!list_empty(&dqp->q_freelist));
-                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-                                       trace_xfs_dqlookup_want(dqp);
-
-                                       /*
-                                        * We may have raced with dqreclaim_one()
-                                        * (and lost). So, flag that we don't
-                                        * want the dquot to be reclaimed.
-                                        */
-                                       dqp->dq_flags |= XFS_DQ_WANT;
-                                       xfs_dqunlock(dqp);
-                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       xfs_dqlock(dqp);
-                                       dqp->dq_flags &= ~(XFS_DQ_WANT);
-                               }
-                               flist_locked = B_TRUE;
-                       }
+               trace_xfs_dqlookup_found(dqp);
 
-                       /*
-                        * id couldn't have changed; we had the hashlock all
-                        * along
-                        */
-                       ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
-                       if (flist_locked) {
-                               if (dqp->q_nrefs != 0) {
-                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                                       flist_locked = B_FALSE;
-                               } else {
-                                       /* take it off the freelist */
-                                       trace_xfs_dqlookup_freelist(dqp);
-                                       list_del_init(&dqp->q_freelist);
-                                       xfs_Gqm->qm_dqfrlist_cnt--;
-                               }
-                       }
+               xfs_dqlock(dqp);
+               if (dqp->dq_flags & XFS_DQ_FREEING) {
+                       *O_dqpp = NULL;
+                       xfs_dqunlock(dqp);
+                       return -1;
+               }
 
-                       XFS_DQHOLD(dqp);
+               dqp->q_nrefs++;
 
-                       if (flist_locked)
-                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-                       /*
-                        * move the dquot to the front of the hashchain
-                        */
-                       ASSERT(mutex_is_locked(&qh->qh_lock));
-                       list_move(&dqp->q_hashlist, &qh->qh_list);
-                       trace_xfs_dqlookup_done(dqp);
-                       *O_dqpp = dqp;
-                       return 0;
-               }
+               /*
+                * move the dquot to the front of the hashchain
+                */
+               list_move(&dqp->q_hashlist, &qh->qh_list);
+               trace_xfs_dqlookup_done(dqp);
+               *O_dqpp = dqp;
+               return 0;
        }
 
        *O_dqpp = NULL;
-       ASSERT(mutex_is_locked(&qh->qh_lock));
-       return (1);
+       return 1;
 }
 
 /*
@@ -811,7 +672,7 @@ xfs_qm_dqget(
        uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
        xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
 {
-       xfs_dquot_t     *dqp;
+       xfs_dquot_t     *dqp, *dqp1;
        xfs_dqhash_t    *h;
        uint            version;
        int             error;
@@ -832,29 +693,30 @@ xfs_qm_dqget(
                        return (EIO);
                }
        }
-#endif
 
- again:
-
-#ifdef DEBUG
        ASSERT(type == XFS_DQ_USER ||
               type == XFS_DQ_PROJ ||
               type == XFS_DQ_GROUP);
        if (ip) {
                ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-               if (type == XFS_DQ_USER)
-                       ASSERT(ip->i_udquot == NULL);
-               else
-                       ASSERT(ip->i_gdquot == NULL);
+               ASSERT(xfs_inode_dquot(ip, type) == NULL);
        }
 #endif
+
+restart:
        mutex_lock(&h->qh_lock);
 
        /*
         * Look in the cache (hashtable).
         * The chain is kept locked during lookup.
         */
-       if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
+       switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
+       case -1:
+               XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
+               mutex_unlock(&h->qh_lock);
+               delay(1);
+               goto restart;
+       case 0:
                XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
                /*
                 * The dquot was found, moved to the front of the chain,
@@ -865,9 +727,11 @@ xfs_qm_dqget(
                ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
                mutex_unlock(&h->qh_lock);
                trace_xfs_dqget_hit(*O_dqpp);
-               return (0);     /* success */
+               return 0;       /* success */
+       default:
+               XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
+               break;
        }
-       XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
 
        /*
         * Dquot cache miss. We don't want to keep the inode lock across
@@ -885,69 +749,34 @@ xfs_qm_dqget(
        version = h->qh_version;
        mutex_unlock(&h->qh_lock);
 
-       /*
-        * Allocate the dquot on the kernel heap, and read the ondisk
-        * portion off the disk. Also, do all the necessary initialization
-        * This can return ENOENT if dquot didn't exist on disk and we didn't
-        * ask it to allocate; ESRCH if quotas got turned off suddenly.
-        */
-       if ((error = xfs_qm_idtodq(mp, id, type,
-                                 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
-                                          XFS_QMOPT_DOWARN),
-                                 &dqp))) {
-               if (ip)
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-               return (error);
-       }
+       error = xfs_qm_dqread(mp, id, type, flags, &dqp);
 
-       /*
-        * See if this is mount code calling to look at the overall quota limits
-        * which are stored in the id == 0 user or group's dquot.
-        * Since we may not have done a quotacheck by this point, just return
-        * the dquot without attaching it to any hashtables, lists, etc, or even
-        * taking a reference.
-        * The caller must dqdestroy this once done.
-        */
-       if (flags & XFS_QMOPT_DQSUSER) {
-               ASSERT(id == 0);
-               ASSERT(! ip);
-               goto dqret;
-       }
+       if (ip)
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       if (error)
+               return error;
 
        /*
         * Dquot lock comes after hashlock in the lock ordering
         */
        if (ip) {
-               xfs_ilock(ip, XFS_ILOCK_EXCL);
-
                /*
                 * A dquot could be attached to this inode by now, since
                 * we had dropped the ilock.
                 */
-               if (type == XFS_DQ_USER) {
-                       if (!XFS_IS_UQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_udquot) {
+               if (xfs_this_quota_on(mp, type)) {
+                       dqp1 = xfs_inode_dquot(ip, type);
+                       if (dqp1) {
                                xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_udquot;
+                               dqp = dqp1;
                                xfs_dqlock(dqp);
                                goto dqret;
                        }
                } else {
-                       if (!XFS_IS_OQUOTA_ON(mp)) {
-                               /* inode stays locked on return */
-                               xfs_qm_dqdestroy(dqp);
-                               return XFS_ERROR(ESRCH);
-                       }
-                       if (ip->i_gdquot) {
-                               xfs_qm_dqdestroy(dqp);
-                               dqp = ip->i_gdquot;
-                               xfs_dqlock(dqp);
-                               goto dqret;
-                       }
+                       /* inode stays locked on return */
+                       xfs_qm_dqdestroy(dqp);
+                       return XFS_ERROR(ESRCH);
                }
        }
 
@@ -964,16 +793,21 @@ xfs_qm_dqget(
                 * lock order between the two dquots here since dqp isn't
                 * on any findable lists yet.
                 */
-               if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
+               switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
+               case 0:
+               case -1:
                        /*
-                        * Duplicate found. Just throw away the new dquot
-                        * and start over.
+                        * Duplicate found, either in cache or on its way out.
+                        * Just throw away the new dquot and start over.
                         */
-                       xfs_qm_dqput(tmpdqp);
+                       if (tmpdqp)
+                               xfs_qm_dqput(tmpdqp);
                        mutex_unlock(&h->qh_lock);
                        xfs_qm_dqdestroy(dqp);
                        XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
-                       goto again;
+                       goto restart;
+               default:
+                       break;
                }
        }
 
@@ -1018,67 +852,49 @@ xfs_qm_dqget(
  */
 void
 xfs_qm_dqput(
-       xfs_dquot_t     *dqp)
+       struct xfs_dquot        *dqp)
 {
-       xfs_dquot_t     *gdqp;
+       struct xfs_dquot        *gdqp;
 
        ASSERT(dqp->q_nrefs > 0);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
        trace_xfs_dqput(dqp);
 
-       if (dqp->q_nrefs != 1) {
-               dqp->q_nrefs--;
+recurse:
+       if (--dqp->q_nrefs > 0) {
                xfs_dqunlock(dqp);
                return;
        }
 
+       trace_xfs_dqput_free(dqp);
+
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+       if (list_empty(&dqp->q_freelist)) {
+               list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+               xfs_Gqm->qm_dqfrlist_cnt++;
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+
        /*
-        * drop the dqlock and acquire the freelist and dqlock
-        * in the right order; but try to get it out-of-order first
+        * If we just added a udquot to the freelist, then we want to release
+        * the gdquot reference that it (probably) has. Otherwise it'll keep
+        * the gdquot from getting reclaimed.
         */
-       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
-               trace_xfs_dqput_wait(dqp);
-               xfs_dqunlock(dqp);
-               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-               xfs_dqlock(dqp);
+       gdqp = dqp->q_gdquot;
+       if (gdqp) {
+               xfs_dqlock(gdqp);
+               dqp->q_gdquot = NULL;
        }
+       xfs_dqunlock(dqp);
 
-       while (1) {
-               gdqp = NULL;
-
-               /* We can't depend on nrefs being == 1 here */
-               if (--dqp->q_nrefs == 0) {
-                       trace_xfs_dqput_free(dqp);
-
-                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-                       xfs_Gqm->qm_dqfrlist_cnt++;
-
-                       /*
-                        * If we just added a udquot to the freelist, then
-                        * we want to release the gdquot reference that
-                        * it (probably) has. Otherwise it'll keep the
-                        * gdquot from getting reclaimed.
-                        */
-                       if ((gdqp = dqp->q_gdquot)) {
-                               /*
-                                * Avoid a recursive dqput call
-                                */
-                               xfs_dqlock(gdqp);
-                               dqp->q_gdquot = NULL;
-                       }
-               }
-               xfs_dqunlock(dqp);
-
-               /*
-                * If we had a group quota inside the user quota as a hint,
-                * release it now.
-                */
-               if (! gdqp)
-                       break;
+       /*
+        * If we had a group quota hint, release it now.
+        */
+       if (gdqp) {
                dqp = gdqp;
+               goto recurse;
        }
-       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 }
 
 /*
@@ -1172,7 +988,7 @@ xfs_qm_dqflush(
         * If not dirty, or it's pinned and we are not supposed to block, nada.
         */
        if (!XFS_DQ_IS_DIRTY(dqp) ||
-           (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
+           ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
                xfs_dqfunlock(dqp);
                return 0;
        }
@@ -1260,40 +1076,6 @@ xfs_qm_dqflush(
 
 }
 
-int
-xfs_qm_dqlock_nowait(
-       xfs_dquot_t *dqp)
-{
-       return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-       if (dqp->q_logitem.qli_dquot == dqp) {
-               /* Once was dqp->q_mount, but might just have been cleared */
-               xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
-                                       (xfs_log_item_t*)&(dqp->q_logitem));
-       }
-}
-
-
-void
-xfs_dqunlock_nonotify(
-       xfs_dquot_t *dqp)
-{
-       mutex_unlock(&(dqp->q_qlock));
-}
-
 /*
  * Lock two xfs_dquot structures.
  *
@@ -1322,43 +1104,18 @@ xfs_dqlock2(
        }
 }
 
-
 /*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
+ * Take a dquot out of the mount's dqlist as well as the hashlist.  This is
+ * called via unmount as well as quotaoff, and the purge will always succeed.
  */
-/* ARGSUSED */
-int
+void
 xfs_qm_dqpurge(
-       xfs_dquot_t     *dqp)
+       struct xfs_dquot        *dqp)
 {
-       xfs_dqhash_t    *qh = dqp->q_hash;
-       xfs_mount_t     *mp = dqp->q_mount;
-
-       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
-       ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_dqhash       *qh = dqp->q_hash;
 
        xfs_dqlock(dqp);
-       /*
-        * We really can't afford to purge a dquot that is
-        * referenced, because these are hard refs.
-        * It shouldn't happen in general because we went thru _all_ inodes in
-        * dqrele_all_inodes before calling this and didn't let the mountlock go.
-        * However it is possible that we have dquots with temporary
-        * references that are not attached to an inode. e.g. see xfs_setattr().
-        */
-       if (dqp->q_nrefs != 0) {
-               xfs_dqunlock(dqp);
-               mutex_unlock(&dqp->q_hash->qh_lock);
-               return (1);
-       }
-
-       ASSERT(!list_empty(&dqp->q_freelist));
 
        /*
         * If we're turning off quotas, we have to make sure that, for
@@ -1373,23 +1130,18 @@ xfs_qm_dqpurge(
                 * Block on the flush lock after nudging dquot buffer,
                 * if it is incore.
                 */
-               xfs_qm_dqflock_pushbuf_wait(dqp);
+               xfs_dqflock_pushbuf_wait(dqp);
        }
 
        /*
-        * XXXIf we're turning this type of quotas off, we don't care
+        * If we are turning this type of quotas off, we don't care
         * about the dirty metadata sitting in this dquot. OTOH, if
         * we're unmounting, we do care, so we flush it and wait.
         */
        if (XFS_DQ_IS_DIRTY(dqp)) {
                int     error;
 
-               /* dqflush unlocks dqflock */
                /*
-                * Given that dqpurge is a very rare occurrence, it is OK
-                * that we're holding the hashlist and mplist locks
-                * across the disk write. But, ... XXXsup
-                *
                 * We don't care about getting disk errors here. We need
                 * to purge this dquot anyway, so we go ahead regardless.
                 */
@@ -1399,38 +1151,44 @@ xfs_qm_dqpurge(
                                __func__, dqp);
                xfs_dqflock(dqp);
        }
+
        ASSERT(atomic_read(&dqp->q_pincount) == 0);
        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
               !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
 
+       xfs_dqfunlock(dqp);
+       xfs_dqunlock(dqp);
+
+       mutex_lock(&qh->qh_lock);
        list_del_init(&dqp->q_hashlist);
        qh->qh_version++;
+       mutex_unlock(&qh->qh_lock);
+
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
        list_del_init(&dqp->q_mplist);
        mp->m_quotainfo->qi_dqreclaims++;
        mp->m_quotainfo->qi_dquots--;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+
        /*
-        * XXX Move this to the front of the freelist, if we can get the
-        * freelist lock.
+        * We move dquots to the freelist as soon as their reference count
+        * hits zero, so it really should be on the freelist here.
         */
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
        ASSERT(!list_empty(&dqp->q_freelist));
+       list_del_init(&dqp->q_freelist);
+       xfs_Gqm->qm_dqfrlist_cnt--;
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 
-       dqp->q_mount = NULL;
-       dqp->q_hash = NULL;
-       dqp->dq_flags = XFS_DQ_INACTIVE;
-       memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-       xfs_dqfunlock(dqp);
-       xfs_dqunlock(dqp);
-       mutex_unlock(&qh->qh_lock);
-       return (0);
+       xfs_qm_dqdestroy(dqp);
 }
 
-
 /*
  * Give the buffer a little push if it is incore and
  * wait on the flush lock.
  */
 void
-xfs_qm_dqflock_pushbuf_wait(
+xfs_dqflock_pushbuf_wait(
        xfs_dquot_t     *dqp)
 {
        xfs_mount_t     *mp = dqp->q_mount;